In [209]:
import pickle
import torch
import torch.nn as nn
import torch_geometric.nn as geom_nn
import torch_geometric.data as geom_data
import torch.utils.data as data
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
import os
from torch_geometric.loader import DataLoader
from torch_geometric.data import Dataset
from sklearn.model_selection import train_test_split

torch_data_ast = pickle.load(open("models/data_lists/torch_data_ast.pkl", "rb"))

In [6]:
tot_num_nodes = 0
i = 0
for td in torch_data_ast:
    tot_num_nodes += td.num_nodes
    i += 1
tot_num_nodes / i

39.70207399789417

In [8]:
tot_num_nodes = 0
i = 0
for td in torch_data_ast:
    tot_num_nodes += td.num_nodes
    i += 1
tot_num_nodes / i

39.70207399789417

In [9]:
tot_num_edges = 0
i = 0
for td in torch_data_ast:
    tot_num_nodes += td.num_edges
    i += 1
tot_num_edges / i

0.0

In [None]:
torch_data[0]

In [None]:
import torchmetrics

In [None]:
device = "cuda"

b_size = 100
train, split1 = train_test_split(torch_data, test_size=.25)
test, validate = train_test_split(split1, test_size=.2)
train_batches = DataLoader(train, batch_size=b_size, num_workers=4)
test_batches = DataLoader(test, batch_size=b_size, num_workers=4)
validate_batches = DataLoader(validate, batch_size=b_size, num_workers=4)

# for tb in train_batches:
#     tb = tb.to(device)
#     #print(tb.x)
# for tb in test_batches:
#     tb = tb.to(device)

In [None]:
gnn_layer_by_name = {
    "GCN": geom_nn.GCNConv,
    "GAT": geom_nn.GATConv,
    "GraphConv": geom_nn.GraphConv
}

In [None]:
class GNNModel(nn.Module):
    
    def __init__(self, c_in, c_hidden, c_out, num_layers=2, layer_name="GCN", dp_rate=0.1, **kwargs):
        """
        Inputs:
            c_in - Dimension of input features
            c_hidden - Dimension of hidden features
            c_out - Dimension of the output features. Usually number of classes in classification
            num_layers - Number of "hidden" graph layers
            layer_name - String of the graph layer to use
            dp_rate - Dropout rate to apply throughout the network
            kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT)
        """
        super().__init__()
        gnn_layer = gnn_layer_by_name[layer_name]
        
        layers = []
        in_channels, out_channels = c_in, c_hidden
        for l_idx in range(num_layers - 1):
            layers += [
                gnn_layer(in_channels=in_channels, out_channels=out_channels, **kwargs), nn.ReLU(inplace=True), 
                nn.Dropout(dp_rate)
            ]
            in_channels = c_hidden
        layers += [gnn_layer(in_channels=in_channels, out_channels=c_out, **kwargs)]
        self.layers = nn.ModuleList(layers)
        
    def forward(self, x, edge_index):
        """
        Inputs:
            x - Input features per node
            edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
        """
        for l in self.layers:
            # For graph layers, we need to add the "edge_index" tensor as additional input
            # All PyTorch Geometric graph layer inherit the class "MessagePassing", hence
            # we can simply check the class type.
            if isinstance(l, geom_nn.MessagePassing):
                x = l(x, edge_index)
            else:
                x = l(x)
        return x


In [None]:
class GraphGNNModel(nn.Module):

    def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs):
        """
        Inputs:
            c_in - Dimension of input features
            c_hidden - Dimension of hidden features
            c_out - Dimension of output features (usually number of classes)
            dp_rate_linear - Dropout rate before the linear layer (usually much higher than inside the GNN)
            kwargs - Additional arguments for the GNNModel object
        """
        super().__init__()
        self.GNN = GNNModel(c_in=c_in,
                            c_hidden=c_hidden,
                            c_out=c_hidden, # Not our prediction output yet!
                            **kwargs)
        self.head = nn.Sequential(
            nn.Dropout(dp_rate_linear),
            nn.Linear(c_hidden, c_out)
        )

    def forward(self, x, edge_index, batch_idx):
        """
        Inputs:
            x - Input features per node
            edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
            batch_idx - Index of batch element for each node
        """
        x = self.GNN(x, edge_index)
        x = geom_nn.global_mean_pool(x, batch_idx) # Average pooling
        x = self.head(x)
        return x


In [None]:
class GraphLevelGNN(pl.LightningModule):

    def __init__(self, **model_kwargs):
        super().__init__()
        # Saving hyperparameters
        self.save_hyperparameters()

        self.model = GraphGNNModel(**model_kwargs)
        self.loss_module = nn.BCEWithLogitsLoss() if self.hparams.c_out == 1 else nn.CrossEntropyLoss()

    def forward(self, data, mode="train"):
        x, edge_index, batch_idx = data.x, data.edge_index, data.batch
        x = self.model(x, edge_index, batch_idx)
        x = x.squeeze(dim=-1)

        if self.hparams.c_out == 1:
            preds = (x > 0).float()
            data.y = data.y.float()
        else:
            preds = x.argmax(dim=-1)
        loss = self.loss_module(x[0], data.y[0])
        #acc = (preds == data.y).sum().float() / preds.shape[0]
        acc = torchmetrics.functional.accuracy(preds.int(), data.y.int())
        f1 = torchmetrics.functional.f1_score(preds.int(), data.y.int())
        print(acc, f1)
        #auc = torchmetrics.functional.auc(x, data.y)
        return loss, acc, f1

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=1e-2, weight_decay=0.0) # High lr because of small dataset and small model
        return optimizer

    def training_step(self, batch, batch_idx):
        loss, acc, _ = self.forward(batch, mode="train")
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        #self.log('train_f1', f1)
        #self.log('batch_size', torch.Tensor(b_size).to(torch.float32))
        return loss

    def validation_step(self, batch, batch_idx):
        _, acc, _ = self.forward(batch, mode="val")
        self.log('val_acc', acc)
        #self.log('val_f1', f1)
        #self.log('batch_size', torch.Tensor(b_size).to(torch.float32))

    def test_step(self, batch, batch_idx):
        _, acc, f1 = self.forward(batch, mode="test")
        self.log('test_acc', acc)
        self.log('test_f1', f1)
        #self.log('test_f1', f1)
        #self.log('batch_size', torch.Tensor(b_size).to(torch.float32))

In [None]:
def train_graph_classifier(model_name, **model_kwargs):
    pl.seed_everything(42)

    # Create a PyTorch Lightning trainer with the generation callback
    root_dir = os.path.join("test", "GraphLevel" + model_name)
    os.makedirs(root_dir, exist_ok=True)
    trainer = pl.Trainer(default_root_dir=root_dir,
                         callbacks=[ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc")],
                         gpus=1 if str(device).startswith("cuda") else 0,
                         max_epochs=5,
                         progress_bar_refresh_rate=0)
    trainer.logger._default_hp_metric = None # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    pretrained_filename = os.path.join("test", f"GraphLevel{model_name}.ckpt")
    if os.path.isfile(pretrained_filename):
        print("Found pretrained model, loading...")
        model = GraphLevelGNN.load_from_checkpoint(pretrained_filename)
    else:
        pl.seed_everything(42)
        model = GraphLevelGNN(c_in=100,
                              c_out=1,
                              **model_kwargs)
        trainer.fit(model, train_batches, validate_batches)
        model = GraphLevelGNN.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
    # Test best model on validation and test set
    train_result = trainer.test(model, dataloaders=validate_batches, verbose=True)
    test_result = trainer.test(model, dataloaders=test_batches, verbose=True)
    #result = {"test": test_result[0]['test_acc'], "train": train_result[0]['test_acc']}
    return model, train_result, test_result

In [None]:
model, train_result, test_result = train_graph_classifier(model_name="GraphGCN",
                                       c_hidden=256,
                                       layer_name="GCN",
                                       num_layers=3,
                                       dp_rate_linear=0.5,
                                       dp_rate=0.0)

In [None]:
test_result

In [100]:
import pickle
import torch
import torch.nn as nn
import torch_geometric.nn as geom_nn
import torch_geometric.data as geom_data
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
import os
from torch_geometric.loader import DataLoader
from torch_geometric.data import Dataset
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

torch_data_ast = pickle.load(open("models/data_lists/torch_data_ast.pkl", "rb"))

In [101]:
for i in range(len(torch_data_ast)):
    #torch_data_ast[i].y = torch.Tensor([1, torch_data_ast[i].y])
    torch_data_ast[i] = torch_data_ast[i].to("cuda")

In [102]:
import torch_geometric.loader as loader
train_loader = loader.DataLoader(torch_data_ast, shuffle=True, batch_size=25)

In [103]:
class GraphGNNModel(nn.Module):
    
    def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs):
        """
        Inputs:
            c_in - Dimension of input features
            c_hidden - Dimension of hidden features
            c_out - Dimension of output features (usually number of classes)
            dp_rate_linear - Dropout rate before the linear layer (usually much higher than inside the GNN)
            kwargs - Additional arguments for the GNNModel object
        """
        super().__init__()
        self.GNN = GNNModel(c_in=c_in, 
                            c_hidden=c_hidden, 
                            c_out=c_hidden, # Not our prediction output yet!
                            **kwargs)
        self.head = nn.Sequential(
            nn.Dropout(dp_rate_linear),
            nn.Linear(c_hidden, c_out)
        )

    def forward(self, x, edge_index, batch_idx):
        """
        Inputs:
            x - Input features per node
            edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
            batch_idx - Index of batch element for each node
        """
        x = self.GNN(x, edge_index)
        x = geom_nn.global_mean_pool(x, batch_idx) # Average pooling
        x = self.head(x)
        return x

In [182]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = geom_nn.GCNConv(100, 256)
        self.conv2 = geom_nn.GCNConv(256, 200)
        self.linear1 = nn.Linear(200, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.linear1(x)
        x = F.log_softmax(x, dim=1)
        x = x[torch.argmax(x)]
        
        return x

In [185]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.BCEWithLogitsLoss()
all_preds = []
all_labels = []

for epoch in range(10):
    model.train()
    for data in torch_data_ast:
        # Reset gradients
        optimizer.zero_grad() 
        pred = model(data)
        real_y = torch.Tensor([data.y]).to(device)
        loss = criterion(pred, real_y)
        loss.backward()
        optimizer.step()
#         all_preds.append(np.rint(torch.sigmoid(pred).cpu().detach().numpy()))
#         all_labels.append(batch.y.cpu().detach().numpy())
        
        
#     out = model(data)
#     print(data.y)
#     loss = F.cross_entropy(out, torch.Tensor(data.y))
#     optimizer.zero_grad()
#     loss.backward()
#     optimizer.step()

KeyboardInterrupt: 

In [None]:
model.eval()
pred = model(data).argmax(dm=1)

In [155]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [70]:
dataset[0].to(device)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [137]:
import torch
import torch.nn.functional as F 
from torch.nn import Linear, BatchNorm1d, ModuleList
from torch_geometric.nn import TransformerConv, TopKPooling 
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
torch.manual_seed(42)

class GNN(torch.nn.Module):
    def __init__(self, feature_size, model_params):
        super().__init__()
        embedding_size = 100
        dense_neurons = 256
        self.conv1 = geom_nn.GCNConv(100, 16)
        self.conv2 = geom_nn.GCNConv(16, 1)
        # Linear layers
        self.linear1 = Linear(embedding_size*2, dense_neurons)
        self.linear2 = Linear(dense_neurons, int(dense_neurons/2))  
        self.linear3 = Linear(int(dense_neurons/2), 1)  

    def forward(self, x, batch_index):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index, batch_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index, batch_index)

        # Holds the intermediate graph representations
        global_representation = []

        for i in range(self.n_layers):
            x = self.conv_layers[i](x, edge_index, edge_attr)
            x = torch.relu(self.transf_layers[i](x))
            x = self.bn_layers[i](x)
            # Always aggregate last layer
            if i % self.top_k_every_n == 0 or i == self.n_layers:
                x , edge_index, edge_attr, batch_index, _, _ = self.pooling_layers[int(i/self.top_k_every_n)](
                    x, edge_index, edge_attr, batch_index
                    )
                # Add current representation
                global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))
    
        x = sum(global_representation)

        # Output block
        x = torch.relu(self.linear1(x))
        x = F.dropout(x, p=0.8, training=self.training)
        x = torch.relu(self.linear2(x))
        x = F.dropout(x, p=0.8, training=self.training)
        x = self.linear3(x)

        return x



In [138]:
def train_one_epoch(epoch, model, train_loader, optimizer, loss_fn):
    # Enumerate over the data
    all_preds = []
    all_labels = []
    running_loss = 0.0
    step = 0
    for _, batch in enumerate(tqdm(train_loader)):
        # Use GPU
        batch.to(device)  
        # Reset gradients
        optimizer.zero_grad() 
        # Passing the node features and the connection info
        pred = model(batch.x.float(), 
                                batch.edge_attr.float(),
                                batch.edge_index, 
                                batch.batch) 
        # Calculating the loss and gradients
        loss = loss_fn(torch.squeeze(pred), batch.y.float())
        loss.backward()  
        optimizer.step()  
        # Update tracking
        running_loss += loss.item()
        step += 1
        all_preds.append(np.rint(torch.sigmoid(pred).cpu().detach().numpy()))
        all_labels.append(batch.y.cpu().detach().numpy())
    all_preds = np.concatenate(all_preds).ravel()
    all_labels = np.concatenate(all_labels).ravel()
    calculate_metrics(all_preds, all_labels, epoch, "train")
    return running_loss/step

<torch_geometric.loader.dataloader.DataLoader at 0x7efe930fba30>

In [611]:
# FILE model.py

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.nn.conv import GatedGraphConv


def get_conv_mp_out_size(in_size, last_layer, mps):
    size = in_size

    for mp in mps:
        size = round((size - mp["kernel_size"]) / mp["stride"] + 1)

    size = size + 1 if size % 2 != 0 else size

    return int(size * last_layer["out_channels"])


def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv1d:
        torch.nn.init.xavier_uniform_(m.weight)


class Conv(nn.Module):

    def __init__(self, conv1d_1, conv1d_2, maxpool1d_1, maxpool1d_2, fc_1_size, fc_2_size):
        super(Conv, self).__init__()
        self.conv1d_1_args = conv1d_1
        self.conv1d_1 = nn.Conv1d(**conv1d_1)
        self.conv1d_2 = nn.Conv1d(**conv1d_2)

        fc1_size = get_conv_mp_out_size(fc_1_size, conv1d_2, [maxpool1d_1, maxpool1d_2])
        fc2_size = get_conv_mp_out_size(fc_2_size, conv1d_2, [maxpool1d_1, maxpool1d_2])

        # Dense layers
        self.fc1 = nn.Linear(fc1_size, 1)
        self.fc2 = nn.Linear(fc2_size, 1)

        # Dropout
        self.drop = nn.Dropout(p=0.2)

        self.mp_1 = nn.MaxPool1d(**maxpool1d_1)
        self.mp_2 = nn.MaxPool1d(**maxpool1d_2)

    def forward(self, hidden, x):
        concat = torch.cat([hidden, x], 1)
        print(x.shape)
        concat_size = hidden.shape[1] + x.shape[1]
        print('sizes', self.conv1d_1_args["in_channels"], concat_size)
        print(concat.size())
        print(concat)
        concat = concat.view(-1, self.conv1d_1_args["in_channels"], concat_size)
        #concat = concat.view(-1, self.conv1d_1_args["in_channels"], concat_size)

        Z = self.mp_1(F.relu(self.conv1d_1(concat)))
        Z = self.mp_2(self.conv1d_2(Z))
        
        hidden = hidden.view(-1, self.conv1d_1_args["in_channels"], hidden.shape[1])

        Y = self.mp_1(F.relu(self.conv1d_1(hidden)))
        Y = self.mp_2(self.conv1d_2(Y))

        Z_flatten_size = int(Z.shape[1] * Z.shape[-1])
        Y_flatten_size = int(Y.shape[1] * Y.shape[-1])

        Z = Z.view(-1, Z_flatten_size)
        Y = Y.view(-1, Y_flatten_size)
        res = self.fc1(Z) * self.fc2(Y)
        res = self.drop(res)
        # res = res.mean(1)
        # print(res, mean)
        sig = torch.sigmoid(torch.flatten(res))
        return sig


class Net(nn.Module):

    def __init__(self, gated_graph_conv_args, conv_args, emb_size, device):
        super(Net, self).__init__()
        self.ggc = GatedGraphConv(**gated_graph_conv_args).to(device)
        self.conv = Conv(**conv_args,
                         fc_1_size=gated_graph_conv_args["out_channels"],
                         fc_2_size=gated_graph_conv_args["out_channels"]).to(device)
        # self.conv.apply(init_weights)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.ggc(x, edge_index)
        x = self.conv(x, data.x)

        return x

    def save(self, path):
        torch.save(self.state_dict(), path)

    def load(self, path):
        self.load_state_dict(torch.load(path))

In [612]:
# FILE metrics.py

import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn import metrics


class Metrics:
    def __init__(self, outs, labels):
        self.scores = outs
        self.labels = labels
        self.transform()
        print(self.predicts)

    def transform(self):
        self.series = pd.Series(self.scores)
        self.predicts = self.series.apply(lambda x: 1 if x >= 0.5 else 0)
        self.predicts.reset_index(drop=True, inplace=True)

    def __str__(self):
        confusion = confusion_matrix(y_true=self.labels, y_pred=self.predicts)
        tn, fp, fn, tp = confusion.ravel()
        string = f"\nConfusion matrix: \n"
        string += f"{confusion}\n"
        string += f"TP: {tp}, FP: {fp}, TN: {tn}, FN: {fn}\n"
        string += '\n'.join([name + ": " + str(metric) for name, metric in self().items()])
        return string

    def __call__(self):
        _metrics = {"Accuracy": metrics.accuracy_score(y_true=self.labels, y_pred=self.predicts),
                    "Precision": metrics.precision_score(y_true=self.labels, y_pred=self.predicts),
                    "Recall": metrics.recall_score(y_true=self.labels, y_pred=self.predicts),
                    "F-measure": metrics.f1_score(y_true=self.labels, y_pred=self.predicts),
                    "Precision-Recall AUC": metrics.average_precision_score(y_true=self.labels, y_score=self.scores),
                    "AUC": metrics.roc_auc_score(y_true=self.labels, y_score=self.scores),
                    "MCC": metrics.matthews_corrcoef(y_true=self.labels, y_pred=self.predicts),
                    "Error": self.error()}

        return _metrics

    def log(self):
        excluded = ["Precision-Recall AUC", "AUC"]
        _metrics = self()
        print(_metrics)
#         msg = ' - '.join(
#             [f"({name[:3]} {round(metric, 3)})" for name, metric in _metrics.items() if name not in excluded])

#         print('metrics', msg)

    def error(self):
        errors = [(abs(score - (1 if score >= 0.5 else 0))/score)*100 for score, label in zip(self.scores, self.labels)]

        return sum(errors)/len(errors)


In [613]:
# FILE modeling.py

import torch
import time

class Train(object):
    def __init__(self, step, epochs, verbose=True):
        self.epochs = epochs
        self.step = step
        self.history = History()
        self.verbose = verbose

    def __call__(self, train_loader_step, val_loader_step=None, early_stopping=None):
        for epoch in range(self.epochs):
            print('training')
            self.step.train()
            train_stats = train_loader_step(self.step)
            self.history(train_stats, epoch + 1)

            if val_loader_step is not None:
                with torch.no_grad():
                    self.step.eval()
                    val_stats = val_loader_step(self.step)
                    self.history(val_stats, epoch + 1)

                print(self.history)

                if early_stopping is not None:
                    valid_loss = val_stats.loss()
                    # early_stopping needs the validation loss to check if it has decreased,
                    # and if it has, it will make a checkpoint of the current model
                    if early_stopping(valid_loss):
                        self.history.log()
                        return
            else:
                print(self.history)
        self.history.log()


def predict(step, test_loader_step):
    print(f"Testing")
    with torch.no_grad():
        step.eval()
        stats = test_loader_step(step)
        metrics = Metrics(outs(), labels())
        print(metrics)
        metrics.log()
    return metrics()["Accuracy"]


class History:
    def __init__(self):
        self.history = {}
        self.epoch = 0
        self.timer = time.time()

    def __call__(self, stats, epoch):
        self.epoch = epoch

        if epoch in self.history:
            self.history[epoch].append(stats)
        else:
            self.history[epoch] = [stats]

    def __str__(self):
        epoch = f"\nEpoch {self.epoch};"
        stats = ' - '.join([f"{res}" for res in self.current()])
        timer = f"Time: {(time.time() - self.timer)}"

        return f"{epoch} - {stats} - {timer}"

    def current(self):
        return self.history[self.epoch]

    def log(self):
        msg = f"(Epoch: {self.epoch}) {' - '.join([f'({res})' for res in self.current()])}"
        print("history", msg)


In [614]:
# FILE stats.py

import dataclasses
from dataclasses import dataclass
from typing import List


class Stat:
    def __init__(self, outs=None, loss=0.0, acc=0.0, labels=None):
        if labels is None:
            labels = []
        if outs is None:
            outs = []
        self.outs = outs
        self.labels = labels
        self.loss = loss
        self.acc = acc

    def __add__(self, other):
        return Stat(self.outs + other.outs, self.loss + other.loss, self.acc + other.acc, self.labels + other.labels)

    def __str__(self):
        return f"Loss: {round(self.loss, 4)}; Acc: {round(self.acc, 4)};"


@dataclass
class Stats:
    name: str
    results: List[Stat] = dataclasses.field(default_factory=list)
    total: Stat = Stat()

    def __call__(self, stat):
        self.total += stat
        self.results.append(stat)

    def __str__(self):
        return f"{self.name} {self.mean()}"

    def __len__(self):
        return len(self.results)

    def mean(self):
        res = Stat()
        res += self.total
        res.loss /= len(self)
        res.acc /= len(self)

        return res

    def loss(self):
        return self.mean().loss

    def acc(self):
        return self.mean().acc

    def outs(self):
        return self.total.outs

    def labels(self):
        return self.total.labels


In [615]:
# FILE step.py
import torch

def softmax_accuracy(probs, all_labels):
    acc = (torch.argmax(probs) == all_labels).sum()
    acc = torch.div(acc, len(all_labels) + 0.0)
    return acc


class Step:
    # Performs a step on the loader and returns the result
    def __init__(self, model, loss_function, optimizer):
        self.model = model
        self.criterion = loss_function
        self.optimizer = optimizer

    def __call__(self, i, x, y):
        out = self.model(x)
        loss = self.criterion(out, y.float())
        acc = softmax_accuracy(out, y.float())

        if self.model.training:
            # calculates the gradient
            loss.backward()
            # and performs a parameter update based on it
            self.optimizer.step()
            # clears old gradients from the last step
            self.optimizer.zero_grad()

        # print(f"\tBatch: {i}; Loss: {round(loss.item(), 4)}", end="")
        return Stat(out.tolist(), loss.item(), acc.item(), y.tolist())

    def train(self):
        self.model.train()

    def eval(self):
        self.model.eval()


In [616]:
# FILE devign.py

import torch.optim as optim
import torch.nn.functional as F

class Devign(Step):
    def __init__(self,
                 path: str,
                 device: str,
                 model: dict,
                 learning_rate: float,
                 weight_decay: float,
                 loss_lambda: float):
        self.path = path
        self.lr = learning_rate
        self.wd = weight_decay
        self.ll = loss_lambda
        print('devign', f"LR: {self.lr}; WD: {self.wd}; LL: {self.ll};")
        _model = Net(**model, device=device)
        super().__init__(model=_model,
                         loss_function=lambda o, t: F.binary_cross_entropy(o, t) + F.l1_loss(o, t) * self.ll,
                         optimizer=optim.Adam(_model.parameters(), lr=self.lr, weight_decay=self.wd),
                         )

        self.count_parameters()

    def load(self):
        self.model.load(self.path)

    def save(self):
        self.model.save(self.path)

    def count_parameters(self):
        count = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
        print(f"The model has {count:,} trainable parameters")


In [617]:
# FILE LoaderStep.py

class LoaderStep:
    def __init__(self, name, data_loader, device):
        self.name = name
        self.loader = data_loader
        self.size = len(data_loader)
        self.device = device

    def __call__(self, step):
        self.stats = Stats(self.name)

        for i, batch in enumerate(self.loader):
            batch.to(self.device)
            stat: Stat = step(i, batch, batch.y)
            self.stats(stat)

        return self.stats


In [618]:
model_kwargs = {
    "gated_graph_conv_args": {"out_channels" : 200, "num_layers" : 6, "aggr" : "add", "bias": True},
    "conv_args": {
        "conv1d_1" : {"in_channels": 300, "out_channels": 50, "kernel_size": 3, "padding" : 1},
        "conv1d_2" : {"in_channels": 50, "out_channels": 20, "kernel_size": 1, "padding" : 1},
        "maxpool1d_1" : {"kernel_size" : 3, "stride" : 2},
        "maxpool1d_2" : {"kernel_size" : 2, "stride" : 2}
    },
    "emb_size" : 101
}

In [619]:
model = Devign(path="test/devign", device='cuda', model=model_kwargs, learning_rate=1e-4,
              weight_decay=1.3e-6, loss_lambda=1.3e-6)

devign LR: 0.0001; WD: 1.3e-06; LL: 1.3e-06;
The model has 529,272 trainable parameters


In [620]:
trainer = Train(model, 10)
b_size = 1
train, split1 = train_test_split(torch_data_ast, test_size=.25)
test, validate = train_test_split(split1, test_size=.2)
train_loader = DataLoader(train, batch_size=b_size, num_workers=4)
test_loader = DataLoader(test, batch_size=b_size, num_workers=4)
val_loader = DataLoader(validate, batch_size=b_size, num_workers=4)

In [621]:
train_loader_step = LoaderStep("Train", train_loader, 'cuda')
val_loader_step = LoaderStep("Validation", val_loader, 'cuda')
test_loader_step = LoaderStep("Test", test_loader, 'cuda')

trainer(train_loader_step, val_loader_step)

training
torch.Size([26, 100])
sizes 300 300
torch.Size([26, 300])
tensor([[-0.0566, -0.1202,  0.0139,  ...,  0.2977,  1.2501,  0.0073],
        [-0.0800, -0.1284,  0.0105,  ...,  0.3040,  0.6941, -0.4551],
        [-0.0632, -0.1252,  0.0095,  ..., -0.0045,  0.6536, -0.3779],
        ...,
        [-0.0528, -0.1172,  0.0185,  ...,  0.5362,  0.1098, -0.0150],
        [-0.0446, -0.1083,  0.0115,  ...,  0.9393,  0.7493, -0.3109],
        [-0.0739, -0.0803,  0.0144,  ..., -1.0864,  0.3944,  0.5768]],
       device='cuda:0', grad_fn=<CatBackward0>)


RuntimeError: Given groups=1, weight of size [50, 300, 3], expected input[1, 26, 300] to have 300 channels, but got 26 channels instead