## pipeline

In [1]:
import networkx as nx
import numpy as np
import torch

import torch.nn as nn

from statistics import mean

from func.run_pipeline_super_vox import get_outlayer_of_a_3d_shape, get_crop_by_pixel_val
from func.ultis import load_obj

from func.graph_learning import SuperVoxToNxGraph, VoxelGraphDataset


Using backend: pytorch


In [2]:
# load graphs
from func.ultis import load_obj

graphs = load_obj("graphs_dataset_train")


In [3]:
dataset = VoxelGraphDataset(graphs)

g = dataset[0]


# TODO probably should normalize features!!!!

In [4]:
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler

num_examples = len(dataset)
num_train = int(num_examples)

train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))

train_dataloader = GraphDataLoader(
    dataset, sampler=train_sampler, batch_size=5, drop_last=False)

In [5]:
from func.graph_models import GCN, GCN_2
import torch.nn.functional as F
import random

model = GCN(1, num_classes=2)
CELoss = nn.CrossEntropyLoss()

# training
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
best_val_acc = 0



# features = g.ndata['feat']
# labels = g.ndata['label']
# train_mask = g.ndata['train_mask']
# val_mask = g.ndata['val_mask']

# calculate weights for loss
"""
pos_weights = []
neg_weights = []
for graph_number in range(len(dataset)):
    sample_graph = dataset[graph_number]
    labels = sample_graph.ndata['label']
    # create class weights
    number_positives = torch.count_nonzero(labels)
    positive_weight = 1 - (number_positives / len(labels))
    negative_weight = 1 - positive_weight

    pos_weights.append(positive_weight.item())
    neg_weights.append(negative_weight.item())
weights = torch.tensor([mean(neg_weights), mean(pos_weights)])
print(f"weights: {weights}")
"""
from torchmetrics import F1Score

f1 = F1Score(num_classes=2, average='weighted')

epoch_loss = []
epoch_accuracy = []

epoch_f1score = []
epoch_f1score_val = []

epoch_accuracy_val = []
# best_val_acc = 0
best_f1_score = 0

for e in range(500):
    # get random elements for batch
    #graphs_numbers_list = range(0, len(dataset))
    #rand_graph_numbers = random.sample(graphs_numbers_list, len(dataset))
    for graph_number in range(len(dataset)):
    #for graph_number in range(1):
    #for graph_number in rand_graph_numbers:
        # Forward
        model.train()
        sample_graph = dataset[graph_number]
        features = sample_graph.ndata['feat']
        labels = sample_graph.ndata['label']
        train_mask = sample_graph.ndata['train_mask']
        val_mask = sample_graph.ndata['val_mask']

        # create class weights
        number_positives = torch.count_nonzero(labels)
        percentage_positives = number_positives / len(labels)
        percentage_negatives = 1 - percentage_positives

        weights = torch.tensor([1 - percentage_negatives, 1 - percentage_positives])
        #weights = torch.tensor([0.95, 0.05])
        #print(weights)

        CELoss = nn.CrossEntropyLoss(weight=weights)
        train_mask = sample_graph.ndata['train_mask']
        val_mask = sample_graph.ndata['val_mask']
        logits = model(sample_graph, features)

        # Compute prediction
        pred = logits.argmax(1)

        # Compute loss
        # Note that you should only compute the losses of the nodes in the training set.
        loss = CELoss(logits[train_mask], labels[train_mask])
        epoch_loss.append(loss.item())
        #print(loss)
        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()

        train_f1_score = f1(pred[train_mask], labels[train_mask])
        val_f1_score = f1(pred[val_mask], labels[val_mask])


        epoch_accuracy.append(train_acc.item())
        epoch_accuracy_val.append(val_acc.item())

        epoch_f1score.append(train_f1_score.item())
        epoch_f1score_val.append(val_f1_score.item())


        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if e % 5 == 0:
        print('In epoch {}, loss: {:.7f}, accuracy: {:.3f}, val accuracy: {:.3f}, f1score: {:.3f}, val f1score: {:.3f}'.format(
            e, mean(epoch_loss), mean(epoch_accuracy), mean(epoch_accuracy_val), mean(epoch_f1score), mean(epoch_f1score_val)))

        #if mean(epoch_accuracy_val) >= best_val_acc:
        if mean(epoch_f1score_val) >= best_f1_score:
            print("new best val f1 score")
            torch.save(model.state_dict(), "output/graph_model.pt")
            best_f1_score = mean(epoch_f1score_val)
        epoch_loss = []
        epoch_accuracy = []

        epoch_accuracy_val = []
        epoch_f1score_val = []




In epoch 0, loss: 0.6996461, accuracy: 0.594, val accuracy: 0.600, f1score: 0.685, val f1score: 0.676
new best val f1 score
In epoch 5, loss: 0.6928060, accuracy: 0.894, val accuracy: 0.874, f1score: 0.850, val f1score: 0.852
new best val f1 score
In epoch 10, loss: 0.6923294, accuracy: 0.909, val accuracy: 0.889, f1score: 0.868, val f1score: 0.860
new best val f1 score
In epoch 15, loss: 0.6923858, accuracy: 0.929, val accuracy: 0.907, f1score: 0.878, val f1score: 0.868
new best val f1 score
In epoch 20, loss: 0.6923076, accuracy: 0.919, val accuracy: 0.897, f1score: 0.882, val f1score: 0.863
In epoch 25, loss: 0.6923977, accuracy: 0.925, val accuracy: 0.903, f1score: 0.885, val f1score: 0.866
In epoch 30, loss: 0.6923272, accuracy: 0.928, val accuracy: 0.906, f1score: 0.887, val f1score: 0.867
In epoch 35, loss: 0.6923639, accuracy: 0.911, val accuracy: 0.893, f1score: 0.888, val f1score: 0.862
In epoch 40, loss: 0.6923070, accuracy: 0.928, val accuracy: 0.905, f1score: 0.889, val f1

from func.graph_models import GCN, GCN_2
import torch.nn.functional as F
import random
import dgl

model = GCN(1, num_classes=2)
CELoss = nn.CrossEntropyLoss()

# training
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
best_val_acc = 0



# features = g.ndata['feat']
# labels = g.ndata['label']
# train_mask = g.ndata['train_mask']
# val_mask = g.ndata['val_mask']

# calculate weights for loss
"""
pos_weights = []
neg_weights = []
for graph_number in range(len(dataset)):
    sample_graph = dataset[graph_number]
    labels = sample_graph.ndata['label']
    # create class weights
    number_positives = torch.count_nonzero(labels)
    positive_weight = 1 - (number_positives / len(labels))
    negative_weight = 1 - positive_weight

    pos_weights.append(positive_weight.item())
    neg_weights.append(negative_weight.item())
weights = torch.tensor([mean(neg_weights), mean(pos_weights)])
print(f"weights: {weights}")
"""

# build one big graph
graphs_list = []
for i in range(len(dataset)):
    graphs_list.append(dataset[i])

large_g = dgl.batch(graphs_list)

epoch_loss = []
epoch_accuracy = []
for e in range(1000):
    # get random elements for batch
    #graphs_numbers_list = range(0, len(dataset))
    #rand_graph_numbers = random.sample(graphs_numbers_list, len(dataset))
    sample_graph = large_g
    features = sample_graph.ndata['feat']
    labels = sample_graph.ndata['label']

    # create class weights
    number_positives = torch.count_nonzero(labels)
    percentage_positives = number_positives / len(labels)
    percentage_negatives = 1 - percentage_positives

    weights = torch.tensor([1 - percentage_negatives, 1 - percentage_positives])
    #weights = torch.tensor([0.95, 0.05])
    #print(weights)

    CELoss = nn.CrossEntropyLoss(weight=weights)
    #train_mask = sample_graph.ndata['train_mask']
    #val_mask = sample_graph.ndata['val_mask']
    logits = model(sample_graph, features)

    # Compute prediction
    pred = logits.argmax(1)

    # Compute loss
    # Note that you should only compute the losses of the nodes in the training set.
    loss = CELoss(logits, labels)
    epoch_loss.append(loss.item())
    #print(loss)
    # Compute accuracy on training/validation/test
    train_acc = (pred == labels).float().mean()
    epoch_accuracy.append(train_acc.item())
    # Backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if e % 20 == 0:
        print('In epoch {}, loss: {:.3f}, accuracy: {:.3f}'.format(
            e, mean(epoch_loss), mean(epoch_accuracy)))
        epoch_loss = []
        epoch_accuracy = []




# Train on GPU
g = g.to('cuda')
model = GCN(1, 16, dataset.num_classes).to('cuda')
train(g, model)

In [6]:
g

Graph(num_nodes=1405, num_edges=5327,
      ndata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={'weight': Scheme(shape=(1,), dtype=torch.float64)})

In [7]:

g.ndata['feat'].shape

torch.Size([1405, 1])

In [8]:

predictions = model(g, g.ndata['feat']).argmax(1).numpy()

In [9]:
np.unique(predictions)

array([0, 1])

In [10]:
labels = g.ndata['label']
np.unique(labels)

array([0, 1])

In [11]:
len(labels[labels==1])

1315

In [12]:
len(labels[labels==0])

90

In [13]:
len(predictions[predictions==1])

1051

In [14]:
len(predictions[predictions==0])

354

In [15]:
graph_model = GCN(1, num_classes=2)
checkpoint_graph = 'output/graph_model.pt'
graph_model.load_state_dict(torch.load(checkpoint_graph))
graph_model.eval()

GCN(
  (conv1): GraphConv(in=1, out=4, normalization=both, activation=None)
  (dropout): Dropout(p=0.5, inplace=False)
  (conv2): GraphConv(in=4, out=2, normalization=both, activation=None)
)

In [16]:
predictions = graph_model(g, g.ndata['feat']).argmax(1).numpy()


In [17]:

len(predictions[predictions==1])

1405

In [18]:
len(predictions[predictions==0])

0