In [1]:
import preprocessing.preprocessing as pp
import sampling
import random
import torch
from models import graphClassifier
from torch_geometric.loader import DataLoader
import pandas as pd

ModuleNotFoundError: No module named 'preprocessing'

In [None]:

nodesFileNerve =  "~/Documents/Intestine/nerve-mask/nodes_nerve_bs2_fh.csv"
edgesFileNerve = "~/Documents/Intestine/nerve-mask/edges_nerve_bs2_fh.csv"

nodesFileLymph =  "~/Documents/Intestine/lymph-mask/nodes_lymph_bs2_fh.csv"
edgesFileLymph = "~/Documents/Intestine/lymph-mask/edges_lymph_bs2_fh.csv"

nodes_n = pd.read_csv(nodesFileNerve, sep = ";", index_col= "id")
edges_n = pd.read_csv(edgesFileNerve, sep = ";", index_col= "id")
nodes_l = pd.read_csv(nodesFileLymph, sep = ";", index_col= "id")
edges_l = pd.read_csv(edgesFileLymph, sep = ";", index_col= "id")


# scaling with the factors provided by luciano
nodes_l = pp.scale_position(nodes_l, (1.65,1.65,6))
nodes_n = pp.scale_position(nodes_n, (1.65,1.65,6))

# create the graphs for both networks
G_nerve = pp.create_graph(nodes_n, edges_n, index_addon ="n")
G_lymph = pp.create_graph(nodes_l, edges_l, index_addon ="l")

# get short description of graph
pp.graph_summary(G_nerve)
pp.graph_summary(G_lymph)

# get rid of self-loops, multi edges and isolated nodes
G_nerve_einf = pp.to_einfach(G_nerve)
G_lymph_einf = pp.to_einfach(G_lymph)

# enrich the attributes of the nodes with information from the incident edges
pp.enrich_node_attrs(G_lymph_einf)
pp.enrich_node_attrs(G_nerve_einf)


***************
Number of Nodes: 1861
Number of Edges: 2413
Number of Connected Components: 14
Number of Self Loops: 9
Number of Isolated Nodes: 0
Average Node Degree: 2.5932294465341212
***************
***************
Number of Nodes: 2152
Number of Edges: 2710
Number of Connected Components: 33
Number of Self Loops: 3
Number of Isolated Nodes: 0
Average Node Degree: 2.5185873605947955
***************


# Graph Classification using Subsets of Lymph and Nerve Network for Training and Testing

In [None]:

graph_node_num = 100

# create random samples 
randomSampleLymphNx, randomSampleLymph = sampling.randomGeomSubgraphs(G_lymph_einf, label = 1,starts = 100, node_sample_size = graph_node_num,  mode = "rnn")
randomSampleNerveNx, randomSampleNerve = sampling.randomGeomSubgraphs(G_nerve_einf, label = 0,starts = 100, node_sample_size = graph_node_num,  mode = "rnn")

# combine the graphs to a random set
allGraphs = randomSampleLymph + randomSampleNerve
random.shuffle(allGraphs)

# split into training and test set
breaker = int(len(allGraphs)*0.8)
train_dataset = allGraphs[:breaker]
test_dataset = allGraphs[breaker:]

Creating subgraphs using random node neighbor selection.: 100%|██████████| 100/100 [00:00<00:00, 375.15it/s]
Creating subgraphs using random node neighbor selection.: 100%|██████████| 100/100 [00:00<00:00, 516.26it/s]


In [None]:
# selection of the features to use
feat_slice = [1,3,7,8]

# create the model
model = graphClassifier.GCN_GC(hidden_channels=32, in_features = len(feat_slice), classes = 2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# create brach data loaders for training and test set
train_loader = DataLoader(train_dataset, batch_size= 64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)



def train_GC():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x[:,feat_slice], data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.



def test_GC(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x[:,feat_slice], data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 51):
    train_GC()
    train_acc = test_GC(train_loader)
    test_acc = test_GC(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')



Epoch: 001, Train Acc: 0.5166, Test Acc: 0.4737
Epoch: 002, Train Acc: 0.5166, Test Acc: 0.4737
Epoch: 003, Train Acc: 0.5166, Test Acc: 0.4737
Epoch: 004, Train Acc: 0.5430, Test Acc: 0.4737
Epoch: 005, Train Acc: 0.5828, Test Acc: 0.5000
Epoch: 006, Train Acc: 0.6623, Test Acc: 0.5526
Epoch: 007, Train Acc: 0.5563, Test Acc: 0.4737
Epoch: 008, Train Acc: 0.5298, Test Acc: 0.4737
Epoch: 009, Train Acc: 0.7351, Test Acc: 0.8421
Epoch: 010, Train Acc: 0.8079, Test Acc: 0.7368
Epoch: 011, Train Acc: 0.5695, Test Acc: 0.4737
Epoch: 012, Train Acc: 0.7285, Test Acc: 0.5789
Epoch: 013, Train Acc: 0.8212, Test Acc: 0.8158
Epoch: 014, Train Acc: 0.8146, Test Acc: 0.8158
Epoch: 015, Train Acc: 0.8146, Test Acc: 0.7632
Epoch: 016, Train Acc: 0.8146, Test Acc: 0.7632
Epoch: 017, Train Acc: 0.8079, Test Acc: 0.7632
Epoch: 018, Train Acc: 0.8212, Test Acc: 0.8158
Epoch: 019, Train Acc: 0.8212, Test Acc: 0.7632
Epoch: 020, Train Acc: 0.7682, Test Acc: 0.6053
Epoch: 021, Train Acc: 0.8146, Test Acc: