In [1]:
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv, GATConv, SAGEConv
from torch_geometric.data import Data, DataLoader
import matplotlib.pyplot as plt


import numpy as np
import torch.nn.functional as F

In [2]:
# Load data from the pickle file
with open('graph-slow75.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

print(loaded_data[0])

Data(x=[468, 1], edge_index=[2, 46971], edge_attr=[46971], y=[468])


In [3]:
# Training size
num_train_samples = 3744

train_data = loaded_data[:num_train_samples]
test_data = loaded_data[num_train_samples:]

In [4]:
# Splitting data into train and test set
batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)



In [5]:
# Check if a GPU is available and use it, otherwise use CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [7]:
# Define your GCN model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_prob):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.dropout1 = nn.Dropout(dropout_prob)
        self.conv2 = GCNConv(hidden_dim, output_dim)
        self.dropout2 = nn.Dropout(dropout_prob)  # Dropout after the second layer


    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.dropout1(x)  # Apply dropout after the first activation
        x = self.conv2(x, edge_index)
        x = self.dropout2(x)  # Apply dropout after the second activation
        return x

In [None]:
weight = torch.tensor([4.0])  # Weight for positive class

# Initialize your model, optimizer, and loss function
model = GCN(input_dim=1, hidden_dim=256, output_dim=1, dropout_prob=0).to(device)  # Adjust dimensions as needed
optimizer = optim.Adam(model.parameters(), lr=0.01)
# criterion = nn.BCEWithLogitsLoss().to(device)  # Binary classification loss
criterion = nn.BCEWithLogitsLoss(pos_weight=weight).to(device)  # Binary classification loss

# Training loop
model.train()
num_epochs = 50  # Adjust as needed

# Lists to store loss and accuracy values for plotting
loss_curve = []
accuracy_curve = []

for epoch in range(num_epochs):
    total_loss = 0
    TP_train = 0
    TN_train = 0
    FP_train = 0
    FN_train = 0
    
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch.x.float(), batch.edge_index)
        loss = criterion(out.squeeze(), batch.y.float())  # Squeeze to match the shapes
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        # Calculate accuracy for this batch
        predicted = (out > 0).int()
        element_wise_comparison = (predicted.squeeze() == batch.y)

        # Calculate TP, TN, FP, FN
        TP = torch.sum((predicted.squeeze() == 1) & (batch.y == 1)).item()
        TN = torch.sum((predicted.squeeze() == 0) & (batch.y == 0)).item()
        FP = torch.sum((predicted.squeeze() == 1) & (batch.y == 0)).item()
        FN = torch.sum((predicted.squeeze() == 0) & (batch.y == 1)).item()
        
        TP_train += TP
        TN_train += TN
        FP_train += FP
        FN_train += FN

    average_loss = total_loss / len(train_loader)    
    train_accuracy = (TP_train + TN_train) / (TP_train + TN_train + FP_train + FN_train)
    train_sensitivity = TP_train / (TP_train + FN_train)
    train_specificity = TN_train / (TN_train + FP_train)
    
    # Calculate F1-Score for the whole test set
    if TP_train == 0 and FP_train == 0:
        train_precision = 0
        train_f1_score = 0
    else:
        train_precision = TP_train / (TP_train + FP_train)
        train_f1_score = 2 * (train_precision * train_sensitivity) / (train_precision + train_sensitivity)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}, Accuracy: {train_accuracy:.4f}, Sensitivity: {train_sensitivity:.4f}, Specificity: {train_specificity:.4f}, F1score: {train_f1_score:.4f}')

    # Append loss and accuracy to the curves
    loss_curve.append(average_loss)
    accuracy_curve.append(train_accuracy)

In [None]:
# Create a figure with two subplots stacked vertically
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))

# Plot the loss curve in the first subplot
ax1.plot(range(1, num_epochs+1), loss_curve, label='Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Loss Curve')

# Plot the accuracy curve in the second subplot
ax2.plot(range(1, num_epochs+1), [acc * 100 for acc in accuracy_curve], label='Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Accuracy Curve')

# Adjust spacing between subplots
plt.tight_layout()

# Save the image with 600 DPI
plt.savefig('run15_training.png', dpi=600)

# Display the plot
plt.show()


In [None]:
# Evaluation loop (on the test set)
model.eval()

TP_test = 0
TN_test = 0
FP_test = 0
FN_test = 0

with torch.no_grad():
    for batch in test_loader:
        batch = batch.to(device)
        out = model(batch.x.float(), batch.edge_index)
        predicted = (out > -0.1).int()  # Assuming a threshold of 0. Adjust as needed
        element_wise_comparison = (predicted.squeeze() == batch.y)

        # Calculate TP, TN, FP, FN
        TP = torch.sum((predicted.squeeze() == 1) & (batch.y == 1)).item()
        TN = torch.sum((predicted.squeeze() == 0) & (batch.y == 0)).item()
        FP = torch.sum((predicted.squeeze() == 1) & (batch.y == 0)).item()
        FN = torch.sum((predicted.squeeze() == 0) & (batch.y == 1)).item()
        
        TP_test += TP
        TN_test += TN
        FP_test += FP
        FN_test += FN

# Calculate Sensitivity (Recall)
test_sensitivity = TP_test / (TP_test + FN_test)

# Calculate Specificity
test_specificity = TN_test / (TN_test + FP_test)

# Calculate F1-Score for the whole test set
test_precision = TP_test / (TP_test + FP_test)
test_f1_score = 2 * (test_precision * test_sensitivity) / (test_precision + test_sensitivity)

# Calculate Accuracy
test_accuracy = (TP_test + TN_test) / (TP_test + TN_test + FP_test + FN_test)

print(f'GCN: Test Accuracy: {test_accuracy * 100:.2f}%')
print(f"Sensitivity (Recall): {test_sensitivity:.4f}")
print(f"Specificity: {test_specificity:.4f}")
print(f"F1-Score: {test_f1_score:.4f}")

In [7]:
# Define your GAT model
class GAT(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads):
        super(GAT, self).__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=num_heads)
        self.conv2 = GATConv(hidden_dim * num_heads, output_dim, heads=1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [8]:
# Initialize your GAT model, optimizer, and loss function
gat_model = GAT(input_dim=1, hidden_dim=256, output_dim=1, num_heads=4).to(device)  # Adjust dimensions and num_heads as needed
gat_optimizer = optim.Adam(gat_model.parameters(), lr=0.01)
weight = torch.tensor([4.0])  # Weight for positive class
gat_criterion = nn.BCEWithLogitsLoss(pos_weight=weight).to(device)  # Binary classification loss

# Training loop for GAT
gat_model.train()
num_epochs = 20  # Adjust as needed

# Lists to store loss and accuracy values for plotting
loss_curve = []
accuracy_curve = []

for epoch in range(num_epochs):
    total_loss = 0
    TP_train = 0
    TN_train = 0
    FP_train = 0
    FN_train = 0
    
    for batch in train_loader:
        batch = batch.to(device)
        gat_optimizer.zero_grad()
        out = gat_model(batch.x.float(), batch.edge_index)
        loss = gat_criterion(out.squeeze(), batch.y.float())  # Squeeze to match the shapes
        loss.backward()
        gat_optimizer.step()
        total_loss += loss.item()
        
        # Calculate accuracy for this batch
        predicted = (out > 0).int()
        element_wise_comparison = (predicted.squeeze() == batch.y)

        # Calculate TP, TN, FP, FN
        TP = torch.sum((predicted.squeeze() == 1) & (batch.y == 1)).item()
        TN = torch.sum((predicted.squeeze() == 0) & (batch.y == 0)).item()
        FP = torch.sum((predicted.squeeze() == 1) & (batch.y == 0)).item()
        FN = torch.sum((predicted.squeeze() == 0) & (batch.y == 1)).item()
        
        TP_train += TP
        TN_train += TN
        FP_train += FP
        FN_train += FN

    average_loss = total_loss / len(train_loader)
    train_accuracy = (TP_train + TN_train) / (TP_train + TN_train + FP_train + FN_train)
    train_sensitivity = TP_train / (TP_train + FN_train)
    train_specificity = TN_train / (TN_train + FP_train)
    
    # Calculate F1-Score for the whole test set
    if TP_train == 0 and FP_train == 0:
        train_precision = 0
        train_f1_score = 0
    else:
        train_precision = TP_train / (TP_train + FP_train)
        train_f1_score = 2 * (train_precision * train_sensitivity) / (train_precision + train_sensitivity)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}, Accuracy: {train_accuracy:.4f}, Sensitivity: {train_sensitivity:.4f}, Specificity: {train_specificity:.4f}, F1score: {train_f1_score:.4f}')
    
    # Append loss and accuracy to the curves
    loss_curve.append(average_loss)
    accuracy_curve.append(train_accuracy)

Epoch [1/20], Loss: 0.5564, Accuracy: 0.8218, Sensitivity: 0.9181, Specificity: 0.7890, F1score: 0.7241
Epoch [2/20], Loss: 0.4409, Accuracy: 0.9228, Sensitivity: 0.9372, Specificity: 0.9178, F1score: 0.8607
Epoch [3/20], Loss: 0.4913, Accuracy: 0.8607, Sensitivity: 0.9261, Specificity: 0.8384, F1score: 0.7720
Epoch [4/20], Loss: 0.5774, Accuracy: 0.8426, Sensitivity: 0.8950, Specificity: 0.8247, F1score: 0.7433
Epoch [5/20], Loss: 0.5218, Accuracy: 0.8704, Sensitivity: 0.8755, Specificity: 0.8686, F1score: 0.7748
Epoch [6/20], Loss: 0.4328, Accuracy: 0.9031, Sensitivity: 0.8841, Specificity: 0.9095, F1score: 0.8229
Epoch [7/20], Loss: 0.4086, Accuracy: 0.9135, Sensitivity: 0.8824, Specificity: 0.9241, F1score: 0.8385
Epoch [8/20], Loss: 0.6528, Accuracy: 0.7442, Sensitivity: 0.9120, Specificity: 0.6869, F1score: 0.6449
Epoch [9/20], Loss: 0.6829, Accuracy: 0.7096, Sensitivity: 0.9126, Specificity: 0.6403, F1score: 0.6155
Epoch [10/20], Loss: 0.6787, Accuracy: 0.7106, Sensitivity: 0.91

In [9]:
# Evaluation loop (on the test set) for GAT
gat_model.eval()

TP_test = 0
TN_test = 0
FP_test = 0
FN_test = 0

with torch.no_grad():
    for batch in test_loader:
        batch = batch.to(device)
        out = gat_model(batch.x.float(), batch.edge_index)
        predicted = (out > -0.1).int()  # Assuming a threshold of 0. Adjust as needed
        element_wise_comparison = (predicted.squeeze() == batch.y)

        # Calculate TP, TN, FP, FN
        TP = torch.sum((predicted.squeeze() == 1) & (batch.y == 1)).item()
        TN = torch.sum((predicted.squeeze() == 0) & (batch.y == 0)).item()
        FP = torch.sum((predicted.squeeze() == 1) & (batch.y == 0)).item()
        FN = torch.sum((predicted.squeeze() == 0) & (batch.y == 1)).item()
        
        TP_test += TP
        TN_test += TN
        FP_test += FP
        FN_test += FN

# Calculate Sensitivity (Recall)
test_sensitivity = TP_test / (TP_test + FN_test)

# Calculate Specificity
test_specificity = TN_test / (TN_test + FP_test)

# Calculate F1-Score for the whole test set
test_precision = TP_test / (TP_test + FP_test)
test_f1_score = 2 * (test_precision * test_sensitivity) / (test_precision + test_sensitivity)

# Calculate Accuracy
test_accuracy = (TP_test + TN_test) / (TP_test + TN_test + FP_test + FN_test)

print(f'GAT: Test Accuracy: {test_accuracy * 100:.2f}%')
print(f"Sensitivity (Recall): {test_sensitivity:.4f}")
print(f"Specificity: {test_specificity:.4f}")
print(f"F1-Score: {test_f1_score:.4f}")

print(test_accuracy, test_sensitivity, test_specificity, test_f1_score)

GAT: Test Accuracy: 69.39%
Sensitivity (Recall): 0.7423
Specificity: 0.6828
F1-Score: 0.4752
0.6938879985754985 0.7423212937576986 0.6827681837671165 0.47522989456011594


In [6]:
# Define your GraphSAGE model
class GraphSAGE(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_samples):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(input_dim, hidden_dim, aggregator='mean', neighbor_sampler='random', num_samples=num_samples)
        self.conv2 = SAGEConv(hidden_dim, output_dim, aggregator='mean', neighbor_sampler='random', num_samples=num_samples)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [7]:
# Initialize your GraphSAGE model, optimizer, and loss function
graphsage_model = GraphSAGE(input_dim=1, hidden_dim=16, output_dim=1, num_samples=1).to(device)  # Adjust dimensions and num_samples as needed
graphsage_optimizer = optim.Adam(graphsage_model.parameters(), lr=0.01)
weight = torch.tensor([1.0])
graphsage_criterion = nn.BCEWithLogitsLoss(pos_weight=weight).to(device)  # Binary classification loss


# Training loop for GraphSAGE
graphsage_model.train()
num_epochs = 20  # Adjust as needed

# Lists to store loss and accuracy values for plotting
loss_curve = []
accuracy_curve = []

for epoch in range(num_epochs):
    total_loss = 0
    TP_train = 0
    TN_train = 0
    FP_train = 0
    FN_train = 0
    
    for batch in train_loader:
        batch = batch.to(device)
        graphsage_optimizer.zero_grad()
        out = graphsage_model(batch.x.float(), batch.edge_index)
        loss = graphsage_criterion(out.squeeze(), batch.y.float())  # Squeeze to match the shapes
        loss.backward()
        graphsage_optimizer.step()
        total_loss += loss.item()
        
        # Calculate accuracy for this batch
        predicted = (out > 0).int()
        element_wise_comparison = (predicted.squeeze() == batch.y)

        # Calculate TP, TN, FP, FN
        TP = torch.sum((predicted.squeeze() == 1) & (batch.y == 1)).item()
        TN = torch.sum((predicted.squeeze() == 0) & (batch.y == 0)).item()
        FP = torch.sum((predicted.squeeze() == 1) & (batch.y == 0)).item()
        FN = torch.sum((predicted.squeeze() == 0) & (batch.y == 1)).item()
        
        TP_train += TP
        TN_train += TN
        FP_train += FP
        FN_train += FN

    average_loss = total_loss / len(train_loader)
    train_accuracy = (TP_train + TN_train) / (TP_train + TN_train + FP_train + FN_train)
    train_sensitivity = TP_train / (TP_train + FN_train)
    train_specificity = TN_train / (TN_train + FP_train)
    
    # Calculate F1-Score for the whole test set
    if TP_train == 0 and FP_train == 0:
        train_precision = 0
        train_f1_score = 0
    else:
        train_precision = TP_train / (TP_train + FP_train)
        train_f1_score = 2 * (train_precision * train_sensitivity) / (train_precision + train_sensitivity)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}, Accuracy: {train_accuracy:.4f}, Sensitivity: {train_sensitivity:.4f}, Specificity: {train_specificity:.4f}, F1score: {train_f1_score:.4f}')
    
    # Append loss and accuracy to the curves
    loss_curve.append(average_loss)
    accuracy_curve.append(train_accuracy)

Epoch [1/20], Loss: 0.1277, Accuracy: 0.9613, Sensitivity: 0.8989, Specificity: 0.9826, F1score: 0.9220
Epoch [2/20], Loss: 0.0037, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [3/20], Loss: 0.0033, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [4/20], Loss: 0.0032, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [5/20], Loss: 0.0031, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [6/20], Loss: 0.0031, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [7/20], Loss: 0.0031, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [8/20], Loss: 0.0031, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [9/20], Loss: 0.0031, Accuracy: 0.9997, Sensitivity: 0.9992, Specificity: 0.9998, F1score: 0.9993
Epoch [10/20], Loss: 0.0031, Accuracy: 0.9997, Sensitivity: 0.99

In [9]:
# Evaluation loop (on the test set) for GraphSAGE
graphsage_model.eval()
TP_test = 0
TN_test = 0
FP_test = 0
FN_test = 0

with torch.no_grad():
    for batch in test_loader:
        batch = batch.to(device)
        out = graphsage_model(batch.x.float(), batch.edge_index)
        predicted = (out > -0.1).int()  # Assuming a threshold of 0. Adjust as needed
        element_wise_comparison = (predicted.squeeze() == batch.y)

        # Calculate TP, TN, FP, FN
        TP = torch.sum((predicted.squeeze() == 1) & (batch.y == 1)).item()
        TN = torch.sum((predicted.squeeze() == 0) & (batch.y == 0)).item()
        FP = torch.sum((predicted.squeeze() == 1) & (batch.y == 0)).item()
        FN = torch.sum((predicted.squeeze() == 0) & (batch.y == 1)).item()
        
        TP_test += TP
        TN_test += TN
        FP_test += FP
        FN_test += FN

# Calculate Sensitivity (Recall)
test_sensitivity = TP_test / (TP_test + FN_test)

# Calculate Specificity
test_specificity = TN_test / (TN_test + FP_test)

# Calculate F1-Score for the whole test set
test_precision = TP_test / (TP_test + FP_test)
test_f1_score = 2 * (test_precision * test_sensitivity) / (test_precision + test_sensitivity)

# Calculate Accuracy
test_accuracy = (TP_test + TN_test) / (TP_test + TN_test + FP_test + FN_test)

print(f'GAT: Test Accuracy: {test_accuracy * 100:.2f}%')
print(f"Sensitivity (Recall): {test_sensitivity:.4f}")
print(f"Specificity: {test_specificity:.4f}")
print(f"F1-Score: {test_f1_score:.4f}")

print(test_accuracy, test_sensitivity, test_specificity, test_f1_score)

GAT: Test Accuracy: 99.97%
Sensitivity (Recall): 0.9996
Specificity: 0.9998
F1-Score: 0.9993
0.9997403252611586 0.9996423888425319 0.9997628105129679 0.9993048797441957
