In [None]:
!pip install torch_geometric torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html


Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html
Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_scatter-2.1.2%2Bpt20cpu-cp311-cp311-linux_x86_64.whl (494 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.0/494.0 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_sparse-0.6.18%2Bpt20cpu-cp311-cp311-linux_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_cluster
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_cluster-1.6.3%2Bpt20cpu-cp311-cp311-linux_x86_64.whl (750 kB)
[2K     [9

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder



In [None]:
# Load dataset
file_path = "/content/cnn_features (1).csv"  # Update for Google Colab

df = pd.read_csv(file_path)


In [None]:
# Split features and labels
X = df.iloc[:, :-1].values#holds all the columns except the last one (features).
y = df['label'].values#holds the last column (labels).

In [None]:
# Encode labels
le = LabelEncoder()#We use LabelEncoder from sklearn to convert the labels into numerical format (essential for model training in most cases).
y = le.fit_transform(y)

In [None]:
# Standardize features
scaler = StandardScaler()#StandardScaler scales the features to have zero mean and unit variance. This helps the neural network train faster and better
X = scaler.fit_transform(X)

In [None]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Keras GNN Model (Enhanced)
input_layer = Input(shape=(X_train.shape[1],))#Input layer based on feature size
x = Dense(256, activation='relu')(input_layer)
x = BatchNormalization()(x)#Batch Normalization is a technique to normalize the inputs of each layer in a network during training.
x = Dropout(0.3)(x)#Dropout (prevents overfitting)
x = Dense(128, activation='relu')(x)#softmax activation for multi-class classification
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
out_layer = Dense(len(set(y)), activation='softmax')(x)

model = Model(inputs=input_layer, outputs=out_layer)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])#sparse_categorical_crossentropy is correct if your labels (y) are integers (like [0, 1, 2]) — which they are after LabelEncoder.

In [None]:
# Train Keras Model
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))


Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.1795 - loss: 2.4893 - val_accuracy: 0.4940 - val_loss: nan
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2636 - loss: 2.0398 - val_accuracy: 0.5498 - val_loss: nan
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.2996 - loss: 1.8010 - val_accuracy: 0.6773 - val_loss: nan
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.3550 - loss: 1.6330 - val_accuracy: 0.7610 - val_loss: nan
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4464 - loss: 1.4564 - val_accuracy: 0.8088 - val_loss: nan
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.5511 - loss: 1.2130 - val_accuracy: 0.8367 - val_loss: nan
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [None]:
# PyTorch GNN Model (Enhanced)
class GCN(nn.Module):
    def __init__(self, num_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, 256)#3 GCNConv layers, followed by:ReLU activation.A final Linear (fully connected) layer to map to num_classes
        self.conv2 = GCNConv(256, 128)
        self.conv3 = GCNConv(128, 64)
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()#Passes input x and graph structure edge_index through the graph convolution layers.
        x = self.conv2(x, edge_index).relu()#Each layer learns to aggregate neighbor information at increasing depth.
        x = self.conv3(x, edge_index).relu()#Final dense layer produces logits for classification.
        x = self.fc(x)
        return x

In [None]:
# Convert to PyTorch tensors
X_tensor = torch.tensor(X_train, dtype=torch.float)
y_tensor = torch.tensor(y_train, dtype=torch.long)
edge_index = torch.randint(0, X_tensor.shape[0], (2, X_tensor.shape[0]))  # Random graph This makes a random graph where nodes are connected arbitrarily.

data = Data(x=X_tensor, edge_index=edge_index, y=y_tensor)#Create Data object and move to device:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(num_features=X_train.shape[1], num_classes=len(set(y))).to(device)#Define model, loss, optimizer:
data = data.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()


In [None]:
# Train PyTorch Model
model.train()
for epoch in range(50):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 5 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 5, Loss: 0.6199939250946045
Epoch 10, Loss: 0.430783748626709
Epoch 15, Loss: 0.3638676404953003
Epoch 20, Loss: 0.3070162236690521
Epoch 25, Loss: 0.26104387640953064
Epoch 30, Loss: 0.2136014699935913
Epoch 35, Loss: 0.17219750583171844
Epoch 40, Loss: 0.1357276737689972
Epoch 45, Loss: 0.1016382947564125
Epoch 50, Loss: 0.07265602052211761


In [None]:
# Evaluate PyTorch Model
model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)
    preds = out.argmax(dim=1)
    accuracy = (preds == data.y).float().mean().item()
    print(f'PyTorch GNN Model Accuracy: {accuracy * 100:.2f}%')

PyTorch GNN Model Accuracy: 98.01%
