<a href="https://colab.research.google.com/github/mojtabaSefidi/Machine-Learning-with-Graphs/blob/main/MLG_Ex3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install Essential Packages

In [None]:
!pip install -q torch_geometric
!pip install -q torch-sparse==0.6.13
!pip install -q torch_scatter

[K     |████████████████████████████████| 564 kB 26.4 MB/s 
[K     |████████████████████████████████| 280 kB 73.3 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 48 kB 5.7 MB/s 
[?25h  Building wheel for torch-sparse (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 106 kB 32.9 MB/s 


## Install Essential Libraries

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import torch
import networkx as nx
import torch_geometric
from torch_geometric.utils import to_networkx
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.loader import DataLoader
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GraphConv, SAGEConv

from torch_geometric.nn import global_mean_pool, global_max_pool, global_add_pool

In [None]:
def plot_history(list_train_loss, list_train_acc, list_val_loss, list_val_acc, n_epochs, title):
    
    plt.figure(figsize=(18,8),linewidth = 7, edgecolor="whitesmoke")    
    n = n_epochs
    
    plt.plot(list(range(1, n_epochs+1)), list_train_acc, color='orange',marker=".")
    plt.plot(list(range(1, n_epochs+1)), list_train_loss,'b',marker=".")
    
    plt.plot(list(range(1, n_epochs+1)), list_val_acc,'r')  
    plt.plot(list(range(1, n_epochs+1)), list_val_loss,'g')
    
    plt.legend(['Train Accuracy','Train Loss','Test Accuracy','Test Loss'])
    plt.grid(True)
    
    # plt.gca().set_ylim(0,1)

    plt.xlabel("Number of Epochs")
    plt.ylabel("Value")
    plt.suptitle(title, size=16, y=0.927)
    plt.show()

## Read the Dataset

In [None]:
dataset = TUDataset(root='/tmp/MUTAG', name='MUTAG', transform=NormalizeFeatures())

## Split the Dataset

In [None]:
def split_data(dataset, train_split_percentage):
  cut_index = int(len(dataset) * train_split_percentage)
  dataset = dataset.shuffle()
  return dataset[:cut_index], dataset[cut_index:]

In [None]:
train_dataset, test_dataset = split_data(dataset, train_split_percentage=0.8)
print(f'Number of graphs in the Train Dataset: {len(train_dataset)}')
print(f'Number of graphs in the Test Dataset: {len(test_dataset)}')

## Batch Generator

In [None]:
def batch_generator(train_dataset, test_dataset, batch_size, shuffle=True):
  train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
  test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)
  return train_data_loader, test_data_loader

In [None]:
train_data_loader, test_data_loader =  batch_generator(train_dataset, test_dataset, batch_size=64)

for batch, data in enumerate(train_data_loader):
    print(f'Batch {batch + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()


In [None]:
class GCN_Add_Pooling(torch.nn.Module):
    
    def __init__(self, hidden_channels, aggregation):
        super(GCN_Add_Pooling, self).__init__()
        torch.manual_seed(12345)
        if type(aggregation) != list:
          self.conv1 = GCNConv(dataset.num_node_features, hidden_channels, aggr=aggregation)
          self.conv2 = GCNConv(hidden_channels, hidden_channels, aggr=aggregation)
          self.conv3 = GCNConv(hidden_channels, hidden_channels, aggr=aggregation)
          self.conv4 = GCNConv(hidden_channels, hidden_channels, aggr=aggregation)
          self.lin = Linear(hidden_channels, dataset.num_classes)
        else:
          i=1
          self.conv1 = GCNConv(dataset.num_node_features, hidden_channels*(len(aggregation)**(i)), aggr=aggregation, bias=False)
          i+=1
          self.conv2 = GCNConv(hidden_channels*(len(aggregation)**(i)), hidden_channels*(len(aggregation)**(i)), aggr=aggregation, bias=False)
          i+=1
          self.conv3 = GCNConv(hidden_channels*(len(aggregation)**(i)), hidden_channels*(len(aggregation)**(i)), aggr=aggregation, bias=False)
          i+=1
          self.conv4 = GCNConv(hidden_channels*(len(aggregation)**(i)), hidden_channels*(len(aggregation)**(i)), aggr=aggregation, bias=False)
          self.lin = Linear(hidden_channels*(len(aggregation)**(i+1)), dataset.num_classes)
        
    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = x.relu()
        x = self.conv4(x, edge_index)

        x = global_add_pool(x, batch)

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x


In [None]:
class Graph_Sage(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(Graph_Sage, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = SAGEConv(dataset.num_node_features, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        x = global_add_pool(x, batch)

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

In [None]:
class Graph_Conv(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(Graph_Conv, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GraphConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GraphConv(hidden_channels, hidden_channels)
        self.conv3 = GraphConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = x.relu()

        x = global_add_pool(x, batch)

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

In [None]:
class Leaning_Evaluation(torch.nn.Module):
    def __init__(
        self,
        model,
        learning_rate=0.006,
        best_results=[0, 0, 0],
        ):
      
      super().__init__()
      self.model = model
      self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
      self.criterion = torch.nn.CrossEntropyLoss()
      self.best_results = best_results
    
    def train(self, data_loader):
        self.model.train()

        for data in data_loader:
            out = self.model(data.x, data.edge_index, data.batch)
            loss = self.criterion(out, data.y)
            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()

    def evaluate(self, data_loader):
        self.model.eval()

        correct, loss = 0, 0
        for data in data_loader:
            out = self.model(data.x, data.edge_index, data.batch)
            loss += self.criterion(out, data.y)  
            pred = out.argmax(dim=1)
            correct += int((pred == data.y).sum())
        return correct / len(data_loader.dataset), loss/ len(data_loader.dataset)

    def train_and_evaluate(self, train_data_loader, test_data_loader, n_epochs=200):
      
      list_train_acc, list_train_loss, list_test_acc, list_test_loss = [], [], [], []
      print('Train and Evaluation started...')
      for epoch in range(1, n_epochs+1):
          self.train(train_data_loader)
          
          train_accuracy, train_loss = self.evaluate(train_data_loader)
          list_train_acc.append(train_accuracy)
          list_train_loss.append(float(train_loss.detach()))
          
          test_accuracy, test_loss = self.evaluate(test_data_loader)
          if self.best_results[-1] + self.best_results[1] < test_accuracy + train_accuracy :
            self.best_results[0], self.best_results[1], self.best_results[-1] = epoch, train_accuracy ,test_accuracy

          list_test_acc.append(test_accuracy)
          list_test_loss.append(float(test_loss.detach()))

          print(f'Epoch: {epoch:03d}, Train Accuracy: {train_accuracy:.4f}, Train Loss: {train_loss:.4f}, Test Accuracy: {test_accuracy:.4f}, Test Loss: {test_loss:.4f}')
      
      print('---------------------------------------------------')
      print('Train and Evaluation finished...')
      print(f'Best Results of the model : Epoch: {self.best_results[0]:03d}, Train Accuracy: {self.best_results[1]:.4f}, Test Accuracy: {self.best_results[-1]:.4f}')
      return list_train_acc, list_train_loss, list_test_acc, list_test_loss
      

### Aggregation Function = add

In [None]:
model = GCN_Add_Pooling(hidden_channels=64, aggregation='add')
print(model)

In [None]:
evaluate_GCN_Mean = Leaning_Evaluation(
                                   model = model,
                                   best_results=[0, 0, 0]
                                   )
list_train_acc, list_train_loss, list_test_acc, list_test_loss = evaluate_GCN_Mean.train_and_evaluate(train_data_loader, test_data_loader, n_epochs=100)

In [None]:
plot_history(list_train_loss, list_train_acc, list_test_loss, list_test_acc, n_epochs=100, title='GCN (Aggregation=add)')

### Aggregation Function = Max

In [None]:
model = GCN_Add_Pooling(hidden_channels=64, aggregation='max')
print(model)

In [None]:
evaluate_GCN_max = Leaning_Evaluation(
                                   model = model,
                                   best_results=[0, 0, 0]
                                   )
list_train_acc, list_train_loss, list_test_acc, list_test_loss = evaluate_GCN_max.train_and_evaluate(train_data_loader, test_data_loader, n_epochs=100)

In [None]:
plot_history(list_train_loss, list_train_acc, list_test_loss, list_test_acc, n_epochs=100, title='GCN (Aggregation=Max)')

### Aggregation Function = Min

In [None]:
model = GCN_Add_Pooling(hidden_channels=64, aggregation='mean')
print(model)

In [None]:
evaluate_GCN_min = Leaning_Evaluation(
                                   model = model,
                                   best_results=[0, 0, 0]
                                   )
list_train_acc, list_train_loss, list_test_acc, list_test_loss = evaluate_GCN_min.train_and_evaluate(train_data_loader, test_data_loader, n_epochs=100)

In [None]:
plot_history(list_train_loss, list_train_acc, list_test_loss, list_test_acc, n_epochs=100, title='GCN (Aggregation=mean)')

### Aggregation Function = [Add, Max, Min]

In [None]:
model = GCN_Add_Pooling(hidden_channels=8, aggregation=['add','mean','max'])
print(model)

In [None]:
evaluate_GCN_min = Leaning_Evaluation(
                                   model = model,
                                   )
list_train_acc, list_train_loss, list_test_acc, list_test_loss = evaluate_GCN_min.train_and_evaluate(train_data_loader, test_data_loader, n_epochs=100)

In [None]:
plot_history(list_train_loss, list_train_acc, list_test_loss, list_test_acc, n_epochs=100, title='GCN (Aggregation=[add, mean, max])')

### GraphSage

In [None]:
model = Graph_Sage(hidden_channels=64)
print(model)

In [None]:
evaluate_GraphSage = Leaning_Evaluation(
                                   model = model,
                                   learning_rate=0.006,
                                   best_results=[0, 0, 0]
                                   )
list_train_acc, list_train_loss, list_test_acc, list_test_loss = evaluate_GraphSage.train_and_evaluate(train_data_loader, test_data_loader, n_epochs=150)

In [None]:
plot_history(list_train_loss, list_train_acc, list_test_loss, list_test_acc, n_epochs=150, title='GraphSage')

### GraphConv

In [None]:
model = Graph_Conv(hidden_channels=64)
print(model)

In [None]:
evaluate_GraphConv = Leaning_Evaluation(
                                   model = model,
                                   learning_rate=0.001,
                                   best_results=[0, 0, 0]
                                   )
list_train_acc, list_train_loss, list_test_acc, list_test_loss = evaluate_GraphConv.train_and_evaluate(train_data_loader, test_data_loader,  n_epochs=150)

In [None]:
plot_history(list_train_loss, list_train_acc, list_test_loss, list_test_acc, n_epochs=150, title='GraphConv')

In [None]:
import pandas as pd

conclusion = pd.DataFrame([['GCN (Mean polling)' ,0.8933, 0.7368, 165],
              ['GCN (Max polling)',0.8933, 0.7368, 165],
              ['GCN (Add polling)',0.8867, 0.7895, 165],
              ['GraphSage (Add polling)',0.8667, 0.8421, 182],
              ['GraphConv (Add polling)',0.9533, 0.8947, 192]],
              columns=["Model Details","Train Accuracy","Test Accuracy",'Epoch with best result'])
conclusion = conclusion.set_index('Model Details')
conclusion.style.background_gradient(cmap="YlOrRd")

