<a href="https://colab.research.google.com/github/sherax139/DNA-To-Protein-Python-code/blob/main/Graph%20Neural%20Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import networkx as nx
import numpy as np
import torch 
from sklearn.preprocessing import StandardScaler

In [None]:
graph = nx.karate_club_graph()

In [None]:
graph.edges

In [None]:
labels = np.asarray([graph.nodes[i]['club'] != 'Mr. Hi' for i in graph.nodes]).astype(np.int64)

In [None]:
labels.dtype

In [None]:
adj = nx.to_scipy_sparse_matrix(graph).tocoo()

In [None]:
adj.shape

In [None]:
row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long)

In [None]:
row

In [None]:
labels.shape

In [None]:
col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long)

In [None]:
col

In [None]:
edge_index = torch.stack([row, col], dim= 0)

In [None]:
edge_index


In [None]:
edge_index.shape

In [None]:
dict(graph.degree())

In [None]:
embending = np.array(list(dict(graph.degree()).values()))

In [None]:
embending

In [None]:
list(graph.degree([0, 1, 2]))

In [None]:
embending

In [None]:
scale = StandardScaler()

In [None]:
embending = scale.fit_transform(embending.reshape(-1, 1))

In [None]:
embending

In [None]:
pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cpu.html

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

In [None]:
import pandas as pd
from torch_geometric.data import InMemoryDataset, Data

In [None]:
from sklearn.model_selection import train_test_split
import torch_geometric.transforms as T

# custom dataset
class KarateDataset(InMemoryDataset):
    def __init__(self, transform=None):
        super(KarateDataset, self).__init__('.', transform, None, None)

        data = Data(edge_index=edge_index)
        
        data.num_nodes = graph.number_of_nodes()
        
        # embedding 
        data.x = torch.from_numpy(embending).type(torch.float32)
        
        # labels
        y = torch.from_numpy(labels).type(torch.long)
        data.y = y.clone().detach()
        
        data.num_classes = 2

        # splitting the data into train, validation and test
        X_train, X_test, y_train, y_test = train_test_split(pd.Series(list(graph.nodes())), 
                                                            pd.Series(labels),
                                                            test_size=0.30, 
                                                            random_state=42)
        
        n_nodes = graph.number_of_nodes()
        
        # create train and test masks for data
        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
        train_mask[X_train.index] = True
        test_mask[X_test.index] = True
        data['train_mask'] = train_mask
        data['test_mask'] = test_mask

        self.data, self.slices = self.collate([data])

    def _download(self):
        return

    def _process(self):
        return

    def __repr__(self):
        return '{}()'.format(self.__class__.__name__)
    
dataset = KarateDataset()
data = dataset[0]

In [None]:
#GCN model with 2 layers 
class Network(torch.nn.Module):
  def __init__(self):
    super(Network, self).__init__()
    self.conv1 = GCNConv(data.num_features, 16)
    self.conv2 = GCNConv(16, int(data.num_classes))

  def forward(self):
    x, edge_index = data.x, data.edge_index
    x = F.relu(self, conv1(x, edge_index))
    x = F.dropout(x, training=self.training)
    x = self.conv2(x, edge_index)
    return F.log_softmax(x, dim = 1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

data = data.to(device)

model = Network().to(device)

In [None]:
torch.manual_seed(42)

optimizer_name = "Adam"
lr = 1e-1
optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)
epochs = 200

def train():
  model.train()
  optimizer.zero_grad()
  F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
  optimizer.step()

@torch.no_grad()
def test():
  model.eval()
  logits = model()
  mask1 = data['train_mask']
  pred1 = logits[mask1].max(1)[1]
  acc1 = pred1.eq(data.y[mask1]).sum().item() / mask1.sum().item()
  mask = data['test_mask']
  pred = logits[mask].max(1)[1]
  acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
  return acc1,acc

for epoch in range(1, epochs):
  train()

train_acc,test_acc = test()

print('#' * 70)
print('Train Accuracy: %s' %train_acc )
print('Test Accuracy: %s' % test_acc)
print('#' * 70)