In [1]:
import torch
import torch_geometric
from torch_geometric.data import Data
import networkx as nx
import numpy as np

# Define the edge index (PyTorch Geometric format)
edge_index = torch.tensor([[0, 1], [1, 2]], dtype=torch.long)  # (From, To)

# Define node features (each node has 2 features)
x = torch.tensor([[0.5, 1.2], [0.3, 0.8], [0.9, 1.5]], dtype=torch.float)

# Create the PyTorch Geometric data object
graph_data = Data(x=x, edge_index=edge_index.t())

print("Graph Data Object:")
print(graph_data)

Graph Data Object:
Data(x=[3, 2], edge_index=[2, 2])


In [2]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

# Define a simple GNN
class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)  # Activation function
        x = self.conv2(x, edge_index)
        return x

# Create dummy graph data (from previous step)
edge_index = torch.tensor([[0, 1], [1, 2]], dtype=torch.long).t()
x = torch.tensor([[0.5, 1.2], [0.3, 0.8], [0.9, 1.5]], dtype=torch.float)
graph_data = Data(x=x, edge_index=edge_index)

# Initialize GNN
model = GNN(in_channels=2, hidden_channels=4, out_channels=2)

# Forward pass
output = model(graph_data)
print("GNN Output:\n", output)

GNN Output:
 tensor([[ 0.8543, -0.1073],
        [ 1.0399, -0.1280],
        [ 0.8982, -0.1204]], grad_fn=<AddBackward0>)


In [3]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

# Define the GNN model
class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)  # Activation function
        x = self.conv2(x, edge_index)
        return x

# Create dummy graph data (same as before)
edge_index = torch.tensor([[0, 1], [1, 2]], dtype=torch.long).t()
x = torch.tensor([[0.5, 1.2], [0.3, 0.8], [0.9, 1.5]], dtype=torch.float)
y = torch.tensor([0, 1, 0], dtype=torch.long)  # Labels for each node

graph_data = Data(x=x, edge_index=edge_index, y=y)

# Initialize model, loss function, and optimizer
model = GNN(in_channels=2, hidden_channels=4, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    optimizer.zero_grad()  # Reset gradients
    out = model(graph_data)  # Forward pass
    loss = criterion(out, graph_data.y)  # Compute loss
    loss.backward()  # Backpropagation
    optimizer.step()  # Update weights

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# Final output
print("Final GNN Predictions:\n", out)


Epoch 0, Loss: 0.6682
Epoch 10, Loss: 0.6558
Epoch 20, Loss: 0.6459
Epoch 30, Loss: 0.6355
Epoch 40, Loss: 0.6249
Epoch 50, Loss: 0.6137
Epoch 60, Loss: 0.6018
Epoch 70, Loss: 0.5891
Epoch 80, Loss: 0.5752
Epoch 90, Loss: 0.5601
Final GNN Predictions:
 tensor([[-0.0942, -0.6532],
        [-0.3520, -0.5780],
        [ 0.0641, -0.7356]], grad_fn=<AddBackward0>)


In [7]:
import pandas as pd

df = pd.read_csv("metadata.csv")
print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1380190 entries, 0 to 1380189
Data columns (total 7 columns):
 #   Column            Non-Null Count    Dtype 
---  ------            --------------    ----- 
 0   Accession         1380190 non-null  object
 1   Release_Date      1380190 non-null  object
 2   Nuc_Completeness  1380190 non-null  object
 3   Segment           1319653 non-null  object
 4   Country           1320180 non-null  object
 5   Host              1291358 non-null  object
 6   Collection_Date   1294213 non-null  object
dtypes: object(7)
memory usage: 73.7+ MB
None
  Accession Release_Date Nuc_Completeness Segment Country                Host  \
0  CY231567   2017-05-15          partial       3     USA        Homo sapiens   
1  CY077076   2011-11-08          partial       4   China  Anas platyrhynchos   
2  OP476923   2023-01-17          partial       6   India                 NaN   
3  OP213790   2022-08-17          partial       8     USA        Homo sapiens   
4  MT