In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.nn import GCNConv, global_mean_pool
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import streamlit as st
from rdkit import Chem
from rdkit.Chem import Draw


In [3]:
class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.lin = nn.Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, batch)
        return self.lin(x)


In [5]:
def train(model, loader, optimizer, criterion):
    model.train()
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()

def test(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim=1)
        correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)

from torch_geometric.datasets import TUDataset

# Load graph-based molecular dataset
dataset = TUDataset(root='data/MUTAG', name='MUTAG')

# Proceed with KFold splitting
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []

for fold, (train_idx, test_idx) in enumerate(kf.split(dataset)):
    train_dataset = dataset[train_idx.tolist()]
    test_dataset = dataset[test_idx.tolist()]
    # Continue training...

    train_loader = DataLoader(train_dataset, batch_size=32)
    test_loader = DataLoader(test_dataset, batch_size=32)

    model = GCN(in_channels=dataset.num_features, hidden_channels=64, out_channels=2)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(1, 51):
        train(model, train_loader, optimizer, criterion)

    acc = test(model, test_loader)
    accuracies.append(acc)
    print(f'Fold {fold+1}, Accuracy: {acc:.4f}')


Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Processing...
Done!


Fold 1, Accuracy: 0.7895
Fold 2, Accuracy: 0.7368
Fold 3, Accuracy: 0.5789
Fold 4, Accuracy: 0.7568
Fold 5, Accuracy: 0.7027


In [6]:
st.title("AI-Powered Drug Discovery Dashboard")

st.markdown("### Cross-Validation Accuracies")
st.line_chart(accuracies)

st.markdown("### Sample Molecule Visualization")
smiles = st.text_input("Enter SMILES:", "CCO")
mol = Chem.MolFromSmiles(smiles)
st.image(Draw.MolToImage(mol), caption="Molecule Structure")


2025-07-27 12:20:14.963 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-07-27 12:20:15.603 Session state does not function when running a script without `streamlit run`


DeltaGenerator()