In [None]:
import torch
import torch_geometric
import gemmi
import Bio
print("PyTorch:", torch.__version__)
print("PyTorch Geometric:", torch_geometric.__version__)
print("GEMMI:", gemmi.__version__)
print("Biopython:", Bio.__version__)


: 

In [3]:
# %pip install torch_geometric torch_cluster

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class ProteinDNA_GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_dim, output_dim=1):
        super(ProteinDNA_GNN, self).__init__()
        
        # Graph Convolutional Layers
        self.conv1 = GCNConv(in_channels, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.conv3 = GCNConv(hidden_dim, hidden_dim)
        
        # Fully Connected (MLP) for regression
        self.fc = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = F.relu(self.conv3(x, edge_index))
        
        # Aggregate node embeddings into a graph-level embedding
        x = global_mean_pool(x, batch)
        
        # Final regression output (binding affinity)
        x = self.fc(x)
        return x




OSError: libc10_cuda.so: cannot open shared object file: No such file or directory

In [4]:
import gemmi
import numpy as np
from torch_geometric.data import Data

def parse_cif(file_path):
    """Parses a .cif file and extracts atomic-level graph representation."""
    structure = gemmi.read_structure(file_path)
    atoms, pos, node_features = [], [], []
    
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    # Extract atomic features
                    atoms.append(atom)
                    pos.append(atom.pos.tolist())  # 3D coordinates
                    node_features.append(atom.element.atomic_number)  # Atomic number as a feature

    # Convert lists to tensors
    x = torch.tensor(node_features, dtype=torch.float).view(-1, 1)  # Node features
    pos = torch.tensor(pos, dtype=torch.float)  # 3D positions

    # Construct edges based on proximity (radius graph)
    from torch_cluster import radius_graph
    edge_index = radius_graph(pos, r=5.0)  # Connect atoms within 5Å radius

    return Data(x=x, edge_index=edge_index, pos=pos)


In [None]:
from torch_geometric.loader import DataLoader

# Load dataset
cif_files = ["data_complex.cif"]
dataset = [parse_cif(f) for f in cif_files]

# Assign binding affinities as labels (example values)
binding_affinities = [7.2]  # Experimental binding affinity values (pKd, pIC50, etc.)

# Add labels to dataset
for i, data in enumerate(dataset):
    data.y = torch.tensor([binding_affinities[i]], dtype=torch.float)

# Create DataLoader
train_loader = DataLoader(dataset, batch_size=2, shuffle=True)


In [None]:
model = ProteinDNA_GNN(in_channels=1, hidden_dim=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()

# Training loop
for epoch in range(100):
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
