In [None]:

import pandas as pd
sider_df = pd.read_csv('meddra_all_se.tsv', sep='\t', header=None, names=['stitch_id', 'umls_id', 'side_effect'])
print(sider_df.head())

In [72]:
import time
import random
from pubchempy import get_compounds

def fetch_pubchem_data(drug_name, retries=5, delay=5):
    for attempt in range(retries):
        try:
            print(f"🔍 Fetching PubChem data for {drug_name} (Attempt {attempt+1}/{retries})...")
            compounds = get_compounds(drug_name, 'name')
            
            if not compounds:
                print(f"⚠️ No PubChem data found for {drug_name}. Trying lowercase version...")
                compounds = get_compounds(drug_name.lower(), 'name')
            
            if not compounds:
                print(f"❌ Still no data for {drug_name}. Trying synonyms...")
                synonyms = {
                    "gamma-aminobutyric": "GABA",
                    "5-aminolevulinic": "5-ALA",
                    "Oestrogen": "Estrogen"
                }
                if drug_name in synonyms:
                    compounds = get_compounds(synonyms[drug_name], 'name')
                    if compounds:
                        print(f"✅ Found {synonyms[drug_name]} instead of {drug_name}")
                        return compounds[0].isomeric_smiles
                    else:
                        print(f"❌ No data for synonym {synonyms[drug_name]}")

                return None  # No valid PubChem data

            return compounds[0].isomeric_smiles  # Return SMILES if found

        except Exception as e:
            if "PUGREST.ServerBusy" in str(e) or "Remote end closed" in str(e):
                print(f"⚠️ Server busy, retrying {drug_name} in {delay} seconds... (Attempt {attempt+1}/{retries})")
                time.sleep(delay + random.uniform(0, 3))  # Add random delay to avoid API blocks
            else:
                print(f"⚠️ Error fetching {drug_name}: {e}")
                return None  # Other errors won't retry

    print(f"❌ Failed after {retries} attempts: {drug_name}")
    return None  # Failed after all retries

In [None]:
print(fetch_pubchem_data("Aspirin"))

In [None]:
import pandas as pd

# Load drug names data
drug_names = pd.read_csv('drug_names.tsv', sep='\t', header=None, names=['stitch_id', 'drug_name'])

# Verify the data
print(drug_names.head())
merged_df = sider_df.merge(drug_names, on='stitch_id', how='inner')

In [None]:
merged_df = sider_df.merge(drug_names, on='stitch_id', how='inner')
merged_df

In [76]:
from rdkit import Chem
from rdkit.Chem import rdmolops
import torch
from torch_geometric.data import Data

def generate_molecular_graph(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    adj = rdmolops.GetAdjacencyMatrix(mol)
    edge_index = torch.tensor(adj.nonzero(), dtype=torch.long)
    x = torch.tensor([atom.GetAtomicNum() for atom in mol.GetAtoms()], dtype=torch.float).view(-1, 1)
    return Data(x=x, edge_index=edge_index)

In [None]:
print(generate_molecular_graph(fetch_pubchem_data("Aspirin")))

In [78]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class DrugGNN(torch.nn.Module):
    def __init__(self):
        super(DrugGNN, self).__init__()
        self.conv1 = GCNConv(1, 64)
        self.conv2 = GCNConv(64, 32)
        self.fc = torch.nn.Linear(32, 1)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        return torch.sigmoid(self.fc(x))

In [None]:
def preprocess_data(sider_df, drug_names_path):
 
    drug_names = pd.read_csv(drug_names_path, sep='\t', header=None, names=['stitch_id', 'drug_name'])
    data_list = []

    for _, row in drug_names.iterrows():
        try:
            smiles = fetch_pubchem_data(row['drug_name'])
            if smiles is None:
                continue
            side_effects = sider_df[sider_df['stitch_id'] == row['stitch_id']]['side_effect'].values
            graph = generate_molecular_graph(smiles)
            graph.y = torch.tensor([len(side_effects)], dtype=torch.float)
            data_list.append(graph)
        except Exception as e:
            print(f"⚠️ Error processing {row['drug_name']}: {e}")

    print(f"✅ Preprocessing Completed: {len(data_list)} graphs created.")
    return data_list
import pandas as pd
import torch

# Load SIDER Data First
sider_df = pd.read_csv('meddra_all_se.tsv', sep='\t', header=None, names=['stitch_id', 'umls_id', 'side_effect'])

# Now Call preprocess_data with sider_df
data_list = preprocess_data(sider_df, 'drug_names.tsv')

print(f"✅ Final Data List Count: {len(data_list)}")

In [28]:
from pubchempy import get_compounds

def fetch_pubchem_data(drug_name):
    try:
        # Try fetching data normally
        compounds = get_compounds(drug_name, 'name')
        if not compounds:
            print(f"⚠️ No PubChem data found for {drug_name}. Trying lowercase version...")
            
            # Try lowercase version
            compounds = get_compounds(drug_name.lower(), 'name')
            if not compounds:
                print(f"❌ Still no data for {drug_name}. Trying synonyms...")

                # If synonyms exist, try alternative search
                synonyms = {
                    "gamma-aminobutyric": "GABA",
                    "5-aminolevulinic": "5-ALA",
                    "Oestrogen": "Estrogen"
                }
                
                if drug_name in synonyms:
                    compounds = get_compounds(synonyms[drug_name], 'name')
                    if compounds:
                        print(f"✅ Found {synonyms[drug_name]} instead of {drug_name}")
                        return compounds[0].isomeric_smiles
                    else:
                        print(f"❌ No data for synonym {synonyms[drug_name]}")

                return None  # If all attempts fail
        return compounds[0].isomeric_smiles  # Return valid SMILES if found
    except Exception as e:
        print(f"⚠️ Error fetching {drug_name}: {e}")
        return None

In [None]:
missing_drugs = []
for _, row in drug_names.iterrows():
    smiles = fetch_pubchem_data(row['drug_name'])
    if smiles is None:
        missing_drugs.append(row['drug_name'])

print(f"⚠️ Missing PubChem Data for {len(missing_drugs)} drugs: {missing_drugs[:10]}")  # Show first 10

In [None]:
print(sider_df.head())  
print(drug_names.head())

In [None]:
print(f"Number of graphs in data_list: {len(data_list)}")
if len(data_list) == 0:
    print("❌ No data found! Check preprocessing step.")

In [None]:
import pandas as pd

drug_names = pd.read_csv('drug_names.tsv', sep='\\t', header=None, names=['stitch_id', 'drug_name'])
# print(drug_names.head())  # Display the first few rows
drug_names.columns = drug_names.columns.str.strip()

from pubchempy import get_compounds

compound = get_compounds('Aspirin', 'name')[0]
print(compound.to_dict())

In [57]:
missing_drugs = []

def preprocess_data(sider_df, drug_names_path):
    drug_names = pd.read_csv(drug_names_path, sep='\t', header=None, names=['stitch_id', 'drug_name'])
    data_list = []

    for _, row in drug_names.iterrows():
        try:
            smiles = fetch_pubchem_data(row['drug_name'])
            if smiles is None:
                missing_drugs.append(row['drug_name'])
                continue
            side_effects = sider_df[sider_df['stitch_id'] == row['stitch_id']]['side_effect'].values
            graph = generate_molecular_graph(smiles)
            graph.y = torch.tensor([len(side_effects)], dtype=torch.float)
            data_list.append(graph)
        except Exception as e:
            print(f"⚠️ Error processing {row['drug_name']}: {e}")

    print(f"✅ Preprocessing Completed: {len(data_list)} graphs created.")
    print(f"⚠️ {len(missing_drugs)} drugs missing data: {missing_drugs[:10]} (Showing first 10)")
    return data_list

In [None]:
drug_names = pd.read_csv('drug_names.tsv', sep='\t', header=None, names=['stitch_id', 'drug_name'])
print("✅ Drug Names Loaded:")
print(drug_names.head())  # Ensure it has correct data

In [None]:
for _, row in drug_names.iterrows():
    print(f"🔍 Fetching data for: {row['drug_name']}")
    smiles = fetch_pubchem_data(row['drug_name'])

    if smiles is None:
        print(f"❌ No SMILES found for {row['drug_name']}")
        continue
    print(f"✅ SMILES found: {smiles}")

In [None]:
test_smiles = fetch_pubchem_data("Aspirin")
print(f"✅ Aspirin SMILES: {test_smiles}")

In [None]:
# Ensure SIDER data is loaded first
sider_df = pd.read_csv('meddra_all_se.tsv', sep='\t', header=None, names=['stitch_id', 'umls_id', 'side_effect'])

# Process data and create molecular graphs
data_list = preprocess_data(sider_df, 'drug_names.tsv')

# Check if data_list is populated
if 'data_list' in locals():
    print(data_list[:5])
    # print(f"✅ Total Processed Drugs: {len(data_list)}")
    print("🔍 Sample Data:")
    # print(data_list[:5])  # Show first few processed molecular graphs
else:
    print("❌ `data_list` is not defined. Check if `preprocess_data()` is running correctly.")

In [None]:
print(f"✅ Total Processed Drugs: {len(data_list)}")
print("🔍 Sample Data:")
for data in data_list[:5]:  # Show first 5 molecular graphs
    print(f"📌 Graph - x: {data.x.shape}, edge_index: {data.edge_index.shape}, y: {data.y}")

In [None]:
test_smiles = "CC(C(=O)O)N"  # Example: Alanine
test_graph = generate_molecular_graph(test_smiles)

print(f"✅ Test Graph - x: {test_graph.x.shape}, edge_index: {test_graph.edge_index.shape}")

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import DataLoader

# Define Graph Neural Network Model
class DrugGNN(torch.nn.Module):
    def __init__(self):
        super(DrugGNN, self).__init__()
        self.conv1 = GCNConv(1, 64)  # Input: Atomic number
        self.conv2 = GCNConv(64, 32)
        self.fc = torch.nn.Linear(32, 1)  # Predict side effect severity

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        return torch.sigmoid(self.fc(x))  # Binary output (Side effect likelihood)

# ✅ Move `train_model()` Outside the Class
def train_model(data_list, epochs=20, batch_size=32, learning_rate=0.001):
    print("🚀 Starting Training...")

    # Initialize model, optimizer, and loss function
    model = DrugGNN()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loader = DataLoader(data_list, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        total_loss = 0
        for batch in loader:
            optimizer.zero_grad()
            out = model(batch.x, batch.edge_index)

            # Fix shape mismatch
            out = out.mean(dim=0, keepdim=True)
            y = batch.y.view(-1, 1).mean(dim=0, keepdim=True)

            loss = F.mse_loss(out, y)  # Mean Squared Error Loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"✅ Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(loader)}")

    print("🎉 Training Complete!")

# Run Training
train_model(data_list)

In [None]:
def evaluate_model(model, test_data):
    model.eval()  # Set model to evaluation mode
    total_loss = 0
    with torch.no_grad():  # No gradients needed during evaluation
        for data in test_data:
            out = model(data.x, data.edge_index)
            out = out.mean(dim=0, keepdim=True)  # Ensure output matches y
            y = data.y.view(-1, 1).mean(dim=0, keepdim=True)
            loss = F.mse_loss(out, y)
            total_loss += loss.item()

    avg_loss = total_loss / len(test_data)
    print(f"🔍 Model Evaluation Loss: {avg_loss}")

# Prepare test dataset (use a subset of data_list as a quick check)
test_data = data_list[:10]  # Use 10 samples for testing
evaluate_model(DrugGNN(), test_data)

In [None]:
def predict_side_effects(drug_name, model):
    smiles = fetch_pubchem_data(drug_name)
    if smiles is None:
        print(f"❌ No PubChem data found for {drug_name}")
        return None

    graph = generate_molecular_graph(smiles)
    model.eval()
    with torch.no_grad():
        prediction = model(graph.x, graph.edge_index).mean().item()
    
    print(f"🔬 Predicted Side Effect Severity for {drug_name}: {prediction:.4f}")
    return prediction

# Try predicting for Aspirin
trained_model = DrugGNN()
predict_side_effects("Aspirin", trained_model)

In [None]:
test_drugs = ["Ibuprofen", "Paracetamol", "Hydrocortisone", "Morphine"]
for drug in test_drugs:
    predict_side_effects(drug, trained_model)

In [None]:
test_data = data_list[:20]  # Test on 20 samples

evaluate_model(trained_model, test_data)

In [83]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

class DrugInteractionGNN(torch.nn.Module):
    def __init__(self):
        super(DrugInteractionGNN, self).__init__()
        self.conv1 = GCNConv(1, 128)
        self.conv2 = GCNConv(128, 64)
        self.fc = torch.nn.Linear(64 * 2, 3)  # 3 classes: Major, Moderate, Minor

    def forward(self, x1, edge_index1, x2, edge_index2):
        x1 = F.relu(self.conv1(x1, edge_index1))
        x1 = F.relu(self.conv2(x1, edge_index1))
        x2 = F.relu(self.conv1(x2, edge_index2))
        x2 = F.relu(self.conv2(x2, edge_index2))
        
        combined = torch.cat([x1.mean(dim=0), x2.mean(dim=0)], dim=0)
        return F.log_softmax(self.fc(combined), dim=-1)  # Classification output

In [89]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import rdmolops
import torch
import numpy as np

# Load drug interaction dataset
df_ddi = pd.read_csv('drug_interactions.csv')

# Function to convert a drug into a molecular graph
def generate_molecular_graph(drug_name):
    smiles = fetch_pubchem_data(drug_name)
    if smiles is None:
        return None

    mol = Chem.MolFromSmiles(smiles)
    adj = rdmolops.GetAdjacencyMatrix(mol)
    edge_index = torch.from_numpy(np.array(adj.nonzero(), dtype=np.int64))
    x = torch.tensor([atom.GetAtomicNum() for atom in mol.GetAtoms()], dtype=torch.float).view(-1, 1)
    
    return Data(x=x, edge_index=edge_index)

# Convert all drug pairs into graph data
ddi_graphs = []
for _, row in df_ddi.iterrows():
    graph_A = generate_molecular_graph(row['Drug_A'])
    graph_B = generate_molecular_graph(row['Drug_B'])
    
    if graph_A and graph_B:
        label = {'Major': 0, 'Moderate': 1, 'Minor': 2}[row['Level']]
        ddi_graphs.append((graph_A, graph_B, label))

print(f"✅ Processed {len(ddi_graphs)} drug interactions.")

🔍 Fetching PubChem data for Naltrexone (Attempt 1/5)...
🔍 Fetching PubChem data for Abacavir (Attempt 1/5)...
🔍 Fetching PubChem data for Abacavir (Attempt 1/5)...
🔍 Fetching PubChem data for Orlistat (Attempt 1/5)...
🔍 Fetching PubChem data for Aluminum hydroxide (Attempt 1/5)...
🔍 Fetching PubChem data for Dolutegravir (Attempt 1/5)...
🔍 Fetching PubChem data for Aprepitant (Attempt 1/5)...
🔍 Fetching PubChem data for Dolutegravir (Attempt 1/5)...
🔍 Fetching PubChem data for Attapulgite (Attempt 1/5)...
⚠️ No PubChem data found for Attapulgite. Trying lowercase version...
❌ Still no data for Attapulgite. Trying synonyms...
🔍 Fetching PubChem data for Dolutegravir (Attempt 1/5)...
🔍 Fetching PubChem data for Calcium acetate (Attempt 1/5)...
🔍 Fetching PubChem data for Dolutegravir (Attempt 1/5)...
🔍 Fetching PubChem data for Calcium carbonate (Attempt 1/5)...
🔍 Fetching PubChem data for Dolutegravir (Attempt 1/5)...
⚠️ Server busy, retrying Dolutegravir in 5 seconds... (Attempt 1/5)
🔍

KeyboardInterrupt: 