In [1]:
import sys
print(sys.executable)  # Should show path to your conda env

c:\Users\rashm\AppData\Local\Programs\Python\Python313\python.exe


In [2]:
# Import libraries
import torch
import torch_geometric
from rdkit import Chem
from rdkit.Chem import Draw
from torch_geometric.datasets import MoleculeNet
from torch_geometric.loader import DataLoader

# Load dataset
dataset = MoleculeNet(root='.', name='Tox21')

# Get the first graph to inspect dimensions
sample_data = dataset[0]
print(f"Number of node features: {sample_data.num_features}")
print(f"Number of tasks: {sample_data.y.shape[1]}")  # Tox21 has 12 tasks

# Define GNN model
class GNN(torch.nn.Module):
    def __init__(self, num_features, num_tasks):
        super().__init__()
        self.conv1 = torch_geometric.nn.GCNConv(num_features, 32)
        self.conv2 = torch_geometric.nn.GCNConv(32, 16)
        self.lin = torch.nn.Linear(16, num_tasks)
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return self.lin(x)

# Initialize model with correct dimensions
model = GNN(
    num_features=sample_data.num_features,
    num_tasks=sample_data.y.shape[1]  # 12 for Tox21
)

# Training setup
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()

# Create data loader
loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Training loop
for epoch in range(100):
    total_loss = 0
    for batch in loader:
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch}, Loss: {total_loss/len(loader):.4f}')

# Save model
torch.save(model.state_dict(), 'gnn_tox21.pth')

Number of node features: 9
Number of tasks: 12


RuntimeError: result type Float can't be cast to the desired output type Long

In [1]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from rdkit import Chem
from rdkit.Chem import Draw
import base64
import io

# Initialize the app
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Molecular Property Predictor", style={'textAlign': 'center'}),
    dcc.Upload(
        id='upload-data',
        children=html.Button('Upload SDF/MOL File'),
        style={'margin': '20px'}
    ),
    html.Div(id='molecule-image'),
    html.Div(id='prediction-output', style={'margin': '20px', 'fontSize': 20}),
    dcc.Graph(id='feature-importance')
])

@app.callback(
    [Output('molecule-image', 'children'),
     Output('prediction-output', 'children')],
    Input('upload-data', 'contents')
)
def update_output(contents):
    if contents is None:
        return [html.Div("No file uploaded"), "Upload a file to see predictions"]
    
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    
    try:
        # For SDF files
        suppl = Chem.SDMolSupplier(io.BytesIO(decoded))
        mol = next(suppl)
    except:
        # For MOL files
        mol = Chem.MolFromMolBlock(decoded)
    
    if mol is None:
        return [html.Div("Invalid file format"), "Error: Could not read molecule"]
    
    img = Draw.MolToImage(mol)
    return [
        html.Img(src=img, style={'height': '300px'}),
        f"Prediction: Active (Confidence: 0.87)"
    ]

if __name__ == '__main__':
    app.run(debug=True)  # Changed from app.run_server()
