In [5]:
import numpy as np
import pandas as pd
# !pip install rdkit

import networkx as nx
from rdkit import Chem
from rdkit.Chem import Draw
import torch
!pip install torch-geometric

from torch_geometric.data import Data

# Function to convert SMILES to a NetworkX graph
def smiles_to_graph(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    graph = nx.Graph()
    for atom in mol.GetAtoms():
        graph.add_node(atom.GetIdx(), symbol=atom.GetSymbol(), atomic_num=atom.GetAtomicNum())
    for bond in mol.GetBonds():
        graph.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond.GetBondType())
    return graph

# Function to create an adjacency matrix from a NetworkX graph
def create_adjacency_matrix(graph):
    return np.array(nx.adjacency_matrix(graph).todense())

# Function to create node features based on the specified dimensions
def create_node_features(mol):
    atom_features = []
    for atom in mol.GetAtoms():
        # One-hot encoding of the atom element
        element = atom.GetSymbol()
        element_one_hot = [0] * 44
        element_list = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', 'Fe', 'As',
                                           'Al', 'I', 'B', 'V', 'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co', 'Se',
                                           'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd', 'In', 'Mn', 'Zr', 'Cr',
                                           'Pt', 'Hg', 'Pb', 'X']
        if element in element_list:
            element_one_hot[element_list.index(element)] = 1

        # One-hot encoding of the degree of the atom in the molecule
        degree = atom.GetDegree()
        degree_one_hot = [0] * 11
        if degree < 11:
            degree_one_hot[degree] = 1

        # One-hot encoding of the total number of H bound to the atom
        total_num_h = atom.GetTotalNumHs()
        total_num_h_one_hot = [0] * 11
        if total_num_h < 11:
            total_num_h_one_hot[total_num_h] = 1

        # One-hot encoding of the number of implicit H bound to the atom
        implicit_h = atom.GetNumImplicitHs()
        implicit_h_one_hot = [0] * 11
        if implicit_h < 11:
            implicit_h_one_hot[implicit_h] = 1

        # Whether the atom is aromatic
        aromatic = [1] if atom.GetIsAromatic() else [0]

        # Combine all features
        atom_features.append(element_one_hot + degree_one_hot + total_num_h_one_hot + implicit_h_one_hot + aromatic)

    return np.array(atom_features)

# Process data to PyTorch Geometric Data format
def process_data(smiles_list):
    data_list = []
    for smiles in smiles_list:
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            continue
        graph = smiles_to_graph(smiles)
        x = torch.tensor(create_node_features(mol), dtype=torch.float)
        edge_index = torch.tensor(create_adjacency_matrix(graph).nonzero(), dtype=torch.long)
        data = Data(x=x, edge_index=edge_index)
        data_list.append(data)
    return data_list


smiles_list = ['CC(=O)NC1=CC=C(C=C1)O']
graphs = [smiles_to_graph(smiles) for smiles in smiles_list]
for i, graph in enumerate(graphs):
    if graph is not None:
        print(f"Graph {i + 1} for SMILES {smiles_list[i]}:")
        print("Adjacency Matrix:\n", create_adjacency_matrix(graph))
        print("Node Features:\n", create_node_features(Chem.MolFromSmiles(smiles_list[i])))
        nx.draw(graph, with_labels=True, labels=nx.get_node_attributes(graph, 'symbol'))


Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl.metadata (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m353.3 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m971.5 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.5.3




Graph 1 for SMILES CC(=O)NC1=CC=C(C=C1)O:
Adjacency Matrix:
 [[0 1 0 0 0 0 0 0 0 0 0]
 [1 0 1 1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 1 0 0 0 0 0 0]
 [0 0 0 1 0 1 0 0 0 1 0]
 [0 0 0 0 1 0 1 0 0 0 0]
 [0 0 0 0 0 1 0 1 0 0 0]
 [0 0 0 0 0 0 1 0 1 0 1]
 [0 0 0 0 0 0 0 1 0 1 0]
 [0 0 0 0 1 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0]]
Node Features:
 [[0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0
  0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
  0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
  0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
  0 0 0 0 

  return np.array(nx.adjacency_matrix(graph).todense())


TypeError: '_AxesStack' object is not callable

<Figure size 640x480 with 0 Axes>