In [None]:
import rdkit
from rdkit.Chem import AllChem
import dgl
from dgl import DGLGraph
import numpy as np

def smiles_to_molgraph(smiles_list):
    """
    Convert a list of SMILES strings to a list of DGLGraphs.

    Args:
        smiles_list (list): List of SMILES strings.

    Returns:
        list: List of DGLGraphs representing the molecules.
    """
    def one_of_k_encoding_unk(x, allowable_set):
        """
        Encode x with respect to the allowable_set. If x is not in the set, use the last element.

        Args:
            x: The value to encode.
            allowable_set (list): The set of allowed values.

        Returns:
            list: A list of booleans indicating the encoding.
        """
        if x not in allowable_set:
            x = allowable_set[-1]
        return list(map(lambda s: x == s, allowable_set))

    def get_atom_features(atom):
        """
        Get the features of an atom.

        Args:
            atom (rdkit.Chem.rdchem.Atom): The atom object.

        Returns:
            np.ndarray: The feature vector of the atom.
        """
        possible_atom = ['C', 'N', 'O', 'F', 'P', 'Cl', 'Br', 'I', 'DU']  # DU represents other atoms
        atom_features = one_of_k_encoding_unk(atom.GetSymbol(), possible_atom)
        atom_features += one_of_k_encoding_unk(atom.GetImplicitValence(), [0, 1])
        atom_features += one_of_k_encoding_unk(atom.GetNumRadicalElectrons(), [0, 1])
        atom_features += one_of_k_encoding_unk(atom.GetDegree(), [0, 1, 2, 3, 4, 5, 6])
        atom_features += one_of_k_encoding_unk(atom.GetFormalCharge(), [-1, 1])
        atom_features += one_of_k_encoding_unk(atom.GetHybridization(),
                                               [rdkit.Chem.rdchem.HybridizationType.SP,
                                                rdkit.Chem.rdchem.HybridizationType.SP2,
                                                rdkit.Chem.rdchem.HybridizationType.SP3,
                                                rdkit.Chem.rdchem.HybridizationType.SP3D])
        return np.array(atom_features)

    def get_bond_features(bond):
        """
        Get the features of a bond.

        Args:
            bond (rdkit.Chem.rdchem.Bond): The bond object.

        Returns:
            np.ndarray: The feature vector of the bond.
        """
        bond_type = bond.GetBondType()
        bond_feats = [
            bond_type == rdkit.Chem.rdchem.BondType.SINGLE, bond_type == rdkit.Chem.rdchem.BondType.DOUBLE,
            bond_type == rdkit.Chem.rdchem.BondType.TRIPLE, bond_type == rdkit.Chem.rdchem.BondType.AROMATIC,
            bond.GetIsConjugated(),
            bond.IsInRing()
        ]
        return np.array(bond_feats)

    molgraph = []
    for molecule_smiles in smiles_list:  # Loop through each SMILES string
        G = DGLGraph()
        G = dgl.add_self_loop(G)
        molecule = [rdkit.Chem.MolFromSmiles(molecule_smiles)]  # Generate Mol object
        molecule = [rdkit.Chem.AddHs(i) for i in molecule]

        G.add_nodes(molecule[0].GetNumAtoms())
        node_features = []
        edge_features = []
        for i in range(molecule[0].GetNumAtoms()):
            atom_i = molecule[0].GetAtomWithIdx(i)
            atom_i_features = get_atom_features(atom_i)
            node_features.append(atom_i_features)
            for j in range(i):
                bond_ij = molecule[0].GetBondBetweenAtoms(i, j)
                if bond_ij is not None:
                    G.add_edges(i, j)
                    bond_features_ij = get_bond_features(bond_ij)
                    edge_features.append(bond_features_ij)
        G.ndata['x'] = torch.from_numpy(np.array(node_features)).float()  # Add atom/node features to DGLGraph
        G.edata['w'] = torch.from_numpy(np.array(edge_features)).float()  # Add bond/edge features to DGLGraph
        molgraph.append(G)
    return molgraph

if __name__ == "__main__":
    data_smiles = ['C1CCCCC1', 'CC(=O)O']  # Example SMILES strings
    molgraphs = smiles_to_molgraph(data_smiles)
    for graph in molgraphs:
        print(graph)