In [13]:
import pandas as pd
from libchebipy import ChebiEntity


def get_smiles_from_chebi(chebi_id):
    try:
        entity = ChebiEntity(chebi_id)
        name = entity.get_name()
        smiles = entity.get_smiles()
        definition = entity.get_definition()
        return name, smiles, definition
    except Exception as e:
        print(f"Failed to retrieve SMILES for {chebi_id}: {e}")
        return None


# Load ChEBI IDs from CSV file
input_csv = '/data_link/servilla/SPOT2/data/Uniprot/Unique_CHEBI_IDs.csv'
df = pd.read_csv(input_csv)

# Ensure the column containing ChEBI IDs is correctly named
chebi_ids = df['molecule ID']

# Create a DataFrame to store the results
results = []

for chebi_id in chebi_ids:
    name, smiles, definition = get_smiles_from_chebi(chebi_id)
    results.append({'ChEBI ID': chebi_id, 'Name': name, 'SMILES': smiles, 'Definition': definition})

results_df = pd.DataFrame(results)

# Save the results to a new CSV file
output_csv = '/data_link/servilla/SPOT2/data/Uniprot/chebi_smiles.csv'
results_df.to_csv(output_csv, index=False)

print(results_df)

        ChEBI ID                                     Name  \
0    CHEBI:30616                                  ATP(4-)   
1    CHEBI:64837  N(pros)-phosphonato-L-histidine residue   
2    CHEBI:58245   2'-deoxyadenosine 5'-monophosphate(2-)   
3    CHEBI:57673   2'-deoxyguanosine 5'-monophosphate(2-)   
4    CHEBI:58115           guanosine 5'-monophosphate(2-)   
..           ...                                      ...   
208  CHEBI:16708                                  adenine   
209  CHEBI:16040                                 cytosine   
210  CHEBI:17712                              9H-xanthine   
211  CHEBI:57947                      creatine zwitterion   
212  CHEBI:33542             trioxidosulfanidosulfate(1-)   

                                                SMILES  \
0    Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP([O-])(=O)OP(...   
1        C(*)(=O)[C@@H](N*)CC=1N(C=NC1)P([O-])(=O)[O-]   
2    Nc1ncnc2n(cnc12)[C@H]1C[C@H](O)[C@@H](COP([O-]...   
3    Nc1nc2n(cnc2c(=O)[nH]1)[C@H]1C