In [30]:
from rdkit.Chem import MolFromSmiles, MolToSmiles, Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
from langchain.agents import Tool
import pubchempy

In [32]:
def calculate_descriptors(compound: str, descriptors: list) -> list:
    '''
    Given a SMILES string and chosen descriptor(s),
    return the value for each descriptor

    parameters:
        compound: a chemical compound in SMILES format
        descriptors: a list of RDKit-compatible descriptors

    returns:
        list of computed properties
    '''
    calc = MoleculeDescriptors.MolecularDescriptorCalculator(descriptors)
    mol  = MolFromSmiles(compound)
    return list(calc.CalcDescriptors(mol)) # convert to list form

Calculate_Descriptors = Tool(
    name="calculate_descriptors",
    func=calculate_descriptors,
    description="""calculates chosen descriptors for a smiles. input should be json in the following format: `{{"compound":'<compound_smiles>', "descriptors":'[<descriptor_list>]'}}`"""
)

x = calculate_properties('CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CC[C@@H](O)C[C@@H](O)CC(=O)O',
                         ['ExactMolWt','HeavyAtomCount'])
print(f"Atorvastatin has {round(x[0],2)} ExactMolWt and {round(x[1],2)} HeavyAtomCount")

Atorvastatin has 558.25 ExactMolWt and 41 HeavyAtomCount


In [31]:
def convert_names(input_name: str, input_type: str, desired_type: str) -> str:
    '''
    Given any compatible name (anything from pubchem lowercase and using _ for space),
    return a different, chosen name.

    parameters:
        input_name: Any compatible name for a chemical compound
        input_type: The type of the inputted name
        desired_type: The type of name for the output

    returns:
        a string with the desired chemical name
    '''
    if input_type != 'CID': compound = pubchempy.get_compounds(input_name,input_type)[0] # get first entry
    else: compound = pubchempy.Compound.from_cid(int(input_name)) # directly instantiate
    return getattr(compound, desired_type) # equivalent to compound.canonical_smiles or other type

Convert_Names = Tool(
    name="convert_names",
    func=convert_names,
    description="""converts between the different names for a compound. input should be json in the following format: `{{"input_name":'<compound_name>', "input_type":'<name_type>', "desired_type":'<new_name_type>'}}`"""
)

x = convert_names(input_name='Atorvastatin',input_type='name',desired_type='canonical_smiles')
print(f"Atorvastatin in SMILES format is {x}")

Atorvastatin in SMILES format is CC(C)C1=C(C(=C(N1CCC(CC(CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4
