# Molecular structure analysis agent
1. Download smiles code with the name of the compound.
2. Analyze Lipinski's rule of 5 with rdkit.
3. Return the answer to user.

In [2]:
import os
from crewai import LLM

ollama = LLM(model='ollama/qwen3:0.6b',base_url='http://localhost:11434')

In [3]:
from crewai.tools import BaseTool
from crewai import Agent, Task, Crew, Process

import pubchempy as pcp
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem.Lipinski import NumHAcceptors, NumHDonors

class retrieve_smiles_from_pubchem(BaseTool):
    name: str ="Tool to access pubchem"
    description: str = ("Retrieve smiles code of the compound with the name.")

    def _run(self, chemical: str) -> str:
        compounds = pcp.get_compounds(chemical,'name')
        if compounds:
            smi = compounds[0].smiles
            return smi
        else:
            return f'{chemical} is not found in PubChem.'

class check_lipinski_ro5(BaseTool):
    name: str ="Tool to check compliance of lipinski's rule of 5 using rdkit based on smiles code"
    description: str = ("""
    Check compliance with Lipinski's Rule of Five.

    *Abbreviation*
    mw: molecular weight
    logp: octanol/water partition coefficient
    hbd: hydrogen bonding donor
    hba: hydrogen bonding acceptor

    1) smiles code is needed to get rdkit mol, essential for computation of Lipinski's rule of five.
    2) calculate mw, logp, hbd, hba using rdkit library.
    3) check conditions (mw<=500, logp<=5, hbd<=5, hbd<=10).
    4) Three out of four conditions should be satisifed to comply with the Lipinski's rule of five.
    5) Return the dictionary in which each condition to check compliance with Lipinski's rule of five.
    """)

    def _run(self, smiles: str) -> dict:
        mol = Chem.MolFromSmiles(smiles)
        if not mol:
            return {}  # Invalid SMILES
        
        # Calculate properties
        mw = Descriptors.ExactMolWt(mol)
        logp = Descriptors.MolLogP(mol)
        hbd = NumHDonors(mol)
        hba = NumHAcceptors(mol)
        
        # Check thresholds
        conditions = [
            mw <= 500,
            logp <= 5,
            hbd <= 5,
            hba <= 10
        ]
        
        # Allow ≤1 violation
        ro5_compliant = sum(conditions) >= 3
        return {
            "MW": round(mw, 2),
            "LogP": round(logp, 2),
            "HBD": hbd,
            "HBA": hba,
            "RO5_Compliant": ro5_compliant
        }

In [4]:
pubchem_tool = retrieve_smiles_from_pubchem()
pubchem_tool._run('acetaminophen')

'CC(=O)NC1=CC=C(C=C1)O'

In [5]:
ro5_tool = check_lipinski_ro5()
ro5_tool._run(pubchem_tool._run('acetaminophen'))

{'MW': 151.06, 'LogP': 1.35, 'HBD': 2, 'HBA': 2, 'RO5_Compliant': True}

In [None]:
# Agent to get smiles code
pubchem_agent = Agent(
    role="PubChem Agent",
    goal="Get smiles code with the name of compound: {request}",
    tools=[pubchem_tool],
    backstory="You should use pubchem_tool to get smiles code based on the name of compound.",
    llm = ollama
)

# Agent checking Lipinski's rule of 5 
ro5_agent = Agent(
    role="RO5 Agent",
    goal="Use smiles code to check lipinski's rule of five based on rdkit",
    tools=[ro5_tool],
    backstory="You should use ro5_tool with smiles code from PubChem Agent",
    llm = ollama
)

# PubChem agent's task.
pubchem_task = Task(
    description="Get smiles code using pubchempy with the name of compound: {request}",
    expected_output="Smiles code",
    tools=[pubchem_tool],  # The function will be passed here as the tool
    agent=pubchem_agent
)

# RO5 agent's tak
ro5_task = Task(
    description="Check Lipinski's rule of five with the given smiles code.",
    expected_output="Final answer based on lipinski's rule of 5 compliance (MW, logP, HBD, HBA)",
    tools=[ro5_tool],  # The function will be passed here as the tool
    agent=ro5_agent
)

usr_query = "acetaminophen"

# Crew runs the process
crew = Crew(agents=[pubchem_agent, ro5_agent],
            tasks=[pubchem_task, ro5_task],
            process=Process.sequential)

result = crew.kickoff(inputs={"request": usr_query})
print(result)

In [None]:
import re

final_output = re.split(r'<\/think>', result.tasks_output[-1].raw)[-1].strip()
final_output