In [None]:
!pip install -q chembl_webresource_client

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.2/55.2 kB[0m [31m458.4 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/61.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.4/69.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from chembl_webresource_client.new_client import new_client
import pandas as pd
from typing import List, Union

def get_similar_chembl_compounds(smiles_list: List[str], threshold: int = 75, return_combined: bool = True) -> Union[pd.DataFrame, dict]:
    """
    Search ChEMBL for similar compounds (Tanimoto ≥ threshold) for multiple SMILES inputs.

    Parameters:
        smiles_list (list of str): Query SMILES strings
        threshold (int): Tanimoto similarity threshold (default 85)
        return_combined (bool): If True, return one combined DataFrame. If False, return dict per SMILES.

    Returns:
        pd.DataFrame or dict: Combined DataFrame or dict of DataFrames per input SMILES
    """
    results = {}

    for smi in smiles_list:
        print(f"🔍 Searching similar to: {smi} (≥{threshold}%)")
        try:
            hits = new_client.similarity.filter(smiles=smi, similarity=threshold)
        except Exception as e:
            print(f"⚠️ Error for {smi}: {e}")
            continue

        if not hits:
            print(f"⚠️ No similar compounds found for {smi}")
            continue

        data = []
        for mol in hits:
            struct = mol.get("molecule_structures") or {}
            smiles_hit = struct.get("canonical_smiles", "N/A")
            similarity_score = mol.get("similarity", None)

            if similarity_score is None:
                continue

            data.append({
                "Query_SMILES": smi,
                "ChEMBL_ID": mol.get("molecule_chembl_id"),
                "Similarity": similarity_score,
                "SMILES": smiles_hit
            })

        if not data:
            print(f"⚠️ No valid results for {smi}")
            continue

        df = pd.DataFrame(data).sort_values("Similarity", ascending=False)
        results[smi] = df
        print(f"✅ Found {len(df)} compounds for {smi}.")

    if not results:
        print("❌ No valid results for any SMILES.")
        return pd.DataFrame() if return_combined else {}

    return pd.concat(results.values(), ignore_index=True) if return_combined else results

In [None]:
smiles_batch = ["C1=CC2=CC(=CN=C2N=C1)C(=O)N" ##1,8-Naphthyridine-3-carboxamide
 ]

df = get_similar_chembl_compounds(smiles_batch, threshold=60, return_combined=True)

print(df)

df.to_csv('similar_compounds_1,8-Naphthyridine-3-carboxamide.csv')

🔍 Searching similar to: C1=CC2=CC(=CN=C2N=C1)C(=O)N (≥60%)
✅ Found 1 compounds for C1=CC2=CC(=CN=C2N=C1)C(=O)N.
                  Query_SMILES     ChEMBL_ID                Similarity  \
0  C1=CC2=CC(=CN=C2N=C1)C(=O)N  CHEMBL216226  60.000002384185791015625   

                 SMILES  
0  NC(=O)c1cnc2ccccc2c1  


In [None]:
smiles_batch = ["CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C)C(=O)O)O)O" ]  ##Carnosic acid

df = get_similar_chembl_compounds(smiles_batch, threshold=60, return_combined=True)

print(df)

df.to_csv('similar_compounds_carnosic_acid.csv')

🔍 Searching similar to: CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C)C(=O)O)O)O (≥60%)
✅ Found 21 compounds for CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C)C(=O)O)O)O.
                                         Query_SMILES      ChEMBL_ID  \
0   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4868012   
1   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL1096627   
2   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4471445   
3   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL2333537   
4   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4519804   
5   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4515503   
6   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4574206   
7   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4451825   
8   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4471914   
9   CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[C@@]2(CCCC3(C)C...  CHEMBL4447764   
10  CC(C)C1=C(C(=C2C(=C1)CC[C@@H]3[

In [None]:
# 🧪 Test with a few molecules
smiles_batch = [
                "CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCCC4(C)C)C(=O)O3)O)O" ## carnesol
 ]

# 🔁 Run batch similarity search
df = get_similar_chembl_compounds(smiles_batch, threshold=60, return_combined=True)

# 📊 Preview
print(df)

df.to_csv('similar_compounds_carnesol.csv')

🔍 Searching similar to: CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCCC4(C)C)C(=O)O3)O)O (≥60%)
✅ Found 15 compounds for CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCCC4(C)C)C(=O)O3)O)O.
                                         Query_SMILES      ChEMBL_ID  \
0   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...   CHEMBL483017   
1   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...   CHEMBL491307   
2   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...   CHEMBL478933   
3   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...  CHEMBL1079367   
4   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...  CHEMBL2376097   
5   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...  CHEMBL2333536   
6   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...   CHEMBL507166   
7   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...   CHEMBL494659   
8   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...  CHEMBL1081338   
9   CC(C)C1=C(C(=C2C(=C1)[C@@H]3C[C@@H]4[C@@]2(CCC...   CHEMBL464376   
10  CC(C)C1=C(C(=C2C(