# Taxonomy search of predicted

Given a known compound (SMILES) from LOTUS, we like to see the predicted compounds. 

In [3]:
%%time

# Cell 1: Import pymongo and connect to MongoDB
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client['lotus_mines_enzymatic']

# Cell 2: Function to retrieve taxonomy with field name
def get_taxonomy_with_field_name(document):
    taxonomy_fields = [
        "organism_taxonomy_10varietas",
        "organism_taxonomy_09species",
        "organism_taxonomy_08genus",
        "organism_taxonomy_07tribe",
        "organism_taxonomy_06family",
        "organism_taxonomy_05order",
        "organism_taxonomy_04class",
        "organism_taxonomy_03phylum",
        "organism_taxonomy_02kingdom",
        "organism_taxonomy_01domain"
    ]
    
    for field in taxonomy_fields:
        if document.get(field):
            return field, document[field]
    return None, None
    

# Cell 3: Function to search for SMILES or InChIKey and retrieve compounds
def search_smiles_or_inchikey(search_term):
    lotus_doc = db.lotus.find_one({
        "$or": [
            {"structure_smiles": search_term},
            {"structure_inchikey": search_term}
        ]
    })
    
    if lotus_doc:
        taxonomy_field, taxonomy_value = get_taxonomy_with_field_name(lotus_doc)
        print(f"Found in lotus with ID: {lotus_doc['_id']}")
        if taxonomy_field:
            print(f"Organism Taxonomy ({taxonomy_field}): {taxonomy_value}")
        else:
            print("No taxonomy information available.")
        
        # Count the predicted compounds
        predicted_count = db.compounds.count_documents({"Type": "Predicted"})
        
        print(f"\nNumber of Predicted Compounds: {predicted_count}")
        print("\nPredicted Compounds (_id only):")
        # If you want to list them as well
        predicted_compounds = db.compounds.find({"Type": "Predicted"})
        for compound in predicted_compounds:
            print(compound["_id"])
    else:
        print("No matching SMILES or InChIKey found in the lotus collection.")




# Cell 4: Example usage
search_term = "C=C(CC[C@@H](C)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C)C(C)C"  # SMILES or InChIKey
search_smiles_or_inchikey(search_term)

Found in lotus with ID: Cdc669418053eef4dc9d8c113ee6322de9393ef00
No taxonomy information available.

Number of Predicted Compounds: 3284418

Predicted Compounds (_id):
C57a73b796ef9de341670ad4f895779c4ce0d4623
Cb969d7dca7f60bdb827dc3efb4fc22789592ef6e
Cc65bdd68f5ca4038b80b1eb2be0d5434f75156dc
C43ffd86541b5ae7a5030f1a1189cabe23c5049d6
C170060f77450253147a47686d71cd2c64daa7765
Cb619d2c65c6b84ad29fc524e1b73e9fa11734e9c
C2dd093b20e3ed07be9b29f17802ac12aaf4fab2a
C2ce60fa02d4fb754b9b17bba6a61e57657a1ce89
C878f017efe6de2805a953d0ca9b8491274a29290
C54130f1c76aaa5380fa631a6a659121284978c5d
Cfa5e885b86c8c37a465cad5238ed62672498a45d
C11a038df994aced6401a2e09ebf260f4f894c7ca
Cb6b15f65b0ef41a4986fbeb37749b1bea3e26164
C8e1b680b68eec30be34c6b4857d630e2c245759d
C06298570e94bc87e59a16459ad11a157259d5521
C00eb38722ea74e3671d815c444f48e0db6315491
Cc6b93e1b698518a73c653b506578d229c000a6b4
Cceca72da2195f029e035132a30c3ae5b2f5b68b0
Cac3b1724a01566dcd8d1bccda78cd2c398c31c1a
C1969c6f42cd8b6130b8f3ff4d8588602

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Ca65fc26c2132c50c360743f50ed60d705080eb1c
C7a2864fd50d986701f524024abc66ed0445cec35
C737ea92c7769adf4a1a8f8e30eb610f7de42abe7
Cb29fde6b3541cf3cc8e8c973c4aafe14c153d22d
C6811c0f47bf86a7e51f5906c9aa5fb5552690e61
C670e87694d9745e9b17999c1f4e779bf9b96b8a7
Cf82b94c1ee2df0347be26a62791d30551e22fe0a
C6712b15c3a0745b29181c2e9f4a27a1eb31eddf1
C1122717cb894acd7ae20dfabb2d298717539a3af
C50fcd277d6d5dde05d3479c71a9b287ef22afe25
Cbaced6a7452160a18a70dd284341419ad0d5a06d
Cfe3937e61ca6516dc79ee68b7adde978120c1a2d
C1a733726b99e28610cd89f56596c7d658e6f4eae
C2e95ad054709f319a0da15ff039e167a377cefad
C84d9943103584e42fe58f621dcb95afdf0efc4ab
C97763aba9a075c7e6ee7e298c0d00a36d4e51dcc
Cecbb9d8d5dfc41fab45c57a680073508a681a24b
Cc1a8cc476bb2b5df48b221b79ce52513e66a681e
C765f6787cbf41bef03350f3e500fb4b51e53777b
Ca198f83d2484d9de8e87866b2df80904d1d169b1
C646d70996accbf98d9fa127f4beb580675e585e5
C892c489a6bfacb0f3e84b27a168ed7b50eac4541
C42da7e04fafcca75ef190e8e0dd4e09c6c6f7ec7
C3c56350a67363b26b5c1f7b5c00fe797a