In [1]:
import pandas as pd

df_a = pd.read_csv("../data/antidiabetic/antidiabetic_molecules_smiles.csv")
df_non_a = pd.read_csv("../data/non_antidiabetic/non_antidiabetic_molecules_smiles.csv")

In [2]:
import requests
import json
import time

def getClassyFire(SMILES:str, ID:str=None):
    """The code you provided seems to define a function named getClassyFire that sends a POST request 
    to the ClassyFire API to retrieve information about a chemical compound represented by its SMILES notation. 
    This function takes a SMILES string as input and an optional ID parameter.

    Here's a breakdown of how the function works:

    The url variable contains the API endpoint URL for sending the request to the ClassyFire service.

    The label variable is set to either the provided ID or a default value ("MyQueryLabel") if no ID is provided.

    The query_type variable is set to "STRUCTURE", indicating that the query is based on the chemical structure.

    The payload dictionary contains the data that will be sent in the request. It includes the label, 
    the SMILES string (query_input), and the query type.

    The headers dictionary specifies the content type and the accepted response format.

    Inside a try-except block, a POST request is made using the requests.post method. The data parameter contains
    the JSON-encoded payload, and the headers are provided.

    If the request is successful (no exceptions are raised), the function returns the response content 
    as a string (response.text). If there's an exception (such as a request error), an error message is printed."""
    
    # URL для отправки запроса
    url = "http://classyfire.wishartlab.com/queries"
    # Метка запроса
    if ID:
        label = ID
    else:
        label = "MyQueryLabel"

    # Тип запроса (STRUCTURE)
    query_type = "STRUCTURE"

    # Параметры для запроса
    payload = {
        "label": label,
        "query_input": SMILES, #input_data,
        "query_type": query_type
    }

    # Опции для заголовков
    headers = {
        "Content-Type": "application/json",
        "Accept": "application/json"
    }

    try:
        # Отправка POST-запроса
        response = requests.post(url, data=json.dumps(payload), headers=headers)
        data_dict = json.loads(response.text)
        # time.sleep(61)
        URL = url +'/'+ str(data_dict['id'])+'.json'
        response2 = requests.get(URL)
        return response2.text
        # data_dict2 = json.loads(response2.text)
        # entities_data = data_dict2.get('entities', [])
        # df = pd.DataFrame(entities_data)
        # return df
    except requests.exceptions.RequestException as e:
        print("Ошибка при выполнении запроса:", e)
    except KeyError:
        if data_dict['error'] == 'Limit exceeded':
            time.sleep(61)
            getClassyFire(SMILES, ID)

In [3]:
response_text = getClassyFire('F[C@H](CC(N1CCn2c(nnc2C(F)(F)F)C1)=O)N')
response_text

'{"id":12590253,"label":"MyQueryLabel","classification_status":"In Queue","number_of_elements":0,"number_of_pages":0,"invalid_entities":[],"entities":[]}'

'{"id":12133074,"label":"MyQueryLabel","classification_status":"Done","number_of_elements":1,"number_of_pages":1,"invalid_entities":[],"entities":[{"identifier":"Q12133074-1","smiles":"C[C@H](O)C1=CC=CC=C1","inchikey":"InChIKey=WAPNOHKVXSQRPX-ZETCQYMHSA-N","kingdom":{"name":"Organic compounds","description":"Compounds that contain at least one carbon atom, excluding isocyanide/cyanide and their non-hydrocarbyl derivatives, thiophosgene, carbon diselenide, carbon monosulfide, carbon disulfide, carbon subsulfide, carbon monoxide, carbon dioxide, Carbon suboxide, and dicarbon monoxide.","chemont_id":"CHEMONTID:0000000","url":"http://classyfire.wishartlab.com/tax_nodes/C0000000"},"superclass":{"name":"Benzenoids","description":"Aromatic compounds containing one or more benzene rings.","chemont_id":"CHEMONTID:0002448","url":"http://classyfire.wishartlab.com/tax_nodes/C0002448"},"class":{"name":"Benzene and substituted derivatives","description":"Aromatic compounds containing one monocyclic ring system consisting of benzene.","chemont_id":"CHEMONTID:0002279","url":"http://classyfire.wishartlab.com/tax_nodes/C0002279"},"subclass":null,"intermediate_nodes":[],"direct_parent":{"name":"Benzene and substituted derivatives","description":"Aromatic compounds containing one monocyclic ring system consisting of benzene.","chemont_id":"CHEMONTID:0002279","url":"http://classyfire.wishartlab.com/tax_nodes/C0002279"},"alternative_parents":[{"name":"Secondary alcohols","description":"Compounds containing a secondary alcohol functional group, with the general structure HOC(R)(R\') (R,R\'=alkyl, aryl).","chemont_id":"CHEMONTID:0001661","url":"http://classyfire.wishartlab.com/tax_nodes/C0001661"},{"name":"Hydrocarbon derivatives","description":"Derivatives of hydrocarbons obtained by substituting one or more carbon atoms by an heteroatom. They contain at least one carbon atom and heteroatom.","chemont_id":"CHEMONTID:0004150","url":"http://classyfire.wishartlab.com/tax_nodes/C0004150"},{"name":"Aromatic alcohols","description":"Compounds containing an alcohol group attached to an aromatic carbon.","chemont_id":"CHEMONTID:0003073","url":"http://classyfire.wishartlab.com/tax_nodes/C0003073"}],"molecular_framework":"Aromatic homomonocyclic compounds","substituents":["Monocyclic benzene moiety","Secondary alcohol","Organic oxygen compound","Hydrocarbon derivative","Aromatic alcohol","Organooxygen compound","Alcohol","Aromatic homomonocyclic compound"],"description":"This compound belongs to the class of organic compounds known as benzene and substituted derivatives. These are aromatic compounds containing one monocyclic ring system consisting of benzene.","external_descriptors":[{"source":"CHEBI","source_id":"CHEBI:16346","annotations":["1-phenylethanol"]}],"ancestors":["Alcohols and polyols","Aromatic alcohols","Benzene and substituted derivatives","Benzenoids","Chemical entities","Hydrocarbon derivatives","Organic compounds","Organic oxygen compounds","Organooxygen compounds","Secondary alcohols"],"predicted_chebi_terms":["secondary alcohol (CHEBI:35681)","organic molecule (CHEBI:72695)","aromatic alcohol (CHEBI:33854)","benzenes (CHEBI:22712)","chemical entity (CHEBI:24431)","oxygen molecular entity (CHEBI:25806)","organic molecular entity (CHEBI:50860)","organooxygen compound (CHEBI:36963)","polyol (CHEBI:26191)","organic hydroxy compound (CHEBI:33822)","alcohol (CHEBI:30879)","benzenoid aromatic compound (CHEBI:33836)"],"predicted_lipidmaps_terms":[],"classification_version":"2.1"}]}'

In [None]:
import json
from typing import Dict, Any, List

def parse_classyfire_ids(response_text: str) -> Dict[str, str]:
    """
    Extracts identifiers and names from the response of the ClassyFire API.
    
    Args:
        response_text: A JSON string from the ClassyFire API response
    
    Returns:
        Dict[str, str]: A dictionary where keys are identifiers and values are names
        
    Raises:
        json.JSONDecodeError: if the JSON is malformed
        KeyError: if required fields are missing in the data
    """

    result: Dict[str, str] = {}
    
    try:
        data = json.loads(response_text)
        
        # Get the first entity from the list
        if not data.get('entities') or not data['entities'][0]:
            return result
            
        entity = data['entities'][0]
        
        # Processing predicted_chebi_terms
        chebi_terms: List[str] = entity.get('predicted_chebi_terms', [])
        for term in chebi_terms:
            # Extract CHEBI ID and name from a string in the format "name (CHEBI:id)"
            if '(' in term and ')' in term:
                name, chebi_id = term.split(' (')
                chebi_id = chebi_id.rstrip(')')
                result[chebi_id] = name
        
        # Processing all nodes with CHEMONTID
        nodes_to_check = [
            entity.get('kingdom', {}),
            entity.get('superclass', {}),
            entity.get('class', {}),
            entity.get('subclass', {}),
            entity.get('direct_parent', {})
        ]
        nodes_to_check.extend(entity.get('alternative_parents', []))
        nodes_to_check.extend(entity.get('intermediate_nodes', []))
        
        for node in nodes_to_check:
            if node and 'chemont_id' in node and 'name' in node:
                result[node['chemont_id']] = node['name']
                
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Error processing data: {e}")
        
    return result

In [None]:
parse_classyfire_ids(response_text)

{'CHEBI:35681': 'secondary alcohol',
 'CHEBI:72695': 'organic molecule',
 'CHEBI:33854': 'aromatic alcohol',
 'CHEBI:22712': 'benzenes',
 'CHEBI:24431': 'chemical entity',
 'CHEBI:25806': 'oxygen molecular entity',
 'CHEBI:50860': 'organic molecular entity',
 'CHEBI:36963': 'organooxygen compound',
 'CHEBI:26191': 'polyol',
 'CHEBI:33822': 'organic hydroxy compound',
 'CHEBI:30879': 'alcohol',
 'CHEBI:33836': 'benzenoid aromatic compound',
 'CHEMONTID:0000000': 'Organic compounds',
 'CHEMONTID:0002448': 'Benzenoids',
 'CHEMONTID:0002279': 'Benzene and substituted derivatives',
 'CHEMONTID:0001661': 'Secondary alcohols',
 'CHEMONTID:0004150': 'Hydrocarbon derivatives',
 'CHEMONTID:0003073': 'Aromatic alcohols'}