In [None]:
from categories import print_tests, all_tests
from rdkit import Chem

: 

In [None]:
def humanBuiltQuery(function = None, query_words = {'qtype':None, 'smart':None, 'prop':None, 'operand':None, 'logic': None, 'subqueries':None}):
    """This function is an attempt to automate building queries for the hard-coded tests below such that
    print_tree functionality should still work with these newer test types. The query_words dictionary MUST contain
    the key 'qtype' with value 'b:StructureQuery', 'b:ParameterQuery', 'b:ExclusionQuery', or 'LogicalQuery'. Other
    keys should contain the necessary information for print_tree for the given qtype. The function will be the
    query function, as built below. 
    
    Required keys by qtype:
    - Structure or Exclusion Query - 'smart'
    - Parameter Query - 'prop', 'operand', 'value'
    - Logical Query - 'logic', 'subqueries': provided as a list of dictionaries obeying input rules for this function
    """
    new_query = Query(query_words)
    new_query.query = function
    qtype = query_words['qtype']
    new_query.type = qtype
    if qtype in ['b:StructureQuery', 'b:ExclusionQuery']:
        new_query.smart = query_words['smart']
    elif qtype == 'b:ParameterQuery':
        new_query.prop = query_words['prop']
        new_query.operand = query_words['operand']
        new_query.value = query_words['value']
    elif qtype == 'LogicalQuery':
        new_query.logic = query_words['logic']
        new_query.subqueries = [humanBuiltQuery(None, query_words['subqueries'][i]) for i in range(len(query_words['subqueries']))]

    return new_query

: 

In [26]:
qtype

'LogicalQuery'

In [None]:
from categories import Query
def create_test():
    primamine=Chem.MolFromSmarts('[NX3;H2;!$(NC=[O,N,S]);!$(NCN)][CX3]')
    secamine=Chem.MolFromSmarts('[NX3;H1;!$(NC=[O,N,S]);!$(NCN)](C)[CX3]')
    tertamine=Chem.MolFromSmarts('[N;!$(NC=[O,N,S]);!$(NCN)](C)(C)[CX3]')
    def test(x):
        mol=x['mol']
        smiles=x['smiles']
        mw=x['mol_weight']
        return 'c' not in smiles and mw<1000 and '1' not in smiles and (mol.HasSubstructMatch(primamine) or mol.HasSubstructMatch(secamine)\
        or mol.HasSubstructMatch(tertamine)) 
    return test

aa_words = {'qtype':'LogicalQuery', 'logic':'And', \
             'subqueries': [{'qtype':'b:ExclusionQuery', 'smart':'c'}, \
                            {'qtype':'b:ParameterQuery', 'prop':'Molecular Weight', 'operand':'LessThan', 'value': 1000}, \
                            {'qtype': 'LogicalQuery', 'logic':'or', \
                              'subqueries': [{'qtype': 'b:StructureQuery', 'smart':'[NX3;H2;!$(NC=[O,N,S]);!$(NCN)][CX3]'}, \
                                             {'qtype': 'b:StructureQuery', 'smart':'[NX3;H1;!$(NC=[O,N,S]);!$(NCN)](C)[CX3]'}, \
                                             {'qtype': 'b:StructureQuery', 'smart':'[N;!$(NC=[O,N,S]);!$(NCN)](C)(C)[CX3]'}]}]} 
all_tests['Aliphatic Amines']=humanBuiltQuery(create_test, aa_words)

: 

In [None]:
all_tests['Aliphatic Amines'].print_tree(None)

: 

In [None]:
all_tests['Aliphatic Amines'].print_tree(test_chem)

In [17]:
all_tests['Acid Chlorides'].print_tree

<bound method Query.print_tree of <categories.Query object at 0x000001BABE8ABE00>>

In [None]:
# Define a test chemical as chemicals are stored in the GenRA database
test_chem = {'dsstox_sid': 'DTXSID7020009',
 'smiles': 'CC#N',
 'logp': -0.33971,
 'ws': 12.6417,
 'mol_weight': 41.053,
 'mol': Chem.MolFromSmiles('CC#N')}

: 

In [None]:
from categories import queryAll

In [None]:
print_tests

{'Acid Chlorides': ['32', 'LogicalQuery'],
 'Acrylamides': ['51', 'LogicalQuery'],
 'Acrylates/Methacrylates (Acute toxicity)': ['70', 'LogicalQuery'],
 'Aldehydes (Acute toxicity)': ['89', 'LogicalQuery'],
 'Aliphatic Amines': ['121', 'LogicalQuery'],
 'Aluminum Compounds': ['140', 'LogicalQuery'],
 'Anilines (Acute toxicity)': ['159', 'LogicalQuery'],
 'Azides (Acute toxicity)': ['184', 'LogicalQuery'],
 'Benzotriazoles (Acute toxicity)': ['203', 'LogicalQuery'],
 'Benzotriazole-hindered phenols': ['214', 'b:StructureQuery'],
 'Boron Compounds': ['230', 'LogicalQuery'],
 'Cationic (quaternary ammonium) surfactants': ['241', 'b:StructureQuery'],
 'Cobalt': ['263', 'LogicalQuery'],
 'Diazoniums (Acute toxicity)': ['282', 'LogicalQuery'],
 'Epoxides': ['298', 'LogicalQuery'],
 'Esters (Acute toxicity)': ['317', 'LogicalQuery'],
 'Hydrazines and Related Compounds': ['336', 'LogicalQuery'],
 'Hindered Amines': ['350', 'b:StructureQuery'],
 'Imides (Acute toxicity)': ['369', 'LogicalQuery'

In [None]:
new_test_chems = [{'dsstox_sid': 'DTXSID3060164',
  'smiles': 'C1=CC=CC=C1C(C1C=CC=CC=1)C1C=CC=CC=1',
  'logp': 5.76,
  'ws': 4.07380277804113e-07,
  'mol_weight': 244.125200512,
  'mol': Chem.MolFromSmiles('C1=CC=CC=C1C(C1C=CC=CC=1)C1C=CC=CC=1')},
 {'dsstox_sid': 'DTXSID7060837',
  'smiles': 'ICCCI',
  'logp': 3.02,
  'ws': 0.0007413102413009177,
  'mol_weight': 295.855896192,
  'mol': Chem.MolFromSmiles('ICCCI')},
 {'dsstox_sid': 'DTXSID9025879',
  'smiles': 'OC(=O)C=CC1C=CC(C=CC(O)=O)=CC=1',
  'logp': 1.99,
  'ws': 0.009120108393559097,
  'mol_weight': 218.0579088,
  'mol': Chem.MolFromSmiles('OC(=O)C=CC1C=CC(C=CC(O)=O)=CC=1')},
 {'dsstox_sid': 'DTXSID2026282',
  'smiles': 'O=C(NC1=CC2=C(NC3C4=C(C=CC2=3)C(=O)C2C(=CC=CC=2C4=O)NC(=O)C2C=CC=CC=2)C2=C1C(=O)C1C=CC=CC=1C2=O)C1C=CC=CC=1',
  'logp': 3.11,
  'ws': 2.454708915685029e-08,
  'mol_weight': 667.174335520001,
  'mol': Chem.MolFromSmiles('O=C(NC1=CC2=C(NC3C4=C(C=CC2=3)C(=O)C2C(=CC=CC=2C4=O)NC(=O)C2C=CC=CC=2)C2=C1C(=O)C1C=CC=CC=1C2=O)C1C=CC=CC=1')},
 {'dsstox_sid': 'DTXSID4052188',
  'smiles': 'CC1(C)COC(C)(OC1)C1C=CC=CC=1',
  'logp': 2.72,
  'ws': 0.001148153621496883,
  'mol_weight': 206.130679816,
  'mol': Chem.MolFromSmiles('CC1(C)COC(C)(OC1)C1C=CC=CC=1')}]

In [None]:
df = queryAll(new_test_chems[0])
for category in all_tests.keys():
     for chem in df.index:
          if df.loc[chem,category] == True:
               print(category)

Neutral Organics


In [None]:
all_tests['Acid Chlorides'].subqueries

[<categories.Query at 0x1a16ba0fd70>,
 <categories.Query at 0x1a16bad8a10>,
 <categories.Query at 0x1a16ba9bc20>]

In [None]:
error_test = {'dsstox_sid': 'DTXSID3060164',
  'smiles': 'C1=CC=CC=C1C(C1C=CC=CC=1)C1C=CC=CC=1',
  'mol_weight': 'A',
  'mol': Chem.MolFromSmiles('C1=CC=CC=C1C(C1C=CC=CC=1)C1C=CC=CC=1')}

In [None]:
queryAll(error_test)

KeyError: logp must be provided
KeyError: ws must be provided
TypeError: mol_weight must be provided as float64. Please adjust the input accordingly.


KeyError: 'One or more attributes is missing. See printed statement(s).'

In [None]:
queryAll(new_test_chems)

Unnamed: 0,chemicals,Acid Chlorides,Acrylamides,Acrylates/Methacrylates (Acute toxicity),Aldehydes (Acute toxicity),Aliphatic Amines,Aluminum Compounds,Anilines (Acute toxicity),Azides (Acute toxicity),Benzotriazoles (Acute toxicity),...,Organotins (Chronic toxicity),Phenols (Chronic toxicity),Phosphinate Esters (Chronic toxicity),Polynitroaromatics (Chronic toxicity),Substituted Triazines (Chronic toxicity),Thiols (Chronic toxicity),Vinyl Esters (Chronic toxicity),Diazoniums (Chronic toxicity),Ethylene Glycol Ethers,Benzotriazoles
0,DTXSID3060164,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,DTXSID7060837,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,DTXSID9025879,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,DTXSID2026282,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,DTXSID4052188,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
import pandas as pd
mini_test_chem = dict(pd.DataFrame(new_test_chems).iloc[0])

from categories import checkForAttributes, normalizeChemicals, listCategories
mini_test_chem = normalizeChemicals(mini_test_chem)
checkForAttributes(mini_test_chem)
queryAll(mini_test_chem)

dict(mini_test_chem)
listCategories(mini_test_chem)

['Neutral Organics']

In [None]:
all_tests['Aliphatic Amines'].subqueries[3].subqueries


[]

In [None]:
selfy.subqueries[1].subqueries

[]

In [None]:
all_tests['Aliphatic Amines'].print_tree(None)

(None, 'LogicalQuery', 'And', 'does not process')
	(None, 'b:ExclusionQuery', 'does not process')
	(None, 'b:ExclusionQuery', 'does not process')
	(None, 'b:ExclusionQuery', 'does not process')
	(None, 'b:ExclusionQuery', 'does not process')
