In [1]:
import lwreg
from lwreg import standardization_lib
import psycopg2
import numpy as np

# Mark external test set 500-700 in all solvents

In [2]:
with open("/home/kpaul/.lwreg_key","r") as f:
    key = f.read().strip()

config = lwreg.utils.defaultConfig()
config["standardization"] = standardization_lib.NoStandardization()
config["dbname"] = "solvent_forces"
config["dbtype"] = "postgresql"
config["removeHs"] = 0
config["registerConformers"] = True
config["hashConformer"] = 0 # set to 0
config["numConformerDigits"] = 3 # Question: what is this?
config["host"] = "scotland"
config["user"] = "kpaul_lwreg"
config["password"] = key

## Find test molecules

In [3]:
test_molecules = np.loadtxt("../Simulation/simulation_smiles/MB.txt",dtype=str,comments=None)
test_molecules = np.concatenate((test_molecules, np.loadtxt("../Simulation/simulation_smiles/dimethoxy.txt",dtype=str)[np.newaxis]))
test_molecules = np.concatenate((test_molecules, np.loadtxt("../Simulation/simulation_smiles/intra_molecular_hbond2.txt",dtype=str)))
test_molecules = np.concatenate((test_molecules, np.loadtxt("../Simulation/simulation_smiles/conformational_ensemble_smiles.txt",dtype=str)))
test_molecules = np.concatenate((test_molecules, np.loadtxt("../Simulation/simulation_smiles/platinum_diverse_selection.txt",dtype=str,comments=None)))
test_molecules

array(['Fc1ccc(N(C=O)Cc2ccccc2N)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2Nc2ccccc2)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2Nc2ccccc(C#N)2)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2Nc2ccccc(OC)2)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2N)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2Nc2ccccc2)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2Nc2ccccc(C#N)2)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2Nc2ccccc(OC)2)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2NC(=O)C)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2NC(=O)C(F)(F)F)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2NC(=O)C(C)(C)C)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2NC(=O)c2ccccc2)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2NC(=O)c2cc(OC)ccc2)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2NC(=O)c2ccc(F)cn2)cc1',
       'Fc1ccc(N(C=O)Cc2ccccc2NC(=O)c2ccccn2)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2NC(=O)C)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2NC(=O)C(F)(F)F)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2NC(=O)C(C)(C)C)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2NC(=O)c2ccccc2)cc1',
       'Fc1ccc(N(C=O)CCc2ccccc2NC(=O)c2cc(OC)ccc2)cc1',
       'Fc1c

In [4]:
confids_to_set_to_test = []
for smiles in test_molecules:
    ids = lwreg.query(config,smiles=smiles,layers=lwreg.utils.HashLayer.NO_STEREO_SMILES)
    confids_tmp = None
    if len(ids) > 0:
        print(smiles)
        confids_to_set_to_test.append(lwreg.query(config,ids=ids))

COCCOC
COCCO
[H]c1c([H])c([H])c(N([H])C(=O)[C@@]2([H])C(=O)C([H])([H])C([H])([H])C([H])([H])C2([H])[H])c(C(=O)OC([H])([H])[H])c1[H]


In [5]:
sim_molregnos = []
for confids in confids_to_set_to_test:
    sim_molregnos += [cf[0] for cf in confids]

In [6]:
sim_molregnos = np.sort(np.unique(np.array(sim_molregnos)))

### Add external test set

In [7]:
cn = lwreg.utils.connect(config) # Connection to the database
curs = cn.cursor() # Command line cursor in postgresql
curs.execute("select molregno from public.conformers inner join solvent_tip3p.explicit_calculations on public.conformers.conf_id=solvent_tip3p.explicit_calculations.conf_id where solvent_tip3p.explicit_calculations.usage_flag='test'")
df = curs.fetchall()
cn.commit()

testmolregnos = [m[0] for m in df]
testmolregnos = np.sort(np.unique(np.array(testmolregnos)))


In [8]:
cn = psycopg2.connect(
    database=config.get("dbname", None),
    host=config.get("host", None),
    user=config.get("user", None),
    password=config.get("password", None),
)
curs = cn.cursor() # Command line cursor in postgresql
curs.execute("select conf_id from public.conformers where public.conformers.molregno in %s",
            (
                tuple(
                    sim_molregnos.tolist()
                ),
            ),)
df = curs.fetchall()
cn.commit()

sim_conf_ids = np.sort(np.unique(df))

cn = psycopg2.connect(
    database=config.get("dbname", None),
    host=config.get("host", None),
    user=config.get("user", None),
    password=config.get("password", None),
)
curs = cn.cursor() # Command line cursor in postgresql
curs.execute("select conf_id from public.conformers where public.conformers.molregno in %s",
            (
                tuple(
                    testmolregnos.tolist()
                ),
            ),)
df = curs.fetchall()
cn.commit()

test_conf_ids = np.sort(np.unique(df))

### Add for all solvents

In [9]:
import yaml
solvent_dict = yaml.load(open("../Simulation/solvents.yml"), Loader=yaml.FullLoader)[
    "solvent_mapping_dict"
]

In [12]:
for solvent in solvent_dict.keys():
    print("Working on solvent: ", solvent)
    cn = psycopg2.connect(
        database=config.get("dbname", None),
        host=config.get("host", None),
        user=config.get("user", None),
        password=config.get("password", None),
    )
    curs = cn.cursor() # Command line cursor in postgresql
    curs.execute(
            "select conf_id from solvent_%s.explicit_calculations"
            % solvent
        )
    solvent_conf_ids = curs.fetchall()
    cn.commit()
    
    # Set test flag
    all_conf_ids_set = set(test_conf_ids)
    solvent_confs_to_asign = np.array([sid[0] for sid in solvent_conf_ids if sid[0] in all_conf_ids_set])
    print(solvent_confs_to_asign.shape)
    
    cn = psycopg2.connect(
        database=config.get("dbname", None),
        host=config.get("host", None),
        user=config.get("user", None),
        password=config.get("password", None),
    )
    curs = cn.cursor() # Command line cursor in postgresql
    curs.execute(
            "update solvent_"+solvent+".explicit_calculations set usage_flag='test' where conf_id in %s",
            (
                    tuple(
                        solvent_confs_to_asign.tolist()
                    ),
                ),
        )
    cn.commit()

    # Set simulation flag
    all_conf_ids_set = set(sim_conf_ids)
    solvent_confs_to_asign = np.array([sid[0] for sid in solvent_conf_ids if sid[0] in all_conf_ids_set])
    print(solvent_confs_to_asign.shape)

    cn = psycopg2.connect(
        database=config.get("dbname", None),
        host=config.get("host", None),
        user=config.get("user", None),
        password=config.get("password", None),
    )
    curs = cn.cursor() # Command line cursor in postgresql
    curs.execute(
            "update solvent_"+solvent+".explicit_calculations set usage_flag='prospective_simulations' where conf_id in %s",
            (
                    tuple(
                        solvent_confs_to_asign.tolist()
                    ),
                ),
        )
    cn.commit()



Working on solvent:  tip3p
(8985,)
(27,)
Working on solvent:  Chloroform
(2847,)
(9,)
Working on solvent:  Methanol
(2895,)
(9,)
Working on solvent:  DMSO
(2898,)
(9,)
Working on solvent:  DMPU
(973,)
(3,)
Working on solvent:  Diethylether
(962,)
(3,)
Working on solvent:  Ethanol
(956,)
(3,)
Working on solvent:  DMF
(966,)
(3,)
Working on solvent:  DCM
(975,)
(3,)
Working on solvent:  Toluol
(970,)
(3,)
Working on solvent:  Benzol
(975,)
(3,)
Working on solvent:  Hexan
(966,)
(3,)
Working on solvent:  acetonitrile
(967,)
(2,)
Working on solvent:  acetone
(946,)
(3,)
Working on solvent:  aceticacid
(919,)
(3,)
Working on solvent:  14dioxane
(912,)
(3,)
Working on solvent:  nitrobenzol
(960,)
(3,)
Working on solvent:  HMPA
(964,)
(3,)
Working on solvent:  MTBE
(930,)
(3,)
Working on solvent:  IPA
(972,)
(3,)
Working on solvent:  Hexafluorobenzene
(950,)
(3,)
Working on solvent:  pyridine
(953,)
(3,)
Working on solvent:  THF
(968,)
(3,)
Working on solvent:  Ethylacetate
(975,)
(3,)
Workin