In [1]:
import sys
import numpy as np
import tqdm

sys.path.append("../")
from lwreg import standardization_lib
import lwreg
from rdkit import Chem

import yaml
solvent_dict = yaml.load(open("../Simulation/solvents.yml"), Loader=yaml.FullLoader)[
    "solvent_mapping_dict"
]

with open("/home/kpaul/.lwreg_key", "r") as f:
    key = f.read().strip()

config = lwreg.utils.defaultConfig()
config["standardization"] = standardization_lib.NoStandardization()
config["dbname"] = "solvent_forces"
config["dbtype"] = "postgresql"
config["removeHs"] = 0
config["registerConformers"] = True
config["hashConformer"] = 0  # set to 0
config["numConformerDigits"] = 3  #
config["host"] = "scotland"
config["user"] = "kpaul_lwreg"
config["password"] = key

In [2]:
save_folder = "/fileserver/pine/pine8/kpaul/multi_solvent_pub/data/Extracted_forces_SDFs/"
for solvent in tqdm.tqdm(solvent_dict.keys()):
    print(f"Processing {solvent}")
    if solvent == "tip3p": # TIP3P is already published
        print("Skipping TIP3P")
        continue

    sdf_filename = f"{save_folder}dataset_{solvent}.sdf"

    cn = lwreg.utils.connect(config)
    curs = cn.cursor()
    curs.execute(
        "select canonical_smiles, hashes.molregno, calcs.conf_id, calcs.positions, calcs.forces, calcs.usage_flag from solvent_%s.explicit_calculations calcs join conformers using (conf_id) join hashes using (molregno) where usage_flag='train' or usage_flag='test'" %solvent
    )
    data_list = []

    with Chem.SDWriter(sdf_filename) as w:

        for smiles, molregno, conf_id, positions, forces, usage_flag in tqdm.tqdm(curs):

            results = lwreg.retrieve(config,id=(molregno,conf_id),as_submitted=True)
            mol = Chem.MolFromMolBlock(results[(molregno,conf_id)][0],sanitize=False)
            diff = mol.GetConformer().GetPositions() - positions
            assert np.isclose(diff,0.0,atol=1e-5).all()
            mol.SetProp("forces",np.array2string(np.array(forces)[:,1:], separator=","))
            mol.SetProp("usage_flag",usage_flag)
            mol.SetProp("SMILES",smiles)
            
            w.write(mol)
    cn.commit()

  0%|          | 0/39 [00:00<?, ?it/s]

Processing tip3p
Skipping TIP3P
Processing Chloroform


1089696it [47:10, 384.96it/s]
  5%|▌         | 2/39 [56:02<17:16:54, 1681.47s/it]

Processing Methanol


1107512it [48:00, 384.49it/s]
  8%|▊         | 3/39 [1:54:37<24:27:16, 2445.46s/it]

Processing DMSO


1105447it [50:36, 364.09it/s]
 10%|█         | 4/39 [2:54:43<27:58:59, 2878.27s/it]

Processing DMPU


369745it [17:13, 357.77it/s]
 13%|█▎        | 5/39 [3:15:03<21:44:15, 2301.63s/it]

Processing Diethylether


364052it [17:02, 355.91it/s]
 15%|█▌        | 6/39 [3:35:04<17:44:59, 1936.34s/it]

Processing Ethanol


360079it [16:56, 354.37it/s]
 18%|█▊        | 7/39 [3:54:57<15:05:14, 1697.34s/it]

Processing DMF


367528it [17:25, 351.39it/s]
 21%|██        | 8/39 [4:15:22<13:20:12, 1548.79s/it]

Processing DCM


367312it [16:38, 368.00it/s]
 23%|██▎       | 9/39 [4:35:01<11:56:56, 1433.88s/it]

Processing Toluol


361517it [15:44, 382.83it/s]
 26%|██▌       | 10/39 [4:53:41<10:46:29, 1337.58s/it]

Processing Benzol


364541it [15:46, 385.28it/s]
 28%|██▊       | 11/39 [5:12:25<9:53:49, 1272.47s/it] 

Processing Hexan


361073it [16:27, 365.79it/s]
 31%|███       | 12/39 [5:31:49<9:17:49, 1239.62s/it]

Processing acetonitrile


365464it [16:56, 359.47it/s]
 33%|███▎      | 13/39 [5:51:45<8:51:26, 1226.40s/it]

Processing acetone


364183it [16:20, 371.57it/s]
 36%|███▌      | 14/39 [6:11:03<8:22:25, 1205.81s/it]

Processing aceticacid


359122it [16:31, 362.25it/s]
 38%|███▊      | 15/39 [6:30:32<7:57:52, 1194.68s/it]

Processing 14dioxane


364334it [16:42, 363.40it/s]
 41%|████      | 16/39 [6:50:12<7:36:11, 1190.08s/it]

Processing nitrobenzol


363863it [16:37, 364.84it/s]
 44%|████▎     | 17/39 [7:09:48<7:14:53, 1186.08s/it]

Processing HMPA


367365it [16:58, 360.53it/s]
 46%|████▌     | 18/39 [7:29:47<6:56:26, 1189.82s/it]

Processing MTBE


359136it [16:22, 365.46it/s]
 49%|████▊     | 19/39 [7:49:06<6:33:32, 1180.60s/it]

Processing IPA


370158it [17:54, 344.40it/s]
 51%|█████▏    | 20/39 [8:10:02<6:21:02, 1203.27s/it]

Processing Hexafluorobenzene


366831it [17:35, 347.69it/s]
 54%|█████▍    | 21/39 [8:30:37<6:03:51, 1212.89s/it]

Processing pyridine


366067it [17:43, 344.37it/s]
 56%|█████▋    | 22/39 [8:51:20<5:46:09, 1221.75s/it]

Processing THF


361535it [17:28, 344.69it/s]
 59%|█████▉    | 23/39 [9:11:44<5:25:58, 1222.40s/it]

Processing Ethylacetate


369272it [17:36, 349.57it/s]
 62%|██████▏   | 24/39 [9:32:20<5:06:39, 1226.64s/it]

Processing Sulfolane


370280it [17:31, 352.05it/s]
 64%|██████▍   | 25/39 [9:52:56<4:46:49, 1229.24s/it]

Processing nitromethane


370304it [17:51, 345.56it/s]
 67%|██████▋   | 26/39 [10:13:50<4:27:57, 1236.76s/it]

Processing Butylformate


370327it [18:08, 340.36it/s]
 69%|██████▉   | 27/39 [10:34:59<4:09:19, 1246.59s/it]

Processing NMP


370315it [17:56, 343.97it/s]
 72%|███████▏  | 28/39 [10:55:59<3:49:15, 1250.51s/it]

Processing Octanol


369453it [18:25, 334.04it/s]
 74%|███████▍  | 29/39 [11:17:28<3:30:19, 1261.91s/it]

Processing cyclohexane


365364it [17:09, 354.75it/s]
 77%|███████▋  | 30/39 [11:37:38<3:06:59, 1246.59s/it]

Processing glycerin


368570it [17:46, 345.47it/s]
 79%|███████▉  | 31/39 [11:58:27<2:46:18, 1247.29s/it]

Processing carbontetrachloride


365678it [17:54, 340.19it/s]
 82%|████████▏ | 32/39 [12:19:22<2:25:46, 1249.51s/it]

Processing DME


308870it [14:50, 347.00it/s]
 85%|████████▍ | 33/39 [12:36:48<1:58:51, 1188.58s/it]

Processing 2Nitropropane


359196it [17:07, 349.53it/s]
 87%|████████▋ | 34/39 [12:56:53<1:39:26, 1193.31s/it]

Processing Trifluorotoluene


363727it [17:36, 344.22it/s]
 90%|████████▉ | 35/39 [13:17:30<1:20:25, 1206.32s/it]

Processing hexafluroacetone


367490it [17:42, 345.85it/s]
 92%|█████████▏| 36/39 [13:38:14<1:00:53, 1217.79s/it]

Processing Propionitrile


366828it [17:33, 348.05it/s]
 95%|█████████▍| 37/39 [13:58:49<40:45, 1222.97s/it]  

Processing Benzonitrile


367008it [17:33, 348.45it/s]
 97%|█████████▋| 38/39 [14:19:23<20:26, 1226.31s/it]

Processing oxylol


358606it [17:02, 350.65it/s]
100%|██████████| 39/39 [14:39:22<00:00, 1352.88s/it]
