### Exploring the methods to obtain melting points for our basis set

The most straightforward way is to just use the pubchem PUGREST services like we have before, you can conveniently retrieve the melting point data from the compound record. The drawback is that I've seen that pubchem does not have a lot of melting point data. First we will use this method on the basisn set to see how much data we can actually get and then move on to another method if it's not sufficient.

In [1]:
import pubchempy as pcp
import pandas as pd
import numpy as np
import requests
import json

In [2]:
hba = pd.read_csv('hba_w_properties.csv')

In [3]:
hba

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBA_InChIKey,HBA_MolecularFormula,HBA_MolecularWeight
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True,"['H302', 'H314', 'H301', 'H318', 'H400', 'H410...",500,275,RUPBZQFQVRMKDG-UHFFFAOYSA-M,C22H48ClN,362.10
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H314', 'H400', 'H311', 'H315', 'H318...",295,200,WOWHHFRSBJGXCM-UHFFFAOYSA-M,C19H42ClN,320.00
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True,"['H226', 'H301', 'H302', 'H310', 'H314', 'H318...",275,100,FARBQUXLIQOIDY-UHFFFAOYSA-M,C18H40ClN,306.00
3,5946,CC[N+](CC)(CC)CC.[Cl-],True,"['H302', 'H315', 'H319', 'H335']",120,0,YMBCJWGVCUEGHA-UHFFFAOYSA-M,C8H20ClN,165.70
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True,"['H302', 'H315', 'H319', 'H335', 'H411', 'H412...",145,125,JRMUNVKIHCOMHV-UHFFFAOYSA-M,C16H36BrN,322.37
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True,"['H301', 'H315', 'H318', 'H319', 'H400', 'H410...",195,200,XKBGEWXEAPTVCK-UHFFFAOYSA-M,C25H54ClN,404.20
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H315', 'H319', 'H335']",70,0,CEYYIKYYFSTQRU-UHFFFAOYSA-M,C17H38ClN,291.90
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H311', 'H312', 'H314', 'H318', 'H400...",225,200,VBIIFPGSPJYLRR-UHFFFAOYSA-M,C21H46ClN,348.00
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True,"['H318', 'H400', 'H410']",25,200,REZZEXDLIUJMMS-UHFFFAOYSA-M,C38H80ClN,586.50
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True,"['H302', 'H315', 'H318', 'H319', 'H335']",145,0,DPKBAXPHAYBPRL-UHFFFAOYSA-M,C16H36IN,369.37


In [4]:
hbd = pd.read_csv('hbd_w_properties.csv')

In [5]:
hbd

Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBD_InChIKey,HBD_MolecularFormula,HBD_MolecularWeight
0,1176,C(=O)(N)N,True,Not classified as a hazardous substance,0,0,XSQUKJJJFZCRTK-UHFFFAOYSA-N,CH4N2O,60.056
1,1030,CC(CO)O,True,Not classified as a hazardous substance,0,0,DNIAPMSPPWPWGF-UHFFFAOYSA-N,C3H8O2,76.090
2,7896,CC(CCO)O,True,['H226'],0,0,PUPZLCDOIYMWBV-UHFFFAOYSA-N,C4H10O2,90.120
3,753,C(C(CO)O)O,True,Not classified as a hazardous substance,0,0,PEDCQBHIVMGVHV-UHFFFAOYSA-N,C3H8O3,92.090
4,222285,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-ZXZARUISSA-N,C4H10O4,122.120
5,11164,C1C(O1)CO,True,"['H302', 'H312', 'H315', 'H319', 'H331', 'H335...",820,0,CTKINSOISVBQLD-UHFFFAOYSA-N,C3H6O2,74.080
6,10442,C(CO)CO,True,['H315'],10,0,YPFDHNVEDLHUCE-UHFFFAOYSA-N,C3H8O2,76.090
7,262,CC(C(C)O)O,True,['H227'],0,0,OWBTYPJTUOEWEK-UHFFFAOYSA-N,C4H10O2,90.120
8,14846,CC(CO)OC,True,"['H226', 'H315', 'H318', 'H335', 'H360D', 'H360']",185,0,YTTFFPATQICAQN-UHFFFAOYSA-N,C4H10O2,90.120
9,169019,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-QWWZWVQMSA-N,C4H10O4,122.120


In [7]:
# for i, row in hba.iterrows():
    # source column contains the cid's for chemical similarity search.
# cid = row['HBA_cid']

request_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/2244/JSON?heading=Melting+Point" 

request = requests.get(request_url)
request_json = request.json()
    

In [47]:
final_list = []

for i in range(len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'])):
    temp_list = []
    temp_list.append(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['StringWithMarkup'][0]['String'])
    final_list.append(temp_list)


In [48]:
final_list

[['275 °F (NTP, 1992)'],
 ['138-140'],
 ['135.0 °C'],
 ['135 °C (rapid heating)'],
 ['135°C'],
 ['135 °C'],
 ['275°F'],
 ['275°F']]

In [None]:
for i in final_list:
    

In [43]:
request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][3]['Value']['StringWithMarkup'][0]['String']

'135 °C (rapid heating)'