### Exploring the methods to obtain melting points for our basis set

The most straightforward way is to just use the pubchem PUGREST services like we have before, you can conveniently retrieve the melting point data from the compound record. The drawback is that I've seen that pubchem does not have a lot of melting point data. First we will use this method on the basisn set to see how much data we can actually get and then move on to another method if it's not sufficient.

In [1]:
import pubchempy as pcp
import pandas as pd
import numpy as np
import requests
import json

In [2]:
hba = pd.read_csv('hba_w_properties.csv')

In [3]:
hba

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBA_InChIKey,HBA_MolecularFormula,HBA_MolecularWeight
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True,"['H302', 'H314', 'H301', 'H318', 'H400', 'H410...",500,275,RUPBZQFQVRMKDG-UHFFFAOYSA-M,C22H48ClN,362.10
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H314', 'H400', 'H311', 'H315', 'H318...",295,200,WOWHHFRSBJGXCM-UHFFFAOYSA-M,C19H42ClN,320.00
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True,"['H226', 'H301', 'H302', 'H310', 'H314', 'H318...",275,100,FARBQUXLIQOIDY-UHFFFAOYSA-M,C18H40ClN,306.00
3,5946,CC[N+](CC)(CC)CC.[Cl-],True,"['H302', 'H315', 'H319', 'H335']",120,0,YMBCJWGVCUEGHA-UHFFFAOYSA-M,C8H20ClN,165.70
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True,"['H302', 'H315', 'H319', 'H335', 'H411', 'H412...",145,125,JRMUNVKIHCOMHV-UHFFFAOYSA-M,C16H36BrN,322.37
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True,"['H301', 'H315', 'H318', 'H319', 'H400', 'H410...",195,200,XKBGEWXEAPTVCK-UHFFFAOYSA-M,C25H54ClN,404.20
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H315', 'H319', 'H335']",70,0,CEYYIKYYFSTQRU-UHFFFAOYSA-M,C17H38ClN,291.90
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H311', 'H312', 'H314', 'H318', 'H400...",225,200,VBIIFPGSPJYLRR-UHFFFAOYSA-M,C21H46ClN,348.00
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True,"['H318', 'H400', 'H410']",25,200,REZZEXDLIUJMMS-UHFFFAOYSA-M,C38H80ClN,586.50
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True,"['H302', 'H315', 'H318', 'H319', 'H335']",145,0,DPKBAXPHAYBPRL-UHFFFAOYSA-M,C16H36IN,369.37


In [4]:
hbd = pd.read_csv('hbd_w_properties.csv')

In [5]:
hbd

Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBD_InChIKey,HBD_MolecularFormula,HBD_MolecularWeight
0,1176,C(=O)(N)N,True,Not classified as a hazardous substance,0,0,XSQUKJJJFZCRTK-UHFFFAOYSA-N,CH4N2O,60.056
1,1030,CC(CO)O,True,Not classified as a hazardous substance,0,0,DNIAPMSPPWPWGF-UHFFFAOYSA-N,C3H8O2,76.090
2,7896,CC(CCO)O,True,['H226'],0,0,PUPZLCDOIYMWBV-UHFFFAOYSA-N,C4H10O2,90.120
3,753,C(C(CO)O)O,True,Not classified as a hazardous substance,0,0,PEDCQBHIVMGVHV-UHFFFAOYSA-N,C3H8O3,92.090
4,222285,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-ZXZARUISSA-N,C4H10O4,122.120
5,11164,C1C(O1)CO,True,"['H302', 'H312', 'H315', 'H319', 'H331', 'H335...",820,0,CTKINSOISVBQLD-UHFFFAOYSA-N,C3H6O2,74.080
6,10442,C(CO)CO,True,['H315'],10,0,YPFDHNVEDLHUCE-UHFFFAOYSA-N,C3H8O2,76.090
7,262,CC(C(C)O)O,True,['H227'],0,0,OWBTYPJTUOEWEK-UHFFFAOYSA-N,C4H10O2,90.120
8,14846,CC(CO)OC,True,"['H226', 'H315', 'H318', 'H335', 'H360D', 'H360']",185,0,YTTFFPATQICAQN-UHFFFAOYSA-N,C4H10O2,90.120
9,169019,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-QWWZWVQMSA-N,C4H10O4,122.120


In [98]:
import pubchempy as pcp
import pandas as pd
import requests
import json

def get_mp(dataframe, source_column, new_column_name):

    final_mp_list = [] #empty list to append melting point values to

    for i, row in dataframe.iterrows():

        # source column contains the cid's for chemical similarity search.
        cid = row[source_column]
        print(cid)

        request_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/%s/JSON?heading=Melting+Point" % str(cid)

        request = requests.get(request_url)
        request_json = request.json() 

      

        if 'Fault' in request_json:  #fault will apear if no melting point data available
            mp = 'no mp data'
            final_mp_list.append(mp)

        else:
            parsed_list = [] #list that contains the mp info from the json file
            
            
                
            for i in range(len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'])):
                
                if 'Number' and 'Unit' in request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']:
                    temp_list = []
                    temp_list.append(str(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['Number'][0]) + request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['Unit']) 
                    parsed_list.append(temp_list)
                    
                elif 'Number' in request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']:
                    temp_list_4 = []
                    temp_list_4.append(str(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['Number'][0]) + u'\N{DEGREE SIGN}C')
                
                else:
                    temp_list_2 = []
                    temp_list_2.append(request_json['Record']['Section'][0]['Section'][0]['Section']
                                    [0]['Information'][i]['Value']['StringWithMarkup'][0]['String'])
                    parsed_list.append(temp_list_2)

            mp_list = [] #list that will only contain the melting points in Celsius from the parsed list

            for i in parsed_list:
                if u'\N{DEGREE SIGN}C' in i[0]: #checking if mp is in Celsius
                    mp = i[0].split(u'\N{DEGREE SIGN}')[0] #splits the string on the degree symbol and grabs the melting point
                    mp_list.append(mp)

            if len(mp_list) == 0: #if list len is zero, only melting point in F available
                temp_list_3 = [] #stores temp mp value in farenheit
                for i in parsed_list:
                    # taking mp in farenheit
                    if u'\N{DEGREE SIGN}F' in i[0]:
                        # splits the string on the degree symbol and grabs the melting point
                        mp = i[0].split(u'\N{DEGREE SIGN}')[0]
                        
                        if len(mp) >3:              #in some cases, the string is a temp range and needs to be split again.
                            mp = mp.split(' ')[0]   # Splits the string on the first space and grabs the first element
                        
                        if len(mp) >3: #if still greater than 3, split on hyphen
                            mp = mp.split('-')[0]
                            
                        if '<' in mp:
                            mp = mp.split('<')[1]
                        
                        temp_list_3.append(mp)

                for j in temp_list_3:
                    mp_c = (float(j) - 32)*(5/9)
                    mp_list.append(mp_c)

            if len(mp_list) ==0: #if len is still zero, then no mention of C or F and value is not certain.
                mp = 'Units uncertain'
                mp_list.append(mp)

            mp_list.sort() #sorts mp from lowest to highest. Some sources may vary the mp they report
            final_mp_list.append(mp_list[0])

    dataframe[new_column_name] = final_mp_list

    return dataframe











In [28]:
get_mp(hba, 'HBA_cid', 'HBA_mp')

23558
8154
62581
5946
74236
21218
20708
8155
7879
67553
2724141
91822
78667
74745
70681
61906
24952
15743
12429
3014969
78073
76521
18843
2735155
82489
80021
79880
78026
77071
75056
70086
17248
134813759
20316921
14029864
11996614
11748636
11746670
11726816
10891295
10062191
71309260
23500186
23500184
23500174
16739405
16212273
15859728
3017238
16213617
8755
8753
31204
5963
31202
23705
66133
13762
29563
20619
177843
21449
13740
15636465
101569
76668
22833367
2724282
159952
122756
78872
40887
33917
23274630
16211866
11062816
10157558
9805153
6097067
3014549
165294
103275
103099
88091
53442637
16211989
13047371
12767855
10992774
3034700
3015147
103101
45050597
16212312
13813470
6285
82326
6225
69582
16211205
11790998
11085061
3083778
16211652
20749
6454543
6452152
112264
11009532
10825627
10059492
75854
75853
13552200
158451
10921
5974
31280
14250
8152
16388
14249
2734117
18669
16957
9880197
74964
74750
70708
77293
74014
3017419
3014876
3014871
24951
44630344
16218603
13743279
12073128
1

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBA_InChIKey,HBA_MolecularFormula,HBA_MolecularWeight,HBA_mp
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True,"['H302', 'H314', 'H301', 'H318', 'H400', 'H410...",500,275,RUPBZQFQVRMKDG-UHFFFAOYSA-M,C22H48ClN,362.10,94-100
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H314', 'H400', 'H311', 'H315', 'H318...",295,200,WOWHHFRSBJGXCM-UHFFFAOYSA-M,C19H42ClN,320.00,no mp data
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True,"['H226', 'H301', 'H302', 'H310', 'H314', 'H318...",275,100,FARBQUXLIQOIDY-UHFFFAOYSA-M,C18H40ClN,306.00,no mp data
3,5946,CC[N+](CC)(CC)CC.[Cl-],True,"['H302', 'H315', 'H319', 'H335']",120,0,YMBCJWGVCUEGHA-UHFFFAOYSA-M,C8H20ClN,165.70,no mp data
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True,"['H302', 'H315', 'H319', 'H335', 'H411', 'H412...",145,125,JRMUNVKIHCOMHV-UHFFFAOYSA-M,C16H36BrN,322.37,no mp data
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True,"['H301', 'H315', 'H318', 'H319', 'H400', 'H410...",195,200,XKBGEWXEAPTVCK-UHFFFAOYSA-M,C25H54ClN,404.20,no mp data
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H315', 'H319', 'H335']",70,0,CEYYIKYYFSTQRU-UHFFFAOYSA-M,C17H38ClN,291.90,no mp data
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H311', 'H312', 'H314', 'H318', 'H400...",225,200,VBIIFPGSPJYLRR-UHFFFAOYSA-M,C21H46ClN,348.00,no mp data
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True,"['H318', 'H400', 'H410']",25,200,REZZEXDLIUJMMS-UHFFFAOYSA-M,C38H80ClN,586.50,no mp data
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True,"['H302', 'H315', 'H318', 'H319', 'H335']",145,0,DPKBAXPHAYBPRL-UHFFFAOYSA-M,C16H36IN,369.37,no mp data


In [29]:
hba

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBA_InChIKey,HBA_MolecularFormula,HBA_MolecularWeight,HBA_mp
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True,"['H302', 'H314', 'H301', 'H318', 'H400', 'H410...",500,275,RUPBZQFQVRMKDG-UHFFFAOYSA-M,C22H48ClN,362.10,94-100
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H314', 'H400', 'H311', 'H315', 'H318...",295,200,WOWHHFRSBJGXCM-UHFFFAOYSA-M,C19H42ClN,320.00,no mp data
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True,"['H226', 'H301', 'H302', 'H310', 'H314', 'H318...",275,100,FARBQUXLIQOIDY-UHFFFAOYSA-M,C18H40ClN,306.00,no mp data
3,5946,CC[N+](CC)(CC)CC.[Cl-],True,"['H302', 'H315', 'H319', 'H335']",120,0,YMBCJWGVCUEGHA-UHFFFAOYSA-M,C8H20ClN,165.70,no mp data
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True,"['H302', 'H315', 'H319', 'H335', 'H411', 'H412...",145,125,JRMUNVKIHCOMHV-UHFFFAOYSA-M,C16H36BrN,322.37,no mp data
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True,"['H301', 'H315', 'H318', 'H319', 'H400', 'H410...",195,200,XKBGEWXEAPTVCK-UHFFFAOYSA-M,C25H54ClN,404.20,no mp data
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H315', 'H319', 'H335']",70,0,CEYYIKYYFSTQRU-UHFFFAOYSA-M,C17H38ClN,291.90,no mp data
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"['H302', 'H311', 'H312', 'H314', 'H318', 'H400...",225,200,VBIIFPGSPJYLRR-UHFFFAOYSA-M,C21H46ClN,348.00,no mp data
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True,"['H318', 'H400', 'H410']",25,200,REZZEXDLIUJMMS-UHFFFAOYSA-M,C38H80ClN,586.50,no mp data
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True,"['H302', 'H315', 'H318', 'H319', 'H335']",145,0,DPKBAXPHAYBPRL-UHFFFAOYSA-M,C16H36IN,369.37,no mp data


In [99]:
get_mp(hbd, 'HBD_cid', 'HBD_mp')

1176
1030
7896
753
222285
11164
10442
262
14846
169019
18302
12190
11429
8998
637497
446973
439888
439846
259994
42953
6994279
6973630
5460455
445969
94215
20497
1657
12486323
11170931
6993189
641012
71309239
642402
87931346
12486324
867
487
68152
11686
538366
18365
10820511
123087
16213460
11332425
71309289
8134
8117
8172
8146
8019
8200
8133
8076
8923
8190
8178
62551
17756
17472
7996
90263
15286
8087
4867
11355992
73791
15287
81313
62411
521158
79734
19846
77596
14765037
9920467
2723953
81639
9771
9763
71309724
53412760
16213380
16212251
11819432
71309368
21183450
16213024
174
3014186
2733139
2733137
16213434
10986148
6269
6101
62738
6924
7371
4784
25457
8420
7811
5017140
78318
77703
9965
6637
75905
15303
13066
6645
6638
6630
6628
5353431
74697
69035
22547
7402
7397
6640
6639
44457221
222870
79202
77279
76562
69539
68056
39497
13046
11868
8485
6938
598985
562845
222872
154187
153296
136743
98916
96276
87910
79776
76950
74740
69434
69109
18521
16673
13722
8623
6925
14763059
4463273
419

11924
11892
11782
10210
9837
50898347
20313173
5352902
4983912
3151587
2780090
2756959
2735609
2733302
2724622
1712193
808194
778517
700620
602761
575777
518900
446626
282064
253131
237413
225855
223595
220324
220005
150866
139037
138552
118342
110729
99862
99818
99174
98577
98283
98014
96732
95459
94852
92959
92733
88337
85728
80634
79695
79676
77823
74653
73914
70357
69553
68992
68474
67695
67537
66167
38111
29788
26549
24123
23269
16046
14098
12854
12439
11470
9899
12332876
11424295
11204982
10176891
9361742
4962173
3870220
3870217
3588736
3390511
3292215
2778994
2733848
2729903
2725061
2064039
2063862
2063421
904938
854054
785330
747832
736146
697959
689096
644449
520373
520273
520062
370590
348720
348329
347383
344812
316783
308772
303571
298877
296013
295961
287322
277927
277664
259740
252732
246682
244162
238995
236577
232036
228104
227256
226294
182114
177164
140324
139032
137806
136696
136302
123464
104196
102525
101370
101359
101234
101121
98950
98421
98068
97942
96501
96468


17016
14855
14759
12111
11742
10698
7311
7267
2374
637542
31405
21648
20087
17004
10436
7253
6937
6923
6610
5280457
637541
70507
12902
12059
11843
10346
8455
8375
8372
7453
7147
7112
340
5281717
608116
86583
69560
68313
68146
62530
26548
21685
16678
15884
15731
14116
12785
10377
9958
7309
6749
6620
5319562
5318169
796857
641301
99016
84677
79717
76013
75294
72303
70775
18048
17927
16498
12769
11907
11381
3054
254
135408751
15541180
5356585
637516
443135
205912
172321
97790
95779
93189
92970
81259
79192
75576
75575
75490
75386
74457
74181
70825
70761
68258
61153
33637
24582
18597
16919
16896
16497
15118
14327
11450
10333
7379
7039
873
11171903
10171348
5315696
2734675
1549106
155892
94763
82200
79983
79706
78153
76883
75875
75370
74865
69224
69165
69154
23109
21260
15808
15523
14853
14305
12775
11351
10335
8735
8396
6898
135484217
12483497
12444627
11602828
11439236
11160528
9882905
7019274
5375953
1382789
919205
688057
519333
254958
248475
247477
242468
228537
220119
164362
97466
95873

12444418
10013216
6365527
133556427
15929269
12444420
3016911
4093363
246854
121440691
284
12220501
11115974
123092
11263466
31347
1128
96232
70483
11709
138400
79119
2772263
21719919
520509
2733922
8086
8112
8016
8880
78361
8072
110301
21088
8132
7994
85382
78179
225935
2383
98429
86049
78211
61929
16070
20545753
12695786
3016164
3015059
174509
108950
98535
98177
19100566
15105174
12695787
12695785
10887881
2777768
546919
415283
63624378
28727773
6451912
67818
67821
67542
9555
9554
77222
67640
67545
15243
6434
2733270
67824
67550
2776093
2775491
2775476
2733269
2733268
167547
165053
106027
67822
14922999
10803537
2782473
2776124
2776041
2776030
2769340
2759976
547885
2776058


Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBD_InChIKey,HBD_MolecularFormula,HBD_MolecularWeight,HBD_mp
0,1176,C(=O)(N)N,True,Not classified as a hazardous substance,0,0,XSQUKJJJFZCRTK-UHFFFAOYSA-N,CH4N2O,60.056,132
1,1030,CC(CO)O,True,Not classified as a hazardous substance,0,0,DNIAPMSPPWPWGF-UHFFFAOYSA-N,C3H8O2,76.090,-59
2,7896,CC(CCO)O,True,['H226'],0,0,PUPZLCDOIYMWBV-UHFFFAOYSA-N,C4H10O2,90.120,< -50
3,753,C(C(CO)O)O,True,Not classified as a hazardous substance,0,0,PEDCQBHIVMGVHV-UHFFFAOYSA-N,C3H8O3,92.090,18
4,222285,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-ZXZARUISSA-N,C4H10O4,122.120,119-123
5,11164,C1C(O1)CO,True,"['H302', 'H312', 'H315', 'H319', 'H331', 'H335...",820,0,CTKINSOISVBQLD-UHFFFAOYSA-N,C3H6O2,74.080,-45
6,10442,C(CO)CO,True,['H315'],10,0,YPFDHNVEDLHUCE-UHFFFAOYSA-N,C3H8O2,76.090,-26.7
7,262,CC(C(C)O)O,True,['H227'],0,0,OWBTYPJTUOEWEK-UHFFFAOYSA-N,C4H10O2,90.120,16.3
8,14846,CC(CO)OC,True,"['H226', 'H315', 'H318', 'H335', 'H360D', 'H360']",185,0,YTTFFPATQICAQN-UHFFFAOYSA-N,C4H10O2,90.120,no mp data
9,169019,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-QWWZWVQMSA-N,C4H10O4,122.120,119-123


In [100]:
hbd

Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status,GHS_info,Health Score,Environmental Score,HBD_InChIKey,HBD_MolecularFormula,HBD_MolecularWeight,HBD_mp
0,1176,C(=O)(N)N,True,Not classified as a hazardous substance,0,0,XSQUKJJJFZCRTK-UHFFFAOYSA-N,CH4N2O,60.056,132
1,1030,CC(CO)O,True,Not classified as a hazardous substance,0,0,DNIAPMSPPWPWGF-UHFFFAOYSA-N,C3H8O2,76.090,-59
2,7896,CC(CCO)O,True,['H226'],0,0,PUPZLCDOIYMWBV-UHFFFAOYSA-N,C4H10O2,90.120,< -50
3,753,C(C(CO)O)O,True,Not classified as a hazardous substance,0,0,PEDCQBHIVMGVHV-UHFFFAOYSA-N,C3H8O3,92.090,18
4,222285,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-ZXZARUISSA-N,C4H10O4,122.120,119-123
5,11164,C1C(O1)CO,True,"['H302', 'H312', 'H315', 'H319', 'H331', 'H335...",820,0,CTKINSOISVBQLD-UHFFFAOYSA-N,C3H6O2,74.080,-45
6,10442,C(CO)CO,True,['H315'],10,0,YPFDHNVEDLHUCE-UHFFFAOYSA-N,C3H8O2,76.090,-26.7
7,262,CC(C(C)O)O,True,['H227'],0,0,OWBTYPJTUOEWEK-UHFFFAOYSA-N,C4H10O2,90.120,16.3
8,14846,CC(CO)OC,True,"['H226', 'H315', 'H318', 'H335', 'H360D', 'H360']",185,0,YTTFFPATQICAQN-UHFFFAOYSA-N,C4H10O2,90.120,no mp data
9,169019,C(C(C(CO)O)O)O,True,"['H315', 'H319', 'H335']",70,0,UNXHWFMMPAWVPI-QWWZWVQMSA-N,C4H10O4,122.120,119-123


In [101]:
hba.to_csv('hba_w_mp.csv', index = False)

In [102]:
hbd.to_csv('hbd_w_mp.csv', index = False)

In [77]:


request_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/487/JSON?heading=Melting+Point" 

request = requests.get(request_url)
request_json = request.json()
    

In [93]:
if 'Number' and 'Unit' in request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][0]['Value']:
    print ('yes')
    


yes


In [87]:
request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][1]['Value']

{'StringWithMarkup': [{'String': '135°C'}]}

In [15]:
# parsed_list = [] #list that contains the mp info from the json file

# for i in range(len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'])):
#     temp_list = []
#     temp_list.append(request_json['Record']['Section'][0]['Section'][0]['Section']
#                     [0]['Information'][i]['Value']['StringWithMarkup'][0]['String'])
#     parsed_list.append(temp_list)

In [16]:
# parsed_list

[['486 °F (NTP, 1992)']]

In [17]:
# mp_list = [] #list that will only contain the melting points in Celsius from the parsed list

# for i in parsed_list:
#     if u'\N{DEGREE SIGN}C' in i[0]: #checking if mp is in Celsius
#         mp = i[0].split(u'\N{DEGREE SIGN}')[0] #splits the string on the degree symbol and grabs the melting point
#         mp_list.append(mp)

In [18]:
# mp_list

[]

In [22]:
# if len(mp_list) == 0: #if list len is zero, only melting point in F available
   
#     temp_list_2 = [] #stores temp mp value in farenheit
#     for i in parsed_list:
#         # taking mp in farenheit
#         if u'\N{DEGREE SIGN}F' in i[0]:
#             # splits the string on the degree symbol and grabs the melting point
#             mp = i[0].split(u'\N{DEGREE SIGN}')[0]
#             temp_list_2.append(mp)

#     for j in temp_list_2:
#         mp_c = (float(j) - 32)*(5/9)
#         mp_list.append(mp_c)