### Here we will further reduce the basis set by excluding those that are not commercially available, and proceed to retrieve the hazard scores, mw, and melting points for ranking.

In [1]:
import pubchempy as pcp
import pandas as pd
import numpy as np
import requests
import json
import sys

In [2]:
hba_vendor = pd.read_csv('hba_vendor.csv')

In [3]:
hba_vendor

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True
3,5946,CC[N+](CC)(CC)CC.[Cl-],True
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True


In [4]:
filtered_hba_vendor = hba_vendor[hba_vendor['Vendor Status'] == True]

In [5]:
filtered_hba_vendor = filtered_hba_vendor.reset_index(drop = True)

### So we now have reduced the HBA candidates from 4,705 to 898

In [6]:
filtered_hba_vendor

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True
3,5946,CC[N+](CC)(CC)CC.[Cl-],True
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True


### We will do the same for the HBD

In [7]:
hbd_vendor = pd.read_csv('hbd_vendor.csv')

In [8]:
hbd_vendor

Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status
0,1176,C(=O)(N)N,True
1,11457650,C(=O)(N)N,True
2,636363,C(=O)(N)N,True
3,10197611,C(=O)(N)N,True
4,2723980,C(=O)(N)N,True
5,22120529,C(=O)(N)NS,True
6,11367429,C(=O)(N)NI,False
7,131953165,C(=O)(N)N,True
8,71309436,C(=O)(N)N,True
9,71309326,C(=O)(N)N,True


In [9]:
filtered_hbd_vendor = hbd_vendor[hbd_vendor['Vendor Status'] == True]

In [10]:
filtered_hbd_vendor = filtered_hbd_vendor.reset_index(drop = True)

### Similarly, we have reduced the HBD candidates from over 68,000 to 24,455

In [11]:
filtered_hbd_vendor

Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status
0,1176,C(=O)(N)N,True
1,11457650,C(=O)(N)N,True
2,636363,C(=O)(N)N,True
3,10197611,C(=O)(N)N,True
4,2723980,C(=O)(N)N,True
5,22120529,C(=O)(N)NS,True
6,131953165,C(=O)(N)N,True
7,71309436,C(=O)(N)N,True
8,71309326,C(=O)(N)N,True
9,71308962,C(=O)(N)N,True


### Now we will use the functions created previously to get the safety information and scores for the candidates. (There was a bug in the code and so it is was fixed here in the notebook but the changes should now be reflected in the safety.py function for further use)

In [93]:
def check_GHS_data(request_json):
    """This function checks to see if GHS safety information data is available
    in the pubchem data file for a chemical"""

    if 'Fault' in request_json:  # first key in dict will be Fault if no GHS heading in json data
        return 'No GHS data available'

    else:
        return 'GHS data available'


def hazard_classification(request_json):
    """This function checks if the subsatnce is classified as hazardous or non hazardous if GHS data was found"""

    GHS_status = check_GHS_data(request_json)

    # cas if no data was found in ghs retrieval function
    if GHS_status == 'No GHS data available':
        return GHS_status

    #otherwise, continue to parse through the json file to determine if the substance is hazardous or not
    else:
        if len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information']) == 1:
            return 'Not classified as a hazardous substance'

        else:
            return 'Hazardous substance'


def get_hazard_codes(cid):

    """This is the main wrapper function for retrieving GHS hazard codes"""

    safety_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/%s/JSON?heading=GHS+Classification" % str(cid)

    request = requests.get(safety_url)
    request_json = request.json()


    hazard_status = hazard_classification(request_json)

    if hazard_status == 'No GHS data available':
        return hazard_status

    elif hazard_status == 'Not classified as a hazardous substance':
        return hazard_status

    elif hazard_status == 'Hazardous substance':
        
        GHS_information_list = [] #list that contains GHS information in which hazard codes are located.
        
        for i in range(len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'])):
            if request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Name'] == 'GHS Hazard Statements':
        
                for j in range(len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['StringWithMarkup'])):
                    temp_list = [] #temporary list each sentance gets added to before appending to GHS list
                    temp_list.append(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['StringWithMarkup'][j]['String'])
                    GHS_information_list.append(temp_list)

            
        #this portion checks for lists with empty string '' that will break the code if not removed
        for item in GHS_information_list:
            if '' not in item: 
                pass
        
            elif '' in item:
                index = GHS_information_list.index(item)
                GHS_information_list[index].remove('')
        
            
        hazard_description_list = [] #list that will contain the hazard codes and their descriptions. 
        
        for item in GHS_information_list:
            temp_haz = [idx for idx in item if idx[0] == 'H'] #list comprehension, keeps lists that start with H, i.e. the hazard code
            hazard_description_list.append(temp_haz)
            #There will be empty lists so this step removes them
            for item in hazard_description_list:
                if len(item) == 0:
                    hazard_description_list.remove(item)
                    
        hazard_code_list = [] #list that contains all of the hazard codesfor the chemcial
        
        for item in hazard_description_list:
            string = item[0]
            hazard = string.split(' ', 1)[0]
            hazard_code_list.append(hazard)
            
        cleaned_hazard_code_list = []

        for code in hazard_code_list:
            # some of the codes end with a colon from extarcting from jsons. Remove them here if present.
            if code.endswith(':'):
                # removes last string from item, which will be the colon.
                code = code[:-1]
                cleaned_hazard_code_list.append(code)
            
            else: 
                cleaned_hazard_code_list.append(code)


        filtered_hazard_code_list = [] 
        #list comprehension to remove duplicates from the cleaned hazrad codes list
        [filtered_hazard_code_list.append(x) for x in cleaned_hazard_code_list if x not in filtered_hazard_code_list] 

            
        return filtered_hazard_code_list
                    


### This snippet of code uses the get_hazard_codes function from the safety.py file to retrieve all of the hazard codes for the candidates

In [94]:
hazard_codes = []
for i, row in filtered_hba_vendor.iterrows():
    cid = row['HBA_cid']
    print(cid)
    values = get_hazard_codes(cid)
    hazard_codes.append(values)
filtered_hba_vendor['GHS_info'] = hazard_codes

23558
8154
62581
5946
74236
21218
20708
8155
7879
67553
2724141
91822
78667
74745
70681
61906
24952
15743
12429
11769095
3014969
78073
76521
18843
2735155
104201
82489
80021
79880
78026
77071
75056
70086
21541
17248
134813759
60196394
23225441
20316921
14029864
11996614
11748636
11746670
11726816
10891295
10062191
84293
81601
78814
61505
71404253
71309260
23500186
23500184
23500174
23469415
20537283
20537282
19369153
19043781
18971006
16739405
16212273
15859728
15818177
15664046
15461386
15461354
14718300
14009109
13726195
13508502
11288938
10888803
9923738
9846387
3017238
83308
77852
77067
23516
19179
71398586
71370914
71362436
71342334
71338506
71335164
71318846
53436407
53431297
23500181
23018947
23018946
22556203
22506378
22367840
22311748
22311747
22269555
22117807
21924947
20063106
19844217
19604628
18627212
18185861
16213617
16211673
15839034
15708907
14598532
14475739
14389983
14009101
14009098
13879751
13879587
13879585
13860112
13797566
13783603
11783457
11507979
9835338
3015

In [95]:
filtered_hba_vendor

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status,GHS_info
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True,"[H302, H314, H301, H318, H400, H410, H411, H31..."
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"[H302, H314, H400, H311, H315, H318, H335, H41..."
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True,"[H226, H301, H302, H310, H314, H318, H400]"
3,5946,CC[N+](CC)(CC)CC.[Cl-],True,"[H302, H315, H319, H335]"
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True,"[H302, H315, H319, H335, H411, H412, H260, H314]"
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True,"[H301, H315, H318, H319, H400, H410, H302, H314]"
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"[H315, H319, H335]"
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"[H302, H311, H312, H314, H318, H400, H410]"
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True,"[H318, H400, H410]"
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True,"[H302, H315, H318, H319, H335]"


### Now we will use the scoring.py file to get scores for each of the candiates based on their hazard codes

In [100]:
#adding path to fodler where functions/dictionary are contained
sys.path.insert(0, '../safety/')

In [101]:
#importing function and scoring dictionary
from scoring import get_hazard_scores
scoring_table = pd.read_csv('/Users/Jaime/Desktop/des-basis-set/scripts/safety/hazard_score.csv') #The table of hazard codes and corresponding scores

In [102]:
#Converting the GHS_info column in the data table as a list 
GHS_list = filtered_hba_vendor['GHS_info'].tolist()

#dictionary to pull hazard scores from
scoring_table_dict = scoring_table.to_dict() 

In [103]:
#Health and environmental list from which the function will append the scores to
health_list = []
env_list = []

for item in GHS_list:
    get_hazard_scores(item, health_list, env_list, scoring_table_dict)

In [104]:
health_list

[500,
 295,
 275,
 120,
 145,
 195,
 70,
 225,
 25,
 145,
 100,
 170,
 50,
 0,
 70,
 75,
 110,
 160,
 70,
 100,
 145,
 70,
 70,
 70,
 120,
 100,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 100,
 70,
 85,
 100,
 100,
 70,
 70,
 70,
 500,
 20,
 50,
 25,
 70,
 100,
 100,
 100,
 100,
 100,
 70,
 20,
 95,
 20,
 100,
 100,
 100,
 100,
 100,
 100,
 50,
 70,
 70,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 70,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 70,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 450,
 375,
 110,
 470,
 400,
 220

In [105]:
env_list

[275,
 200,
 100,
 0,
 125,
 200,
 0,
 200,
 200,
 0,
 0,
 225,
 0,
 0,
 0,
 100,
 275,
 200,
 0,
 100,
 200,
 0,
 0,
 0,
 0,
 100,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 100,
 0,
 175,
 100,
 100,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 100,
 100,
 100,
 100,
 100,
 0,
 0,
 0,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 0,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 200,
 200,
 200,
 50,
 200,
 100,
 50,
 100,
 100,
 200,
 0,
 100,
 0,


In [106]:
filtered_hba_vendor['Health Score'] = health_list
filtered_hba_vendor['Environmental Score'] = env_list
filtered_hba_vendor

Unnamed: 0,HBA_cid,HBA_smiles,Vendor Status,GHS_info,Health Score,Environmental Score
0,23558,CCCCCCCCCC[N+](C)(C)CCCCCCCCCC.[Cl-],True,"[H302, H314, H301, H318, H400, H410, H411, H31...",500,275
1,8154,CCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"[H302, H314, H400, H311, H315, H318, H335, H41...",295,200
2,62581,CCCCCCCC[N+](C)(C)CCCCCCCC.[Cl-],True,"[H226, H301, H302, H310, H314, H318, H400]",275,100
3,5946,CC[N+](CC)(CC)CC.[Cl-],True,"[H302, H315, H319, H335]",120,0
4,74236,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],True,"[H302, H315, H319, H335, H411, H412, H260, H314]",145,125
5,21218,CCCCCCCC[N+](C)(CCCCCCCC)CCCCCCCC.[Cl-],True,"[H301, H315, H318, H319, H400, H410, H302, H314]",195,200
6,20708,CCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"[H315, H319, H335]",70,0
7,8155,CCCCCCCCCCCCCCCCCC[N+](C)(C)C.[Cl-],True,"[H302, H311, H312, H314, H318, H400, H410]",225,200
8,7879,CCCCCCCCCCCCCCCCCC[N+](C)(C)CCCCCCCCCCCCCCCCCC...,True,"[H318, H400, H410]",25,200
9,67553,CCCC[N+](CCCC)(CCCC)CCCC.[I-],True,"[H302, H315, H318, H319, H335]",145,0


In [107]:
#Saving the dataset
filtered_hba_vendor.to_csv('filtered_hba_w_safety.csv', index = False)

### We can now repeat this process for the HBD

In [110]:
hazard_codes = []
for i, row in filtered_hbd_vendor.iterrows():
    cid = row['HBD_cid']
    print(cid)
    values = get_hazard_codes(cid)
    hazard_codes.append(values)
filtered_hbd_vendor['GHS_info'] = hazard_codes

1176
11457650
636363
10197611
2723980
22120529
131953165
71309436
71309326
71308962
16213489
71309437
124202878
1030
7896
753
222285
11164
10442
262
14846
169019
18302
12190
11429
8998
92127
637497
446973
439888
439846
259994
92822
42953
6994279
6973630
5460455
445969
225936
220010
94215
20497
1657
12486323
11170931
6993189
99037
20195342
12592183
12241264
6398419
641012
640997
547790
449470
345901
156179
139799
123505
71309239
40457949
21627884
20162113
17835330
16213271
14819038
14120372
12239075
12203561
11768502
10630886
10176094
7014878
642402
253154
240392
214676
208702
87008
85759
71309005
57882501
57127623
54145031
45479327
44148235
44118659
22709006
22286133
22002688
21891976
21758503
18728334
16213502
16213438
15720951
14229398
13801163
13536814
13273098
13273090
12825070
12592182
12498146
12498145
12203549
11275204
11159299
11137112
6999759
2733588
547785
370185
155528
90853312
89781510
87931346
87561753
87153038
71309105
71309096
71308889
59596539
59026256
57056659
57035454

12868674
12834645
12828790
12815427
12801436
12761608
12661295
12644542
12608697
12582786
12582781
12574631
12568537
12565079
12556457
12548799
12548721
12532953
12509526
12495404
12481560
12470281
12429631
12424899
12424897
12424894
12424890
12422128
12418997
12415691
12412826
12412825
12411578
12378424
12346086
12293070
12258736
12249504
12214484
11961356
11834887
11831099
11791354
11770614
11770613
11770515
11631509
11426070
11367403
11345140
11301299
11299453
11299102
11275445
11141895
11140252
11127044
11107052
11097066
11075999
11074111
11066835
11046968
11031937
11019581
11009249
11009139
10998810
10988399
10955266
10922886
10909927
10901532
10900803
10824198
10773982
10680897
10659442
10517333
10334615
10197977
9948898
9877914
9837522
9835718
9816353
9583262
7315423
7168054
7019372
6454295
5704776
5463520
5463061
5461213
5387485
5382420
5240407
5235626
5006204
4914688
4737704
4737336
4622178
4566459
4466166
4341660
4188202
4118232
4102123
3912199
3804276
3776048
3557556
3547167

57497344
57497239
57416308
56737673
54758882
54510174
54469653
54398252
54397313
54016583
53766065
53439936
53425696
53338929
53216311
52911287
52180817
51341958
46911832
46835330
46739471
46739401
46220688
45357613
45083068
45082971
45052200
44459222
44119342
24971982
23424012
23336964
22988871
22909852
22630182
22485360
22352372
22253769
22221624
22173428
22020621
21999478
21989491
21967051
21954678
21923724
21917370
21740077
21708183
21707933
21386914
20872625
20872624
20355594
19883183
19859691
19772420
19436620
19350187
19100356
19083293
19017919
19006344
18978546
18947834
18690409
18617005
18616914
18541828
18521346
18457306
18421185
18353580
17969972
17869445
17810346
17796921
17761054
17750950
16213360
15852925
15817998
15812128
15744079
15688147
15664638
15631663
15621358
15576478
15505783
15210942
15170325
15034753
14695255
14669322
14597426
14440973
14228603
14151430
14048374
13954531
13684366
13610242
13610234
13494253
13405905
13324203
13318953
13304842
13031555
12969896
1

361944
244872
235711
181572
167627
138052
135309
119908
98310
97783
94251
84204
83499
79879
78727
77255
75502
74409
74408
69802
69527
69376
69339
66340
39742
36599
33970
33635
33002
31039
26613
23524
21813
21006
20245
18379
17812
16064
15609
15596
15016
12978
12204
9542
1781
1560
56935801
49770727
20601675
16217011
15569773
14632790
14371787
12346696
10468473
9972843
5312872
5312869
5312863
5312799
5312749
5312700
5312662
5312346
5312335
5312289
5282911
5282910
5282900
5282899
5282685
5282601
5282597
4998669
4383090
4378574
4169198
4052642
3250790
3083765
3083221
3017629
3013895
2783376
2776244
1747486
656737
620397
566760
552003
548221
543634
543502
522487
459849
381109
378339
361947
361946
361943
361942
318731
292984
292983
280190
269945
259794
244084
232555
231998
222518
212838
204478
182089
160576
152196
142712
139065
138016
118383
113342
111470
111278
109095
109035
103882
102894
102607
101877
99723
98008
97917
91425
91169
91168
91166
86827
85708
85548
78360
78321
78009
77076
75546

4147679
4112876
4095614
3736504
3457651
3367086
3289080
3080906
3045043
3036054
3034389
3025406
2735569
2733156
1713018
1713016
1713014
1713012
568482
566758
560435
553176
552715
552600
544240
539108
537012
525065
488116
488115
472328
472326
468668
458097
450509
380475
347361
318732
271909
267589
256374
254536
254535
246850
234509
211036
208864
196822
191731
185095
164708
152435
151273
143180
135772
135771
134554
132442
131558
126407
124486
109692
108962
106234
102886
102445
92184
91787
89899
87374
85410
74961
35727
33503
30942
29865
18812
17363
10011
9990
123133459
123133458
121217472
121009172
118946891
118946670
118946594
118178585
118168933
118168837
117763113
102601131
102044916
93831137
92278166
91864452
90390249
90390096
90390003
89990097
89985408
89542680
89385925
89311728
89061897
88241560
88207782
88206854
87815458
87745016
87646451
87402150
2723790
16213485
16212162
12201089
11400723
11105299
12201087
176
1032
264
6590
6417
1060
8856
10004
65136
9898
59152704
12216724
122162

54270211
53942894
53908420
53770431
53447123
53418992
53329547
53316651
53275959
53216260
46938719
46739825
46739666
46218596
45925729
45357836
44332017
44151790
43810276
42552891
40492826
40424080
29942612
29940514
27282475
25023574
24821994
24820460
24726987
24726957
23583341
23447195
23423017
23295060
23295038
23035894
22988428
22906572
22480132
22406871
22323937
22270652
22051836
21964731
21939462
21893381
21878961
21714191
21572620
21514676
21398367
21190503
21123439
21121355
20586615
20575447
20572731
20496664
20493834
20439316
20322124
20163803
20026176
19966045
19841725
19840115
19107746
19040135
19010931
19010181
18913514
18424721
18417671
18417578
18417120
18381036
17846679
17813988
17788324
17750675
16795674
16769973
16769729
16769509
16769017
16768964
16766584
16228268
16225509
16217358
16038256
15775088
15712209
15672492
15670518
15667418
15649118
15631622
15616459
15582761
15525375
15497666
15487791
15409573
15284809
15222234
15209341
15168495
15125093
15105202
15103325
1

23456934
23456931
23456915
23456908
23456880
23423363
23423245
23422987
23338325
23302150
23295065
23294844
23294784
23293615
23293442
23271185
23165290
23159480
23149500
23083861
23062863
23005219
22917261
22904615
22742526
22722156
22722124
22660131
22598446
22590978
22590975
22569863
22482974
22475873
22471927
22459033
22457007
22397301
22397035
22350209
22344590
22280181
22270748
22268708
22262265
22247029
22243083
22238520
22236963
22211655
22171361
22163365
22157599
22119033
22116405
22115892
22069809
22051227
22015237
21986118
21967221
21964516
21962343
21955332
21955307
21952877
21939567
21939529
21939481
21939416
21939370
21939356
21938646
21937109
21927967
21867819
21864674
21864659
21864656
21864654
21714237
21706477
21700134
21615787
21576626
21576624
21524248
21512296
21493195
21465099
21454895
21433062
21433059
21405824
21405816
21404355
21403420
21336820
21313488
21304904
21287201
21273359
21266083
21251027
21186282
21152552
21118764
21115459
21103209
21044014
21043985
2

816741
777377
754723
747800
747795
747614
744445
737433
737125
736144
736133
716482
716481
694654
694648
641302
640205
609773
602563
596249
591387
583723
583652
582532
582378
578303
578089
577954
576868
576244
576164
576149
573248
573193
572145
570682
569922
569015
568839
568752
568614
563108
563089
563065
562790
562662
562570
562367
562345
562019
561998
560433
521458
520774
488138
488137
488112
447149
444512
409397
382780
350847
348133
347424
344848
343886
339074
331481
318271
317532
316527
313346
299968
298924
296649
291894
283912
281641
281262
277923
277921
277915
277914
275621
274369
274364
273901
271106
269299
269283
265275
263375
263216
263120
261484
259202
256028
253024
249510
246660
243248
242679
242441
241948
240851
240463
238809
237711
236378
235781
232032
231978
231683
231606
230534
230457
230422
229569
228408
227869
226206
226173
226153
226052
223057
222930
222471
221424
219156
218067
216656
213715
213714
206205
205979
199031
199030
195566
195128
194664
192348
187058
185681

11137921
11788454
12303007
12303008
12303010
12306855
12567683
13174312
13661111
13991037
14058958
14159513
14168320
14557096
14557097
15266931
15284009
15559153
18327342
21296524
25209456
42544028
45084666
53802795
54324770
55300310
55300746
55302534
55302699
55303630
55303824
57489064
66039301
66039304
66039539
66039541
71309907
71352216
71355565
71749880
74429831
76380168
79367206
83478565
86033124
87734465
88603411
89127539
89643869
89822422
90254631
90478228
90482843
92158757
92158758
92158759
92158760
92158761
92158762
92159049
92159662
92159683
92163027
92171524
92172059
92172881
92229735
92229736
92229737
92277412
92287087
92469120
92532712
96013041
97546572
97546573
98147182
98147183
98147184
101429369
101570648
101805087
101995931
101995932
102150672
105429169
118327209
118856807
118988855
119095667
119096099
119098396
121493653
121493654
122643701
124300982
124300983
124490638
124506936
124507732
124507733
124507734
124559710
124559711
124559712
124583163
124583164
124583165

11744801
11636827
11480992
11469647
11423590
11378619
11378571
11367018
11356360
11355603
11344095
11321328
11286496
11252863
11217465
11194378
11171004
11170965
11126659
11094598
11052145
10975884
10964596
10953487
10920765
10909623
10899150
10888044
10887894
10866937
10856112
10855456
10752445
10726051
10702577
10535415
10419976
10396982
10103270
10103251
10103228
10012608
10011909
9833973
7567692
7537667
6933329
5960044
5474210
5357789
5288316
5288269
4739186
4739184
4682007
4619932
4374264
4346259
4131255
3820373
3779043
3710250
3705229
3022729
3014190
2733894
2049778
1549988
940410
914902
914901
871785
822499
793100
758262
605479
574153
573264
566338
565211
563117
558940
552686
327926
321330
269681
255154
227183
224826
223843
177463
157983
144745
143576
115881
110718
109435
99220
80236
61165
58140
13453
12169
10876
136610987
135690264
129157024
124661939
124119646
119084886
119084547
118862615
102505472
102106991
97034714
91884051
91411652
90970312
90380116
90380053
89820669
89804

247319
245424
234856
221310
98027
375792
378251
378261
380619
403206
534488
565207
565241
565611
565612
565613
566145
566214
571000
574086
574850
579129
580565
639855
640447
640490
642113
671795
690375
690378
695340
699490
720725
736824
736825
768978
768981
774945
778855
781341
784916
784917
796905
820524
820525
936560
936561
1385362
1385364
1549251
1549987
1609647
1609651
1712615
1712947
1712951
1712954
1713815
1714491
1753659
1916109
1916111
1918713
1918715
1918717
1980950
2189357
2249226
2530479
2530489
2740486
2760292
2771927
2780148
3271033
3311498
3412824
4082350
4584635
4673422
4693307
4739159
4739183
4739194
4739199
4739200
4739201
4739202
4739203
4739204
4875523
4875524
4912963
4912964
5311605
5324102
5325138
5375655
5708283
5708287
5708291
5926351
5926455
5967383
6187803
6376789
6512306
7016625
7036447
7157349
7160905
7200762
7200764
7200765
7471826
9793802
10080632
10241492
10261316
10352313
10375973
10397269
10441880
10442250
10511656
10582836
10583141
10583989
10608697
691

130759834
130762459
130762553
130762574
130767006
130771794
130771855
130773357
130824411
130830241
130841329
130841660
130843047
130856252
130875054
130875252
130875706
130885951
130904163
130904238
130904873
130918088
130918694
130918699
130928555
130929205
130929878
130944890
130948056
130966414
130977417
130990901
131000812
131003444
131004393
131004576
131007454
131019248
131037385
131038266
131039031
131041853
131042187
131043930
131045585
131064043
131080699
131107073
131107081
131132539
131136398
131156024
131175146
131177594
131179304
131199984
131201868
131215288
131230720
131231342
131637981
131846094
132987885
133617458
133618193
133618731
133673612
133675618
133677880
133678421
133687546
133687549
137553927
137553928
137553937
157355
5793
493591
88735
439357
64947
6036
3034828
439764
439195
135191
64689
18950
134129635
4454759
444200
439353
94214
79025
76935
17106
2108
2724705
448388
444863
441481
439804
439710
439650
25310
6902
6027
3871
16048618
11869260
9828112
656941
4

53655245
53640233
53639018
53639017
46874252
46783484
46783455
46782224
46780441
45358320
45358319
45266785
45109814
45040370
45038839
45038826
45038823
44717700
44629923
44354842
44328785
44321795
44149764
40617445
29927361
26470594
26470514
26280917
24802564
24802505
24802344
24802303
24802302
24802292
24802291
24802290
24802163
24802149
24802068
24801912
24801910
23724695
23644578
23543215
23543214
23432884
22997012
22960898
22887295
22887279
22887271
22887245
22887244
22867790
22859546
22859545
22852473
22845217
22842016
22837929
22816498
22811341
22555907
22495206
22415851
22381778
22214407
22214154
22094536
21969694
21858375
21723221
21723220
21720294
21646006
21635043
21633031
21633016
21632987
21604003
21602351
21600129
21600128
21596889
21521808
21308539
20768265
20768264
20764116
20598616
20596796
20584087
20584086
20078710
19361256
18974249
18665788
18654015
18646092
18641494
18602649
18417383
17973953
17926291
17751026
16663983
16217663
16211988
16211987
15978236
15858348
1

3381990
3086532
3083133
3082905
3082806
3081297
3081217
3034998
2802069
2724778
261147
236622
197522
197415
195681
195678
195676
195663
193406
192430
190888
146373
132613
131404
131122
131046
130732
128164
118162
110671
87233
18468
1144
134692182
134129496
133688160
133611951
126735860
126731036
124196217
122451653
102173123
97293609
92449963
91857885
91857527
91855269
91623337
90125707
6643
448537
7017
667476
192197
31404
16043
10364
8814
7771
7005
6989
3610
244
66166
12053
11335
10687
7948
7409
7393
7249
7103
6623
4943
637540
72300
17016
14855
14759
12111
11742
10698
7311
7267
3606
2374
637542
31405
21648
20087
17004
10436
7253
6937
6923
6610
5280457
637541
70507
12902
12059
11843
10346
8455
8375
8372
7453
7147
7112
340
5281717
608116
86583
69560
68313
68146
62530
26548
21685
16678
15884
15731
14116
12785
10377
9958
7309
6749
6620
13259919
5319562
5318169
796857
641301
234907
188287
99016
84677
79717
76013
75294
72303
70775
18048
17927
16498
12769
11907
11381
3054
3049
254
135408751


11356203
11356048
11355640
11320974
11314052
11310258
11298405
11287852
11286977
11286912
11286794
11275229
11264749
11252727
11228871
11217851
11206198
11171430
11138426
11128076
11126435
11116606
11116238
11106206
11105746
11083932
11040884
11032503
10997099
10994035
10987648
10986277
10964856
10953671
10923275
10922843
10911001
10910647
10900865
10890246
10888236
10882130
10866399
10797135
10726467
10725670
10704873
10683868
10645332
10582824
10564578
10486892
10376542
10376477
10351871
10309709
10261921
10236210
10214726
10192734
10176498
10130551
10103868
9964584
9856534
9839024
9813060
9795379
9794200
9794036
6999842
6850856
6613417
6504586
6454223
6454044
6453934
6452778
6429339
6365359
6019831
5960620
5463755
5463753
5376033
5370692
5325941
5324666
5314408
5171345
5169710
5127431
5020317
4611298
4535586
4454822
4186462
4157428
4091386
3912608
3854867
3848879
3348569
3106702
3106037
3083760
3080691
3058303
3057224
3057208
3035633
3025046
3024064
3017640
3017523
3017362
3016913
3

14388655
14372665
14339292
14325412
14315726
14274819
14274496
14258206
14258205
14251859
14227518
14220040
14209863
14204944
14202829
14115991
14086665
14086159
14071763
14066940
14066936
14044820
14029518
14018450
14009024
14009002
14008514
13981642
13981000
13980668
13929048
13878465
13877096
13868473
13862957
13848399
13847909
13847907
13847901
13838774
13826248
13807217
13796738
13796216
13783521
13778508
13766249
13751987
13732723
13722497
13699757
13684853
13668735
13660470
13633795
13627680
13624841
13591064
13591061
13580789
13542887
13541921
13541579
13541364
13540682
13530916
13485869
13462558
13459778
13438942
13438939
13433934
13405064
13404899
13395187
13392513
13358356
13346144
13342591
13330127
13328339
13324205
13319286
13302007
13277867
13255796
13201015
13197611
13193501
13172931
13140161
13139396
13139394
13130216
13128056
13116082
13086199
13047688
13040158
13004448
12992331
12981674
12970067
12958403
12921787
12872970
12858548
12858519
12830123
12815574
12788186
1

10200771
9942267
6444306
6021858
5385045
5371146
5281795
5067808
4631015
4535080
4524918
4189750
4078577
3537878
3353399
3017967
2793907
1550356
1237122
767641
754769
641440
592578
546966
523264
519420
445939
385826
349172
312803
211028
188222
185968
171452
156514
140904
134788
127642
118646
118414
112192
104571
104567
89313
88477
78660
77788
68520
29358
136823670
136622888
136610218
136156395
135797589
135702308
135681997
135629232
135478731
135477883
129164786
126752658
126752642
126539174
125501570
125501522
125485763
124355530
124164647
124004566
123788338
123155087
122664967
122235351
121513959
121420250
121417940
121415614
121342617
121342614
119031990
119019869
119001063
118899107
118899098
118899044
118835335
9838022
644019
8456
4534
15942731
125835
71398
11305
6929
6815
12788231
10209
10205
6932
9996032
21387
11492
11397
10229
8601
7091
11601669
5353609
442027
104895
76617
75547
68463
14529
11318
10723
10288
6439522
4412255
3083585
442700
347577
181700
174862
161858
155461
119

6454047
6454042
6432270
5328759
5314426
5239155
5216234
5103527
4834325
4575996
4527621
4383325
4286764
4213114
3644904
3629334
3609462
3413668
3255344
3079583
3060519
3056662
3024821
3023308
3023305
3023304
3022871
3022777
3022775
3022568
3022567
3021055
3020610
3019466
3019354
3017982
3017981
3017552
3017550
3017521
3017206
3017203
3017200
3016582
3015955
3015792
3015545
3015242
2775194
2775049
1572796
1512675
1271820
706609
688036
622986
598868
590422
577766
522278
519561
466442
350405
316181
285886
282982
278495
268784
262681
251510
235618
230018
221613
214946
190466
188496
187461
179973
160109
154193
152135
148983
145777
145762
133896
133190
118964
118161
104575
104573
104572
104570
104569
104568
103816
103461
100926
97040
90248
87924
137796284
8425
21013
8449
7271
6914
11334
7258
27582
6899
66
18909
11859
26173
16496
301
12008
65
67299
1731
19928
19859
12167
10104483
79238
22630180
4559010
2773710
94950
94554
88801
88366
80331
78759
69670
21394
19877
17750691
11400891
10988569
70

49786991
46782165
46738762
45266890
45158693
45090716
44754831
44178761
39236211
29926298
24770286
24745085
24745084
24745083
24744926
24253863
23508303
23414473
23373822
23373816
23318306
23266458
23157253
23045284
22988570
22761328
22728263
22667259
22616539
22615171
22485537
22460126
22460118
22280894
22147746
22146233
22146226
22051950
22036327
22033480
21890808
21678610
21553949
21504029
21417497
21407893
21392546
21331683
21270587
21248434
21248430
21087751
20735265
20501040
20498216
20447459
20329942
20274112
20257083
20249276
20234196
20223900
19875622
19849468
19738364
19372171
19369449
19261743
19001154
18989358
18979404
18795796
18683611
18521001
18518863
18517276
18392772
18377669
18351671
18336120
17833065
17620899
16664826
16282361
16216996
16216995
16064952
15877487
15719483
15695098
15662578
15652627
15570277
15520802
15471313
15462916
15382571
15323515
15219061
15171821
15058658
14856743
14856733
14676898
14636490
14636489
14472553
14440994
14386780
14257601
14252192
1

76721
60046
42848
8498
287929
32064
17916
357051
95655
46782564
21971977
20513531
12249371
3034149
342520
293482
249874
239819
237041
236171
231939
86719910
54569113
46209411
45791167
22642441
22218551
22056833
21652161
18702095
18003851
17990389
17796806
17770113
15894840
14344578
13635720
13635719
13397348
10313070
261287
231181
172363
92498
17383
130126311
89127751
87906532
87243796
85774326
83815048
71356353
60161167
60111371
59143868
59143861
58592132
57988531
57694919
57694918
54594038
54512402
53794716
45087581
39869974
23423714
23277870
23263412
22713030
22642434
22218487
22030569
22024138
21119478
20696631
20480697
18322620
17796805
15217356
13586016
13583389
12212439
10909681
343878
231183
130480784
117951351
92043528
89467522
89275850
89001860
52146250
45039965
44815308
22024137
16697458
6951553
6951551
10011870
10329357
18935904
22688893
28064844
28064846
44521768
45088758
53424121
53434844
60685741
60686282
61287134
61287345
62020806
62021319
62021588
62310400
65804602
713

56916147
57466509
57494152
60664469
60669427
60684113
60684296
60684997
60685191
60685215
60685317
60686254
60687291
60690647
60694628
60695295
60695337
60719562
60723737
60966952
60968513
61061185
61083639
61085657
61086261
61098379
61324497
61326611
61387078
61387184
61405436
61462908
61463001
61482333
61482335
61482448
61492961
61493115
61493122
61493147
61493148
61493221
61493296
61493297
61493392
61494716
61494717
61494819
61495040
61495041
61557588
61557639
61557660
61557689
61557690
61557723
61557832
61557868
61557898
61558057
61558089
61558122
61558150
61558178
61558222
61558228
61558229
61558231
61558235
61558254
61558255
61558271
61558301
61558308
61558348
61558355
61558383
61558385
61558387
61558393
61558432
61558434
61558435
61558437
61560963
61561068
61561166
61561200
61561201
61561238
61561354
61561357
61566254
61566279
61571635
61571664
61687947
61688079
61691031
61691582
61693329
61697981
61751814
61752614
61752792
61753181
61778837
61854135
61881133
61881135
61881507
6

193477
39177
38755
22734
53401778
22352249
22168201
20260989
19366384
18541668
15174615
15003010
13725909
12827948
11320951
11287381
11126290
9793740
98165
47796
71394040
23290254
22763528
21987994
21572685
20837854
20281315
20274072
17775181
17750133
15842001
15100129
14740453
13834050
13683897
13544387
13214070
13214069
12937416
12729170
12600623
12430065
12017059
11815878
11228813
11018913
10725529
9794029
6999778
6994463
5312732
348271
94851143
88135627
87229012
71420028
71351507
71309257
71309229
71309162
71309160
71309003
64299449
64299240
59029785
59007849
57909727
57425242
57026066
57021572
54295823
54259317
53418167
23073707
22827795
20648317
20518111
19380617
18477236
17946079
14057564
13977921
13706728
13706726
12953792
12195557
11309528
10986394
10511030
7021685
129882720
129847364
94851145
93517168
83815296
81357860
71388526
60142184
55299634
55298785
53378100
45934158
13011403
12358993
12209362
11789729
8030498
10441675
11460000
12204308
12222844
12227347
12356390
1243006

145711288
6288
5951
5950
71077
602
71080
68841
617
99289
90624
100714
106
160555
69435
205
164701
99478
2723731
439747
416960
324007
88251
29
54549710
6399258
5288119
2734894
2724186
416257
146933
128057
88250
37164
27136
189
49866897
17757249
16213421
16213420
16213419
12982259
12205393
12205391
12205389
12205373
12003152
11815285
11217230
10749140
7009640
4371770
3086470
3003204
1550058
443424
439775
410488
150895
127838
97963
93242
90387
71308944
57328746
57223026
54515073
54089883
51397019
20057082
19937323
17998969
17967377
16217531
16213543
16213447
16213446
16213340
16213291
15284008
15002895
13882986
13783258
12219644
12205386
12003150
11830115
11768865
11768555
11116109
10975471
10887953
10034628
9964068
7408681
7349652
7037260
4571063
554048
360170
273349
185742
134039
100312
130737289
124389900
118103348
102601306
91544677
87506612
86709694
71310240
71309922
71309512
71309147
71308936
59697643
59100101
58618368
57539232
57071418
55287061
55286882
55281349
54515072
54258654
5

71435834
71359220
71355668
71311580
71308949
71308948
71308866
71308865
69293675
66244097
65364832
63652146
61320483
60210739
59888065
59819005
58681528
58599198
57892873
57352420
56607010
55285076
55277570
54488946
54345159
54339897
54338124
54256943
54237759
54186523
53978327
53704867
53701995
53251240
46186615
45122419
45086748
44293662
44293654
44293653
44293630
40565898
40563232
36688841
29895719
28204809
28187071
25043519
25043518
25021758
24937910
24820243
24704842
24693615
23439269
23154418
22907712
22863195
22816605
22390002
22253789
22221647
21988755
21866593
21446333
21334611
21295218
21295147
21295075
21201343
21133009
21131866
21120973
20699825
20624765
20495584
20495553
20495428
20236948
20218117
19959629
19767433
19608669
19431769
18683023
18506725
18395020
17920483
17799945
16773668
16771282
16217582
15858713
15556502
15556498
15556489
15462342
14801596
14729013
14676650
14071073
14071072
13894881
13890967
13605571
13194809
13192753
13018911
12148533
11958144
11693958
1

16078
99479
2543
2116
6560141
1742129
92094
14986
12359166
1548900
40634
5282350
5282349
5282180
638026
92729
5282347
2117
9943542
9844470
6918505
6857447
425400
93147
86052
71406
2977
9913401
5282348
133098
119193
74812
8334
2978
58078320
53315106
9909519
6541354
5311171
688071
586537
313134
167577
131243
129128
97820
45356845
24779742
15729798
10868658
10740654
10455173
10062026
9823230
6538275
447681
175815
138501
134740
124963
92161
36276
8989
53798831
15122338
14484959
14484953
14484952
13782100
12444418
11807084
11746632
11464420
10924069
10215604
10013216
9854614
6442395
6365527
644087
297375
293175
177039
173955
151903
151899
150948
129597
124494
103763
103737
92771
86720
34180
133556427
71332125
57348538
23815440
23089681
18473196
15929269
15222757
14311399
14311396
13794954
13389492
12564279
12444420
11130957
11098757
11052973
11021753
10924668
10523333
10242888
9992969
9881728
5317868
3080777
3016911
3016009
1797807
759445
688656
622545
613238
601765
345123
278656
195099
174

In [111]:
filtered_hbd_vendor

Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status,GHS_info
0,1176,C(=O)(N)N,True,Not classified as a hazardous substance
1,11457650,C(=O)(N)N,True,No GHS data available
2,636363,C(=O)(N)N,True,No GHS data available
3,10197611,C(=O)(N)N,True,No GHS data available
4,2723980,C(=O)(N)N,True,No GHS data available
5,22120529,C(=O)(N)NS,True,No GHS data available
6,131953165,C(=O)(N)N,True,No GHS data available
7,71309436,C(=O)(N)N,True,No GHS data available
8,71309326,C(=O)(N)N,True,No GHS data available
9,71308962,C(=O)(N)N,True,No GHS data available


In [112]:
#Converting the GHS_info column in the data table as a list 
hbd_GHS_list = filtered_hbd_vendor['GHS_info'].tolist()

In [115]:
#Health and environmental list from which the function will append the scores to
hbd_health_list = []
hbd_env_list = []

for item in hbd_GHS_list:
    get_hazard_scores(item, hbd_health_list, hbd_env_list, scoring_table_dict)

In [116]:
hbd_health_list

[0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 0,
 0,
 70,
 820,
 10,
 0,
 185,
 70,
 70,
 50,
 10,
 70,
 100,
 70,
 70,
 70,
 10,
 10,
 100,
 70,
 70,
 410,
 870,
 70,
 100,
 100,
 70,
 120,
 295,
 70,
 70,
 10,
 100,
 100,
 100,
 100,
 100,
 10,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 10,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 70,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 70,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 10

In [117]:
hbd_env_list

[0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 100,
 0,
 0,
 0,
 0,
 0,
 100,
 0,
 0,
 0,
 0,
 0,
 100,
 100,
 0,
 0,
 0,
 0,
 0,
 0,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 0,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 1

In [118]:
filtered_hbd_vendor['Health Score'] = hbd_health_list
filtered_hbd_vendor['Environmental Score'] = hbd_env_list
filtered_hbd_vendor

Unnamed: 0,HBD_cid,HBD_smiles,Vendor Status,GHS_info,Health Score,Environmental Score
0,1176,C(=O)(N)N,True,Not classified as a hazardous substance,0,0
1,11457650,C(=O)(N)N,True,No GHS data available,100,100
2,636363,C(=O)(N)N,True,No GHS data available,100,100
3,10197611,C(=O)(N)N,True,No GHS data available,100,100
4,2723980,C(=O)(N)N,True,No GHS data available,100,100
5,22120529,C(=O)(N)NS,True,No GHS data available,100,100
6,131953165,C(=O)(N)N,True,No GHS data available,100,100
7,71309436,C(=O)(N)N,True,No GHS data available,100,100
8,71309326,C(=O)(N)N,True,No GHS data available,100,100
9,71308962,C(=O)(N)N,True,No GHS data available,100,100


In [119]:
#Saving the dataset
filtered_hbd_vendor.to_csv('filtered_hbd_w_safety.csv', index = False)

### This portion below was used to fix the bug in the get_hazard_codes function

In [96]:
#safety_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/62581/JSON?heading=GHS+Classification" 

#request = requests.get(safety_url)
#request_json = request.json()

In [97]:
#request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information']#[2]['Value']['StringWithMarkup'][1]['String']

In [98]:
# GHS_information_list = []
# for i in range(len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'])):
#     if request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Name'] == 'GHS Hazard Statements':
        
#         for j in range(len(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['StringWithMarkup'])):
#             temp_list = [] #temporary list each sentance gets added to before appending to GHS list
#             temp_list.append(request_json['Record']['Section'][0]['Section'][0]['Section'][0]['Information'][i]['Value']['StringWithMarkup'][j]['String'])
#             GHS_information_list.append(temp_list)

# #this portion checks for lists with empty string '' that will break the code if not removed
# for item in GHS_information_list:
#     if '' not in item: 
#         pass

#     elif '' in item:
#         index = GHS_information_list.index(item)
#         GHS_information_list[index].remove('')


# hazard_description_list = [] #list that will contain the hazard codes and their descriptions. 

# for item in GHS_information_list:
#     temp_haz = [idx for idx in item if idx[0] == 'H'] #list comprehension, keeps lists that start with H, i.e. the hazard code
#     hazard_description_list.append(temp_haz)
#     #There will be empty lists so this step removes them
#     for item in hazard_description_list:
#         if len(item) == 0:
#             hazard_description_list.remove(item)

# hazard_code_list = [] #list that contains all of the hazard codesfor the chemcial

# for item in hazard_description_list: #grabs only the hazard code from each statement in the list
#     string = item[0]
#     hazard = string.split(' ', 1)[0]
#     hazard_code_list.append(hazard)

# cleaned_hazard_code_list = []

# for code in hazard_code_list:
#     # some of the codes end with a colon from extarcting from jsons. Remove them here if present.
#     if code.endswith(':'):
#         # removes last string from item, which will be the colon.
#         code = code[:-1]
#         cleaned_hazard_code_list.append(code)
        
#     else:
#         cleaned_hazard_code_list.append(code)

# filtered_hazard_code_list = [] 
# #list comprehension to remove duplicates from the cleaned hazrad codes list
# [filtered_hazard_code_list.append(x) for x in cleaned_hazard_code_list if x not in filtered_hazard_code_list] 


# print(filtered_hazard_code_list)


In [99]:
# test_list = []
# for code in hazard_code_list:
#     # some of the codes end with a colon from extarcting from jsons. Remove them here if present.
#     if code.endswith(':'):
#         # removes last string from item, which will be the colon.
#         code = code[:-1]
#         test_list.append(code)
        
# test_list_2 = [] 
# [test_list_2.append(x) for x in test_list if x not in test_list_2] 
        
# print(test_list_2)