In [4]:
import pandas as pd
import requests
import json
import pubchempy as pcp
from safety import get_hazard_codes
data = pd.read_csv('/Users/Jaime/Downloads/HBA_list.csv') #change file path if necessary
scoring_table = pd.read_csv('hazard_score.csv')

### Recap from the previous work in the other notebooks, we were able to take a list of chemicals and determine if ghs data was available, and if so, if the chemical was hazardous or nonhazardous. If hazardous, the specific hazard codes were returned. We also took a table of all possible hazard codes from GHS, and used a paper (Verslycke et al, "the Chemistry Scoring Index")  that ranked these hazards based on how detrimental they are, to assign scores to all these values.

### Below, the get_hazard_code function is demonstrated which is contained in the safety.py file.

In [5]:
hazard_codes = []
for i, row in data.iterrows():
    cid = row['HBA_cid']
    values = get_hazard_codes(cid)
    hazard_codes.append(values)
data['GHS_info'] = hazard_codes


In [6]:
data

Unnamed: 0,HBA,HBA_cid,GHS_info
0,choline chloride,6209,"[H315, H319, H335]"
1,choline acetate,187,No GHS data available
2,choline bromide,74724,"[H315, H319, H335]"
3,choline fluoride,22134097,No GHS data available
4,choline nitrate,13646546,No GHS data available
5,(2-chloroethyl)trimethylammonium chloride,13836,"[H302:, H312:]"
6,ethyl(2-hydroxyethyl)dimethylammonium chloride,87940,No GHS data available
7,benzyl(2-hydroxyethyl)dimethylammonium chloride,3014549,"[H315, H319, H335]"
8,acetylcholine chloride,6060,"[H315, H319, H335]"
9,tetramethylammonium chloride,6379,"[H300, H301, H311, H315, H319, H335, H370, H41..."


### And here is what the scoring table looks like

In [7]:
scoring_table

Unnamed: 0,Code,Hazard Score
0,H200,100
1,H201,100
2,H202,100
3,H203,100
4,H204,100
5,H205,100
6,H206,100
7,H207,100
8,H207,100
9,H208,100


### Now the objective is to assign final hazard scores to the chemicals in the data table as a sum of all their hazard codes. Ideally, this could be split into a health and environmental score, since all hazard codes for these start with a 3 and 4, respectively. We also will rank health scores higher than environmental scores in the final chemical ranking against with all other properties (melting point, cost, etc.). 


### There could be a few ways to do this, but it might be beneficial to have this process work outside the dataframe and then append back to it. The reason is, the hazard codes are contained inside lists in the dataframe, extarcting that column as another list would make the hazrad codes for each chemical easily iteratted through a list of lists. For the hazard scoring table, we could keep as a dataframe or convert into dictionary instead. 

In [8]:
#Converting the GHS_info column in the data table as a list for testing our fucntions.
GHS_list = data['GHS_info'].tolist()

In [13]:
def hazard_check():
    """This function will check the ghs info and determine to give it the max or min safety penalty
    or calculate a total hazard score"""

    if item == 'Not classified as a hazardous substance':
        return 'Minimum Penalty'

    if item == 'No GHS data available':
        return 'Maximum Penalty'

    else:
        return 'Calculate Score'


In [14]:
check_list = []
for item in GHS_list:
    checks = hazard_check()
    check_list.append(checks)



In [15]:
check_list

['Calculate Score',
 'Maximum Penalty',
 'Calculate Score',
 'Maximum Penalty',
 'Maximum Penalty',
 'Calculate Score',
 'Maximum Penalty',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Maximum Penalty',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Minimum Penalty',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score',
 'Calculate Score']