# Code snippets for testing the functions



In [113]:
import pandas as pd

In [114]:
# Load the data in a dataframe structure
df = pd.read_excel('solventSelectionTool_table.xlsx', sheet = 0, header = 2)
# Drop first row as it is empty
df = df[1:]

In [117]:
# Add extra columns with calculated parameters
df['Hansen coordinates']= [np.array([df['dD - Dispersion'].iloc[i], df['dP - Polarity'].iloc[i], df['dH - Hydrogen bonding'].iloc[i]]) for i in range(df.shape[0])]
list_GSK_scores = df.columns.values[7:18]
df['Waste'] = (df['Incineration']*df['Recycling']*df['Biotreatment']*df['VOC Emissions'])**0.25
df['Environment']  =(df['Aquatic Impact']*df['Air Impact'])**0.5
df['Health'] = (df['Health Hazard']*df['Exposure Potential'])**0.5
df['Safety'] = (df['Flammability and Explosion']*df['Reactivity and Stability'])**0.5
df['Greenness'] = round((df['Waste']*df['Environment']*df['Health']*df['Safety'])**0.25,2)
df['Ra'] = np.nan
df.set_index('Solvent Name', inplace=True, drop=False)

In [411]:
 options = [{'label': name, 'value': True} for name in df.columns[8:17]]

In [412]:
options

[{'label': 'Recycling', 'value': True},
 {'label': 'Biotreatment', 'value': True},
 {'label': 'VOC Emissions', 'value': True},
 {'label': 'Aquatic Impact', 'value': True},
 {'label': 'Air Impact', 'value': True},
 {'label': 'Health Hazard', 'value': True},
 {'label': 'Exposure Potential', 'value': True},
 {'label': 'Flammability and Explosion', 'value': True},
 {'label': 'Reactivity and Stability', 'value': True}]

In [201]:
# Load the data in a dataframe structure
df3 = pd.read_excel('solventSelectionTool_table.xlsx', sheet = 0, header = 2)
# Drop first row as it is empty
df3 = df3[1:]
# Add extra columns with calculated parameters
df3['Hansen coordinates']= [np.array([df3['dD - Dispersion'].iloc[i], df3['dP - Polarity'].iloc[i], df3['dH - Hydrogen bonding'].iloc[i]]) for i in range(df.shape[0])]
list_GSK_scores = df.columns.values[7:18]

df3['Waste'] = [np.array([df3['Incineration'].iloc[i], df3['Recycling'].iloc[i], df3['Biotreatment'].iloc[i], df3['VOC Emissions'].iloc[i]]) for i in range(df.shape[0])]
df3['Environment'] = [np.array([df3['Aquatic Impact'].iloc[i], df3['Air Impact'].iloc[i]]) for i in range(df.shape[0])]
df3['Health'] = [np.array([df3['Health Hazard'].iloc[i], df3['Exposure Potential'].iloc[i]]) for i in range(df.shape[0])]
df3['Safety'] = [np.array([df3['Flammability and Explosion'].iloc[i], df3['Reactivity and Stability'].iloc[i]]) for i in range(df.shape[0])]
# df3['Greenness'] = round((df['Waste']*df['Environment']*df['Health']*df['Safety'])**0.25,2)


In [405]:
def mid_composite_score_calculator(scores, score_filter = None):
    # Count the length of the vector
    N = scores[0].shape[0]
    if score_filter is None:
        n = N
        score_filter = [True] * N
    else:
        # Count the number of Trues in the filter
        n = score_filter.count(True)
        if n > 0:
            gscore = np.array([np.power(vector[score_filter].prod(),1/n) for vector in scores])
        else:
            gscore =  None
    return gscore
def final_composite_score_calculator(dataframe,\
                                     waste = 4*[True],\
                                     health = 2*[True],\
                                     environment = 2*[True],\
                                     safety = 2*[True]):
    a = mid_composite_score_calculator(dataframe['Waste'].values, waste)
    b = mid_composite_score_calculator(dataframe['Health'].values, health)
    c = mid_composite_score_calculator(dataframe['Environment'].values, environment)
    d = mid_composite_score_calculator(dataframe['Safety'].values, safety)
    data = []
    for i in [a,b,c,d]:
        if i is not None:
            data.append(i)
    data = np.vstack([*data]).T
    n = 1/data.shape[1]
    gmean = np.power(data.prod(axis = 1),n).round(2)
    return gmean

In [406]:
print(final_composite_score_calculator(df3))

[5.42 5.41 4.3  4.12 4.85 8.02 7.51 4.09 6.67 7.97 7.1  7.4  8.1  6.62
 3.02 6.53 7.89 7.62 4.28 4.45 7.04 6.41 6.48 6.77 5.69 5.75 5.88 5.81
 7.45 3.73 7.68 3.1  3.27 4.14 5.43 4.4  5.87 5.3  7.54 7.24 7.43 4.88
 5.45 6.22 5.13 7.2  4.15  nan  nan  nan  nan 7.8   nan  nan 7.95 7.16
 4.49 5.14 4.95 5.42 7.64 6.67 5.02 6.96 6.4  5.82 5.55 6.63 7.22 6.66
 6.13 6.42 5.34 8.03 8.05 7.97 4.38 5.52 5.94 6.63 7.51 5.58 4.75 7.26
 7.52 6.93 7.84 5.4  6.92 6.82 5.44 6.14 5.83 6.   5.14 7.22 5.71 4.96
 5.41 5.91 6.49 7.48 4.11 5.18 5.49 6.7  4.95 7.68 3.81 5.41 6.43 7.1
 8.76 7.77 5.92 4.99 5.61 6.55 6.33 6.27 5.23 4.79 4.9  4.49 5.96 8.73
 4.43 4.74 4.08 7.91 3.88 7.29]


In [399]:
a = mid_composite_score_calculator(df3['Waste'].values, [True, True, True, True])
b = mid_composite_score_calculator(df3['Health'].values, [True, True])
c = mid_composite_score_calculator(df3['Environment'].values, [True, True])
d = mid_composite_score_calculator(df3['Safety'].values, [True, True])
data  = []
for i in [a,b,c,d]:
    if i is None:
        pass
    else:
        data.append(i)
data = np.vstack([*data]).T
gmean = np.power(data.prod(axis = 1),1/data.shape[1])
# data = np.vstack([a,b,c,d]).T
# print(np.isnan(c))
# final_composite_score_calculator([a,b], df3.shape[0])
# a  = df3['Waste'].values

In [400]:
gmean

array([5.42056262, 5.414848  , 4.30449117, 4.12164526, 4.84877912,
       8.01768925, 7.51224522, 4.08931993, 6.6671983 , 7.96681539,
       7.09962966, 7.3951711 , 8.09922516, 6.61822058, 3.0151347 ,
       6.53162964, 7.8928308 , 7.62154319, 4.28466534, 4.44721086,
       7.03670567, 6.40892835, 6.48372399, 6.77284454, 5.69194229,
       5.74815846, 5.87522432, 5.81163209, 7.44937316, 3.73410456,
       7.68327726, 3.10415438, 3.2713392 , 4.14060896, 5.42716983,
       4.40152521, 5.87374183, 5.30100106, 7.54479701, 7.23714388,
       7.42506093, 4.87925848, 5.44677651, 6.22296396, 5.13157324,
       7.20442175, 4.14594843,        nan,        nan,        nan,
              nan, 7.80491243,        nan,        nan, 7.94884113,
       7.15713626, 4.48572994, 5.1369889 , 4.94682597, 5.42477282,
       7.64408147, 6.67229838, 5.02116817, 6.95891392, 6.40386284,
       5.82058033, 5.54559599, 6.6317294 , 7.22473427, 6.65724474,
       6.13372254, 6.41648155, 5.34400165, 8.02802835, 8.05422

In [398]:
df

Unnamed: 0_level_0,Solvent Name,CAS Number,dD - Dispersion,dP - Polarity,dH - Hydrogen bonding,Melting Point (°C),Boiling Point (°C),Incineration,Recycling,Biotreatment,...,Life Cycle Analysis,Hazard Labels,Precautionary Labels,Hansen coordinates,Waste,Environment,Health,Safety,Greenness,Ra
Solvent Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"1,2,4-Trichlorobenzene","1,2,4-Trichlorobenzene",120-82-1,20.2,4.2,3.2,17.0,214.0,3.0,7.0,7.0,...,8.0,H302 H315 H331 H400 H410,P261 P264 P270 P271 P273 P280 P301+P312 P302+P...,"[20.2, 4.2, 3.2]",6.191977,3.000000,4.898979,9.486833,5.42,
"1,2-Dichlorobenzene (o-DCB)","1,2-Dichlorobenzene (o-DCB)",95-50-1,19.2,6.3,3.3,-17.0,180.0,4.0,8.0,6.0,...,8.0,H302 H315 H317 H319 H332 H335 H400 H410,P261 P264 P270 P271 P272 P273 P280 P301+P312 P...,"[19.2, 6.3, 3.3]",6.054800,2.449490,6.480741,8.944272,5.41,
"1,2-Dichloroethane (DCE)","1,2-Dichloroethane (DCE)",107-06-2,18.0,7.4,4.1,-36.0,84.0,2.0,7.0,5.0,...,7.0,H225 H302 H315 H319 H331 H335 H350,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[18.0, 7.4, 4.1]",4.325308,7.937254,1.414214,7.071068,4.30,
"1,2-Dimethoxyethane (Glyme)","1,2-Dimethoxyethane (Glyme)",110-71-4,15.4,6.3,6.0,-58.0,85.0,4.0,4.0,3.0,...,7.0,H225 H315 H332 H351 H360,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[15.4, 6.3, 6.0]",3.935979,7.483315,2.000000,4.898979,4.12,
"1,3-Dioxolane","1,3-Dioxolane",646-06-0,18.1,6.6,9.3,-95.0,75.0,4.0,4.0,3.0,...,,H225 H319,P210 P233 P240 P241 P242 P243 P264 P280 P303+P...,"[18.1, 6.6, 9.3]",3.722419,5.916080,5.916080,4.242641,4.85,
"1,3-Propanediol","1,3-Propanediol",504-63-2,16.8,13.5,23.2,-27.0,214.0,4.0,5.0,5.0,...,3.0,Not Hazardous,Not Hazardous,"[16.8, 13.5, 23.2]",5.623413,7.745967,9.486833,10.000000,8.02,
"1,4-Butanediol","1,4-Butanediol",110-63-4,16.6,11.0,20.9,20.0,235.0,4.0,5.0,4.0,...,4.0,H302 H336,P261 P264 P270 P271 P301+P312 P304+P340 P312 P...,"[16.6, 11.0, 20.9]",5.180040,7.745967,7.937254,10.000000,7.51,
"1,4-Dioxane","1,4-Dioxane",123-91-1,17.5,1.8,9.0,12.0,102.0,4.0,1.0,3.0,...,6.0,H225 H319 H335 H351,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[17.5, 1.8, 9.0]",2.912951,5.656854,3.464102,4.898979,4.09,
1-Butanol,1-Butanol,71-36-3,16.0,5.7,15.8,-89.0,118.0,6.0,7.0,5.0,...,5.0,H226 H302 H315 H318 H335 H336,P210 P233 P240 P241 P242 P243 P261 P264 P270 P...,"[16.0, 5.7, 15.8]",6.402172,5.196152,7.000000,8.485281,6.67,
1-Heptanol,1-Heptanol,111-70-6,16.0,5.3,11.7,-34.0,176.0,9.0,8.0,10.0,...,,H319,P264 P280 P305+P351+P338 P337+P313,"[16.0, 5.3, 11.7]",8.972093,5.656854,8.366600,9.486833,7.97,


In [282]:
print(x[~np.isnan(x).any(axis=1)])

Unnamed: 0,Solvent Name,CAS Number,dD - Dispersion,dP - Polarity,dH - Hydrogen bonding,Melting Point (°C),Boiling Point (°C),Incineration,Recycling,Biotreatment,...,Flammability and Explosion,Reactivity and Stability,Life Cycle Analysis,Hazard Labels,Precautionary Labels,Hansen coordinates,Waste,Environment,Health,Safety
1,"1,2,4-Trichlorobenzene",120-82-1,20.2,4.2,3.2,17.0,214.0,3.0,7.0,7.0,...,9.0,10.0,8.0,H302 H315 H331 H400 H410,P261 P264 P270 P271 P273 P280 P301+P312 P302+P...,"[20.2, 4.2, 3.2]","[3.0, 7.0, 7.0, 10.0]","[1.0, 9.0]","[4.0, 6.0]","[9.0, 10.0]"
2,"1,2-Dichlorobenzene (o-DCB)",95-50-1,19.2,6.3,3.3,-17.0,180.0,4.0,8.0,6.0,...,8.0,10.0,8.0,H302 H315 H317 H319 H332 H335 H400 H410,P261 P264 P270 P271 P272 P273 P280 P301+P312 P...,"[19.2, 6.3, 3.3]","[4.0, 8.0, 6.0, 7.0]","[1.0, 6.0]","[7.0, 6.0]","[8.0, 10.0]"
3,"1,2-Dichloroethane (DCE)",107-06-2,18.0,7.4,4.1,-36.0,84.0,2.0,7.0,5.0,...,5.0,10.0,7.0,H225 H302 H315 H319 H331 H335 H350,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[18.0, 7.4, 4.1]","[2.0, 7.0, 5.0, 5.0]","[9.0, 7.0]","[1.0, 2.0]","[5.0, 10.0]"
4,"1,2-Dimethoxyethane (Glyme)",110-71-4,15.4,6.3,6.0,-58.0,85.0,4.0,4.0,3.0,...,4.0,6.0,7.0,H225 H315 H332 H351 H360,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[15.4, 6.3, 6.0]","[4.0, 4.0, 3.0, 5.0]","[8.0, 7.0]","[1.0, 4.0]","[4.0, 6.0]"
5,"1,3-Dioxolane",646-06-0,18.1,6.6,9.3,-95.0,75.0,4.0,4.0,3.0,...,2.0,9.0,,H225 H319,P210 P233 P240 P241 P242 P243 P264 P280 P303+P...,"[18.1, 6.6, 9.3]","[4.0, 4.0, 3.0, 4.0]","[7.0, 5.0]","[7.0, 5.0]","[2.0, 9.0]"
6,"1,3-Propanediol",504-63-2,16.8,13.5,23.2,-27.0,214.0,4.0,5.0,5.0,...,10.0,10.0,3.0,Not Hazardous,Not Hazardous,"[16.8, 13.5, 23.2]","[4.0, 5.0, 5.0, 10.0]","[10.0, 6.0]","[10.0, 9.0]","[10.0, 10.0]"
7,"1,4-Butanediol",110-63-4,16.6,11.0,20.9,20.0,235.0,4.0,5.0,4.0,...,10.0,10.0,4.0,H302 H336,P261 P264 P270 P271 P301+P312 P304+P340 P312 P...,"[16.6, 11.0, 20.9]","[4.0, 5.0, 4.0, 9.0]","[10.0, 6.0]","[7.0, 9.0]","[10.0, 10.0]"
8,"1,4-Dioxane",123-91-1,17.5,1.8,9.0,12.0,102.0,4.0,1.0,3.0,...,4.0,6.0,6.0,H225 H319 H335 H351,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[17.5, 1.8, 9.0]","[4.0, 1.0, 3.0, 6.0]","[8.0, 4.0]","[4.0, 3.0]","[4.0, 6.0]"
9,1-Butanol,71-36-3,16.0,5.7,15.8,-89.0,118.0,6.0,7.0,5.0,...,8.0,9.0,5.0,H226 H302 H315 H318 H335 H336,P210 P233 P240 P241 P242 P243 P261 P264 P270 P...,"[16.0, 5.7, 15.8]","[6.0, 7.0, 5.0, 8.0]","[9.0, 3.0]","[7.0, 7.0]","[8.0, 9.0]"
10,1-Heptanol,111-70-6,16.0,5.3,11.7,-34.0,176.0,9.0,8.0,10.0,...,9.0,10.0,,H319,P264 P280 P305+P351+P338 P337+P313,"[16.0, 5.3, 11.7]","[9.0, 8.0, 10.0, 9.0]","[8.0, 4.0]","[10.0, 7.0]","[9.0, 10.0]"


In [239]:
a = [True, True, False]
N = len(np.array(a)[a])
print(N)

2


In [247]:
a = np.array([1,2,3,4])
a.prod()

24

In [98]:
def update_Ra(coordinates, reference):
    distance = coordinates - reference
    Ra = [np.sqrt(4*d[0]**2 + d[1]**2 + d[2]**2) for d in distance]
    return Ra


In [96]:
def update_Ra2(coordinates, reference):
    Ra = []
    for value in coordinates:
        # Target vector minus the reference vector
        d = [value[i] - reference[i] for i in range(3)]
        # Square the values to get the hansen index and update the Distance column
        Ra.append(np.sqrt(4*d[0]**2 + d[1]**2 + d[2]**2))
    return Ra

In [99]:
Href = [18,1.5,2]
df['Ra'] = update_Ra(df['Hansen coordinates'], Href)
df['Ra'].head()


1    5.300000
2    5.521775
3    6.262587
4    8.128961
5    8.907300
Name: Ra, dtype: float64

In [114]:
class Solvent(object):
    def __init__(self, parameters):
        # Initalize all the properties of the object from the input information (parameters, a DataFrame structure)
        self.name = parameters['Solvent Name']
        self.CAS = parameters['CAS Number']
        self.melting_point = parameters['Melting Point (°C)']
        self.boiling_point = parameters['Boiling Point (°C)']
        self.dD = parameters['dD - Dispersion']
        self.dP = parameters['dP - Polarity']
        self.dH = parameters['dH - Hydrogen bonding']
        self.hansen_coordinates = [self.dD,self.dP, self.dH]
        self.GSK_scores = dict(Incineration = parameters['Incineration'],\
                              Recycling = parameters['Recycling'])
                                

In [56]:
a = Solvent(df.iloc[0])
a.name
a.CAS
a.hansen_coordinates

[20.2, 4.2, 3.2]

In [47]:
df.iloc[0]

Solvent Name                                             1,2,4-Trichlorobenzene
CAS Number                                                             120-82-1
dD - Dispersion                                                            20.2
dP - Polarity                                                               4.2
dH - Hydrogen bonding                                                       3.2
Melting Point (°C)                                                           17
Boiling Point (°C)                                                          214
Incineration                                                                  3
Recycling                                                                     7
Biotreatment                                                                  7
VOC Emissions                                                                10
Aquatic Impact                                                                1
Air Impact                              

# Plotting

In [131]:
import plotly.graph_objs as go

filt = [True]*df.shape[0]
filt = [df['Greenness'] < 5]
x = df['dD - Dispersion']
y = df['dP - Polarity']
z = df['dH - Hydrogen bonding']

trace = go.Scatter3d(x = x, y = y, z = z, mode='markers', marker=dict(size=8,\
                                                        color = df['Greenness'],\
                                                        colorscale = 'RdYlGn',\
                                                        opacity=0.8,\
                                                        showscale = True),\
                    hovertemplate = '<b>%{text}</b><br>' +\
                                     '%{hovertext}<br>' +\
                                     'dD = %{x:.2f}<br>dP = %{y:.2f}<br>dH = %{z:.2f}',
                    text = df['Solvent Name'],\
                    hovertext = [f'Greenness  = {value:.2f}' for value in df['Greenness']]) 

plot_layout = go.Layout(height=600, title = None,
                paper_bgcolor='white',
                scene={"aspectmode": "cube",
                       "xaxis": {"title": 'dD - Dispersion', },
                       "yaxis": {"title": 'dP - Polarity', },
                       "zaxis": {"title":'dH - Hydrogen bonding' }})

go.Figure(data = trace, layout = layout)

In [103]:
a =(df[['Solvent Name', 'Greenness', 'Ra']]).to_dict('records')

In [116]:
i = 1
for element in a:
    if element['Solvent Name'] == '1-Heptanol': break
    i += 1
df.loc[i]

Solvent Name                                          1-Heptanol
CAS Number                                              111-70-6
dD - Dispersion                                               16
dP - Polarity                                                5.3
dH - Hydrogen bonding                                       11.7
Melting Point (°C)                                           -34
Boiling Point (°C)                                           176
Incineration                                                   9
Recycling                                                      8
Biotreatment                                                  10
VOC Emissions                                                  9
Aquatic Impact                                                 8
Air Impact                                                     4
Health Hazard                                                 10
Exposure Potential                                             7
Flammability and Explosio

In [128]:
if len([1,2]):
    print(1)

1


In [132]:
if -1:
    print(4)

4


In [162]:
['{:s}: {:.1f} '.format(label, data.loc[label]) for label in data.index[8:18]])

SyntaxError: invalid syntax (<ipython-input-162-08a37755a74d>, line 1)

In [37]:
df2 = pd.read_excel('solventSelectionTool_table.xlsx', sheet_name = 1, header = 0, usecols=(0,1))
df2.set_index('Statements', inplace=True, drop=False)

In [70]:
a = df['Hazard Labels']

In [72]:
a[25]

'H226 H314 H318'

In [70]:
data = df.loc[10]
print(1, data['Precautionary Labels'])
precaution_labels = data['Precautionary Labels'].split(' ')
print(2, precaution_labels)
for precaution in precaution_labels:
    if precaution == 'Data'
    print(3, precaution)
    splitted_precaution = precaution.split('+')
    text = ''
    for s_precaution in splitted_precaution:
        
        text_precaution = df2.Fulltext[df2['Statements'] == s_precaution].values[0]
        text += text_precaution
    print(precaution, text)

1 P264 P280 P305+P351+P338 P337+P313
2 ['P264', 'P280', 'P305+P351+P338', 'P337+P313']
3 P264
P264 Wash ... thoroughly after handling.
3 P280
P280 Wear protective gloves/protective clothing/eye protection/face protection.
3 P305+P351+P338
P305+P351+P338 IF IN EYES:Rinse cautiously with water for several minutes.Remove contact lenses, if present and easy to do. Continue rinsing.
3 P337+P313
P337+P313 If eye irritation persists:Get medical advice/attention.


In [246]:
hazard_labels = data['Pre'].split(' ')
solvent_filter = np.ones((df.shape[0]), dtype = bool)
for hazard in hazard_labels:
    text_hazard = df2.Fulltext[df2['Statements'] == hazard].values[0]
    print(value, text_hazard)

['H226', 'H314', 'H318']

In [240]:
hazard = df2.Fulltext[df2['Statements'] == 'H290'].values[0]

In [86]:
df[df.columns[[0,-2,-1]]][df['Greenness']>= 5].shape

(96, 3)

In [55]:
solvent_filter = np.ones((df.shape[0]), dtype = bool)

In [56]:
solvent_filter

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [237]:
df2['Statements'] == 'H290'

0      False
1      False
2      False
3      False
4      False
5       True
6      False
7      False
8      False
9      False
10     False
11     False
12     False
13     False
14     False
15     False
16     False
17     False
18     False
19     False
20     False
21     False
22     False
23     False
24     False
25     False
26     False
27     False
28     False
29     False
       ...  
121    False
122    False
123    False
124    False
125    False
126    False
127    False
128    False
129    False
130    False
131    False
132    False
133    False
134    False
135    False
136    False
137    False
138    False
139    False
140    False
141    False
142    False
143    False
144    False
145    False
146    False
147    False
148    False
149    False
150    False
Name: Statements, Length: 151, dtype: bool

In [90]:
type(df[['Solvent Name', 'Waste']])

pandas.core.frame.DataFrame

In [95]:
type(df.index[['Solvent Name', 'Waste']])

  result = getitem(key)


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [98]:
def filter_by_hazard(hazards_to_remove, data_hazards):
    hazards_filter = np.ones((data_hazards.shape[0]), dtype = bool)
    for hazard in hazards_to_remove:
        print(hazard)
        for i, solvent in enumerate(data_hazards):
#             print(solvent)
            for solvent_hazard in solvent.split(' '):
                if solvent_hazard == hazard:
#                     print('Hazard found')
                    hazards_filter[i] = False
                    break
    return hazards_filter

In [100]:
a = filter_by_hazard(['H302'], df['Hazard Labels'])
print(a)

H302
[False False False  True  True  True False  True False  True False False
  True False  True  True  True  True False False  True False  True  True
  True False  True False  True  True False  True  True  True  True False
  True  True False False  True False  True  True False  True False  True
  True False  True  True False False False  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True False  True
  True False False  True  True  True False  True  True  True  True  True
 False  True  True  True  True  True  True  True  True  True False  True
  True  True  True  True  True  True False False  True  True  True  True
  True  True  True  True  True  True  True False False  True  True  True
  True False  True False  True  True  True  True False  True False  True]


In [92]:
dff = df[['Solvent Name', 'Greenness', 'Ra']][a]

In [93]:
dff.shape

(94, 3)

In [102]:
b = df['Greenness'] > 5

In [112]:
df2[2:49]

Unnamed: 0_level_0,Fulltext
id,Unnamed: 1_level_1
H224,Extremely flammable liquid and vapor
H225,Highly Flammable liquid and vapor
H226,Flammable liquid and vapor
H290,May be corrosive to metals
H300,Fatal if swallowed
H301,Toxic if swallowed
H302,Harmful if swallowed
H303,May be harmful if swallowed
H304,May be fatal if swallowed and enters airways
H305,May be fatal if swallowed and enters airways


In [413]:
WASTE = ['Incineration','Recycling','Biotreatment','VOC Emissions']
HEALTH = ['Health Hazard', 'Exposure Potential']
ENVIRONMENT = ['Aquatic Impact', 'Air Impact']
SAFETY = ['Flammability and Explosion', 'Reactivity and Stability']

In [414]:
a = ['Incineration']

In [416]:
ff = []
for el in WASTE:
    if el in a:
        ff.append(True)
    else:
        ff.append(False)

In [438]:
def GSK_calculator(dataframe,\
                                     waste = 4*[True],\
                                     health = 2*[True],\
                                     environment = 2*[True],\
                                     safety = 2*[True]):
#        WASTE = ['Incineration','Recycling','Biotreatment','VOC Emissions']
#    HEALTH = ['Health Hazard', 'Exposure Potential']
#    ENVIRONMENT = ['Aquatic Impact', 'Air Impact']
#    SAFETY = ['Flammability and Explosion', 'Reactivity and Stability']
#    indicators = {'waste' = WASTE, 'health' : HEALTH,  'environment' : ENVIRONMENT, 'safety' : SAFETY]
#                    
#    ff = []
#    for key, value in indicators.items():
#        if value in a:
#            ff.append(True)
#        else:
#            ff.append(False)
#    
    a = mid_composite_score_calculator(dataframe['Waste'].values, waste)
    print(a)
    b = mid_composite_score_calculator(dataframe['Health'].values, health)
    c = mid_composite_score_calculator(dataframe['Environment'].values, environment)
    d = mid_composite_score_calculator(dataframe['Safety'].values, safety)
    data = []
    for i in [a,b,c,d]:
        if i is not None:
            data.append(i)
    data = np.vstack([*data]).T
    n = 1/data.shape[1]
    gmean = np.power(data.prod(axis = 1),n).round(2)
    return gmean

In [451]:
def GSK_calculator(df, scores):
    k = 0
    gmean = 1
    for element in scores:
        if len(element):
            gmean *= ((df[element]).prod(axis =1)).pow(1/len(element))
            k += 1
    return np.power(gmean,1/k).round(2)

In [452]:
GSK_calculator(df, [WASTE, ENVIRONMENT, SAFETY])

Solvent Name
1,2,4-Trichlorobenzene         5.61
1,2-Dichlorobenzene (o-DCB)    5.10
1,2-Dichloroethane (DCE)       6.24
1,2-Dimethoxyethane (Glyme)    5.25
1,3-Dioxolane                  4.54
1,3-Propanediol                7.58
1,4-Butanediol                 7.38
1,4-Dioxane                    4.32
1-Butanol                      6.56
1-Heptanol                     7.84
1-Hexanol                      6.84
1-Octanol                      7.10
1-Pentanol                     7.68
1-Propanol                     6.12
2,2,2-Trifluoroethanol         4.36
2-Butanol                      6.01
2-Ethyl hexanol                7.74
2-Ethylhexyl acetate           7.39
2-Methoxyethanol               6.20
2-Methyltetrahydrofuran        4.83
2-Pentanol                     7.05
2-Pentanone                    5.86
2-Propanol (IPA)               6.11
3-Pentanone                    6.31
Acetic acid                    5.62
Acetic anhydride               6.49
Acetone                        5.36
Acetonitrile   

In [441]:
((df3[WASTE]).prod(axis =1)).pow(1/len(WASTE))

1      6.191977
2      6.054800
3      4.325308
4      3.935979
5      3.722419
6      5.623413
7      5.180040
8      2.912951
9      6.402172
10     8.972093
11     5.334838
12     8.425732
13     7.968565
14     4.053600
15     2.514867
16     4.053600
17     8.711754
18     8.677523
19     4.864599
20     4.355877
21     6.879581
22     5.009970
23     4.400559
24     5.009970
25     4.527019
26     5.264296
27     3.309751
28     2.783158
29     8.000000
30     6.000000
         ...   
103    2.632148
104    4.161791
105    4.355877
106    8.677523
107    4.472136
108    7.737346
109    4.472136
110    3.309751
111    5.264296
112    5.421612
113    5.885662
114    5.623413
115    7.841027
116    3.984283
117    5.233176
118    4.738137
119    4.400559
120    5.634626
121    4.600653
122    3.499636
123    4.756828
124    4.053600
125    7.363543
126    6.467844
127    3.223710
128    5.029734
129    2.990698
130    5.623413
131    2.514867
132    3.722419
Length: 132, dtype: floa

In [440]:
a = GSK_calculator(df3)

[6.19197699 6.05480021 4.32530773 3.93597934 3.72241944 5.62341325
 5.18004013 2.91295063 6.40217175 8.97209269 5.33483823 8.42573186
 7.96856521 4.05360046 2.51486686 4.05360046 8.71175435 8.67752283
 4.86459856 4.35587717 6.87958126 5.00997014 4.40055868 5.00997014
 4.52701906 5.26429605 3.30975092 2.78315768 8.         6.
 7.08516772 3.86867284 4.52701906 3.46410162 6.4807407  4.48604634
 8.18124698 5.88566191 7.17211856 7.20041149 7.4155855  5.56631537
 7.84102701 8.18124698 2.99069756 7.93725393 3.48200455        nan
        nan        nan        nan 8.1491            nan        nan
 5.62341325 5.02973372 5.02973372 5.80300926 3.08007029 4.69525374
 7.95270729 4.16179145 4.55901411 5.88566191 4.55901411 8.67752283
 8.42573186 4.16179145 8.73885189 4.949232   4.22948505 5.26429605
 5.2436108  5.2331757  5.62341325 6.51355562 4.86459856 4.22948505
 5.56631537 7.73734568 5.62341325 5.88566191 5.56631537 8.48528137
 7.20041149 5.00997014 7.4155855  5.88566191 5.6924251  5.47722558
 8.

In [437]:
a

array([5.42, 5.41, 4.3 , 4.12, 4.85, 8.02, 7.51, 4.09, 6.67, 7.97, 7.1 ,
       7.4 , 8.1 , 6.62, 3.02, 6.53, 7.89, 7.62, 4.28, 4.45, 7.04, 6.41,
       6.48, 6.77, 5.69, 5.75, 5.88, 5.81, 7.45, 3.73, 7.68, 3.1 , 3.27,
       4.14, 5.43, 4.4 , 5.87, 5.3 , 7.54, 7.24, 7.43, 4.88, 5.45, 6.22,
       5.13, 7.2 , 4.15,  nan,  nan,  nan,  nan, 7.8 ,  nan,  nan, 7.95,
       7.16, 4.49, 5.14, 4.95, 5.42, 7.64, 6.67, 5.02, 6.96, 6.4 , 5.82,
       5.55, 6.63, 7.22, 6.66, 6.13, 6.42, 5.34, 8.03, 8.05, 7.97, 4.38,
       5.52, 5.94, 6.63, 7.51, 5.58, 4.75, 7.26, 7.52, 6.93, 7.84, 5.4 ,
       6.92, 6.82, 5.44, 6.14, 5.83, 6.  , 5.14, 7.22, 5.71, 4.96, 5.41,
       5.91, 6.49, 7.48, 4.11, 5.18, 5.49, 6.7 , 4.95, 7.68, 3.81, 5.41,
       6.43, 7.1 , 8.76, 7.77, 5.92, 4.99, 5.61, 6.55, 6.33, 6.27, 5.23,
       4.79, 4.9 , 4.49, 5.96, 8.73, 4.43, 4.74, 4.08, 7.91, 3.88, 7.29])