# Code snippets for testing the functions



In [1]:
import pandas as pd

In [2]:
# Load the data in a dataframe structure
df = pd.read_excel('solventSelectionTool_table.xlsx', sheet = 0, header = 2)
# Drop first row as it is empty
df = df[1:]

In [36]:
# Add extra columns with calculated parameters
df['Hansen coordinates']= [np.array([df['dD - Dispersion'].iloc[i], df['dP - Polarity'].iloc[i], df['dH - Hydrogen bonding'].iloc[i]]) for i in range(df.shape[0])]
list_GSK_scores = df.columns.values[7:18]
df['Waste'] = (df['Incineration']*df['Recycling']*df['Biotreatment']*df['VOC Emissions'])**0.25
df['Environment']  =(df['Aquatic Impact']*df['Air Impact'])**0.5
df['Health'] = (df['Health Hazard']*df['Exposure Potential'])**0.5
df['Safety'] = (df['Flammability and Explosion']*df['Reactivity and Stability'])**0.5
df['Greenness'] = round((df['Waste']*df['Environment']*df['Health']*df['Safety'])**0.25,2)
df['Ra'] = np.nan

In [130]:
df.head()

Unnamed: 0,Solvent Name,CAS Number,dD - Dispersion,dP - Polarity,dH - Hydrogen bonding,Melting Point (°C),Boiling Point (°C),Incineration,Recycling,Biotreatment,...,Life Cycle Analysis,Hazard Labels,Precautionary Labels,Hansen coordinates,Waste,Environment,Health,Safety,Greenness,Ra
1,"1,2,4-Trichlorobenzene",120-82-1,20.2,4.2,3.2,17.0,214.0,3.0,7.0,7.0,...,8.0,H302 H315 H331 H400 H410,P261 P264 P270 P271 P273 P280 P301+P312 P302+P...,"[20.2, 4.2, 3.2]",6.191977,3.0,4.898979,9.486833,5.42,
2,"1,2-Dichlorobenzene (o-DCB)",95-50-1,19.2,6.3,3.3,-17.0,180.0,4.0,8.0,6.0,...,8.0,H302 H315 H317 H319 H332 H335 H400 H410,P261 P264 P270 P271 P272 P273 P280 P301+P312 P...,"[19.2, 6.3, 3.3]",6.0548,2.44949,6.480741,8.944272,5.41,
3,"1,2-Dichloroethane (DCE)",107-06-2,18.0,7.4,4.1,-36.0,84.0,2.0,7.0,5.0,...,7.0,H225 H302 H315 H319 H331 H335 H350,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[18.0, 7.4, 4.1]",4.325308,7.937254,1.414214,7.071068,4.3,
4,"1,2-Dimethoxyethane (Glyme)",110-71-4,15.4,6.3,6.0,-58.0,85.0,4.0,4.0,3.0,...,7.0,H225 H315 H332 H351 H360,P201 P202 P210 P233 P240 P241 P242 P243 P261 P...,"[15.4, 6.3, 6.0]",3.935979,7.483315,2.0,4.898979,4.12,
5,"1,3-Dioxolane",646-06-0,18.1,6.6,9.3,-95.0,75.0,4.0,4.0,3.0,...,,H225 H319,P210 P233 P240 P241 P242 P243 P264 P280 P303+P...,"[18.1, 6.6, 9.3]",3.722419,5.91608,5.91608,4.242641,4.85,


In [98]:
def update_Ra(coordinates, reference):
    distance = coordinates - reference
    Ra = [np.sqrt(4*d[0]**2 + d[1]**2 + d[2]**2) for d in distance]
    return Ra


In [96]:
def update_Ra2(coordinates, reference):
    Ra = []
    for value in coordinates:
        # Target vector minus the reference vector
        d = [value[i] - reference[i] for i in range(3)]
        # Square the values to get the hansen index and update the Distance column
        Ra.append(np.sqrt(4*d[0]**2 + d[1]**2 + d[2]**2))
    return Ra

In [99]:
Href = [18,1.5,2]
df['Ra'] = update_Ra(df['Hansen coordinates'], Href)
df['Ra'].head()


1    5.300000
2    5.521775
3    6.262587
4    8.128961
5    8.907300
Name: Ra, dtype: float64

In [114]:
class Solvent(object):
    def __init__(self, parameters):
        # Initalize all the properties of the object from the input information (parameters, a DataFrame structure)
        self.name = parameters['Solvent Name']
        self.CAS = parameters['CAS Number']
        self.melting_point = parameters['Melting Point (°C)']
        self.boiling_point = parameters['Boiling Point (°C)']
        self.dD = parameters['dD - Dispersion']
        self.dP = parameters['dP - Polarity']
        self.dH = parameters['dH - Hydrogen bonding']
        self.hansen_coordinates = [self.dD,self.dP, self.dH]
        self.GSK_scores = dict(Incineration = parameters['Incineration'],\
                              Recycling = parameters['Recycling'])
                                

In [56]:
a = Solvent(df.iloc[0])
a.name
a.CAS
a.hansen_coordinates

[20.2, 4.2, 3.2]

In [47]:
df.iloc[0]

Solvent Name                                             1,2,4-Trichlorobenzene
CAS Number                                                             120-82-1
dD - Dispersion                                                            20.2
dP - Polarity                                                               4.2
dH - Hydrogen bonding                                                       3.2
Melting Point (°C)                                                           17
Boiling Point (°C)                                                          214
Incineration                                                                  3
Recycling                                                                     7
Biotreatment                                                                  7
VOC Emissions                                                                10
Aquatic Impact                                                                1
Air Impact                              

# Plotting

In [131]:
import plotly.graph_objs as go

filt = [True]*df.shape[0]
filt = [df['Greenness'] < 5]
x = df['dD - Dispersion']
y = df['dP - Polarity']
z = df['dH - Hydrogen bonding']

trace = go.Scatter3d(x = x, y = y, z = z, mode='markers', marker=dict(size=8,\
                                                        color = df['Greenness'],\
                                                        colorscale = 'RdYlGn',\
                                                        opacity=0.8,\
                                                        showscale = True),\
                    hovertemplate = '<b>%{text}</b><br>' +\
                                     '%{hovertext}<br>' +\
                                     'dD = %{x:.2f}<br>dP = %{y:.2f}<br>dH = %{z:.2f}',
                    text = df['Solvent Name'],\
                    hovertext = [f'Greenness  = {value:.2f}' for value in df['Greenness']]) 

plot_layout = go.Layout(height=600, title = None,
                paper_bgcolor='white',
                scene={"aspectmode": "cube",
                       "xaxis": {"title": 'dD - Dispersion', },
                       "yaxis": {"title": 'dP - Polarity', },
                       "zaxis": {"title":'dH - Hydrogen bonding' }})

go.Figure(data = trace, layout = layout)

In [105]:
(df[['Solvent Name', 'Greenness', 'Ra']]).sort_values('Ra').to_dict('records')

[{'Solvent Name': 'Toluene', 'Greenness': 5.96, 'Ra': 0.10000000000000009},
 {'Solvent Name': 'Cumene', 'Greenness': 5.87, 'Ra': 0.877496438739213},
 {'Solvent Name': 'p-Xylene', 'Greenness': 5.92, 'Ra': 1.2727922061357853},
 {'Solvent Name': 'Mesitylene', 'Greenness': 6.14, 'Ra': 1.6643316977093237},
 {'Solvent Name': 'Benzene', 'Greenness': 3.73, 'Ra': 1.6999999999999986},
 {'Solvent Name': 'Decahydronaphthalene (Decalin)',
  'Greenness': 5.45,
  'Ra': 2.6248809496813363},
 {'Solvent Name': 'L-Limonene', 'Greenness': 5.44, 'Ra': 2.8178005607210745},
 {'Solvent Name': 'D-Limonene', 'Greenness': 5.55, 'Ra': 2.8178005607210745},
 {'Solvent Name': 'Cyclohexane', 'Greenness': 5.3, 'Ra': 3.354101966249684},
 {'Solvent Name': 'Chlorobenzene',
  'Greenness': 5.43,
  'Ra': 3.4409301068170506},
 {'Solvent Name': 'Ethoxybenzene', 'Greenness': 7.22, 'Ra': 3.693237062523877},
 {'Solvent Name': 'Diphenyl ether',
  'Greenness': 5.82,
  'Ra': 3.930648801406707},
 {'Solvent Name': 'Chloroform', 'Gree

In [134]:
print(df.loc[1].index[0])

Solvent Name


In [262]:
data = df.loc[25]

In [162]:
['{:s}: {:.1f} '.format(label, data.loc[label]) for label in data.index[8:18]])

SyntaxError: invalid syntax (<ipython-input-162-08a37755a74d>, line 1)

In [261]:
df2 = pd.read_excel('solventSelectionTool_table.xlsx', sheet_name = 1, header = 0, usecols=(0,1))

Unnamed: 0,Statements,Fulltext
0,No Data,No data available.
1,Not Hazardous,Not classified as hazardous.
2,H224,Extremely flammable liquid and vapor
3,H225,Highly Flammable liquid and vapor
4,H226,Flammable liquid and vapor
5,H290,May be corrosive to metals
6,H300,Fatal if swallowed
7,H301,Toxic if swallowed
8,H302,Harmful if swallowed
9,H303,May be harmful if swallowed


In [265]:
print(1, data['Precautionary Labels'])
precaution_labels = data['Precautionary Labels'].split(' ')
print(2, precaution_labels)
for precaution in precaution_labels:
    print(3, precaution)
    splitted_precaution = precaution.split('+')
    text = ''
    for s_precaution in splitted_precaution:
        
        text_precaution = df2.Fulltext[df2['Statements'] == s_precaution].values[0]
        text += text_precaution
    print(precaution, text)

1 P210 P233 P240 P241 P242 P243 P260 P264 P280 P301+P330+P331 P303+P361+P353 P304+P340 P305+P351+P338 P310 P321 P363 P370+P378 P403+P235 P405 P501
2 ['P210', 'P233', 'P240', 'P241', 'P242', 'P243', 'P260', 'P264', 'P280', 'P301+P330+P331', 'P303+P361+P353', 'P304+P340', 'P305+P351+P338', 'P310', 'P321', 'P363', 'P370+P378', 'P403+P235', 'P405', 'P501']
3 P210
P210 Keep away from heat, hot surface, sparks, open flames and other ignition sources. - No smoking.
3 P233
P233 Keep container tightly closed.
3 P240
P240 Ground/bond container and receiving equipment.
3 P241
P241 Use explosion-proof [electrical/ventilating/lighting/.../] equipment.
3 P242
P242 Use only non-sparking tools.
3 P243
P243 Take precautionary measures against static discharge.
3 P260
P260 Do not breathe dust/fume/gas/mist/vapors/spray.
3 P264
P264 Wash ... thoroughly after handling.
3 P280
P280 Wear protective gloves/protective clothing/eye protection/face protection.
3 P301+P330+P331
P301+P330+P331 IF SWALLOWED:Rinse 

In [246]:
hazard_labels = data['Pre'].split(' ')
for hazard in hazard_labels:
    text_hazard = df2.Fulltext[df2['Statements'] == hazard].values[0]
    print(value, text_hazard)

['H226', 'H314', 'H318']

In [240]:
hazard = df2.Fulltext[df2['Statements'] == 'H290'].values[0]

In [221]:
 type(hazard['Fulltext'])

pandas.core.series.Series

In [241]:
hazard

'May be corrosive to metals'

In [231]:
hazard.Fulltext.values[0]

'May be corrosive to metals'

In [237]:
df2['Statements'] == 'H290'

0      False
1      False
2      False
3      False
4      False
5       True
6      False
7      False
8      False
9      False
10     False
11     False
12     False
13     False
14     False
15     False
16     False
17     False
18     False
19     False
20     False
21     False
22     False
23     False
24     False
25     False
26     False
27     False
28     False
29     False
       ...  
121    False
122    False
123    False
124    False
125    False
126    False
127    False
128    False
129    False
130    False
131    False
132    False
133    False
134    False
135    False
136    False
137    False
138    False
139    False
140    False
141    False
142    False
143    False
144    False
145    False
146    False
147    False
148    False
149    False
150    False
Name: Statements, Length: 151, dtype: bool