# Machine Learning solution - PHP

ML model to predict 'Thermal Resistance' and 'Temperature Drop' in PHP

## Data Loading and Pre-Processing

In [1]:
# loading ML module
from ml_solution_module import MachineLearning

In [2]:
# assigning variable
ml = MachineLearning("data/")

data/ml_result already exists and ML reuslts will be stored here.


In [3]:
# loadig data from gfe_combined.csv from each sample and sving them as csv in data/ml_result/

df_w_40 = ml.data_prep('data/di_water_exp/40_FR/gfe_combined.csv', sample='DI Water 40', fr=40)
df_w_60 = ml.data_prep('data/di_water_exp/60_FR/gfe_combined.csv', sample='DI Water 60', fr=60)
df_a_40 = ml.data_prep('data/al2o3_diwater_exp/40_FR/gfe_combined.csv', sample='Al2O3 DI Water 40', fr=40)
df_a_60 = ml.data_prep('data/al2o3_diwater_exp/60_FR/gfe_combined.csv', sample='Al2O3 DI Water 60', fr=60)


Compiled data stored at data/ml_result/all_combined_data_DI Water 40_40.csv
Compiled data stored at data/ml_result/all_combined_data_DI Water 60_60.csv
Compiled data stored at data/ml_result/all_combined_data_Al2O3 DI Water 40_40.csv
Compiled data stored at data/ml_result/all_combined_data_Al2O3 DI Water 60_60.csv


In [4]:
# Data compile
# Combining all files in one super_combine file

df_combined = ml.data_compile() 

All data compiled in a single csv file and saved at: data/ml_result as super_combined_data.csv


In [5]:
# Data Cleaning
# Filtering data by dG values
# removing all datapoints with positive dG value

df_clean = ml.data_filter_dG(df_combined)

In [6]:
df_clean

Unnamed: 0.1,Unnamed: 0,t(min),Te[K],Tc[K],dT[K],P[bar],TR[K/W],GFE [KJ/mol],GFE_Tc [KJ/mol],dG[KJ/mol],Fluid,FR
0,0,0.0,298.150000,296.65,1.500000,0.413299,0.018750,-2190.243835,-2179.224665,-11.019171,DI Water 60,60
1,1,0.5,297.816667,296.65,1.166667,0.413299,0.014583,-2187.795131,-2179.224665,-8.570466,DI Water 60,60
2,2,1.0,298.150000,296.40,1.750000,0.413299,0.021875,-2190.243835,-2177.388136,-12.855699,DI Water 60,60
3,3,1.5,300.150000,296.40,3.750000,0.413299,0.046875,-2204.936063,-2177.388136,-27.547927,DI Water 60,60
4,4,2.0,302.150000,296.40,5.750000,0.413299,0.071875,-2219.628291,-2177.388136,-42.240154,DI Water 60,60
...,...,...,...,...,...,...,...,...,...,...,...,...
1746,392,24.0,360.150000,340.40,19.750000,0.913258,0.246875,-271.692723,-256.793566,-14.899157,Al2O3 DI Water 60,60
1747,393,24.5,360.816667,341.15,19.666667,0.946588,0.245833,-164.663355,-155.688217,-8.975138,Al2O3 DI Water 60,60
1748,394,25.0,360.816667,341.15,19.666667,0.946588,0.245833,-164.663355,-155.688217,-8.975138,Al2O3 DI Water 60,60
1749,395,25.5,360.816667,342.40,18.416667,0.979919,0.230208,-60.852658,-57.746640,-3.106018,Al2O3 DI Water 60,60


## EDA
Detailed EDA (data visualisation) is conducted in ml_eda_*.ipynb

In [7]:
# checking the Fluid column
df_clean.Fluid.unique()

array(['DI Water 60', 'Al2O3 DI Water 40', 'DI Water 40',
       'Al2O3 DI Water 60'], dtype=object)

In [8]:
# checking columns list
df_clean.columns

Index(['Unnamed: 0', 't(min)', 'Te[K]', 'Tc[K]', 'dT[K]', 'P[bar]', 'TR[K/W]',
       'GFE [KJ/mol]', 'GFE_Tc [KJ/mol]', 'dG[KJ/mol]', 'Fluid', 'FR'],
      dtype='object')

In [9]:
# selecting clms
df_sd = df_clean[['Te[K]', 'Tc[K]', 'dT[K]', 'P[bar]', 'TR[K/W]','dG[KJ/mol]', 'Fluid', 'FR']]

In [10]:
df_sd

Unnamed: 0,Te[K],Tc[K],dT[K],P[bar],TR[K/W],dG[KJ/mol],Fluid,FR
0,298.150000,296.65,1.500000,0.413299,0.018750,-11.019171,DI Water 60,60
1,297.816667,296.65,1.166667,0.413299,0.014583,-8.570466,DI Water 60,60
2,298.150000,296.40,1.750000,0.413299,0.021875,-12.855699,DI Water 60,60
3,300.150000,296.40,3.750000,0.413299,0.046875,-27.547927,DI Water 60,60
4,302.150000,296.40,5.750000,0.413299,0.071875,-42.240154,DI Water 60,60
...,...,...,...,...,...,...,...,...
1746,360.150000,340.40,19.750000,0.913258,0.246875,-14.899157,Al2O3 DI Water 60,60
1747,360.816667,341.15,19.666667,0.946588,0.245833,-8.975138,Al2O3 DI Water 60,60
1748,360.816667,341.15,19.666667,0.946588,0.245833,-8.975138,Al2O3 DI Water 60,60
1749,360.816667,342.40,18.416667,0.979919,0.230208,-3.106018,Al2O3 DI Water 60,60


In [11]:
# co-relation
corr_matrix = df_sd.corr()
corr_matrix['dG[KJ/mol]'].sort_values(ascending=True)

  corr_matrix = df_sd.corr()


TR[K/W]      -0.490117
dT[K]        -0.490015
Te[K]         0.228414
FR            0.257672
Tc[K]         0.502954
P[bar]        0.697498
dG[KJ/mol]    1.000000
Name: dG[KJ/mol], dtype: float64

dG has strong correlation with: P[bar]

In [12]:
corr_matrix['Tc[K]'].sort_values(ascending=True)

FR           -0.051822
TR[K/W]       0.191281
dT[K]         0.191412
dG[KJ/mol]    0.502954
P[bar]        0.886474
Te[K]         0.918503
Tc[K]         1.000000
Name: Tc[K], dtype: float64

Tc has strong correlation with Te[K] and P[bar]

In [13]:
# Mutual Information
from sklearn.feature_selection import mutual_info_regression
import pandas as pd
df_int = df_sd.drop(['Fluid', 'Tc[K]', 'dG[KJ/mol]'], axis=1)
mutual_info = mutual_info_regression(df_int, df_sd['Tc[K]'])

In [14]:
mutual_info = pd.Series(mutual_info)
mutual_info.index = df_int.columns
mutual_info.sort_values(ascending=False)

Te[K]      1.580934
P[bar]     1.141626
TR[K/W]    1.056068
dT[K]      1.054095
FR         0.062686
dtype: float64

In [16]:
# data splitting
x_train, x_test, y_train, y_test = ml.data_split(df_sd)

In [18]:
# checking mutual information
#ml.mutual_info(x_train, y_train['Tc[K]'])