**We will create a base model of 5X5 and then we will try variations in topology to figure out best possible model out of certain models**

In [0]:
from tensorflow import keras
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import Dense,Activation
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import statistics as st
from sklearn.model_selection import GridSearchCV

In [0]:
data=pd.read_csv(r"/content/Data.csv")

In [3]:
data.head(2)

Unnamed: 0,log(shear rate) in s^-1,Polymer conc(wt%),NaCl concentration(wt%),Ca+2 concentration(wt%),Temperature(in celsius),log(viscosity) in cP
0,0.010415,0.3,0.1,0.0,25.0,2309.56
1,0.022561,0.3,0.1,0.0,25.0,2298.77


In [0]:

# independent variables (X):shear rate, Polymer conc, NaCl concentration, Ca+2 concentration, Temperature
# dependent variable (Y): Viscosity

X=data[data.columns[0:5]]
Y=data[['log(viscosity) in cP']]

In [5]:
X.shape

(654, 5)

In [6]:
X.head(2)

Unnamed: 0,log(shear rate) in s^-1,Polymer conc(wt%),NaCl concentration(wt%),Ca+2 concentration(wt%),Temperature(in celsius)
0,0.010415,0.3,0.1,0.0,25.0
1,0.022561,0.3,0.1,0.0,25.0


In [7]:
Y[0:2]

Unnamed: 0,log(viscosity) in cP
0,2309.56
1,2298.77


In [0]:
### standardize the dependent variables

scaler_1=MinMaxScaler()
X=pd.DataFrame(scaler_1.fit_transform(X),columns=X.columns)

### scaling viscosity values between 0 to 1

scaler_2=StandardScaler()
Y=pd.DataFrame(scaler_2.fit_transform(Y),columns=Y.columns)


In [0]:
#creating a base model with two hidden layers and 5 neurons each
def build_model():
  model=Sequential()
  model.add(Dense(5,input_dim=5,kernel_initializer='normal',activation='relu'))
  model.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model.add(Dense(1,kernel_initializer='normal'))

  model.compile(loss='mean_squared_error',optimizer='adam')
  return model

In [0]:
estimator=KerasRegressor( build_fn=build_model,epochs=10,batch_size=2,verbose=0)

In [0]:
score_1=[]
cv= KFold(n_splits=10, random_state=0, shuffle=True)
for train_index, test_index in cv.split(X):
  X_train, X_test, Y_train, Y_test = X.iloc[train_index], X.iloc[test_index], Y.iloc[train_index], Y.iloc[test_index]
  estimator.fit(X_train,Y_train)
  pred_1=estimator.predict(X_test)
  err_1=np.mean(np.abs((Y_test['log(viscosity) in cP']-pred_1)/Y_test['log(viscosity) in cP'])*100)
  score_1.append(err_1)


In [37]:
score_1

[96.93842500700039,
 174.01752842330262,
 244.1997867628992,
 101.48681269367555,
 53.66420941901988,
 243.72270412546936,
 94.81816592549097,
 127.65353671431068,
 114.37972217865955,
 121.11780958607274]

In [38]:
st.mean(score_1)

137.1998700835901

**Model is performing really bad**

**Let's try to deepen the network**

In [0]:
def build_deep():
  model_2=Sequential()
  model_2.add(Dense(5,input_dim=5,kernel_initializer='normal',activation='relu'))
  model_2.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model_2.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model_2.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model_2.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model_2.add(Dense(1,kernel_initializer='normal'))

  model_2.compile(loss='mean_squared_error',optimizer='adam')
  return model_2

In [0]:
estimator_2=KerasRegressor( build_fn=build_deep,epochs=10,batch_size=2,verbose=0)
score_2=[]
cv= KFold(n_splits=10, random_state=0, shuffle=True)
for train_index, test_index in cv.split(X):
  X_train, X_test, Y_train, Y_test = X.iloc[train_index], X.iloc[test_index], Y.iloc[train_index], Y.iloc[test_index]
  estimator_2.fit(X_train,Y_train)
  pred_2=estimator_2.predict(X_test)
  err_2=np.mean(np.abs((Y_test['log(viscosity) in cP']-pred_2)/Y_test['log(viscosity) in cP'])*100)
  score_2.append(err_2)

In [41]:
score_2

[94.65953429411475,
 97.68577631842949,
 113.11068802115533,
 80.37638669364439,
 107.47123886767321,
 110.0580071124131,
 67.03639563628825,
 106.54316076911218,
 112.26459876277134,
 117.13395660659054]

In [42]:
st.mean(score_2)

100.63397430821925

**incresing hidden layers is somehow making the predictions more robust as for every batch eroor is almost similar, I tested on 3 hidden layers then 4 hidden layers after that it appears that adding further layes is not contributing much to accuracy so we will keep 4 layers**

**Let's try a wider topology with 4 hiddene layers deep**

In [0]:
def build_wide():
  model_3=Sequential()
  model_3.add(Dense(5,input_dim=5,kernel_initializer='normal',activation='relu'))
  model_3.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model_3.add(Dense(6,kernel_initializer='normal',activation='relu'))
  model_3.add(Dense(7,kernel_initializer='normal',activation='relu'))
  model_3.add(Dense(8,kernel_initializer='normal',activation='relu'))
  model_3.add(Dense(1,kernel_initializer='normal'))

  model_3.compile(loss='mean_squared_error',optimizer='adam')
  return model_3

In [0]:
estimator_3=KerasRegressor( build_fn=build_wide,epochs=10,batch_size=2,verbose=0)
score_3=[]
cv= KFold(n_splits=10, random_state=0, shuffle=True)
for train_index, test_index in cv.split(X):
  X_train, X_test, Y_train, Y_test = X.iloc[train_index], X.iloc[test_index], Y.iloc[train_index], Y.iloc[test_index]
  estimator_3.fit(X_train,Y_train)
  pred_3=estimator_3.predict(X_test)
  err_3=np.mean(np.abs((Y_test['log(viscosity) in cP']-pred_3)/Y_test['log(viscosity) in cP'])*100)
  score_3.append(err_3)

In [58]:
score_3

[71.68124087982777,
 97.31884307185774,
 107.69417806680296,
 95.06079807544911,
 107.37028838743373,
 112.07468038418438,
 95.52625470974519,
 105.62949507083431,
 111.01101500540194,
 119.72571609255776]

In [59]:
st.mean(score_3)

102.3092509744095

** Trying various possibilities **

In [0]:
def random_models(layers,activation):
  rand_model=Sequential()
  rand_model.add(Dense(5,input_dim=5,kernel_initializer='normal',activation='relu'))
  rand_model.add(Dense(5,kernel_initializer='normal',activation='relu'))

  for i,nodes in enumerate(layers):
    rand_model.add(Dense(nodes))
    rand_model.add(Activation(activation))

  rand_model.add(Dense(1,kernel_initializer='normal'))
  rand_model.compile(loss='mean_squared_error',optimizer='adam')
  return rand_model

In [0]:
estimator_random=KerasRegressor( build_fn=random_models,verbose=0)

layers=[(4,5,6),(5,6,7),(5,6,7,8)]
activations=['relu','sigmoid']

param_grid=dict(layers=layers,activation=activations,batch_size=[2,4],epochs=[10,15])

grid=GridSearchCV(estimator=estimator_random,param_grid=param_grid,cv=6)

In [0]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.15)

In [0]:
grid_result=grid.fit(X_train,Y_train)

In [88]:
print(grid_result.best_score_,grid_result.best_params_)

-0.3869687296755377 {'activation': 'relu', 'batch_size': 4, 'epochs': 15, 'layers': (5, 6, 7)}


In [0]:
def build_final():
  model_final=Sequential()
  model_final.add(Dense(5,input_dim=5,kernel_initializer='normal',activation='relu'))
  model_final.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model_final.add(Dense(5,kernel_initializer='normal',activation='relu'))
  model_final.add(Dense(6,kernel_initializer='normal',activation='relu'))
  model_final.add(Dense(7,kernel_initializer='normal',activation='relu'))
  model_final.add(Dense(1,kernel_initializer='normal'))

  model_final.compile(loss='mean_squared_error',optimizer='adam')
  return model_final

In [0]:
estimator_final=KerasRegressor( build_fn=build_final,epochs=15,batch_size=4,verbose=0)
score_final=[]
cv= KFold(n_splits=10, random_state=0, shuffle=True)
for train_index, test_index in cv.split(X):
  X_train, X_test, Y_train, Y_test = X.iloc[train_index], X.iloc[test_index], Y.iloc[train_index], Y.iloc[test_index]
  estimator_final.fit(X_train,Y_train)
  pred_final=estimator_final.predict(X_test)
  err_final=np.mean(np.abs((Y_test['log(viscosity) in cP']-pred_final)/Y_test['log(viscosity) in cP'])*100)
  score_final.append(err_final)

In [92]:
score_final

[96.95819359081825,
 97.29408459801262,
 113.84219842977242,
 50.753406871238006,
 107.4146615329846,
 109.05318852701373,
 96.51761173949087,
 105.53118438123053,
 117.08984100423199,
 117.93844251597228]

In [91]:
st.mean(score_final)

101.23928131907653