<a href="https://colab.research.google.com/github/shahilchaudhari/Data-Science/blob/main/MLmodel_nanofluid_properties.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score,StratifiedKFold
from sklearn.linear_model import LinearRegression
import sklearn.gaussian_process as gp
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,learning_curve
from sklearn.linear_model import Ridge
from sklearn.model_selection import KFold
from sklearn.gaussian_process.kernels import RBF, DotProduct
import statsmodels.api as sn
import seaborn as sns
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.preprocessing import StandardScaler
from scipy.stats import loguniform
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
import xgboost as xbg
from sklearn.pipeline import make_pipeline
from prettytable import PrettyTable

import statsmodels.api as sm

# Viscosity

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/viscosity_pred.csv")

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
len(df.columns)

In [None]:
df['basefluid'] = 1000*df['basefluid']
df['mixture'] = 1000*df['mixture']

In [None]:
df.sample(5)

In [None]:
df = pd.get_dummies(df,dummy_na=False,drop_first=True)

In [None]:
df.sample(5)

In [None]:
df.shape[0]

In [None]:
df.describe(include="all")

In [None]:
corr=df.corr()
sns.heatmap(corr,annot=True)

sns.set(rc={'figure.figsize':(25,25)})

In [None]:
X = df.drop(["mixture","mass_frac"],axis=1)
X

In [None]:
y = df["mixture"]
y

# OLS MOdel for testing

In [None]:
X = sm.add_constant(X)
result = sm.OLS(y, X).fit()
print(result.summary())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=9)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
lr = LinearRegression()
lr.fit(X_train,y_train)

y_pred = lr.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),LinearRegression(fit_intercept=False,n_jobs=1))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

In [None]:
parameters = {"fit_intercept": [True, False],
              "n_jobs":[1,2,3,4,5,6] 
             }
             
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = GridSearchCV(estimator=lr, param_grid = parameters, cv = cv, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_lr = grid.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_lr)
adj_r2_lr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_lr),"MSE=",mean_squared_error(y_test,y_pred_lr),"R2",r2_score(y_test, y_pred_lr),"   adj_R2",adj_r2_lr)
print('MAE: %.3f' % grid.best_score_)
print('Config: %s' % grid.best_params_)

# Gaussian Progress Regression

In [None]:
kernel = gp.kernels.ConstantKernel(1.0, (1e-1, 1e3)) * gp.kernels.RBF(10.0, (1e-3, 1e3))
model = gp.GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100, alpha=0.1, normalize_y=True)
model.fit(X_train, y_train)

y_pred, std = model.predict(X_test, return_std=True)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
param_grid = [{
    "alpha":  [1e-2, 1e-3],
    "kernel": [RBF(l) for l in np.logspace(-1, 1, 3)]
}, {
    "alpha":  [1e-2, 1e-3],
    "kernel": [DotProduct(sigma_0) for sigma_0 in np.logspace(-1, 1, 3)]
}]

# scores = ['explained_variance', 'r2']


# for score in scores:
# print("# Tuning hyper-parameters for %s" % score)
# print()
    
    # sc = StandardScaler().fit(X_train)
    # X_train_sc = sc.fit_transform(X_train)
    # X_test_sc = sc.fit_transform(X_test)

clf = GridSearchCV(estimator=gp.GaussianProcessRegressor(), param_grid=param_grid, cv=cv,scoring='r2')
clf.fit(X_train, y_train)
y_pred_gp = clf.predict(X_test)

In [None]:
# for score in scores:
r_sq = r2_score(y_test, y_pred_gp)
adj_r2_gp = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_gp),"MSE=",mean_squared_error(y_test,y_pred_gp),"R2",r2_score(y_test, y_pred_gp),"  Adj R2",adj_r2_gp)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),gp.GaussianProcessRegressor(alpha=0.01,kernel=RBF(length_scale=1)))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

# Random forest

In [None]:
y_test = np.array(y_test)
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100, random_state = 6666)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# from sklearn.model_selection import RandomizedSearchCV# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 15, cv = cv, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train,y_train)
y_pred_rf = rf_random.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_rf)
adj_r2_rf = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_rf),"MSE=",mean_squared_error(y_test,y_pred_rf),"R2",r2_score(y_test, y_pred_rf),"  Adj_r2  ",adj_r2_rf)
print('Best score',rf_random.best_score_)
print("best params ",rf_random.best_params_)

# Support Vector Machines

In [None]:
sc = StandardScaler().fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

svr = SVR(kernel="rbf",C=500)
svr.fit(X_train_std,y_train)
y_test_pred = svr.predict(X_test_std)

print("MAE= ",mean_absolute_error(y_test, y_test_pred),"MSE=",mean_squared_error(y_test,y_test_pred),"R2",r2_score(y_test, y_test_pred))

In [None]:

# from sklearn.externals._packaging.version import SubLocalType
parameters = {'kernel': ('linear', 'rbf','poly'), 'C':[1.5, 10],'gamma': [1e-7, 1e-4],'epsilon':[0.1,0.2,0.5,0.3]}

clf = GridSearchCV(SVR(), parameters)
sc = StandardScaler().fit(X_train)

SC = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.fit_transform(X_test)

clf.fit(X_train,y_train)
y_pred_svr = clf.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_svr)
adj_r2_svr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_svr),"MSE=",mean_squared_error(y_test,y_pred_svr),"R2",r2_score(y_test, y_pred_svr)," Adj_r2",adj_r2_svr)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

# Ridge Regression

In [None]:
from sklearn.linear_model import RidgeCV,Ridge,Lasso
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

# X_train_std = sc.fit_transform(X_train)
# X_test_std = sc.fit_transform(X_test)
r_alphas = np.logspace(0, 5, 100)


ridge_model = RidgeCV(alphas=r_alphas, scoring='r2')

ridge_model = ridge_model.fit(X_train, y_train)

y_pred = ridge_model.predict(X_test)

In [None]:
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
ridge_model = Ridge(alpha=1)

ridge_model.fit(X_train,y_train)

y_pred = ridge_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
model = Ridge()
# define model evaluation method
cv = RepeatedKFold(n_splits=6, n_repeats=4, random_state=243)
# define grid
grid = dict()

grid['alpha'] = np.arange(0, 1, 0.01)

search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

search.fit(X_train,y_train)

y_pred_ri = search.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ri)
adj_r2_ri = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ri),"MSE=",mean_squared_error(y_test,y_pred_ri),"R2",r2_score(y_test, y_pred_ri)," Adj_R2",adj_r2_ri)
print('MAE: %.3f' % search.best_score_)
print('Config: %s' % search.best_params_)

# Lasso Regression

In [None]:
lasso_model = Lasso(alpha=0)
# define model evaluation method
lasso_model.fit(X_train,y_train)

y_pred = lasso_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
model = Lasso()
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['alpha'] = np.arange(0, 1, 0.01)
# define search
lasso = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# perform the search
lasso.fit(X, y)

y_pred_ls = lasso.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ls)
adj_r2_ls = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ls),"MSE=",mean_squared_error(y_test,y_pred_ls),"R2",r2_score(y_test, y_pred_ls),"Adj R2",adj_r2_ls)
print('MAE: %.3f' % lasso.best_score_)
print('Config: %s' % lasso.best_params_)

# KNN model

In [None]:
knn  = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=5,n_repeats=3,random_state=999)
params = [{'n_neighbors': [2,3,4,5,6,7,8,9,10,11,12,13,14,15], 
           'weights': ['uniform','distance'],
           'p':[1,2,5],
           'algorithm':['auto','ball_tree','kd_tree','brute']}]

best_knn = GridSearchCV(estimator=KNeighborsRegressor(), 
                      param_grid=params, 
                      cv=cv,
                      verbose=1,  
                      scoring='r2', 
                      return_train_score=True)
best_knn.fit(X_train,y_train)
y_pred_knn = best_knn.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_knn)
adj_r2_knn = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_knn),"MSE=",mean_squared_error(y_test,y_pred_knn),"R2",r2_score(y_test, y_pred_knn),"adj R2",adj_r2_knn)
print('MAE: %.3f' % best_knn.best_score_)
print('Config: %s' % best_knn.best_params_)

# XGBOost

In [None]:
import xgboost as xgb
xgb_model = xgb.XGBRegressor(objective ='reg:squarederror')
xgb_model.fit(X_train,y_train)
y_pred = xgb_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
hyperparameter_grid = {
    'n_estimators': [100, 500, 900, 1100, 1500],
    'max_depth': [2, 3, 5, 10, 15],
    'learning_rate': [0.05, 0.1, 0.15, 0.20],
    'min_child_weight': [1, 2, 3, 4]
    }

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_cv = RandomizedSearchCV(estimator=xgb_model,
            param_distributions=hyperparameter_grid,
            cv=cv, n_iter=50,
            scoring = 'neg_mean_absolute_error',n_jobs = 4,
            verbose = 5, 
            return_train_score = True,
            random_state=42)
random_cv.fit(X_train,y_train)
y_pred_xgb = random_cv.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_xgb)
adj_r2_xgb = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_xgb),"MSE=",mean_squared_error(y_test,y_pred_xgb),"R2",r2_score(y_test, y_pred_xgb),"Adj R2",adj_r2_xgb)
print('MAE: %.3f' % random_cv.best_score_)
print('Config: %s' % random_cv.best_params_)

# Table of result for viscosity

In [None]:

# Specify the Column Names while initializing the Table
viscosity = PrettyTable(["Model", "MAE", "MSE", "R2","Adj R2"])

viscosity.add_row(["Linear Regression",mean_absolute_error(y_test, y_pred_lr),mean_squared_error(y_test,y_pred_lr),r2_score(y_test, y_pred_lr),adj_r2_lr])
viscosity.add_row(["Gaussian Progression Regressor",mean_absolute_error(y_test, y_pred_gp),mean_squared_error(y_test,y_pred_gp),r2_score(y_test, y_pred_gp),adj_r2_gp])
viscosity.add_row(["Random Forest",mean_absolute_error(y_test, y_pred_rf),mean_squared_error(y_test,y_pred_rf),r2_score(y_test, y_pred_rf),adj_r2_rf])
viscosity.add_row(["Support Vector Regressor",mean_absolute_error(y_test, y_pred_svr),mean_squared_error(y_test,y_pred_svr),r2_score(y_test, y_pred_svr),adj_r2_svr])
viscosity.add_row(["Ridge Regression",mean_absolute_error(y_test, y_pred_ri),mean_squared_error(y_test,y_pred_ri),r2_score(y_test, y_pred_ri),adj_r2_ri])
viscosity.add_row(["Lasso Regression",mean_absolute_error(y_test, y_pred_ls),mean_squared_error(y_test,y_pred_ls),r2_score(y_test, y_pred_ls),adj_r2_ls])
viscosity.add_row(["K nearest Neighbours",mean_absolute_error(y_test, y_pred_knn),mean_squared_error(y_test,y_pred_knn),r2_score(y_test, y_pred_knn),adj_r2_knn])
viscosity.add_row(["XG boost",mean_absolute_error(y_test, y_pred_xgb),mean_squared_error(y_test,y_pred_xgb),r2_score(y_test, y_pred_xgb),adj_r2_xgb])


In [None]:
print(viscosity)

# Density

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/density.csv")

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
len(df.columns)

In [None]:
df = pd.get_dummies(df,dummy_na=False,drop_first=True)

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
df.describe(include="all")

In [None]:
corr=df.corr()
sns.heatmap(corr,annot=True)

sns.set(rc={'figure.figsize':(10,10)})

In [None]:
X = df.drop(["mixture","mass_frac"],axis=1)
X

In [None]:
y = df["mixture"]
y

# OLS test model

In [None]:
X = sm.add_constant(X)
result = sm.OLS(y, X).fit()
print(result.summary())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=9)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
lr = LinearRegression()
lr.fit(X_train,y_train)

y_pred = lr.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),LinearRegression(fit_intercept=False,n_jobs=1))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='red', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

In [None]:
parameters = {"fit_intercept": [True, False],
              "n_jobs":[1,2,3,4,5,6] 
             }
             
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = GridSearchCV(estimator=lr, param_grid = parameters, cv = cv, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_lr = grid.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_lr)
adj_r2_lr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_lr),"MSE=",mean_squared_error(y_test,y_pred_lr),"R2",r2_score(y_test, y_pred_lr),"  Adj R2 ",adj_r2_lr)
print('MAE: %.3f' % grid.best_score_)
print('Config: %s' % grid.best_params_)

# Gaussian Progress Regression

In [None]:
kernel = gp.kernels.ConstantKernel(1.0, (1e-1, 1e3)) * gp.kernels.RBF(10.0, (1e-3, 1e3))
model = gp.GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100, alpha=0.1, normalize_y=True)
model.fit(X_train, y_train)

y_pred, std = model.predict(X_test, return_std=True)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
param_grid = [{
    "alpha":  [1e-2, 1e-3],
    "kernel": [RBF(l) for l in np.logspace(-1, 1, 3)]
}, {
    "alpha":  [1e-2, 1e-3],
    "kernel": [DotProduct(sigma_0) for sigma_0 in np.logspace(-1, 1, 3)]
}]

# scores = ['explained_variance', 'r2']


# for score in scores:
# print("# Tuning hyper-parameters for %s" % score)
# print()
    
    # sc = StandardScaler().fit(X_train)
    # X_train_sc = sc.fit_transform(X_train)
    # X_test_sc = sc.fit_transform(X_test)

clf = GridSearchCV(estimator=gp.GaussianProcessRegressor(), param_grid=param_grid, cv=cv,scoring='r2')
clf.fit(X_train, y_train)
y_pred_gp = clf.predict(X_test)

In [None]:
# for score in scores:
r_sq = r2_score(y_test, y_pred_gp)
adj_r2_gp = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_gp),"MSE=",mean_squared_error(y_test,y_pred_gp),"R2",r2_score(y_test, y_pred_gp)," Adj R2 ",adj_r2_gp)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),gp.GaussianProcessRegressor(alpha=0.01,kernel=RBF(length_scale=1)))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

# Random forest

In [None]:
y_test = np.array(y_test)
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100, random_state = 6666)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# from sklearn.model_selection import RandomizedSearchCV# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 15, cv = cv, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train,y_train)
y_pred_rf = rf_random.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_rf)
adj_r2_rf = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_rf),"MSE=",mean_squared_error(y_test,y_pred_rf),"R2",r2_score(y_test, y_pred_rf)," Adj R2",adj_r2_rf)

# Support Vector Machines

In [None]:
sc = StandardScaler().fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

svr = SVR(kernel="rbf",C=500)
svr.fit(X_train_std,y_train)
y_test_pred = svr.predict(X_test_std)

print("MAE= ",mean_absolute_error(y_test, y_test_pred),"MSE=",mean_squared_error(y_test,y_test_pred),"R2",r2_score(y_test, y_test_pred))

In [None]:

# from sklearn.externals._packaging.version import SubLocalType
parameters = {'kernel': ('linear', 'rbf','poly'), 'C':[1.5, 10],'gamma': [1e-7, 1e-4],'epsilon':[0.1,0.2,0.5,0.3]}

clf = GridSearchCV(SVR(), parameters)
sc = StandardScaler().fit(X_train)

SC = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.fit_transform(X_test)

clf.fit(X_train,y_train)
y_pred_svr = clf.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_svr)
adj_r2_svr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_svr),"MSE=",mean_squared_error(y_test,y_pred_svr),"R2",r2_score(y_test, y_pred_svr),"Adj_r2",adj_r2_svr)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

# Ridge Regression

In [None]:
from sklearn.linear_model import RidgeCV,Ridge,Lasso
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

# X_train_std = sc.fit_transform(X_train)
# X_test_std = sc.fit_transform(X_test)
r_alphas = np.logspace(0, 5, 100)


ridge_model = RidgeCV(alphas=r_alphas, scoring='r2')

ridge_model = ridge_model.fit(X_train, y_train)

y_pred = ridge_model.predict(X_test)

In [None]:

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
ridge_model = Ridge(alpha=1)

ridge_model.fit(X_train,y_train)

y_pred = ridge_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
model = Ridge()
# define model evaluation method
cv = RepeatedKFold(n_splits=6, n_repeats=4, random_state=243)
# define grid
grid = dict()

grid['alpha'] = np.arange(0, 1, 0.01)

search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

search.fit(X_train,y_train)

y_pred_ri = search.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ri)
adj_r2_ri = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ri),"MSE=",mean_squared_error(y_test,y_pred_ri),"R2",r2_score(y_test, y_pred_ri),adj_r2_ri)
print('MAE: %.3f' % search.best_score_)
print('Config: %s' % search.best_params_)

# Lasso Regression

In [None]:
lasso_model = Lasso(alpha=0)
# define model evaluation method
lasso_model.fit(X_train,y_train)

y_pred = lasso_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
model = Lasso()
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['alpha'] = np.arange(0, 1, 0.01)
# define search
lasso = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# perform the search
lasso.fit(X, y)

y_pred_ls = lasso.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ls)
adj_r2_ls = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ls),"MSE=",mean_squared_error(y_test,y_pred_ls),"R2",r2_score(y_test, y_pred_ls),"Adj R2",adj_r2_ls)
print('MAE: %.3f' % lasso.best_score_)
print('Config: %s' % lasso.best_params_)

# KNN model

In [None]:
knn  = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=5,n_repeats=3,random_state=999)
params = [{'n_neighbors': [2,3,4,5,6,7,8,9,10,11,12,13,14,15], 
           'weights': ['uniform','distance'],
           'p':[1,2,5],
           'algorithm':['auto','ball_tree','kd_tree','brute']}]

best_knn = GridSearchCV(estimator=KNeighborsRegressor(), 
                      param_grid=params, 
                      cv=cv,
                      verbose=1,  
                      scoring='r2', 
                      return_train_score=True)
best_knn.fit(X_train,y_train)
y_pred_knn = best_knn.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_knn)
adj_r2_knn = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_knn),"MSE=",mean_squared_error(y_test,y_pred_knn),"R2",r2_score(y_test, y_pred_knn),"Adj R2",adj_r2_knn)
print('MAE: %.3f' % best_knn.best_score_)
print('Config: %s' % best_knn.best_params_)

# XGBOost

In [None]:
import xgboost as xgb
xgb_model = xgb.XGBRegressor(objective ='reg:squarederror')
xgb_model.fit(X_train,y_train)
y_pred = xgb_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:

hyperparameter_grid = {
    'n_estimators': [100, 500, 900, 1100, 1500],
    'max_depth': [2, 3, 5, 10, 15],
    'learning_rate': [0.05, 0.1, 0.15, 0.20],
    'min_child_weight': [1, 2, 3, 4]
    }

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_cv = RandomizedSearchCV(estimator=xgb_model,
            param_distributions=hyperparameter_grid,
            cv=cv, n_iter=50,
            scoring = 'neg_mean_absolute_error',n_jobs = 4,
            verbose = 5, 
            return_train_score = True,
            random_state=42)
random_cv.fit(X_train,y_train)
y_pred_xgb = random_cv.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_xgb)
adj_r2_xgb = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_xgb),"MSE=",mean_squared_error(y_test,y_pred_xgb),"R2",r2_score(y_test, y_pred_xgb)," Adj r2",adj_r2_xgb)
print('MAE: %.3f' % random_cv.best_score_)
print('Config: %s' % random_cv.best_params_)

# Table of result for Density

In [None]:

# Specify the Column Names while initializing the Table
density = PrettyTable(["Model", "MAE", "MSE", "R2","Adj R2"])

density.add_row(["Linear Regression",mean_absolute_error(y_test, y_pred_lr),mean_squared_error(y_test,y_pred_lr),r2_score(y_test, y_pred_lr),adj_r2_lr])
density.add_row(["Gaussian Progression Regressor",mean_absolute_error(y_test, y_pred_gp),mean_squared_error(y_test,y_pred_gp),r2_score(y_test, y_pred_gp),adj_r2_gp])
density.add_row(["Random Forest",mean_absolute_error(y_test, y_pred_rf),mean_squared_error(y_test,y_pred_rf),r2_score(y_test, y_pred_rf),adj_r2_rf])
density.add_row(["Support Vector Regressor",mean_absolute_error(y_test, y_pred_svr),mean_squared_error(y_test,y_pred_svr),r2_score(y_test, y_pred_svr),adj_r2_svr])
density.add_row(["Ridge Regression",mean_absolute_error(y_test, y_pred_ri),mean_squared_error(y_test,y_pred_ri),r2_score(y_test, y_pred_ri),adj_r2_ri])
density.add_row(["Lasso Regression",mean_absolute_error(y_test, y_pred_ls),mean_squared_error(y_test,y_pred_ls),r2_score(y_test, y_pred_ls),adj_r2_ls])
density.add_row(["K nearest Neighbours",mean_absolute_error(y_test, y_pred_knn),mean_squared_error(y_test,y_pred_knn),r2_score(y_test, y_pred_knn),adj_r2_knn])
density.add_row(["XG boost",mean_absolute_error(y_test, y_pred_xgb),mean_squared_error(y_test,y_pred_xgb),r2_score(y_test, y_pred_xgb),adj_r2_xgb])


In [None]:
print(density)

# Heat Capacity Cp

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/cp_mix_BN.csv")

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
len(df.columns)

In [None]:
df = pd.get_dummies(df,dummy_na=False,drop_first=True)

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
df.describe(include="all")

In [None]:
corr=df.corr()
sns.heatmap(corr,annot=True)

sns.set(rc={'figure.figsize':(10,10)})

In [None]:
X = df.drop(["cp_mix","massfrac"],axis=1)
X

In [None]:
y = df["cp_mix"]
y

# OLS model for testing

In [None]:
X = sm.add_constant(X)
result = sm.OLS(y, X).fit()
print(result.summary())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=9)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
lr = LinearRegression()
lr.fit(X_train,y_train)

y_pred = lr.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),LinearRegression(fit_intercept=False,n_jobs=1))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='red', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

In [None]:
parameters = {"fit_intercept": [True, False],
              "n_jobs":[1,2,3,4,5,6] 
             }
             
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = GridSearchCV(estimator=lr, param_grid = parameters, cv = cv, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_lr = grid.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_lr)
adj_r2_lr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_lr),"MSE=",mean_squared_error(y_test,y_pred_lr),"R2",r2_score(y_test, y_pred_lr),"Adj R2",adj_r2_lr)
print('MAE: %.3f' % grid.best_score_)
print('Config: %s' % grid.best_params_)

# Gaussian Progress Regression

In [None]:
kernel = gp.kernels.ConstantKernel(1.0, (1e-1, 1e3)) * gp.kernels.RBF(10.0, (1e-3, 1e3))
model = gp.GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100, alpha=0.1, normalize_y=True)
model.fit(X_train, y_train)

y_pred, std = model.predict(X_test, return_std=True)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
param_grid = [{
    "alpha":  [1e-2, 1e-3],
    "kernel": [RBF(l) for l in np.logspace(-1, 1, 3)]
}, {
    "alpha":  [1e-2, 1e-3],
    "kernel": [DotProduct(sigma_0) for sigma_0 in np.logspace(-1, 1, 3)]
}]

# scores = ['explained_variance', 'r2']


# for score in scores:
# print("# Tuning hyper-parameters for %s" % score)
# print()
    
    # sc = StandardScaler().fit(X_train)
    # X_train_sc = sc.fit_transform(X_train)
    # X_test_sc = sc.fit_transform(X_test)

clf = GridSearchCV(estimator=gp.GaussianProcessRegressor(), param_grid=param_grid, cv=cv,scoring='r2')
clf.fit(X_train, y_train)
y_pred_gp = clf.predict(X_test)

In [None]:
# for score in scores:
r_sq = r2_score(y_test, y_pred_gp)
adj_r2_gp = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_gp),"MSE=",mean_squared_error(y_test,y_pred_gp),"R2",r2_score(y_test, y_pred_gp),"  Adj R2 ",adj_r2_gp)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),gp.GaussianProcessRegressor(alpha=0.01,kernel=RBF(length_scale=1)))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

# Random forest

In [None]:
y_test = np.array(y_test)
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100, random_state = 6666)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# from sklearn.model_selection import RandomizedSearchCV# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 15, cv = cv, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train,y_train)
y_pred_rf = rf_random.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_rf)
adj_r2_rf = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_rf),"MSE=",mean_squared_error(y_test,y_pred_rf),"R2",r2_score(y_test, y_pred_rf),"Adj R2",adj_r2_rf)
print("Best score",rf_random.best_score_)
print(" Best params ",rf_random.best_params_)

# Support Vector Machines

In [None]:
sc = StandardScaler().fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

svr = SVR(kernel="rbf",C=500)
svr.fit(X_train_std,y_train)
y_test_pred = svr.predict(X_test_std)

print("MAE= ",mean_absolute_error(y_test, y_test_pred),"MSE=",mean_squared_error(y_test,y_test_pred),"R2",r2_score(y_test, y_test_pred))

In [None]:

# from sklearn.externals._packaging.version import SubLocalType
parameters = {'kernel': ('linear', 'rbf','poly'), 'C':[1.5, 10],'gamma': [1e-7, 1e-4],'epsilon':[0.1,0.2,0.5,0.3]}

clf = GridSearchCV(SVR(), parameters)
sc = StandardScaler().fit(X_train)

SC = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.fit_transform(X_test)

clf.fit(X_train,y_train)
y_pred_svr = clf.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_svr)
adj_r2_svr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_svr),"MSE=",mean_squared_error(y_test,y_pred_svr),"R2",r2_score(y_test, y_pred_svr),"Adj R2",adj_r2_svr)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

# Ridge Regression

In [None]:
from sklearn.linear_model import RidgeCV,Ridge,Lasso
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

# X_train_std = sc.fit_transform(X_train)
# X_test_std = sc.fit_transform(X_test)
r_alphas = np.logspace(0, 5, 100)


ridge_model = RidgeCV(alphas=r_alphas, scoring='r2')

ridge_model = ridge_model.fit(X_train, y_train)

y_pred = ridge_model.predict(X_test)

In [None]:

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
ridge_model = Ridge(alpha=1)

ridge_model.fit(X_train,y_train)

y_pred = ridge_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
model = Ridge()
# define model evaluation method
cv = RepeatedKFold(n_splits=6, n_repeats=4, random_state=243)
# define grid
grid = dict()

grid['alpha'] = np.arange(0, 1, 0.01)

search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

search.fit(X_train,y_train)

y_pred_ri = search.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ri)
adj_r2_ri = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ri),"MSE=",mean_squared_error(y_test,y_pred_ri),"R2",r2_score(y_test, y_pred_ri),"Adj R2",adj_r2_ri)
print('MAE: %.3f' % search.best_score_)
print('Config: %s' % search.best_params_)

# Lasso Regression

In [None]:
lasso_model = Lasso(alpha=0)
# define model evaluation method
lasso_model.fit(X_train,y_train)

y_pred = lasso_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
model = Lasso()
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['alpha'] = np.arange(0, 1, 0.01)
# define search
lasso = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# perform the search
lasso.fit(X, y)

y_pred_ls = lasso.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ls)
adj_r2_ls = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ls),"MSE=",mean_squared_error(y_test,y_pred_ls),"R2",r2_score(y_test, y_pred_ls),"Adj R2",adj_r2_ls)
print('MAE: %.3f' % lasso.best_score_)
print('Config: %s' % lasso.best_params_)

# KNN model

In [None]:
knn  = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=5,n_repeats=3,random_state=999)
params = [{'n_neighbors': [2,3,4,5,6,7,8,9,10,11,12,13,14,15], 
           'weights': ['uniform','distance'],
           'p':[1,2,5]}]

best_knn = GridSearchCV(estimator=KNeighborsRegressor(), 
                      param_grid=params, 
                      cv=cv,
                      verbose=1,  
                      scoring='r2', 
                      return_train_score=True)
best_knn.fit(X_train,y_train)
y_pred_knn = best_knn.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_knn)
adj_r2_knn = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_knn),"MSE=",mean_squared_error(y_test,y_pred_knn),"R2",r2_score(y_test, y_pred_knn),adj_r2_knn)
print('MAE: %.3f' % best_knn.best_score_)
print('Config: %s' % best_knn.best_params_)

# XGBOost

In [None]:
import xgboost as xgb
xgb_model = xgb.XGBRegressor(objective ='reg:squarederror')
xgb_model.fit(X_train,y_train)
y_pred = xgb_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:

hyperparameter_grid = {
    'n_estimators': [100, 500, 900, 1100, 1500],
    'max_depth': [2, 3, 5, 10, 15],
    'learning_rate': [0.05, 0.1, 0.15, 0.20],
    'min_child_weight': [1, 2, 3, 4]
    }

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_cv = RandomizedSearchCV(estimator=xgb_model,
            param_distributions=hyperparameter_grid,
            cv=cv, n_iter=50,
            scoring = 'neg_mean_absolute_error',n_jobs = 4,
            verbose = 5, 
            return_train_score = True,
            random_state=42)
random_cv.fit(X_train,y_train)
y_pred_xgb = random_cv.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_xgb)
adj_r2_xgb = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_xgb),"MSE=",mean_squared_error(y_test,y_pred_xgb),"R2",r2_score(y_test, y_pred_xgb),adj_r2_xgb)
print('MAE: %.3f' % random_cv.best_score_)
print('Config: %s' % random_cv.best_params_)

# Table of result for Heat Capacity

In [None]:

# Specify the Column Names while initializing the Table
cp = PrettyTable(["Model", "MAE", "MSE", "R2","Adj R2"])

cp.add_row(["Linear Regression",mean_absolute_error(y_test, y_pred_lr),mean_squared_error(y_test,y_pred_lr),r2_score(y_test, y_pred_lr),adj_r2_lr])
cp.add_row(["Gaussian Progression Regressor",mean_absolute_error(y_test, y_pred_gp),mean_squared_error(y_test,y_pred_gp),r2_score(y_test, y_pred_gp),adj_r2_gp])
cp.add_row(["Random Forest",mean_absolute_error(y_test, y_pred_rf),mean_squared_error(y_test,y_pred_rf),r2_score(y_test, y_pred_rf),adj_r2_rf])
cp.add_row(["Support Vector Regressor",mean_absolute_error(y_test, y_pred_svr),mean_squared_error(y_test,y_pred_svr),r2_score(y_test, y_pred_svr),adj_r2_svr])
cp.add_row(["Ridge Regression",mean_absolute_error(y_test, y_pred_ri),mean_squared_error(y_test,y_pred_ri),r2_score(y_test, y_pred_ri),adj_r2_ri])
cp.add_row(["Lasso Regression",mean_absolute_error(y_test, y_pred_ls),mean_squared_error(y_test,y_pred_ls),r2_score(y_test, y_pred_ls),adj_r2_ls])
cp.add_row(["K nearest Neighbours",mean_absolute_error(y_test, y_pred_knn),mean_squared_error(y_test,y_pred_knn),r2_score(y_test, y_pred_knn),adj_r2_knn])
cp.add_row(["XG boost",mean_absolute_error(y_test, y_pred_xgb),mean_squared_error(y_test,y_pred_xgb),r2_score(y_test, y_pred_xgb),adj_r2_xgb])


In [None]:
print(cp)


# Conductivity

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/conductivity_prediction.csv")

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
len(df.columns)

In [None]:
df["base_fluid"]=1000*df["base_fluid"]
df["mixture"] = 1000*df["mixture"]

In [None]:
df.sample(5)

In [None]:
df = pd.get_dummies(df,dummy_na=False,drop_first=True)

In [None]:
df.sample(5)

In [None]:
df.shape

In [None]:
df.describe(include="all")

In [None]:
corr=df.corr()
sns.heatmap(corr,annot=True)

sns.set(rc={'figure.figsize':(10,10)})

In [None]:
X = df.drop(["mixture","mass_frac"],axis=1)
X

In [None]:
y = df["mixture"]
y

# OLS Model for testing

In [None]:
X = sm.add_constant(X)
result = sm.OLS(y, X).fit()
print(result.summary())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=9)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
lr = LinearRegression()
lr.fit(X_train,y_train)

y_pred = lr.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),LinearRegression(fit_intercept=False,n_jobs=1))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='red', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

In [None]:
parameters = {"fit_intercept": [True, False],
              "n_jobs":[1,2,3,4,5,6] 
             }
             
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = GridSearchCV(estimator=lr, param_grid = parameters, cv = cv, n_jobs=-1)
grid.fit(X_train, y_train)
y_pred_lr = grid.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_lr)
adj_r2_lr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_lr),"MSE=",mean_squared_error(y_test,y_pred_lr),"R2",r2_score(y_test, y_pred_lr),"Adj R2",adj_r2_lr)
print('MAE: %.3f' % grid.best_score_)
print('Config: %s' % grid.best_params_)

# Gaussian Progress Regression

In [None]:
kernel = gp.kernels.ConstantKernel(1.0, (1e-1, 1e3)) * gp.kernels.RBF(10.0, (1e-3, 1e3))
model = gp.GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100, alpha=0.1, normalize_y=True)
model.fit(X_train, y_train)

y_pred, std = model.predict(X_test, return_std=True)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
param_grid = [{
    "alpha":  [1e-2, 1e-3],
    "kernel": [RBF(l) for l in np.logspace(-1, 1, 3)]
}, {
    "alpha":  [1e-2, 1e-3],
    "kernel": [DotProduct(sigma_0) for sigma_0 in np.logspace(-1, 1, 3)]
}]

# scores = ['explained_variance', 'r2']


# for score in scores:
# print("# Tuning hyper-parameters for %s" % score)
# print()
    
    # sc = StandardScaler().fit(X_train)
    # X_train_sc = sc.fit_transform(X_train)
    # X_test_sc = sc.fit_transform(X_test)

clf = GridSearchCV(estimator=gp.GaussianProcessRegressor(), param_grid=param_grid, cv=cv,scoring='r2')
clf.fit(X_train, y_train)
y_pred_gp = clf.predict(X_test)

In [None]:
# for score in scores:
r_sq = r2_score(y_test, y_pred_gp)
adj_r2_gp = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_gp),"MSE=",mean_squared_error(y_test,y_pred_gp),"R2",r2_score(y_test, y_pred_gp),adj_r2_gp)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

In [None]:
# learning curve
pipeline = make_pipeline(StandardScaler(with_mean=False),gp.GaussianProcessRegressor(alpha=0.01,kernel=RBF(length_scale=1)))

train_sizes, train_scores, test_scores = learning_curve(estimator=pipeline, X=X_train, y=y_train,
                                                        cv=10, train_sizes=np.linspace(0.1, 1.0, 10),
                                                        n_jobs=1)

train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

plt.figure(figsize=(9,7))
plt.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
plt.plot(train_sizes, test_mean, color='green', marker='+', markersize=5, linestyle='--', label='Test Accuracy')
# plt.figure(figsize=(1,1))

plt.title('Learning Curve')
plt.xlabel('Training Data Size')
plt.ylabel('Model accuracy')
plt.grid()
plt.legend(loc='lower right')
plt.show()

# Random forest

In [None]:
y_test = np.array(y_test)
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100, random_state = 6666)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

r_sq = r2_score(y_test, y_pred_rf)
adj_r2_rf = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# from sklearn.model_selection import RandomizedSearchCV# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 15, cv = cv, verbose=5, random_state=42, n_jobs = -1)
rf_random.fit(X_train,y_train)
y_pred_rf = rf_random.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_rf)
adj_r2_rf = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_rf),"MSE=",mean_squared_error(y_test,y_pred_rf),"R2",r2_score(y_test, y_pred_rf),"Adj R2",adj_r2_rf)
print("best params",rf_random.best_params_)

# Support Vector Machines

In [None]:
sc = StandardScaler().fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

svr = SVR(kernel="rbf",C=1200,epsilon=0.1011)
svr.fit(X_train_std,y_train)
y_pred_svr = svr.predict(X_test_std)


r_sq = r2_score(y_test, y_pred_svr)
adj_r2_svr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
# print("MAE= ",mean_absolute_error(y_test, y_test_pred),"MSE=",mean_squared_error(y_test,y_test_pred),"R2",r2_score(y_test, y_test_pred))

In [None]:

# from sklearn.externals._packaging.version import SubLocalType
parameters = {'kernel': ('rbf','poly'), 'C':[500,200],'gamma': [1e-9, 1e-4],'epsilon':[0.1,0.2,0.5,0.3]}

clf = GridSearchCV(SVR(), parameters)
sc = StandardScaler().fit(X_train)

SC = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.fit_transform(X_test)

clf.fit(X_train,y_train)
y_pred_svr = clf.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_svr)
adj_r2_svr = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_svr),"MSE=",mean_squared_error(y_test,y_pred_svr),"R2",r2_score(y_test, y_pred_svr),adj_r2_svr)
print('MAE: %.3f' % clf.best_score_)
print('Config: %s' % clf.best_params_)

# Ridge Regression

In [None]:
from sklearn.linear_model import RidgeCV,Ridge,Lasso
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

# X_train_std = sc.fit_transform(X_train)
# X_test_std = sc.fit_transform(X_test)
r_alphas = np.logspace(0, 5, 100)


ridge_model = RidgeCV(alphas=r_alphas, scoring='r2')

ridge_model = ridge_model.fit(X_train, y_train)

y_pred = ridge_model.predict(X_test)

In [None]:

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
ridge_model = Ridge(alpha=1)

ridge_model.fit(X_train,y_train)

y_pred = ridge_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
model = Ridge()
# define model evaluation method
cv = RepeatedKFold(n_splits=6, n_repeats=4, random_state=243)
# define grid
grid = dict()

grid['alpha'] = np.arange(0, 1, 0.01)

search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

search.fit(X_train,y_train)

y_pred_ri = search.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ri)
adj_r2_ri = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ri),"MSE=",mean_squared_error(y_test,y_pred_ri),"R2",r2_score(y_test, y_pred_ri),adj_r2_ri)
print('MAE: %.3f' % search.best_score_)
print('Config: %s' % search.best_params_)

# Lasso Regression

In [None]:
lasso_model = Lasso(alpha=0)
# define model evaluation method
lasso_model.fit(X_train,y_train)

y_pred = lasso_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:

model = Lasso()
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['alpha'] = np.arange(0, 1, 0.01)
# define search
lasso = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# perform the search
lasso.fit(X, y)

y_pred_ls = lasso.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_ls)
adj_r2_ls = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_ls),"MSE=",mean_squared_error(y_test,y_pred_ls),"R2",r2_score(y_test, y_pred_ls),adj_r2_ls)
print('MAE: %.3f' % lasso.best_score_)
print('Config: %s' % lasso.best_params_)

# KNN model

In [None]:
knn  = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:
cv = RepeatedKFold(n_splits=5,n_repeats=3,random_state=999)
params = [{'n_neighbors': [2,3,4,5,6,7,8,9,10,11,12,13,14,15], 
           'weights': ['uniform','distance'],
           'p':[1,2,5],
           'algorithm':['auto','ball_tree','kd_tree','brute']}]

best_knn = GridSearchCV(estimator=KNeighborsRegressor(), 
                      param_grid=params, 
                      cv=cv,
                      verbose=1,  
                      scoring='r2', 
                      return_train_score=True)
best_knn.fit(X_train,y_train)
y_pred_knn = best_knn.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_knn)
adj_r2_knn = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_knn),"MSE=",mean_squared_error(y_test,y_pred_knn),"R2",r2_score(y_test, y_pred_knn),adj_r2_knn)
print('MAE: %.3f' % best_knn.best_score_)
print('Config: %s' % best_knn.best_params_)

# XGBOost

In [None]:
import xgboost as xgb
xgb_model = xgb.XGBRegressor(objective ='reg:squarederror')
xgb_model.fit(X_train,y_train)
y_pred = xgb_model.predict(X_test)

print("MAE= ",mean_absolute_error(y_test, y_pred),"MSE=",mean_squared_error(y_test,y_pred),"R2",r2_score(y_test, y_pred))

In [None]:

hyperparameter_grid = {
    'n_estimators': [100, 500, 900, 1100, 1500],
    'max_depth': [2, 3, 5, 10, 15],
    'learning_rate': [0.05, 0.1, 0.15, 0.20],
    'min_child_weight': [1, 2, 3, 4]
    }

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

random_cv = RandomizedSearchCV(estimator=xgb_model,
            param_distributions=hyperparameter_grid,
            cv=cv, n_iter=50,
            scoring = 'neg_mean_absolute_error',n_jobs = 4,
            verbose = 5, 
            return_train_score = True,
            random_state=42)
random_cv.fit(X_train,y_train)
y_pred_xgb = random_cv.predict(X_test)

In [None]:
r_sq = r2_score(y_test, y_pred_xgb)
adj_r2_xgb = 1-((1-r_sq)*(df.shape[0])/(df.shape[0]-df.shape[1]-1))
print("MAE= ",mean_absolute_error(y_test, y_pred_xgb),"MSE=",mean_squared_error(y_test,y_pred_xgb),"R2",r2_score(y_test, y_pred_xgb),"Adj R2",adj_r2_xgb)
print('MAE: %.3f' % random_cv.best_score_)
print('Config: %s' % random_cv.best_params_)

# Table of result for Conductivity

In [None]:

# Specify the Column Names while initializing the Table
conductivity = PrettyTable(["Model", "MAE", "MSE", "R2","Adj R2"])

conductivity.add_row(["Linear Regression",mean_absolute_error(y_test, y_pred_lr),mean_squared_error(y_test,y_pred_lr),r2_score(y_test, y_pred_lr),adj_r2_lr])
conductivity.add_row(["Gaussian Progression Regressor",mean_absolute_error(y_test, y_pred_gp),mean_squared_error(y_test,y_pred_gp),r2_score(y_test, y_pred_gp),adj_r2_gp])
conductivity.add_row(["Random Forest",mean_absolute_error(y_test, y_pred_rf),mean_squared_error(y_test,y_pred_rf),r2_score(y_test, y_pred_rf),adj_r2_rf])
conductivity.add_row(["Support Vector Regressor",mean_absolute_error(y_test, y_pred_svr),mean_squared_error(y_test,y_pred_svr),r2_score(y_test, y_pred_svr),adj_r2_svr])
conductivity.add_row(["Ridge Regression",mean_absolute_error(y_test, y_pred_ri),mean_squared_error(y_test,y_pred_ri),r2_score(y_test, y_pred_ri),adj_r2_ri])
conductivity.add_row(["Lasso Regression",mean_absolute_error(y_test, y_pred_ls),mean_squared_error(y_test,y_pred_ls),r2_score(y_test, y_pred_ls),adj_r2_ls])
conductivity.add_row(["K nearest Neighbours",mean_absolute_error(y_test, y_pred_knn),mean_squared_error(y_test,y_pred_knn),r2_score(y_test, y_pred_knn),adj_r2_knn])
conductivity.add_row(["XG boost",mean_absolute_error(y_test, y_pred_xgb),mean_squared_error(y_test,y_pred_xgb),r2_score(y_test, y_pred_xgb),adj_r2_xgb])


In [None]:
print(conductivity)


In [None]:
%shell

In [None]:
pip install nbconvert

In [None]:
jupyter nbconvert --to html /content/drive/MyDrive/Colab Notebooks/MLmodel_nanofluid_properties.ipynb

In [None]:
! pwd

In [None]:
/content/drive/MyDrive/Colab Notebooks/MLmodel_nanofluid_properties.ipynb