In [116]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import LeaveOneOut
from pyswarm import pso
from sklearn.model_selection import train_test_split


In [123]:
df = pd.read_csv("thermal_conductivity.csv")

df.drop('SEBS', axis=1, inplace=True)
df

Unnamed: 0,PE,PS,thermal_conductivity
0,0.0,0.98,0.157
1,0.098,0.882,0.164
2,0.294,0.686,0.1954
3,0.784,0.196,0.307
4,0.0,0.952,0.151
5,0.0952,0.8568,0.161
6,0.2856,0.3336,0.184
7,0.7616,0.1904,0.276
8,0.0,0.909,0.161
9,0.0909,0.8181,0.1641


In [118]:
X = df.drop("thermal_conductivity", axis=1).values
y = df["thermal_conductivity"].values


In [119]:
scaler = StandardScaler()
X = scaler.fit_transform(X)


def svr_mse(params):
    svr = SVR(kernel='rbf', C=params[0], gamma=params[1], epsilon=params[2])
    loocv = LeaveOneOut()
    mse = 0
    for train_index, test_index in loocv.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        svr.fit(X_train, y_train)
        y_pred = svr.predict(X_test)
        mse += (y_pred - y_test)**2
    return mse.mean()


In [120]:
lb = [1e-3, 1e-3, 1e-3] 
ub = [1e3, 1e3, 1e3] 

def pso_optimizer():
    return pso(svr_mse, lb, ub, swarmsize=50, maxiter=1000)

#PSO optimization
best_params, best_mse = pso_optimizer()


Stopping search: maximum iterations reached --> 1000


In [121]:
svr = SVR(kernel='rbf', C=best_params[0], gamma=best_params[1], epsilon=best_params[2])
loocv = LeaveOneOut()
mse = 0
y_preds = []
x_tests=[]
y_tests=[]
for train_index, test_index in loocv.split(X):
    X_train, X_test = X[train_index, :], X[test_index, :]
    y_train, y_test = y[train_index], y[test_index]
    
    svr.fit(X_train, y_train)
    y_pred = svr.predict(X_test)
    y_pred_rounded = np.round(y_pred, 4) # round y_pred to 4 decimal places
    y_preds.append(y_pred_rounded)
    y_tests.append(y_test)
    x_tests.append(X_test[0]) 
    mse += (y_pred - y_test)**2
rmse = np.sqrt(mse.mean())
print("LOOCV RMSE: {:.3f}".format(rmse))

results_df = pd.DataFrame({'X_test': x_tests, 'y_test': y_tests, 'y_pred': y_preds})
results_df


LOOCV RMSE: 0.044


Unnamed: 0,X_test,y_test,y_pred
0,"[-0.9698910840028125, 1.210032485505948]",[0.157],[0.1536]
1,"[-0.6249470584819429, 0.8786556975233216]",[0.164],[0.1677]
2,"[0.06494099255979646, 0.21590212155806868]",[0.1954],[0.1936]
3,"[1.7896611201641452, -1.4409818183550642]",[0.307],[0.28]
4,"[-0.9698910840028125, 1.1153534032251975]",[0.151],[0.1572]
5,"[-0.6348026020682535, 0.7934445234706462]",[0.161],[0.1664]
6,"[0.03537436180086491, -0.9757017568610906]",[0.184],[0.1925]
7,"[1.7108167714736606, -1.4599176348112144]",[0.276],[0.2853]
8,"[-0.9698910840028125, 0.9699533840083311]",[0.161],[0.1524]
9,"[-0.6499379011472305, 0.6625845061754663]",[0.1641],[0.1643]


In [122]:
#EVALUATION

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true))*100
r2 = r2_score(y_tests, y_preds)
mape = mean_absolute_percentage_error(y_tests, y_preds)
print('MAPE:', mape)
print(f"R-squared: {r2:.2f}")

MAPE: 4.303871122578213
R-squared: 0.95


Collecting ternary
  Downloading ternary-0.1.tar.gz (1.3 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: ternary
  Building wheel for ternary (setup.py): started
  Building wheel for ternary (setup.py): finished with status 'done'
  Created wheel for ternary: filename=ternary-0.1-py3-none-any.whl size=1766 sha256=92d1f0226ac33b6616517e8eb45027fd3c63dae2160ba60592a141a7474040a5
  Stored in directory: c:\users\risha\appdata\local\pip\cache\wheels\ee\d0\c9\71e069a39c89c82bf711f1a01dc5eb92d5465419dbca38a577
Successfully built ternary
Installing collected packages: ternary
Successfully installed ternary-0.1
