# Package import

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from math import sqrt
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Data import

In [4]:
dataset_ACSINS = pd.read_csv("../data/ACSINS/DeepSP_Feature.csv")
dataset_AS = pd.read_csv("../data/AS/DeepSP_Feature.csv")
dataset_BVP = pd.read_csv("../data/BVP/DeepSP_Feature.csv")
dataset_CIC = pd.read_csv("../data/CIC/DeepSP_Feature.csv")
dataset_CSI = pd.read_csv("../data/CSI/DeepSP_Feature.csv")
dataset_ELISA = pd.read_csv("../data/ELISA/DeepSP_Feature.csv")
dataset_HIC = pd.read_csv("../data/HIC/DeepSP_Feature.csv")
dataset_HEK = pd.read_csv("../data/HEK/DeepSP_Feature.csv")
dataset_PSR = pd.read_csv("../data/PSR/DeepSP_Feature.csv")
dataset_SGAC = pd.read_csv("../data/SGAC/DeepSP_Feature.csv")
dataset_SMAC = pd.read_csv("../data/SMAC/DeepSP_Feature.csv")
dataset_Tm = pd.read_csv("../data/Tm/DeepSP_Feature.csv")

In [5]:
X_train_ACSINS = dataset_ACSINS[
    ["SAP_pos_CDRH1", "SAP_pos_CDRL3", "SCM_pos_CDRH1", "SCM_neg_CDR"]
]
X_train_AS = dataset_AS[
    ["SAP_pos_CDRH2", "SCM_pos_CDRL2", "SCM_pos_CDRL3", "SCM_neg_CDRL3"]
]
X_train_BVP = dataset_BVP[
    ["SAP_pos_CDRH1", "SAP_pos_CDRH3", "SCM_pos_CDR", "SCM_neg_CDRH3"]
]
X_train_CIC = dataset_CIC[
    ["SAP_pos_CDRL2", "SAP_pos_CDRL3", "SAP_pos_Lv", "SCM_neg_CDR"]
]
X_train_CSI = dataset_CSI[
    ["SAP_pos_CDRL1", "SAP_pos_Lv", "SCM_pos_CDRH2", "SCM_neg_CDRL2"]
]
X_train_ELISA = dataset_ELISA[["SAP_pos_CDRH3", "SCM_pos_CDR", "SCM_neg_CDR"]]
X_train_HIC = dataset_HIC[
    ["SAP_pos_CDRL3", "SAP_pos_CDR", "SAP_pos_Hv", "SCM_pos_CDRH3"]
]
X_train_HEK = dataset_HEK[
    ["SAP_pos_CDRH2", "SAP_pos_CDRL3", "SCM_pos_Lv", "SCM_neg_Lv"]
]
X_train_PSR = dataset_PSR[["SAP_pos_Lv", "SCM_pos_CDRH2", "SCM_neg_CDRL2"]]
X_train_SGAC = dataset_SGAC[
    ["SAP_pos_CDRH1", "SAP_pos_CDRL3", "SCM_neg_CDRH2", "SCM_neg_Lv"]
]
X_train_SMAC = dataset_SMAC[
    ["SAP_pos_CDR", "SAP_pos_Fv", "SCM_neg_CDRL2", "SCM_neg_Fv"]
]
X_train_Tm = dataset_Tm[["SAP_pos_CDRH1", "SAP_pos_CDRH2", "SCM_pos_CDRH3"]]

# Data Preprocessing

In [6]:
sc = StandardScaler()
X_train_ACSINS = sc.fit_transform(X_train_ACSINS)
X_train_AS = sc.fit_transform(X_train_AS)
X_train_BVP = sc.fit_transform(X_train_BVP)
X_train_CIC = sc.fit_transform(X_train_CIC)
X_train_CSI = sc.fit_transform(X_train_CSI)
X_train_ELISA = sc.fit_transform(X_train_ELISA)
X_train_HIC = sc.fit_transform(X_train_HIC)
X_train_HEK = sc.fit_transform(X_train_HEK)
X_train_PSR = sc.fit_transform(X_train_PSR)
X_train_SGAC = sc.fit_transform(X_train_SGAC)
X_train_SMAC = sc.fit_transform(X_train_SMAC)
X_train_Tm = sc.fit_transform(X_train_Tm)

In [7]:
y_train_ACSINS = dataset_ACSINS["ACSINS"]
y_train_AS = dataset_AS["AS"]
y_train_BVP = dataset_BVP["BVP"]
y_train_CIC = dataset_CIC["CIC"]
y_train_CSI = dataset_CSI["CSI"]
y_train_ELISA = dataset_ELISA["ELISA"]
y_train_HIC = dataset_HIC["HIC"]
y_train_HEK = dataset_HEK["HEK"]
y_train_PSR = dataset_PSR["PSR"]
y_train_SGAC = dataset_SGAC["SGACSTD"]
y_train_SMAC = dataset_SMAC["SMAC_nor"]
y_train_Tm = dataset_Tm["Tm"]

# Models Training

### ACSINS SVR C=3.5 e=0.1

In [8]:
from sklearn.svm import SVR

SVR_ACSINS = SVR(C=3.5, epsilon=0.1, kernel="rbf", gamma="scale")
SVR_ACSINS.fit(X_train_ACSINS, y_train_ACSINS)
y_pred_ACSINS = SVR_ACSINS.predict(X_train_ACSINS)
r2_ACSINS = r2_score(y_train_ACSINS, y_pred_ACSINS)
r_ACSINS = sqrt(r2_ACSINS)
MSE_ACSINS = mean_squared_error(y_train_ACSINS, y_pred_ACSINS)
RMSE_ACSINS = sqrt(MSE_ACSINS)
MAE_ACSINS = mean_squared_error(y_train_ACSINS, y_pred_ACSINS)
print(r_ACSINS, MSE_ACSINS)

0.6809111969686431 0.6930908164435454


### AS LR

In [9]:
lr_AS = LinearRegression()
lr_AS.fit(X_train_AS, y_train_AS)
y_pred_AS = lr_AS.predict(X_train_AS)
r2_AS = r2_score(y_train_AS, y_pred_AS)
r_AS = sqrt(r2_AS)
MSE_AS = mean_squared_error(y_train_AS, y_pred_AS)
RMSE_AS = sqrt(MSE_AS)
MAE_AS = mean_squared_error(y_train_AS, y_pred_AS)
print(r_AS, MSE_AS)

0.27336639194637663 0.002742893761564698


### BVP KNN n=6

In [10]:
KNN_BVP = KNeighborsRegressor(n_neighbors=6)
KNN_BVP.fit(X_train_BVP, y_train_BVP)
y_pred_BVP = KNN_BVP.predict(X_train_BVP)
r2_BVP = r2_score(y_train_BVP, y_pred_BVP)
r_BVP = sqrt(r2_BVP)
MSE_BVP = mean_squared_error(y_train_BVP, y_pred_BVP)
RMSE_BVP = sqrt(MSE_BVP)
MAE_BVP = mean_squared_error(y_train_BVP, y_pred_BVP)
print(r_BVP, MSE_BVP)

0.7387908015347076 8.425241559023506


### CIC KNN n=6

In [11]:
KNN_CIC = KNeighborsRegressor(n_neighbors=6)
KNN_CIC.fit(X_train_CIC, y_train_CIC)
y_pred_CIC = KNN_CIC.predict(X_train_CIC)
r2_CIC = r2_score(y_train_CIC, y_pred_CIC)
r_CIC = sqrt(r2_CIC)
MSE_CIC = mean_squared_error(y_train_CIC, y_pred_CIC)
RMSE_CIC = sqrt(MSE_CIC)
MAE_CIC = mean_squared_error(y_train_CIC, y_pred_CIC)
print(r_CIC, MSE_CIC)

0.6943256767925905 0.5654896239220957


### CSI SVR C=1.0 e=0.2

In [12]:
from sklearn.svm import SVR

SVR_CSI = SVR(C=1.0, epsilon=0.2, kernel="rbf", gamma="scale")
SVR_CSI.fit(X_train_CSI, y_train_CSI)
y_pred_CSI = SVR_CSI.predict(X_train_CSI)
r2_CSI = r2_score(y_train_CSI, y_pred_CSI)
r_CSI = sqrt(r2_CSI)
MSE_CSI = mean_squared_error(y_train_CSI, y_pred_CSI)
RMSE_CSI = sqrt(MSE_CSI)
MAE_CSI = mean_squared_error(y_train_CSI, y_pred_CSI)
print(r_CSI, MSE_CSI)

0.5552733361636978 0.8817569007497744


### ELISA KNN n=3

In [13]:
KNN_ELISA = KNeighborsRegressor(n_neighbors=3)
KNN_ELISA.fit(X_train_ELISA, y_train_ELISA)
y_pred_ELISA = KNN_ELISA.predict(X_train_ELISA)
r2_ELISA = r2_score(y_train_ELISA, y_pred_ELISA)
r_ELISA = sqrt(r2_ELISA)
MSE_ELISA = mean_squared_error(y_train_ELISA, y_pred_ELISA)
RMSE_ELISA = sqrt(MSE_ELISA)
MAE_ELISA = mean_squared_error(y_train_ELISA, y_pred_ELISA)
print(r_ELISA, MSE_ELISA)

0.8473431985957309 1.9669604653838246


### HIC SVR C=2.0 e=0.01

In [14]:
from sklearn.svm import SVR

SVR_HIC = SVR(C=2.0, epsilon=0.01, kernel="rbf", gamma="scale")
SVR_HIC.fit(X_train_HIC, y_train_HIC)
y_pred_HIC = SVR_HIC.predict(X_train_HIC)
r2_HIC = r2_score(y_train_HIC, y_pred_HIC)
r_HIC = sqrt(r2_HIC)
MSE_HIC = mean_squared_error(y_train_HIC, y_pred_HIC)
RMSE_HIC = sqrt(MSE_HIC)
MAE_HIC = mean_squared_error(y_train_HIC, y_pred_HIC)
print(r_HIC, MSE_HIC)

0.7283280020808967 0.5608490329707032


### HEK KNN n=5

In [15]:
KNN_HEK = KNeighborsRegressor(n_neighbors=5)
KNN_HEK.fit(X_train_HEK, y_train_HEK)
y_pred_HEK = KNN_HEK.predict(X_train_HEK)
r2_HEK = r2_score(y_train_HEK, y_pred_HEK)
r_HEK = sqrt(r2_HEK)
MSE_HEK = mean_squared_error(y_train_HEK, y_pred_HEK)
RMSE_HEK = sqrt(MSE_HEK)
MAE_HEK = mean_squared_error(y_train_HEK, y_pred_HEK)
print(r_HEK, MSE_HEK)

0.6524747770520979 2236.5250708465364


### PSR SVR C=0.4 e=0.1

In [16]:
from sklearn.svm import SVR

SVR_PSR = SVR(C=0.4, epsilon=0.1, kernel="rbf", gamma="scale")
SVR_PSR.fit(X_train_PSR, y_train_PSR)
y_pred_PSR = SVR_PSR.predict(X_train_PSR)
r2_PSR = r2_score(y_train_PSR, y_pred_PSR)
r_PSR = sqrt(r2_PSR)
MSE_PSR = mean_squared_error(y_train_PSR, y_pred_PSR)
RMSE_PSR = sqrt(MSE_PSR)
MAE_PSR = mean_squared_error(y_train_PSR, y_pred_PSR)
print(r_PSR, MSE_PSR)

0.694831459091086 0.02139229739114875


### SGAC SVR C=3.0 e=0.3

In [17]:
from sklearn.svm import SVR

SVR_SGAC = SVR(C=3.0, epsilon=0.3, kernel="rbf", gamma="scale")
SVR_SGAC.fit(X_train_SGAC, y_train_SGAC)
y_pred_SGAC = SVR_SGAC.predict(X_train_SGAC)
r2_SGAC = r2_score(y_train_SGAC, y_pred_SGAC)
r_SGAC = sqrt(r2_SGAC)
MSE_SGAC = mean_squared_error(y_train_SGAC, y_pred_SGAC)
RMSE_SGAC = sqrt(MSE_SGAC)
MAE_SGAC = mean_squared_error(y_train_SGAC, y_pred_SGAC)
print(r_SGAC, MSE_SGAC)

0.6887094128163656 0.5218422691491639


### SMAC KNN n=6

In [18]:
KNN_SMAC = KNeighborsRegressor(n_neighbors=6)
KNN_SMAC.fit(X_train_SMAC, y_train_SMAC)
y_pred_SMAC = KNN_SMAC.predict(X_train_SMAC)
r2_SMAC = r2_score(y_train_SMAC, y_pred_SMAC)
r_SMAC = sqrt(r2_SMAC)
MSE_SMAC = mean_squared_error(y_train_SMAC, y_pred_SMAC)
RMSE_SMAC = sqrt(MSE_SMAC)
MAE_SMAC = mean_squared_error(y_train_SMAC, y_pred_SMAC)
print(r_SMAC, MSE_SMAC)

0.7354505583160753 0.6092917755781933


### Tm KNN n=6

In [19]:
KNN_Tm = KNeighborsRegressor(n_neighbors=6)
KNN_Tm.fit(X_train_Tm, y_train_Tm)
y_pred_Tm = KNN_Tm.predict(X_train_Tm)
r2_Tm = r2_score(y_train_Tm, y_pred_Tm)
r_Tm = sqrt(r2_Tm)
MSE_Tm = mean_squared_error(y_train_Tm, y_pred_Tm)
RMSE_Tm = sqrt(MSE_Tm)
MAE_Tm = mean_squared_error(y_train_Tm, y_pred_Tm)
print(r_Tm, MSE_Tm)

0.625084304875398 20.779197080291972


# Model Export

In [None]:
joblib.dump(SVR_ACSINS, "../models/Trained_model/ACSINS_SVR_model.joblib")
joblib.dump(lr_AS, "../models/Trained_model/AS_LR_model.joblib")
joblib.dump(KNN_BVP, "../models/Trained_model/BVP_KNN_model.joblib")
joblib.dump(KNN_CIC, "../models/Trained_model/CIC_KNN_model.joblib")
joblib.dump(SVR_CSI, "../models/Trained_model/CSI_SVR_model.joblib")
joblib.dump(KNN_ELISA, "../models/Trained_model/ELISA_KNN_model.joblib")
joblib.dump(KNN_HEK, "../models/Trained_model/HEK_KNN_model.joblib")
joblib.dump(SVR_HIC, "../models/Trained_model/HIC_SVR_model.joblib")
joblib.dump(SVR_PSR, "../models/Trained_model/PSR_SVR_model.joblib")
joblib.dump(SVR_SGAC, "../models/Trained_model/SGAC_SVR_model.joblib")
joblib.dump(KNN_SMAC, "../models/Trained_model/SMAC_KNN_model.joblib")
joblib.dump(KNN_Tm, "../models/Trained_model/Tm_KNN_model.joblib")

['Trained_model/Tm_KNN_model.joblib']