## Package import

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib

## Read the SAPSCM.csv file from DeepSP

In [2]:
dataset_test = pd.read_csv('SAPSCM.csv')
dataset_test

Unnamed: 0,Name,SAP_pos_CDRH1,SAP_pos_CDRH2,SAP_pos_CDRH3,SAP_pos_CDRL1,SAP_pos_CDRL2,SAP_pos_CDRL3,SAP_pos_CDR,SAP_pos_Hv,SAP_pos_Lv,...,SCM_pos_CDRH1,SCM_pos_CDRH2,SCM_pos_CDRH3,SCM_pos_CDRL1,SCM_pos_CDRL2,SCM_pos_CDRL3,SCM_pos_CDR,SCM_pos_Hv,SCM_pos_Lv,SCM_pos_Fv
0,abituzumab,3.86,4.35,8.72,2.15,4.70,6.79,30.68,45.53,36.43,...,33.36,50.21,58.55,37.38,99.53,19.15,301.27,877.35,1176.29,2044.64
1,abrilumab,3.14,0.95,1.52,1.63,3.66,5.10,17.65,43.00,29.55,...,4.73,1.59,8.02,34.77,5.28,23.33,70.42,921.07,925.66,1834.76
2,adalimumab,2.13,2.52,14.45,1.90,3.59,3.17,27.50,58.42,30.52,...,3.18,19.58,29.51,116.77,41.76,55.55,263.84,907.11,1219.44,2109.09
3,alemtuzumab,2.10,3.39,4.52,1.68,3.61,3.74,20.84,51.51,32.00,...,31.36,117.38,112.29,59.08,40.17,161.99,528.97,1438.97,1261.75,2691.13
4,alirocumab,2.34,0.49,5.75,6.34,2.48,4.47,23.01,52.35,43.46,...,87.39,30.73,22.05,163.36,64.86,19.05,393.55,1270.47,966.63,2228.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,vedolizumab,2.90,0.38,11.47,6.61,5.52,1.48,28.12,51.99,53.38,...,35.55,-3.78,-0.06,125.56,43.83,73.31,267.23,969.92,1093.80,2053.88
133,veltuzumab,2.51,4.15,14.46,3.16,4.07,2.54,31.23,43.67,37.64,...,34.90,0.02,39.57,32.31,35.55,30.64,166.97,1078.54,1059.89,2115.08
134,visilizumab,6.16,3.60,15.23,2.19,3.86,2.65,34.40,57.11,32.81,...,100.69,106.18,75.53,-2.44,69.55,18.26,372.93,1269.88,1078.14,2312.87
135,zalutumumab,1.99,5.14,18.61,0.97,2.46,5.58,34.69,63.19,32.75,...,29.38,5.33,43.21,1.04,2.67,29.45,108.28,1160.79,941.98,2081.37


## Features Selection and Transformation

In [3]:
feature_ACSINS = dataset_test[['SAP_pos_CDRH1', 'SAP_pos_CDRL3', 'SCM_pos_CDRH1','SCM_neg_CDR']]
feature_AS = dataset_test[['SAP_pos_CDRH2','SCM_pos_CDRL2','SCM_pos_CDRL3','SCM_neg_CDRL3']]
feature_BVP = dataset_test[['SAP_pos_CDRH1','SAP_pos_CDRH3','SCM_pos_CDR','SCM_neg_CDRH3']]
feature_CIC = dataset_test[['SAP_pos_CDRL2', 'SAP_pos_CDRL3', 'SAP_pos_Lv','SCM_neg_CDR']]
feature_CSI = dataset_test[['SAP_pos_CDRL1', 'SAP_pos_Lv', 'SCM_pos_CDRH2','SCM_neg_CDRL2']]
feature_ELISA = dataset_test[['SAP_pos_CDRH3', 'SCM_pos_CDR','SCM_neg_CDR']]
feature_HIC = dataset_test[['SAP_pos_CDRL3', 'SAP_pos_CDR','SAP_pos_Hv','SCM_pos_CDRH3']]
feature_HEK = dataset_test[['SAP_pos_CDRH2','SAP_pos_CDRL3','SCM_pos_Lv','SCM_neg_Lv']]
feature_PSR = dataset_test[['SAP_pos_Lv', 'SCM_pos_CDRH2', 'SCM_neg_CDRL2']]
feature_SGAC = dataset_test[['SAP_pos_CDRH1', 'SAP_pos_CDRL3', 'SCM_neg_CDRH2','SCM_neg_Lv']]
feature_SMAC = dataset_test[['SAP_pos_CDR', 'SAP_pos_Fv', 'SCM_neg_CDRL2','SCM_neg_Fv']]
feature_Tm = dataset_test[['SAP_pos_CDRH1', 'SAP_pos_CDRH2', 'SCM_pos_CDRH3']]


In [4]:
sc = StandardScaler()

X_ACSINS = feature_ACSINS.values
X_AS = feature_AS.values
X_BVP = feature_BVP.values
X_CIC = feature_CIC.values
X_CSI = feature_CSI.values
X_ELISA = feature_ELISA.values
X_HIC = feature_HIC.values
X_HEK = feature_HEK.values
X_PSR = feature_PSR.values
X_SGAC = feature_SGAC.values
X_SMAC = feature_SMAC.values
X_Tm = feature_Tm.values

X_ACSINS = sc.fit_transform(X_ACSINS)
X_AS = sc.fit_transform(X_AS)
X_BVP = sc.fit_transform(X_BVP)
X_CIC = sc.fit_transform(X_CIC)
X_CSI = sc.fit_transform(X_CSI)
X_ELISA = sc.fit_transform(X_ELISA)
X_HIC = sc.fit_transform(X_HIC)
X_HEK = sc.fit_transform(X_HEK)
X_PSR = sc.fit_transform(X_PSR)
X_SGAC = sc.fit_transform(X_SGAC)
X_SMAC = sc.fit_transform(X_SMAC)
X_Tm = sc.fit_transform(X_Tm)

## Predictive Model Import 

In [5]:
ACSINS_SVR_model = joblib.load('Trained_model/ACSINS_SVR_model.joblib')
AS_LR_model = joblib.load('Trained_model/AS_LR_model.joblib')
BVP_KNN_model = joblib.load('Trained_model/BVP_KNN_model.joblib')
CIC_KNN_model = joblib.load('Trained_model/CIC_KNN_model.joblib')
CSI_SVR_model = joblib.load('Trained_model/CSI_SVR_model.joblib')
ELISA_KNN_model = joblib.load('Trained_model/ELISA_KNN_model.joblib')
HEK_KNN_model = joblib.load('Trained_model/HEK_KNN_model.joblib')
HIC_SVR_model = joblib.load('Trained_model/HIC_SVR_model.joblib')
PSR_SVR_model = joblib.load('Trained_model/PSR_SVR_model.joblib')
SGAC_SVR_model = joblib.load('Trained_model/SGAC_SVR_model.joblib')
SMAC_KNN_model = joblib.load('Trained_model/SMAC_KNN_model.joblib')
Tm_KNN_model = joblib.load('Trained_model/Tm_KNN_model.joblib')

## Biophysical Properties Prediction

In [6]:
ACSINS_transformed,AS,BVP,CIC_transformed,CSI_transformed,ELISA,HIC,HEK,PSR,SGAC_transformed,SMAC_transformed,Tm =[],[],[],[],[],[],[],[],[],[],[],[]

for index,row in dataset_test.iterrows():

  feature_ACSINS = X_ACSINS[index]
  prediction_ACSINS = ACSINS_SVR_model.predict(feature_ACSINS.reshape(1,-1))
  feature_AS = X_AS[index]
  prediction_AS = AS_LR_model.predict(feature_AS.reshape(1,-1))
  feature_BVP = X_BVP[index]
  prediction_BVP = BVP_KNN_model.predict(feature_BVP.reshape(1,-1))
  feature_CIC = X_CIC[index]
  prediction_CIC = CIC_KNN_model.predict(feature_CIC.reshape(1,-1))
  feature_CSI = X_CSI[index]
  prediction_CSI = CSI_SVR_model.predict(feature_CSI.reshape(1,-1))
  feature_ELISA = X_ELISA[index]
  prediction_ELISA = ELISA_KNN_model.predict(feature_ELISA.reshape(1,-1))
  feature_HIC = X_HIC[index]
  prediction_HIC = HIC_SVR_model.predict(feature_HIC.reshape(1,-1))
  feature_HEK = X_HEK[index]
  prediction_HEK = HEK_KNN_model.predict(feature_HEK.reshape(1,-1))
  feature_PSR = X_PSR[index]
  prediction_PSR = PSR_SVR_model.predict(feature_PSR.reshape(1,-1))
  feature_SGAC = X_SGAC[index]
  prediction_SGAC = SGAC_SVR_model.predict(feature_SGAC.reshape(1,-1))
  feature_SMAC = X_SMAC[index]
  prediction_SMAC = SMAC_KNN_model.predict(feature_SMAC.reshape(1,-1))
  feature_Tm = X_Tm[index]
  prediction_Tm = Tm_KNN_model.predict(feature_Tm.reshape(1,-1))

  ACSINS_transformed.append(prediction_ACSINS)
  AS.append(prediction_AS)
  BVP.append(prediction_BVP)
  CIC_transformed.append(prediction_CIC)
  CSI_transformed.append(prediction_CSI)
  ELISA.append(prediction_ELISA)
  HIC.append(prediction_HIC)
  HEK.append(prediction_HEK)
  PSR.append(prediction_PSR)
  SGAC_transformed.append(prediction_SGAC)
  SMAC_transformed.append(prediction_SMAC)
  Tm.append(prediction_Tm)



## Result Sheet Generation

In [7]:
Name = dataset_test[['Name']].to_numpy()

In [8]:
data = np.column_stack((Name,ACSINS_transformed,AS,BVP,CIC_transformed,CSI_transformed,ELISA,HIC,HEK,PSR,SGAC_transformed,SMAC_transformed,Tm))

np.savetxt('Prediction_Result.csv', data, delimiter=',', fmt='%s', header='Name,ACSINS_transformed,AS,BVP,CIC_transformed,CSI_transformed,ELISA,HIC,HEK,PSR,SGAC_transformed,SMAC_transformed,Tm', comments='')


In [9]:
Result = pd.read_csv('Prediction_Result.csv')
Result

Unnamed: 0,Name,ACSINS_transformed,AS,BVP,CIC_transformed,CSI_transformed,ELISA,HIC,HEK,PSR,SGAC_transformed,SMAC_transformed,Tm
0,abituzumab,0.129965,0.061849,2.241486,-0.150425,0.754456,1.246774,9.567307,110.547472,0.220896,0.066478,-0.609231,74.083333
1,abrilumab,-1.789201,0.031188,1.504088,-1.883633,-0.736264,1.098520,9.227625,133.475483,0.092503,0.498120,-0.504355,72.500000
2,adalimumab,-0.142208,0.050512,1.923480,0.332348,-0.007850,1.106229,9.436170,115.630949,0.078699,0.361446,-0.461127,72.333333
3,alemtuzumab,0.305071,0.073762,4.494636,-0.161054,0.165025,2.792832,9.446484,137.255561,0.144123,0.634713,-0.881810,68.916667
4,alirocumab,0.109300,0.047116,2.811663,-0.402932,-0.126657,1.150019,9.551408,120.915541,0.133017,0.352992,0.007217,75.416667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,vedolizumab,-0.441513,0.052470,3.009651,-0.094506,-0.289167,1.204125,10.784502,156.510514,0.053686,0.158359,0.891792,74.416667
133,veltuzumab,0.563540,0.045539,1.557630,0.356199,-0.515231,1.065002,11.135058,183.962341,0.096939,0.422563,0.036602,69.750000
134,visilizumab,0.949517,0.052161,3.735838,0.133477,0.541219,1.580209,9.826037,219.159756,0.318070,-0.630253,-0.285582,69.916667
135,zalutumumab,-1.664908,0.036776,3.967388,-2.213117,-0.732720,1.155225,9.749339,175.253958,0.082598,0.617887,-0.233052,72.083333
