In [3]:
import pandas as pd
import numpy as np

import time
from sklearn.metrics import r2_score
from scipy.optimize import curve_fit
from scipy.optimize import leastsq
import scipy.optimize as opt
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import gc

from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import MinMaxScaler
import os
from data_preprocessing import FilteringCurves, ShowResponseCurves
from fitting_curves import FittingColumn, ShowResponseCurvesWithFitting, compute_r2_score

from IPython.display import display
_FOLDER = "results/"
# _FOLDER = "/home/acq18mk/master/results/"

### Lasso regression

In [28]:
number_coefficients = 4
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_description.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_description.csv")

column_not_to_use = ["Unnamed: 0", "Unnamed: 0.1", "COSMIC_ID", "DRUG_ID", "Drug_Name", "Synonyms", "Target", "deriv_found", "PubChem_ID",
                     "elements", "inchi_key", "canonical_smiles", "inchi_string", 
                     "third_target", "first_target", "molecular_formula", "second_target", "Target_Pathway"]
param1 = ["param_" +str(i) for i in range(1, number_coefficients+1)]
param2 = ["param" +str(i) for i in range(1, number_coefficients+1)] 
norm_response  = ["norm_cells_"+str(i) for i in range(1, number_coefficients+1)]
con_columns  = ["fd_num_"+str(i) for i in range(1, number_coefficients+1)]

not_X_columns = param1 + param2 + norm_response + con_columns + column_not_to_use
X_columns = set(train.columns) - set(not_X_columns)

columns_to_use = ["DRUG_ID", "Drug_Name", "COSMIC_ID"] + ["pred_coef_"+str(i+1) for i in range(number_coefficients)]

potential_columns_for_normalisation = []
for col in train.columns:
    if (train[col].nunique()>2) & (train[col].dtype != "O"):
        potential_columns_for_normalisation.append(col)

columns_for_normalisation = list(set(potential_columns_for_normalisation) - set(norm_response) - set(param1) - set(param2) -set(['Unnamed: 0', 'DRUG_ID', 'COSMIC_ID',]))

In [7]:
#1. just drug profiles and cell lines
alpha_parameters_df1_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

# 2. drug profiles, cell lines and drug description
alpha_parameters_df2_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

# 3. drug profiles, cell lines and drug features
alpha_parameters_df3_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

# 4. drug profiles, cell lines and drug features with scaling
alpha_parameters_df4_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

In [8]:
df_test_lasso=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

y = train["param_1"]
y_test = test["param_1"]
model = Lasso(alpha=0.5)
model.fit(X,y)
features_1 = model.coef_

df_test_lasso["y_test_1"]= test["param_1"]
df_test_lasso["y_pred_1"] = model.predict(X_test)

y = train["param_2"]
y_test = test["param_2"]
model = Lasso(alpha=1)
model.fit(X,y)
features_2 = model.coef_

df_test_lasso["y_test_2"]= test["param_2"]
df_test_lasso["y_pred_2"] = model.predict(X_test)

y = train["param_3"]
y_test = test["param_3"]
model = Lasso(alpha=10)
model.fit(X,y)
features_3 = model.coef_

df_test_lasso["y_test_3"]= test["param_3"]
df_test_lasso["y_pred_3"] = model.predict(X_test)

y = train["param_4"]
y_test = test["param_4"]
model = Lasso(alpha=0.1)
model.fit(X,y)
features_4 = model.coef_

df_test_lasso["y_test_4"]= test["param_4"]
df_test_lasso["y_pred_4"] = model.predict(X_test)

display(df_test_lasso.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.5876545,0.994099,0.9959636,-19.105115,-18.99108,0.078222,0.080602
std,1.126649,1.111367e-16,1.168964,4.445468e-16,24.391398,3.556374e-15,0.175412,0.0
min,-6.129963,0.5876545,-10.811767,0.9959636,-206.186539,-18.99108,-1.524319,0.080602
25%,0.398606,0.5876545,1.01166,0.9959636,-20.091012,-18.99108,0.005772,0.080602
50%,0.496912,0.5876545,1.065012,0.9959636,-12.729402,-18.99108,0.046012,0.080602
75%,0.616302,0.5876545,1.167762,0.9959636,-9.194847,-18.99108,0.164274,0.080602
max,9.830121,0.5876545,8.02314,0.9959636,5.48676,-18.99108,0.562794,0.080602


In [10]:
#very small std for the predicted data!!!
#Looks like it predict some average values

In [11]:
sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0)

(0, 0, 0, 0)

In [12]:
# with Scaling
df_test_lasso = pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

# 4. drug profiles, cell lines and drug features with scaling
alpha_parameters_df4_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

#************************
y = train["param_1"]
y_test = test["param_1"]
model = Lasso(alpha = 0.5)
model.fit(X, y)
features_1 = model.coef_

df_test_lasso["y_test_1"]= test["param_1"]
df_test_lasso["y_pred_1"] = model.predict(X_test)

#************************
y = train["param_2"]
y_test = test["param_2"]
model = Lasso(alpha = 1)
model.fit(X, y)
features_2 = model.coef_

df_test_lasso["y_test_2"]= test["param_2"]
df_test_lasso["y_pred_2"] = model.predict(X_test)

#************************
y = train["param_3"]
y_test = test["param_3"]
model = Lasso(alpha = 10)
model.fit(X, y)
features_3 = model.coef_

df_test_lasso["y_test_3"]= test["param_3"]
df_test_lasso["y_pred_3"] = model.predict(X_test)

#************************
y = train["param_4"]
y_test = test["param_4"]
model = Lasso(alpha = 0.1)
model.fit(X, y)
features_4 = model.coef_

df_test_lasso["y_test_4"]= test["param_4"]
df_test_lasso["y_pred_4"] = model.predict(X_test)
#************************

display(df_test_lasso.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.5876545,0.994099,0.9959636,-19.105115,-18.99108,0.078222,0.080602
std,1.126649,1.111367e-16,1.168964,4.445468e-16,24.391398,3.556374e-15,0.175412,0.0
min,-6.129963,0.5876545,-10.811767,0.9959636,-206.186539,-18.99108,-1.524319,0.080602
25%,0.398606,0.5876545,1.01166,0.9959636,-20.091012,-18.99108,0.005772,0.080602
50%,0.496912,0.5876545,1.065012,0.9959636,-12.729402,-18.99108,0.046012,0.080602
75%,0.616302,0.5876545,1.167762,0.9959636,-9.194847,-18.99108,0.164274,0.080602
max,9.830121,0.5876545,8.02314,0.9959636,5.48676,-18.99108,0.562794,0.080602


In [13]:
sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0)

(0, 0, 0, 0)

### Ridge Regression

In [14]:
# 3. drug profiles, cell lines and drug features
alpha_parameters_df3_ridge = {1: 500.0, 2: 500.0, 3: 500.0, 4: 500.0}
solver_parameters_df3_ridge = {1: 'auto', 2: 'auto', 3: 'auto', 4: 'sparse_cg'}

In [15]:
# Without Scaling = mormalisation of some features
df_test_ridge = pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#*************************
y = train["param_1"]
y_test = test["param_1"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_1 = model.coef_

df_test_ridge["y_test_1"]= test["param_1"]
df_test_ridge["y_pred_1"] = model.predict(X_test)
#*************************

y = train["param_2"]
y_test = test["param_2"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_2 = model.coef_

df_test_ridge["y_test_2"]= test["param_2"]
df_test_ridge["y_pred_2"] = model.predict(X_test)
#*************************

y = train["param_3"]
y_test = test["param_3"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_3 = model.coef_

df_test_ridge["y_test_3"]= test["param_3"]
df_test_ridge["y_pred_3"] = model.predict(X_test)

#*************************
y = train["param_4"]
y_test = test["param_4"]
model = Ridge(alpha=500, solver="sparse_cg")
model.fit(X,y)
features_4 = model.coef_

df_test_ridge["y_test_4"]= test["param_4"]
df_test_ridge["y_pred_4"] = model.predict(X_test)
#*************************

display(df_test_ridge.describe())
print("Features!=0:", sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0))

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.596915,0.994099,0.985435,-19.105115,-18.892281,0.078222,0.08077
std,1.126649,0.107318,1.168964,0.168243,24.391398,2.320701,0.175412,0.037125
min,-6.129963,0.254529,-10.811767,-0.45978,-206.186539,-29.410697,-1.524319,0.009561
25%,0.398606,0.526867,1.01166,0.888915,-20.091012,-20.254508,0.005772,0.052391
50%,0.496912,0.585559,1.065012,0.985232,-12.729402,-18.853736,0.046012,0.068744
75%,0.616302,0.64632,1.167762,1.085712,-9.194847,-17.472405,0.164274,0.105012
max,9.830121,1.122744,8.02314,1.510871,5.48676,-10.384942,0.562794,0.200791


Features!=0: 1072 1072 1072 1072


In [16]:
# 4. drug profiles, cell lines and drug features with scaling
alpha_parameters_df4_ridge = {1: 500.0, 2: 500.0, 3: 500.0, 4: 500.0}
solver_parameters_df4_ridge = {1: 'auto', 2: 'auto', 3: 'auto', 4: 'sparse_cg'}

In [17]:
# with Scaling

df_test_ridge = pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

#*************************
y = train["param_1"]
y_test = test["param_1"]

model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_1 = model.coef_

df_test_ridge["y_test_1"]= test["param_1"]
df_test_ridge["y_pred_1"] = model.predict(X_test)

y = train["param_2"]
y_test = test["param_2"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_2 = model.coef_

df_test_ridge["y_test_2"]= test["param_2"]
df_test_ridge["y_pred_2"] = model.predict(X_test)

y = train["param_3"]
y_test = test["param_3"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_3 = model.coef_

df_test_ridge["y_test_3"]= test["param_3"]
df_test_ridge["y_pred_3"] = model.predict(X_test)

y = train["param_4"]
y_test = test["param_4"]
model = Ridge(alpha=500, solver="sparse_cg")
model.fit(X,y)
features_4 = model.coef_

df_test_ridge["y_test_4"]= test["param_4"]
df_test_ridge["y_pred_4"] = model.predict(X_test)

display(df_test_ridge.describe())

print("Features!=0:", sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0))

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.596915,0.994099,0.985435,-19.105115,-18.892281,0.078222,0.08077
std,1.126649,0.107318,1.168964,0.168243,24.391398,2.320701,0.175412,0.037125
min,-6.129963,0.254529,-10.811767,-0.45978,-206.186539,-29.410697,-1.524319,0.009561
25%,0.398606,0.526867,1.01166,0.888915,-20.091012,-20.254508,0.005772,0.052391
50%,0.496912,0.585559,1.065012,0.985232,-12.729402,-18.853736,0.046012,0.068744
75%,0.616302,0.64632,1.167762,1.085712,-9.194847,-17.472405,0.164274,0.105012
max,9.830121,1.122744,8.02314,1.510871,5.48676,-10.384942,0.562794,0.200791


Features!=0: 1072 1072 1072 1072


### Kernel Ridge

In [18]:
pd.read_csv(_FOLDER+"kernel_ridge_model_parameters_drug_cells_PubChem.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,polynomial,"{'alpha': 7.0, 'gamma': 1e-05, 'degree': 1.0}"
1,coef_2,sigmoid,"[{'alpha': 1.0, 'gamma': 0.01, 'coef0': -0.1}]"
2,coef_3,linear,[{'alpha': 500.0}]
3,coef_4,sigmoid,"[{'alpha': 1.0, 'gamma': 1e-05, 'coef0': -0.1}]"


In [19]:
# 2
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_description.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_description.csv")

df_test=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = KernelRidge(kernel='polynomial', alpha=7,gamma=0.00001, degree=1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, coef0=-0.1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = KernelRidge(kernel='linear', alpha=500)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = KernelRidge(kernel='sigmoid', alpha=1, gamma=0.00001, coef0=-0.1)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.585418,0.994099,0.187087,-19.105115,-18.024962,0.078222,0.081042
std,1.126649,0.000216,1.168964,0.346562,24.391398,2.591505,0.175412,0.000332
min,-6.129963,0.584875,-10.811767,-1.156413,-206.186539,-30.782135,-1.524319,0.080152
25%,0.398606,0.585278,1.01166,-0.033116,-20.091012,-19.642632,0.005772,0.08085
50%,0.496912,0.585416,1.065012,0.071257,-12.729402,-17.796042,0.046012,0.080997
75%,0.616302,0.585555,1.167762,0.303842,-9.194847,-16.309679,0.164274,0.081182
max,9.830121,0.58622,8.02314,1.623439,5.48676,-11.783826,0.562794,0.0823


In [20]:
pd.read_csv(_FOLDER+"kernel_ridge_model_parameters_drug_cells_PubChem_Scaling.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,polynomial,"{'alpha': 5.0, 'gamma': 1e-05, 'degree': 1.0}"
1,coef_2,polynomial,"[{'alpha': 1.0, 'gamma': 1e-05, 'degree': 1.0}]"
2,coef_3,polynomial,"[{'alpha': 1.0, 'gamma': 1e-05, 'degree': 1.0}]"
3,coef_4,sigmoid,"[{'alpha': 0.5, 'gamma': 0.01, 'coef0': 5.0}]"


In [21]:
# 3
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = KernelRidge(kernel='polynomial', alpha=5,gamma=0.00001, degree=1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, degree=1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, degree=1)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = KernelRidge(kernel='sigmoid', alpha=0.5, gamma=0.001, coef0=0.5)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.586067,0.994099,0.979826,-19.105115,-18.899795,0.078222,0.080608
std,1.126649,0.000301,1.168964,0.34795,24.391398,4.954979,0.175412,0.0187
min,-6.129963,0.585303,-10.811767,-2.482187,-206.186539,-44.744746,-1.524319,0.029605
25%,0.398606,0.585875,1.01166,0.820788,-20.091012,-21.608582,0.005772,0.06779
50%,0.496912,0.586067,1.065012,0.99128,-12.729402,-18.52558,0.046012,0.076757
75%,0.616302,0.58626,1.167762,1.185009,-9.194847,-15.747627,0.164274,0.092016
max,9.830121,0.587169,8.02314,2.147545,5.48676,-2.183333,0.562794,0.147431


In [22]:
pd.read_csv(_FOLDER+"kernel_ridge_model_parameters_drug_cells_PubChem.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,polynomial,"{'alpha': 7.0, 'gamma': 1e-05, 'degree': 1.0}"
1,coef_2,sigmoid,"[{'alpha': 1.0, 'gamma': 0.01, 'coef0': -0.1}]"
2,coef_3,linear,[{'alpha': 500.0}]
3,coef_4,sigmoid,"[{'alpha': 1.0, 'gamma': 1e-05, 'coef0': -0.1}]"


In [23]:
# 4 with scaling
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = KernelRidge(kernel='polynomial', alpha=7,gamma=0.00001, degree=1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, coef0=-0.1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = KernelRidge(kernel='linear', alpha=500)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = KernelRidge(kernel='sigmoid', alpha=1, gamma=0.00001, coef0=5)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.585417,0.994099,0.200812,-19.105115,-18.01283,0.078222,0.08055809
std,1.126649,0.000216,1.168964,0.345552,24.391398,2.606602,0.175412,7.916587e-08
min,-6.129963,0.584882,-10.811767,-1.155857,-206.186539,-30.51708,-1.524319,0.08055791
25%,0.398606,0.585275,1.01166,-0.02207,-20.091012,-19.674392,0.005772,0.08055804
50%,0.496912,0.585418,1.065012,0.08362,-12.729402,-17.75617,0.046012,0.08055808
75%,0.616302,0.585548,1.167762,0.320273,-9.194847,-16.296291,0.164274,0.08055814
max,9.830121,0.586229,8.02314,1.576219,5.48676,-11.234407,0.562794,0.08055839


### Support Vector Regression 

In [24]:
pd.set_option('display.max_colwidth', -1)
pd.read_csv("results/svr_model_parameters_drug_cells_PubChem.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,sigmoid,"{'C': 7.0, 'coef0': 10.0, 'epsilon': 1.0}"
1,coef_2,sigmoid,"[{'C': 0.1, 'coef0': 10.0, 'epsilon': 1.0}]"
2,coef_3,poly,"[{'C': 1.0, 'degree': 5.0, 'coef0': 10.0, 'epsilon': 5.0}]"
3,coef_4,rbf,"[{'C': 7.0, 'coef0': -0.1, 'epsilon': 0.1}]"


In [25]:
# 3
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = SVR(kernel='sigmoid', C=7, epsilon=1, coef0=10)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = SVR(kernel='sigmoid', C=1, epsilon=1, coef0=10)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = SVR(kernel='poly', C=1, degree = 5, epsilon=5, coef0=10)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = SVR(kernel='rbf', C=7, epsilon=0.1, coef0=-0.1)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,1.21947,0.994099,0.6671013,-19.105115,-15.919965,0.078222,0.090111
std,1.126649,1.00888e-09,1.168964,1.107167e-10,24.391398,10.039805,0.175412,0.07335
min,-6.129963,1.21947,-10.811767,0.6671013,-206.186539,-60.167674,-1.524319,-0.087967
25%,0.398606,1.21947,1.01166,0.6671013,-20.091012,-19.618047,0.005772,0.042146
50%,0.496912,1.21947,1.065012,0.6671013,-12.729402,-15.131967,0.046012,0.076514
75%,0.616302,1.21947,1.167762,0.6671013,-9.194847,-11.230593,0.164274,0.129775
max,9.830121,1.21947,8.02314,0.6671013,5.48676,44.248423,0.562794,0.295982


In [26]:
pd.read_csv("results/svr_model_parameters_drug_cells_PubChem_Scaling.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,rbf,"{'C': 5.0, 'coef0': -0.1, 'epsilon': 0.1}"
1,coef_2,rbf,"[{'C': 7.0, 'coef0': -0.1, 'epsilon': 0.1}]"
2,coef_3,linear,"[{'epsilon': 5.0, 'C': 0.5}]"
3,coef_4,sigmoid,"[{'C': 7.0, 'coef0': 5.0, 'epsilon': 0.1}]"


In [27]:
# 4
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = SVR(kernel='rbf', C=5, epsilon=0.1, coef0=-0.1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = SVR(kernel='rbf', C=7, epsilon=0.1, coef0=-0.1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = SVR(kernel='linear', C=0.5, epsilon=5)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = SVR(kernel='sigmoid', C=7, epsilon=0.1, coef0=5)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.481794,0.994099,1.101743,-19.105115,-15.027245,0.078222,0.099045
std,1.126649,0.101671,1.168964,0.15189,24.391398,3.293872,0.175412,0.000236
min,-6.129963,0.293662,-10.811767,0.848141,-206.186539,-26.326158,-1.524319,0.098383
25%,0.398606,0.398649,1.01166,0.98258,-20.091012,-17.032642,0.005772,0.098881
50%,0.496912,0.462911,1.065012,1.053867,-12.729402,-15.015914,0.046012,0.099025
75%,0.616302,0.556165,1.167762,1.184652,-9.194847,-12.845768,0.164274,0.099181
max,9.830121,0.815779,8.02314,1.525165,5.48676,-5.463925,0.562794,0.099931
