In [6]:
import pandas as pd
import numpy as np

import time
from sklearn.metrics import r2_score
from scipy.optimize import curve_fit
from scipy.optimize import leastsq
import scipy.optimize as opt
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import gc

from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import MinMaxScaler
import os
from data_preprocessing import FilteringCurves, ShowResponseCurves
from fitting_curves import FittingColumn, ShowResponseCurvesWithFitting, compute_r2_score

from IPython.display import display
#_FOLDER = "results/"
_FOLDER = "/home/acq18mk/master/results/"

### Testing function

In [7]:
os.listdir(_FOLDER)

['filtered_drug_profiles_123.csv',
 'filtered_drug_profiles_12.csv',
 'filtered_drug_profiles_13.csv',
 'filtered_drug_profiles_23.csv',
 '.ipynb_checkpoints',
 'kernel_learning_1_2.csv',
 'merged_drug_profiles_sigmoid4_123.csv',
 'drug_features_pubchem_id.csv',
 'drug_features_with_pubchem_properties.csv',
 'merged_fitted_sigmoid4_123_with_drugs_description.csv',
 'merged_fitted_sigmoid4_123_with_drugs_properties.csv',
 'merged_fitted_sigmoid4_123_with_drugs_description_split_target.csv',
 'merged_fitted_sigmoid4_123_with_drugs_properties_split_target.csv',
 'kernel_learning_1.csv',
 'kernel_learning_2.csv',
 'kernel_learning_3.csv',
 'test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv',
 'test02_merged_fitted_sigmoid4_123_with_drugs_description.csv',
 'train08_merged_fitted_sigmoid4_123_with_drugs_description.csv',
 'train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv',
 'kernel_learning_2_2.csv',
 'kernel_learning_3_2.csv',
 'kernel_learning_4.csv',
 'Lasso_1.csv',

### train08_merged_fitted_sigmoid4_123_with_drugs_description.csv

### Lasso regression

In [28]:
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_description.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_description.csv")

column_not_to_use = ["Unnamed: 0", "Unnamed: 0.1", "COSMIC_ID", "DRUG_ID", "Drug_Name", "Synonyms", "Target", "deriv_found", "PubChem_ID",
                     "elements", "inchi_key", "canonical_smiles", "inchi_string", "third_target", "first_target", "molecular_formula", "second_target", "Target_Pathway"]
param1 = ["param_" +str(i) for i in range(10)]
param2 = ["param" +str(i) for i in range(10)] 
norm_response  = ["norm_cells_"+str(i) for i in range(10)]
con_columns  = ["fd_num_"+str(i) for i in range(10)]

not_X_columns = param1 + param2 + norm_response + con_columns + column_not_to_use
X_columns = set(train.columns) - set(not_X_columns)

columns_to_use = ["DRUG_ID", "Drug_Name", "COSMIC_ID"] + ["pred_coef_"+str(i+1) for i in range(number_coefficients)]

potential_columns_for_normalisation = []
for col in train.columns:
    if (train[col].nunique()>2) & (train[col].dtype != "O"):
        potential_columns_for_normalisation.append(col)

columns_for_normalisation = list(set(potential_columns_for_normalisation) - set(norm_response) - set(param1) - set(param2) -set(['Unnamed: 0', 'DRUG_ID', 'COSMIC_ID',]))

In [9]:
#1. just drug profiles and cell lines
alpha_parameters_df1_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

# 2. drug profiles, cell lines and drug description
alpha_parameters_df2_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

# 3. drug profiles, cell lines and drug features
alpha_parameters_df3_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

# 4. drug profiles, cell lines and drug features with scaling
alpha_parameters_df4_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

In [15]:
df_test_lasso=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

y = train["param_1"]
y_test = test["param_1"]
model = Lasso(alpha=0.5)
model.fit(X,y)
features_1 = model.coef_


df_test_lasso["y_test_1"]= test["param_1"]
df_test_lasso["y_pred_1"] = model.predict(X_test)

y = train["param_2"]
y_test = test["param_2"]
model = Lasso(alpha=1)
model.fit(X,y)
features_2 = model.coef_

df_test_lasso["y_test_2"]= test["param_2"]
df_test_lasso["y_pred_2"] = model.predict(X_test)

y = train["param_3"]
y_test = test["param_3"]
model = Lasso(alpha=10)
model.fit(X,y)
features_3 = model.coef_

df_test_lasso["y_test_3"]= test["param_3"]
df_test_lasso["y_pred_3"] = model.predict(X_test)

y = train["param_4"]
y_test = test["param_4"]
model = Lasso(alpha=0.1)
model.fit(X,y)
features_4 = model.coef_

df_test_lasso["y_test_4"]= test["param_4"]
df_test_lasso["y_pred_4"] = model.predict(X_test)

display(df_test_lasso.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.5876545,0.994099,0.9959636,-19.105115,-18.99108,0.078222,0.080602
std,1.126649,1.111367e-16,1.168964,4.445468e-16,24.391398,3.556374e-15,0.175412,0.0
min,-6.129963,0.5876545,-10.811767,0.9959636,-206.186539,-18.99108,-1.524319,0.080602
25%,0.398606,0.5876545,1.01166,0.9959636,-20.091012,-18.99108,0.005772,0.080602
50%,0.496912,0.5876545,1.065012,0.9959636,-12.729402,-18.99108,0.046012,0.080602
75%,0.616302,0.5876545,1.167762,0.9959636,-9.194847,-18.99108,0.164274,0.080602
max,9.830121,0.5876545,8.02314,0.9959636,5.48676,-18.99108,0.562794,0.080602


In [None]:
very small std for the predicted data!!!
Looks like it predict some average values

In [13]:
sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0)

(0, 0, 0, 0)

In [17]:
# with Scaling
df_test_lasso = pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

# 4. drug profiles, cell lines and drug features with scaling
alpha_parameters_df4_lasso = {1: 0.5, 2: 1.0, 3: 10.0, 4: 0.1}

#************************
y = train["param_1"]
y_test = test["param_1"]
model = Lasso(alpha = 0.5)
model.fit(X, y)
features_1 = model.coef_

df_test_lasso["y_test_1"]= test["param_1"]
df_test_lasso["y_pred_1"] = model.predict(X_test)

#************************
y = train["param_2"]
y_test = test["param_2"]
model = Lasso(alpha = 1)
model.fit(X, y)
features_2 = model.coef_

df_test_lasso["y_test_2"]= test["param_2"]
df_test_lasso["y_pred_2"] = model.predict(X_test)

#************************
y = train["param_3"]
y_test = test["param_3"]
model = Lasso(alpha = 10)
model.fit(X, y)
features_3 = model.coef_

df_test_lasso["y_test_3"]= test["param_3"]
df_test_lasso["y_pred_3"] = model.predict(X_test)

#************************
y = train["param_4"]
y_test = test["param_4"]
model = Lasso(alpha = 0.1)
model.fit(X, y)
features_4 = model.coef_

df_test_lasso["y_test_4"]= test["param_4"]
df_test_lasso["y_pred_4"] = model.predict(X_test)
#************************

display(df_test_lasso.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.5876545,0.994099,0.9959636,-19.105115,-18.99108,0.078222,0.080602
std,1.126649,1.111367e-16,1.168964,4.445468e-16,24.391398,3.556374e-15,0.175412,0.0
min,-6.129963,0.5876545,-10.811767,0.9959636,-206.186539,-18.99108,-1.524319,0.080602
25%,0.398606,0.5876545,1.01166,0.9959636,-20.091012,-18.99108,0.005772,0.080602
50%,0.496912,0.5876545,1.065012,0.9959636,-12.729402,-18.99108,0.046012,0.080602
75%,0.616302,0.5876545,1.167762,0.9959636,-9.194847,-18.99108,0.164274,0.080602
max,9.830121,0.5876545,8.02314,0.9959636,5.48676,-18.99108,0.562794,0.080602


In [18]:
sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0)

(0, 0, 0, 0)

### Ridge Regression

In [24]:
# 3. drug profiles, cell lines and drug features
alpha_parameters_df3_ridge = {1: 500.0, 2: 500.0, 3: 500.0, 4: 500.0}
solver_parameters_df3_ridge = {1: 'auto', 2: 'auto', 3: 'auto', 4: 'sparse_cg'}

In [25]:
# Without Scaling = mormalisation of some features
df_test_ridge = pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#*************************
y = train["param_1"]
y_test = test["param_1"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_1 = model.coef_

df_test_ridge["y_test_1"]= test["param_1"]
df_test_ridge["y_pred_1"] = model.predict(X_test)
#*************************

y = train["param_2"]
y_test = test["param_2"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_2 = model.coef_

df_test_ridge["y_test_2"]= test["param_2"]
df_test_ridge["y_pred_2"] = model.predict(X_test)
#*************************

y = train["param_3"]
y_test = test["param_3"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_3 = model.coef_

df_test_ridge["y_test_3"]= test["param_3"]
df_test_ridge["y_pred_3"] = model.predict(X_test)

#*************************
y = train["param_4"]
y_test = test["param_4"]
model = Ridge(alpha=500, solver="sparse_cg")
model.fit(X,y)
features_4 = model.coef_

df_test_ridge["y_test_4"]= test["param_4"]
df_test_ridge["y_pred_4"] = model.predict(X_test)
#*************************

display(df_test_ridge.describe())
print("Features!=0:", sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0))

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.596647,0.994099,0.986638,-19.105115,-18.890427,0.078222,0.080849
std,1.126649,0.105852,1.168964,0.141039,24.391398,2.178736,0.175412,0.017917
min,-6.129963,0.30574,-10.811767,-0.335419,-206.186539,-28.770794,-1.524319,0.030266
25%,0.398606,0.532856,1.01166,0.92944,-20.091012,-20.069508,0.005772,0.06899
50%,0.496912,0.585341,1.065012,0.99299,-12.729402,-18.813967,0.046012,0.077528
75%,0.616302,0.645697,1.167762,1.072019,-9.194847,-17.510525,0.164274,0.088882
max,9.830121,1.08789,8.02314,1.333499,5.48676,-10.507887,0.562794,0.152559


Features!=0: 1063 1063 1063 1063


In [26]:
# 4. drug profiles, cell lines and drug features with scaling
alpha_parameters_df4_ridge = {1: 500.0, 2: 500.0, 3: 500.0, 4: 500.0}
solver_parameters_df4_ridge = {1: 'auto', 2: 'auto', 3: 'auto', 4: 'sparse_cg'}

In [27]:
# with Scaling

df_test_ridge = pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

#*************************
y = train["param_1"]
y_test = test["param_1"]

model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_1 = model.coef_

df_test_ridge["y_test_1"]= test["param_1"]
df_test_ridge["y_pred_1"] = model.predict(X_test)

y = train["param_2"]
y_test = test["param_2"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_2 = model.coef_

df_test_ridge["y_test_2"]= test["param_2"]
df_test_ridge["y_pred_2"] = model.predict(X_test)

y = train["param_3"]
y_test = test["param_3"]
model = Ridge(alpha=500, solver="auto")
model.fit(X,y)
features_3 = model.coef_

df_test_ridge["y_test_3"]= test["param_3"]
df_test_ridge["y_pred_3"] = model.predict(X_test)

y = train["param_4"]
y_test = test["param_4"]
model = Ridge(alpha=500, solver="sparse_cg")
model.fit(X,y)
features_4 = model.coef_

df_test_ridge["y_test_4"]= test["param_4"]
df_test_ridge["y_pred_4"] = model.predict(X_test)

display(df_test_ridge.describe())

print("Features!=0:", sum(features_1!=0), sum(features_2!=0), sum(features_3!=0), sum(features_4!=0))

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.596647,0.994099,0.986638,-19.105115,-18.890427,0.078222,0.080849
std,1.126649,0.105852,1.168964,0.141039,24.391398,2.178736,0.175412,0.017917
min,-6.129963,0.30574,-10.811767,-0.335419,-206.186539,-28.770794,-1.524319,0.030266
25%,0.398606,0.532856,1.01166,0.92944,-20.091012,-20.069508,0.005772,0.06899
50%,0.496912,0.585341,1.065012,0.99299,-12.729402,-18.813967,0.046012,0.077528
75%,0.616302,0.645697,1.167762,1.072019,-9.194847,-17.510525,0.164274,0.088882
max,9.830121,1.08789,8.02314,1.333499,5.48676,-10.507887,0.562794,0.152559


Features!=0: 1063 1063 1063 1063


### Kernel Ridge

In [34]:
pd.read_csv(_FOLDER+"kernel_ridge_model_parameters_drug_cells_PubChem.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,polynomial,"{'alpha': 7.0, 'gamma': 1e-05, 'degree': 1.0}"
1,coef_2,sigmoid,"[{'alpha': 1.0, 'gamma': 0.01, 'coef0': -0.1}]"
2,coef_3,linear,[{'alpha': 500.0}]
3,coef_4,sigmoid,"[{'alpha': 1.0, 'gamma': 1e-05, 'coef0': -0.1}]"


In [None]:
# 2
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_description.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_description.csv")

df_test=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = KernelRidge(kernel='polynomial', alpha=7,gamma=0.00001, degree=1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, coef0=-0.1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = KernelRidge(kernel='linear', alpha=500)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = KernelRidge(kernel='sigmoid', alpha=1, gamma=0.00001, coef0=-0.1)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

In [42]:
pd.read_csv(_FOLDER+"kernel_ridge_model_parameters_drug_cells_PubChem_Scaling.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,polynomial,"{'alpha': 5.0, 'gamma': 1e-05, 'degree': 1.0}"
1,coef_2,polynomial,"[{'alpha': 1.0, 'gamma': 1e-05, 'degree': 1.0}]"
2,coef_3,polynomial,"[{'alpha': 1.0, 'gamma': 1e-05, 'degree': 1.0}]"
3,coef_4,sigmoid,"[{'alpha': 0.5, 'gamma': 0.01, 'coef0': 5.0}]"


In [37]:
# 3
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = KernelRidge(kernel='polynomial', alpha=5,gamma=0.00001, degree=1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, degree=1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, degree=1)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = KernelRidge(kernel='sigmoid', alpha=0.5, gamma=0.001, coef0=0.5)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.585417,0.994099,-1.090286,-19.105115,-15.87842,0.078222,0.081043
std,1.126649,0.000216,1.168964,0.897882,24.391398,5.178585,0.175412,0.000316
min,-6.129963,0.584878,-10.811767,-3.315599,-206.186539,-37.026389,-1.524319,0.080191
25%,0.398606,0.585287,1.01166,-1.6078,-20.091012,-19.436331,0.005772,0.080848
50%,0.496912,0.585418,1.065012,-1.159089,-12.729402,-16.307873,0.046012,0.081
75%,0.616302,0.58555,1.167762,-0.681236,-9.194847,-12.464341,0.164274,0.08116
max,9.830121,0.58621,8.02314,2.554691,5.48676,-1.906954,0.562794,0.082292


In [41]:
pd.read_csv(_FOLDER+"kernel_ridge_model_parameters_drug_cells_PubChem.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,polynomial,"{'alpha': 7.0, 'gamma': 1e-05, 'degree': 1.0}"
1,coef_2,sigmoid,"[{'alpha': 1.0, 'gamma': 0.01, 'coef0': -0.1}]"
2,coef_3,linear,[{'alpha': 500.0}]
3,coef_4,sigmoid,"[{'alpha': 1.0, 'gamma': 1e-05, 'coef0': -0.1}]"


In [40]:
# 4 with scaling
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = KernelRidge(kernel='polynomial', alpha=7,gamma=0.00001, degree=1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = KernelRidge(kernel='polynomial', alpha=1, gamma=0.01, coef0=-0.1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = KernelRidge(kernel='linear', alpha=500)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = KernelRidge(kernel='sigmoid', alpha=1, gamma=0.00001, coef0=5)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.585417,0.994099,-1.090286,-19.105115,-15.87842,0.078222,0.08055809
std,1.126649,0.000216,1.168964,0.897882,24.391398,5.178585,0.175412,5.870082e-08
min,-6.129963,0.584878,-10.811767,-3.315599,-206.186539,-37.026389,-1.524319,0.08055794
25%,0.398606,0.585287,1.01166,-1.6078,-20.091012,-19.436331,0.005772,0.08055806
50%,0.496912,0.585418,1.065012,-1.159089,-12.729402,-16.307873,0.046012,0.08055808
75%,0.616302,0.58555,1.167762,-0.681236,-9.194847,-12.464341,0.164274,0.08055811
max,9.830121,0.58621,8.02314,2.554691,5.48676,-1.906954,0.562794,0.08055833


### Support Vector Regression 

In [47]:
pd.set_option('display.max_colwidth', -1)
pd.read_csv("results/svr_model_parameters_drug_cells_PubChem.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,sigmoid,"{'C': 7.0, 'coef0': 10.0, 'epsilon': 1.0}"
1,coef_2,sigmoid,"[{'C': 0.1, 'coef0': 10.0, 'epsilon': 1.0}]"
2,coef_3,poly,"[{'C': 1.0, 'degree': 5.0, 'coef0': 10.0, 'epsilon': 5.0}]"
3,coef_4,rbf,"[{'C': 7.0, 'coef0': -0.1, 'epsilon': 0.1}]"


In [48]:
# 3
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = SVR(kernel='sigmoid', C=7, epsilon=1, coef0=10)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = SVR(kernel='sigmoid', C=1, epsilon=1, coef0=10)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = SVR(kernel='poly', C=1, degree = 5, epsilon=5, coef0=10)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = SVR(kernel='rbf', C=7, epsilon=0.1, coef0=-0.1)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,1.21947,0.994099,0.6671013,-19.105115,-15.882386,0.078222,0.091364
std,1.126649,1.859673e-08,1.168964,1.694241e-09,24.391398,12.524214,0.175412,0.071956
min,-6.129963,1.21947,-10.811767,0.6671013,-206.186539,-174.817472,-1.524319,-0.50371
25%,0.398606,1.21947,1.01166,0.6671013,-20.091012,-19.040776,0.005772,0.068763
50%,0.496912,1.21947,1.065012,0.6671013,-12.729402,-14.51367,0.046012,0.096484
75%,0.616302,1.21947,1.167762,0.6671013,-9.194847,-11.570322,0.164274,0.12263
max,9.830121,1.21947,8.02314,0.6671013,5.48676,21.153813,0.562794,0.268822


In [49]:
pd.read_csv("results/svr_model_parameters_drug_cells_PubChem_Scaling.csv")

Unnamed: 0.1,Unnamed: 0,kernel,kernel_parameters
0,coef_1,rbf,"{'C': 5.0, 'coef0': -0.1, 'epsilon': 0.1}"
1,coef_2,rbf,"[{'C': 7.0, 'coef0': -0.1, 'epsilon': 0.1}]"
2,coef_3,linear,"[{'epsilon': 5.0, 'C': 0.5}]"
3,coef_4,sigmoid,"[{'C': 7.0, 'coef0': 5.0, 'epsilon': 0.1}]"


In [50]:
# 4
train = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties.csv")
test = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties.csv")

df_test=pd.DataFrame(index=test.index)

scaler = MinMaxScaler()
scaler.fit(train[columns_for_normalisation])
train[columns_for_normalisation] = scaler.transform(train[columns_for_normalisation])
test[columns_for_normalisation] = scaler.transform(test[columns_for_normalisation])

X = train[X_columns]
X_test = test[X_columns]

#***********************************
y = train["param_1"]
y_test = test["param_1"]
model = SVR(kernel='rbf', C=5, epsilon=0.1, coef0=-0.1)
model.fit(X,y)

df_test["y_test_1"]= test["param_1"]
df_test["y_pred_1"] = model.predict(X_test)

#***********************************
y = train["param_2"]
y_test = test["param_2"]
model = SVR(kernel='rbf', C=7, epsilon=0.1, coef0=-0.1)
model.fit(X,y)

df_test["y_test_2"]= test["param_2"]
df_test["y_pred_2"] = model.predict(X_test)

#***********************************
y = train["param_3"]
y_test = test["param_3"]
model = SVR(kernel='linear', C=0.5, epsilon=5)
model.fit(X,y)

df_test["y_test_3"]= test["param_3"]
df_test["y_pred_3"] = model.predict(X_test)

#***********************************
y = train["param_4"]
y_test = test["param_4"]
model = SVR(kernel='sigmoid', C=7, epsilon=0.1, coef0=5)
model.fit(X,y)

df_test["y_test_4"]= test["param_4"]
df_test["y_pred_4"] = model.predict(X_test)

display(df_test.describe())

Unnamed: 0,y_test_1,y_pred_1,y_test_2,y_pred_2,y_test_3,y_pred_3,y_test_4,y_pred_4
count,486.0,486.0,486.0,486.0,486.0,486.0,486.0,486.0
mean,0.623946,0.504195,0.994099,1.094562,-19.105115,-14.826592,0.078222,0.099225
std,1.126649,0.089196,1.168964,0.123808,24.391398,3.049297,0.175412,0.002225
min,-6.129963,0.263542,-10.811767,-0.101503,-206.186539,-25.317269,-1.524319,0.09398
25%,0.398606,0.452374,1.01166,1.040046,-20.091012,-16.848045,0.005772,0.097712
50%,0.496912,0.495919,1.065012,1.095619,-12.729402,-14.559673,0.046012,0.099005
75%,0.616302,0.550009,1.167762,1.140846,-9.194847,-12.689404,0.164274,0.100445
max,9.830121,1.31976,8.02314,1.567688,5.48676,-6.521922,0.562794,0.107296
