### All drugs training

Take all the found hyperparameters, train the model with these hyperparameters,
train the models and test them

In [1]:
import pandas as pd
import numpy as np
import time

from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import Lasso, Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
import gc

from sklearn.preprocessing import MinMaxScaler
import os

np.random.seed(123)

# _FOLDER = "/home/acq18mk/master/results/results/" 
_FOLDER = "results/"
_FOLDER_2 = "GridSearch_results/"


with open(_FOLDER + "drug_ids_50.txt", 'r') as f:
    drug_ids_50 = [np.int32(line.rstrip('\n')) for line in f]

# *****************************************

with open(_FOLDER+"X_features_cancer_cell_lines.txt", 'r') as f:
    X_cancer_cell_lines = [line.rstrip('\n') for line in f]
# *****************************************

with open(_FOLDER+"X_PubChem_properties.txt", 'r') as f:
    X_PubChem_properties = [line.rstrip('\n') for line in f]
# *****************************************

with open(_FOLDER+"X_features_Targets.txt", 'r') as f:
    X_targets = [line.rstrip('\n') for line in f]
# *****************************************

with open(_FOLDER+"X_features_Target_Pathway.txt", 'r') as f:
    X_target_pathway = [line.rstrip('\n') for line in f]
# *****************************************

all_columns = X_cancer_cell_lines + X_PubChem_properties + X_targets + X_target_pathway +["MAX_CONC"]

train_df = pd.read_csv(_FOLDER+"train08_merged_fitted_sigmoid4_123_with_drugs_properties_min10.csv").drop(["Unnamed: 0","Unnamed: 0.1"], axis=1)
test_df = pd.read_csv(_FOLDER+"test02_merged_fitted_sigmoid4_123_with_drugs_properties_min10.csv").drop(["Unnamed: 0","Unnamed: 0.1"], axis=1)               

train_df_50 = train_df.set_index("DRUG_ID").loc[drug_ids_50, :].copy()
test_df_50 = test_df.set_index("DRUG_ID").loc[drug_ids_50, :].copy()

datasets = ["Dataset 1", "Dataset 2", "Dataset 3", "Dataset 4"]

X_feat_dict = {"Dataset 1": X_cancer_cell_lines ,
               "Dataset 2": ["MAX_CONC"] + X_targets + X_target_pathway + X_cancer_cell_lines ,
               "Dataset 3": ["MAX_CONC"] + X_PubChem_properties +  X_cancer_cell_lines,
               "Dataset 4": ["MAX_CONC"] + X_PubChem_properties +  X_targets + X_target_pathway + X_cancer_cell_lines}


data_set = "Dataset 4"
results = pd.DataFrame()
results["COSMIC_ID"]= test_df_50["COSMIC_ID"]

train_drug = train_df_50.copy()
test_drug = test_df_50.copy()

X_columns = X_feat_dict[data_set]
scaler = MinMaxScaler().fit(train_drug[X_columns])
Xtrain_drug = scaler.transform(train_drug[X_columns])
Xtest_drug = scaler.transform(test_drug[X_columns])

In [2]:
names =['lasso',
        'ridge',
    'Linear_KR',
    'Sigmoid_KR',
    'RBF_KR',
    'Polynomial_KR',
    'Linear_SVR',
    'Sigmoid_SVR',
    'RBF_SVR',
    "Polynomial_SVR"]
df_results = pd.DataFrame(index=names, columns =["coef_"+str(i) for i in range(1,5)])
df_results2 = pd.DataFrame(index=names, columns =["coef_"+str(i) for i in range(1,5)])

In [3]:
results = pd.DataFrame()
results["COSMIC_ID"]= test_df_50["COSMIC_ID"]

In [4]:
best_parameters_1 = {}
best_parameters_1["Lasso"] = {}
best_parameters_1["Lasso"]["alpha"] = 0.1

best_parameters_1["Ridge"] = {}
best_parameters_1["Ridge"]["alpha"] = 1000
best_parameters_1["Ridge"]["solver"] = "lsqr"

best_parameters_1["Linear_KR"] = {}
best_parameters_1["Linear_KR"]["alpha"] = 500

best_parameters_1["Sigmoid_KR"] = {}
best_parameters_1["Sigmoid_KR"]["alpha"] = 100
best_parameters_1["Sigmoid_KR"]["gamma"] = 1e-05
best_parameters_1["Sigmoid_KR"]["coef0"] = 0.5

best_parameters_1["RBF_KR"] = {}
best_parameters_1["RBF_KR"]["alpha"] = 500
best_parameters_1["RBF_KR"]["gamma"] = 1e-05
best_parameters_1["RBF_KR"]["coef0"] = 0.01

best_parameters_1["Polynomial_KR"] = {}
best_parameters_1["Polynomial_KR"]["alpha"] = 10
best_parameters_1["Polynomial_KR"]["gamma"] = 1e-05
best_parameters_1["Polynomial_KR"]["coef0"] = 0.5
best_parameters_1["Polynomial_KR"]["degree"] = 5

best_parameters_1["Linear_SVR"] = {}
best_parameters_1["Linear_SVR"]["C"]= 0.01
best_parameters_1["Linear_SVR"]["epsilon"] = 0.1

best_parameters_1["RBF_SVR"] = {}
best_parameters_1["RBF_SVR"]["C"]= 0.5
best_parameters_1["RBF_SVR"]["epsilon"] = 0.01
best_parameters_1["RBF_SVR"]["coef0"] = 0.01

best_parameters_2 = {}
best_parameters_2["Lasso"] = {}
best_parameters_2["Lasso"]["alpha"] = 0.1

best_parameters_2["Ridge"] = {}
best_parameters_2["Ridge"]["alpha"] = 1000
best_parameters_2["Ridge"]["solver"] = "lsqr"

best_parameters_2["Linear_KR"] = {}
best_parameters_2["Linear_KR"]["alpha"] = 500

best_parameters_2["Sigmoid_KR"] = {}
best_parameters_2["Sigmoid_KR"]["alpha"] = 0.1
best_parameters_2["Sigmoid_KR"]["gamma"] = 1e-05
best_parameters_2["Sigmoid_KR"]["coef0"] = 0.5

best_parameters_2["RBF_KR"] = {}
best_parameters_2["RBF_KR"]["alpha"] = 500
best_parameters_2["RBF_KR"]["gamma"] = 1e-05
best_parameters_2["RBF_KR"]["coef0"] = 0.01

best_parameters_2["Polynomial_KR"] = {}
best_parameters_2["Polynomial_KR"]["alpha"] = 5
best_parameters_2["Polynomial_KR"]["gamma"] = 0.0001
best_parameters_2["Polynomial_KR"]["coef0"] = 1
best_parameters_2["Polynomial_KR"]["degree"] = 5

best_parameters_2["Linear_SVR"] = {}
best_parameters_2["Linear_SVR"]["C"]= 0.02
best_parameters_2["Linear_SVR"]["epsilon"] = 0.2

best_parameters_2["RBF_SVR"] = {}
best_parameters_2["RBF_SVR"]["C"]= 0.1
best_parameters_2["RBF_SVR"]["epsilon"] = 0.01
best_parameters_2["RBF_SVR"]["coef0"] = 0.01

best_parameters_3 = {}
best_parameters_3["Lasso"] = {}
best_parameters_3["Lasso"]["alpha"] = 0.5

best_parameters_3["Ridge"] = {}
best_parameters_3["Ridge"]["alpha"] = 1000
best_parameters_3["Ridge"]["solver"] = "lsqr"

best_parameters_3["Linear_KR"] = {}
best_parameters_3["Linear_KR"]["alpha"] = 500

best_parameters_3["Sigmoid_KR"] = {}
best_parameters_3["Sigmoid_KR"]["alpha"] = 100
best_parameters_3["Sigmoid_KR"]["gamma"] = 0.01
best_parameters_3["Sigmoid_KR"]["coef0"] = 0.5

best_parameters_3["RBF_KR"] = {}
best_parameters_3["RBF_KR"]["alpha"] = 5
best_parameters_3["RBF_KR"]["gamma"] = 1
best_parameters_3["RBF_KR"]["coef0"] = 0.01

best_parameters_3["Polynomial_KR"] = {}
best_parameters_3["Polynomial_KR"]["alpha"] = 100
best_parameters_3["Polynomial_KR"]["gamma"] = 0.01
best_parameters_3["Polynomial_KR"]["coef0"] = 0.5
best_parameters_3["Polynomial_KR"]["degree"] = 2

best_parameters_3["Linear_SVR"] = {}
best_parameters_3["Linear_SVR"]["C"]= 0.1
best_parameters_3["Linear_SVR"]["epsilon"] = 1

best_parameters_3["RBF_SVR"] = {}
best_parameters_3["RBF_SVR"]["C"]= 5
best_parameters_3["RBF_SVR"]["epsilon"] = 0
best_parameters_3["RBF_SVR"]["coef0"] = 0

best_parameters_4 = {}
best_parameters_4["Lasso"] = {}
best_parameters_4["Lasso"]["alpha"] = 0.01

best_parameters_4["Ridge"] = {}
best_parameters_4["Ridge"]["alpha"] = 1000
best_parameters_4["Ridge"]["solver"] = "lsqr"

best_parameters_4["Linear_KR"] = {}
best_parameters_4["Linear_KR"]["alpha"] = 500

best_parameters_4["Sigmoid_KR"] = {}
best_parameters_4["Sigmoid_KR"]["alpha"] = 1
best_parameters_4["Sigmoid_KR"]["gamma"] = 0.01
best_parameters_4["Sigmoid_KR"]["coef0"] = 1

best_parameters_4["RBF_KR"] = {}
best_parameters_4["RBF_KR"]["alpha"] = 0.1
best_parameters_4["RBF_KR"]["gamma"] = 0.0001
best_parameters_4["RBF_KR"]["coef0"] = 0.01

best_parameters_4["Polynomial_KR"] = {}
best_parameters_4["Polynomial_KR"]["alpha"] = 10
best_parameters_4["Polynomial_KR"]["gamma"] = 0.01
best_parameters_4["Polynomial_KR"]["coef0"] = 1
best_parameters_4["Polynomial_KR"]["degree"] = 4

best_parameters_4["Linear_SVR"] = {}
best_parameters_4["Linear_SVR"]["C"]= 0.01
best_parameters_4["Linear_SVR"]["epsilon"] = 0.01

best_parameters_4["RBF_SVR"] = {}
best_parameters_4["RBF_SVR"]["C"]= 0.1
best_parameters_4["RBF_SVR"]["epsilon"] = 0.01
best_parameters_4["RBF_SVR"]["coef0"] = 0.01

In [5]:
best_parameters_1

{'Lasso': {'alpha': 0.1},
 'Ridge': {'alpha': 1000, 'solver': 'lsqr'},
 'Linear_KR': {'alpha': 500},
 'Sigmoid_KR': {'alpha': 100, 'gamma': 1e-05, 'coef0': 0.5},
 'RBF_KR': {'alpha': 500, 'gamma': 1e-05, 'coef0': 0.01},
 'Polynomial_KR': {'alpha': 10, 'gamma': 1e-05, 'coef0': 0.5, 'degree': 5},
 'Linear_SVR': {'C': 0.01, 'epsilon': 0.1},
 'RBF_SVR': {'C': 0.5, 'epsilon': 0.01, 'coef0': 0.01}}

In [6]:
parameters_name =[]
parameters_values_1 =[]
parameters_values_2 =[]
parameters_values_3 =[]
parameters_values_4 =[]
for model in best_parameters_1:
    for param in best_parameters_1[model]:
        new_name = model +"_" + param
        parameters_name.append(new_name)
        parameters_values_1.append(best_parameters_1[model][param])
        parameters_values_2.append(best_parameters_2[model][param])
        parameters_values_3.append(best_parameters_3[model][param])
        parameters_values_4.append(best_parameters_4[model][param])

In [7]:
df_report= pd.DataFrame()
df_report['model'] = parameters_name
df_report ["values_coef1"] = parameters_values_1
df_report ["values_coef2"] = parameters_values_2
df_report ["values_coef3"] = parameters_values_3
df_report ["values_coef4"] = parameters_values_4
df_report

Unnamed: 0,model,values_coef1,values_coef2,values_coef3,values_coef4
0,Lasso_alpha,0.1,0.1,0.5,0.01
1,Ridge_alpha,1000,1000,1000,1000
2,Ridge_solver,lsqr,lsqr,lsqr,lsqr
3,Linear_KR_alpha,500,500,500,500
4,Sigmoid_KR_alpha,100,0.1,100,1
5,Sigmoid_KR_gamma,1e-05,1e-05,0.01,0.01
6,Sigmoid_KR_coef0,0.5,0.5,0.5,1
7,RBF_KR_alpha,500,500,5,0.1
8,RBF_KR_gamma,1e-05,1e-05,1,0.0001
9,RBF_KR_coef0,0.01,0.01,0.01,0.01


### Coefficient 1

In [8]:
i=1
y = test_df_50["param_1"]
y_train_drug = train_drug["param_1"].values
y_test_drug =  test_drug["param_1"].values

# Lasso
i=1
alpha = 0.1
model = Lasso(alpha=alpha)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["lasso", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["lasso", "coef_"+str(i)]= abs_errors.mean()

# Ridge
i=1
alpha = 1000
solver = "lsqr"
model = Ridge(alpha=alpha, solver = solver)
model.fit(Xtrain_drug, y_train_drug) 
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["ridge", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["ridge", "coef_"+str(i)]= abs_errors.mean()

# Linear KernelRidge
i=1
kernel = "linear"
alpha = 500
model = KernelRidge(kernel = kernel, 
                    alpha=alpha)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Linear_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Linear_KR", "coef_"+str(i)]= abs_errors.mean()

# Sigmoid KernelRidge
i=1
kernel = "sigmoid"
alpha = 100
gamma = 1e-05
coef0 = 0.5
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Sigmoid_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Sigmoid_KR", "coef_"+str(i)]= abs_errors.mean()

# RBF KernelRidge
i=1
kernel = "rbf"
alpha = 500
gamma =1e-05
coef0 = 0.01
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["RBF_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["RBF_KR", "coef_"+str(i)]= abs_errors.mean()

# Polynomial KernelRidge
i=1
kernel = "polynomial"
alpha = 10
gamma = 1e-05
coef0 = 0.5
degree = 5
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0,
                   degree= degree)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Polynomial_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Polynomial_KR", "coef_"+str(i)]= abs_errors.mean()

#Linear SVR
i=1
kernel ="linear"
C = 0.01
epsilon = 0.1
model = SVR(kernel = kernel, epsilon = epsilon, C=C)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["Linear_SVR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Linear_SVR", "coef_"+str(i)]= abs_errors.mean()

#RBF SVR
i=1
kernel ="rbf"
C= 0.5
epsilon = 0.01
coef0 = 0.01
model = SVR(kernel = kernel, epsilon = epsilon, C=C, coef0 = coef0)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["RBF_SVR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["RBF_SVR", "coef_"+str(i)]= abs_errors.mean()

In [9]:
i=2
y = test_df_50["param_2"]
y_train_drug = train_drug["param_2"].values
y_test_drug =  test_drug["param_2"].values
# Lasso
i=2
alpha = 0.1
model = Lasso(alpha=alpha)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["lasso", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["lasso", "coef_"+str(i)]= abs_errors.mean()

# Ridge
i=2
alpha = 1000
solver = "lsqr"
model = Ridge(alpha=alpha, solver = solver)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["ridge", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["ridge", "coef_"+str(i)]= abs_errors.mean()

# Linear KernelRidge
i=2
kernel = "linear"
alpha = 500
model = KernelRidge(kernel = kernel, 
                    alpha=alpha)
model.fit(Xtrain_drug, y_train_drug) 
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Linear_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Linear_KR", "coef_"+str(i)]= abs_errors.mean()

# Sigmoid KernelRidge
i=2
kernel = "sigmoid"
alpha = 0.1
gamma = 1e-05
coef0 = 0.5
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Sigmoid_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Sigmoid_KR", "coef_"+str(i)]= abs_errors.mean()

# RBF KernelRidge
i=2
kernel = "rbf"
alpha = 0.1
gamma = 1e-05
coef0 = 0.01
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)
model.fit(Xtrain_drug, y_train_drug) 
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["RBF_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["RBF_KR", "coef_"+str(i)]= abs_errors.mean()

# Polynomial KernelRidge
i=2
kernel = "polynomial"

alpha = 5
gamma = 0.0001
coef0 = 1
degree = 5
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0,
                   degree= degree)
model.fit(Xtrain_drug, y_train_drug) 
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Polynomial_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Polynomial_KR", "coef_"+str(i)]= abs_errors.mean()


#Linear SVR
i=2
kernel ="linear"
C = 0.01
epsilon = 0.001
model = SVR(kernel = kernel, epsilon = epsilon, C=C)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["Linear_SVR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Linear_SVR", "coef_"+str(i)]= abs_errors.mean()

#RBF SVR
i=2
kernel ="rbf"
C= 0.1
epsilon = 0.01
coef0 = 0.01
model = SVR(kernel = kernel, epsilon = epsilon, C=C, coef0 = coef0)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["RBF_SVR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["RBF_SVR", "coef_"+str(i)]= abs_errors.mean()

In [10]:
i=3
y = test_df_50["param_3"]
y_train_drug = train_drug["param_3"].values
y_test_drug =  test_drug["param_3"].values  

# Lasso
i=3
alpha = 0.5
model = Lasso(alpha=alpha)
model.fit(Xtrain_drug, y_train_drug)

y = test_df_50["param_3"]   
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["lasso", "coef_"+str(i)]= str(round(abs_errors.mean(),1))+"/"+str(round(abs_errors.std(),1))
df_results2.loc["lasso", "coef_"+str(i)]= abs_errors.mean()

# Ridge
i=3
alpha = 1000
solver = "lsqr"
model = Ridge(alpha=alpha, solver = solver)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["ridge", "coef_"+str(i)]= str(round(abs_errors.mean(),1))+"/"+str(round(abs_errors.std(),1))
df_results2.loc["ridge", "coef_"+str(i)]= abs_errors.mean()

# Linear KernelRidge
i=3
kernel = "linear"
alpha = 500
model = KernelRidge(kernel = kernel, 
                    alpha=alpha)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Linear_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),1))+"/"+str(round(abs_errors.std(),1))
df_results2.loc["Linear_KR", "coef_"+str(i)]= abs_errors.mean()

# Sigmoid KernelRidge
i=3
kernel = "sigmoid"

alpha = 100
gamma = 0.01
coef0 = 0.5

model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Sigmoid_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),1))+"/"+str(round(abs_errors.std(),1))
df_results2.loc["Sigmoid_KR", "coef_"+str(i)]= abs_errors.mean()

# RBF KernelRidge
i=3
kernel = "rbf"
alpha = 5
gamma = 0.01
coef0 = 0.01
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)

model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["RBF_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),1))+"/"+str(round(abs_errors.std(),1))
df_results2.loc["RBF_KR", "coef_"+str(i)]= abs_errors.mean()

# Polynomial KernelRidge
i=3
kernel = "polynomial"
alpha = 100
gamma = 0.01
coef0 = 0.5
degree = 2
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0,
                   degree= degree)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)
df_results.loc["Polynomial_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),1))+"/"+str(round(abs_errors.std(),1))
df_results2.loc["Polynomial_KR", "coef_"+str(i)]= abs_errors.mean()

#Linear SVR
i=3
kernel ="linear"
C = 0.1
epsilon = 1
model = SVR(kernel = kernel, epsilon = epsilon, C=C)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["Linear_SVR", "coef_"+str(i)]= str(round(abs_errors.mean(),2))+"/"+str(round(abs_errors.std(),2))
df_results2.loc["Linear_SVR", "coef_"+str(i)]= abs_errors.mean()

In [11]:
i=4
y = test_df_50["param_4"]
y_train_drug = train_drug["param_4"].values
y_test_drug =  test_drug["param_4"].values
# Lasso
i=4
alpha = 0.01
model = Lasso(alpha=alpha)
model.fit(Xtrain_drug, y_train_drug)

y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["lasso", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["lasso", "coef_"+str(i)]= abs_errors.mean()

# Ridge
i=4
alpha = 1000
solver = "lsqr"
model = Ridge(alpha=alpha, solver = solver)
model.fit(Xtrain_drug, y_train_drug)   
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["ridge", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["ridge", "coef_"+str(i)]= abs_errors.mean()

# Linear KernelRidge
i=4
kernel = "linear"
alpha = 500
model = KernelRidge(kernel = kernel, 
                    alpha=alpha)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Linear_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Linear_KR", "coef_"+str(i)]= abs_errors.mean()

# Sigmoid KernelRidge
i=4
kernel = "sigmoid"
alpha = 1
gamma = 0.01
coef0 = 1
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Sigmoid_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Sigmoid_KR", "coef_"+str(i)]= abs_errors.mean()

# RBF KernelRidge
i=4
kernel = "rbf"
alpha = 0.1
gamma =  0.0001
coef0 = 0.01
model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0)
model.fit(Xtrain_drug, y_train_drug)  
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["RBF_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["RBF_KR", "coef_"+str(i)]= abs_errors.mean()

# Polynomial KernelRidge
i=4
kernel = "polynomial"

alpha = 10
gamma = 0.01
coef0 = 1
degree = 4

model = KernelRidge(kernel = kernel, 
                    alpha=alpha, 
                    gamma = gamma,
                    coef0= coef0,
                   degree= degree)
model.fit(Xtrain_drug, y_train_drug) 
y_pred = model.predict(Xtest_drug)  
abs_errors = abs(y - y_pred)

df_results.loc["Polynomial_KR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Polynomial_KR", "coef_"+str(i)]= abs_errors.mean()

#Linear SVR
i=4
kernel ="linear"
C = 0.01
epsilon = 0.01
model = SVR(kernel = kernel, epsilon = epsilon, C=C)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["Linear_SVR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["Linear_SVR", "coef_"+str(i)]= abs_errors.mean()

#RBF SVR
i=4
kernel ="rbf"
C= 0.1
epsilon = 0.001
coef0 = 0.01
model = SVR(kernel = kernel, epsilon = epsilon, C=C, coef0 = coef0)
model.fit(Xtrain_drug, y_train_drug)
y_pred = model.predict(Xtest_drug) 
abs_errors = abs(y - y_pred)

df_results.loc["RBF_SVR", "coef_"+str(i)]= str(round(abs_errors.mean(),3))+"/"+str(round(abs_errors.std(),3))
df_results2.loc["RBF_SVR", "coef_"+str(i)]= abs_errors.mean()

In [12]:
df_results2

Unnamed: 0,coef_1,coef_2,coef_3,coef_4
lasso,0.308382,0.33092,9.72121,0.0824355
ridge,0.324285,0.343284,10.1645,0.0790446
Linear_KR,0.319662,0.470577,10.2292,0.0747704
Sigmoid_KR,0.23285,0.331667,9.67711,0.0756531
RBF_KR,0.240466,0.332915,9.83687,0.0760131
Polynomial_KR,0.227837,0.336869,9.76593,0.0781655
Linear_SVR,0.213999,0.245959,8.95249,0.0821791
Sigmoid_SVR,,,,
RBF_SVR,0.207926,0.235823,,0.0722271
Polynomial_SVR,,,,


In [13]:
best_models = {}
for i in range(1,5):
    best_models["coef_" + str(i)] = list(df_results2[df_results2["coef_"+str(i)] == df_results2["coef_"+str(i)].min()].index)
    print("coef_" + str(i) +":", list(df_results2[df_results2["coef_"+str(i)] == df_results2["coef_"+str(i)].min()].index))

coef_1: ['RBF_SVR']
coef_2: ['RBF_SVR']
coef_3: ['Linear_SVR']
coef_4: ['RBF_SVR']


In [14]:
df_results

Unnamed: 0,coef_1,coef_2,coef_3,coef_4
lasso,0.308/1.193,0.331/1.564,9.7/21.2,0.082/0.088
ridge,0.324/1.166,0.343/1.53,10.2/21.0,0.079/0.087
Linear_KR,0.32/1.158,0.471/1.51,10.2/22.2,0.075/0.089
Sigmoid_KR,0.233/1.211,0.332/1.56,9.7/22.0,0.076/0.087
RBF_KR,0.24/1.222,0.333/1.554,9.8/21.1,0.076/0.087
Polynomial_KR,0.228/1.216,0.337/1.558,9.8/22.5,0.078/0.087
Linear_SVR,0.214/1.226,0.246/1.583,8.95/22.09,0.082/0.088
Sigmoid_SVR,,,,
RBF_SVR,0.208/1.23,0.236/1.585,,0.072/0.099
Polynomial_SVR,,,,


In [15]:
(0.236-0.231)*100/0.231

2.1645021645021543

In [16]:
(8.96-8.53)*100/8.53

5.041031652989467

In [17]:
0.003*100/0.069

4.3478260869565215