# LOPO model SVM results processing

Here I put together the validation and testing results for the LOPO SVM model.
There is no random search since the linear model has no hyperparameters.
I save the output as a more universal compressed csv file.

## Test set results

In [8]:
import joblib
import pandas as pd

infile = "../../testing_results/lopo_models/linear/validation_and_test_20210530-172934.joblib.xz"
outfile = "../../testing_results/lopo_models/linear/test_results.csv.xz"

result_dump = joblib.load(infile)
result_dump["data"].to_csv(outfile, compression="xz", index=False)
pd.read_csv(outfile)

Unnamed: 0,dms_id,y_pred,y_true,kind,aa1,aa2,position
0,kka2_1:2,-0.123406,0.303030,test,E,N,3
1,kka2_1:2,-0.122308,0.303030,test,E,I,3
2,kka2_1:2,-0.044941,0.197944,test,E,C,3
3,kka2_1:2,-0.174213,0.286599,test,E,R,3
4,kka2_1:2,-0.188992,0.260156,test,E,D,3
...,...,...,...,...,...,...,...
21508,beta-lactamase,-0.458969,-0.748528,valid,K,N,71
21509,beta-lactamase,-0.481968,-0.783916,valid,K,E,71
21510,beta-lactamase,-0.547830,-0.783916,valid,K,I,71
21511,beta-lactamase,-0.620541,-0.783916,valid,K,P,71


In [40]:
import os

import joblib
import pandas as pd

infile_dir = "../../hyperparameter_search/lopo_models/svm/"
infiles = [
    os.path.join(infile_dir, f)
    for f in os.listdir(infile_dir)
    if f.endswith(".joblib.xz")
]
outfile = "../../hyperparameter_search/lopo_models/svm/random_search_results.csv.xz"

result_df = pd.DataFrame()
for infile in infiles:
    result_dump = joblib.load(infile)
    curr_results = result_dump["data"]["spearman"]
    params_random_search = [el[1] for el in result_dump["params"]]
    hyperparam_df = pd.DataFrame()
    for name in curr_results.index:
        param_index = int(name.split("_")[1])
        try:
            curr_params = pd.DataFrame(params_random_search[param_index], index=[name])
        except:
            print(infile, len(params_random_search), name, param_index)
            raise AssertionError
        hyperparam_df = hyperparam_df.append(curr_params)
    curr_results = curr_results.join(hyperparam_df)
    result_df = result_df.append(curr_results)

result_df.to_csv(outfile, compression="xz", index=False)
pd.read_csv(outfile)

Unnamed: 0,gb1,PSD95pdz3,kka2_1:2,Pab1,Ubiquitin,E1_Ubiquitin,beta-lactamase,hsp90,WW_domain,average,sem,kernel,C,epsilon,gamma,degree
0,0.467973,0.457888,0.512417,0.688121,0.196993,0.237669,0.653689,0.432126,0.607634,0.472724,0.050782,linear,0.0001,0.1000,,
1,0.390443,0.459950,0.507944,0.682401,0.237797,0.262597,0.681984,0.421490,0.598675,0.471476,0.048734,linear,0.0001,1.0000,,
2,0.476554,0.457314,0.507679,0.683518,0.192566,0.235851,0.641805,0.433738,0.605117,0.470460,0.050343,linear,0.0001,0.0100,,
3,0.472870,0.457626,0.507622,0.683302,0.194433,0.234672,0.642574,0.433518,0.603724,0.470038,0.050264,linear,0.0001,0.0010,,
4,0.472455,0.457283,0.506992,0.683035,0.189672,0.233810,0.640978,0.433534,0.604149,0.469101,0.050535,linear,0.0001,0.0001,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,0.378952,0.357974,0.469882,0.622613,0.292646,0.283119,0.603762,0.339361,0.552926,0.433471,0.039488,rbf,0.0100,0.1000,1.000000e-09,
939,0.378894,0.357971,0.469937,0.622638,0.292571,0.283101,0.603805,0.339361,0.552949,0.433470,0.039497,rbf,0.1000,0.1000,1.000000e-08,
940,0.378916,0.357970,0.469937,0.622638,0.292689,0.282972,0.603784,0.339361,0.552949,0.433469,0.039497,rbf,0.0100,0.1000,1.000000e-08,
941,0.378890,0.357970,0.469938,0.622638,0.292689,0.282972,0.603784,0.339361,0.552949,0.433466,0.039497,rbf,0.0001,0.1000,1.000000e-08,


In [32]:
result_df[(result_df.average == max(result_df[result_df.kernel == "rbf"].average))&(result_df.kernel == "rbf")]

Unnamed: 0,gb1,PSD95pdz3,kka2_1:2,Pab1,Ubiquitin,E1_Ubiquitin,beta-lactamase,hsp90,WW_domain,average,sem,kernel,C,epsilon,gamma,degree
param_194,0.418563,0.440823,0.53766,0.680517,0.256691,0.270435,0.677011,0.409207,0.594485,0.476155,0.047073,rbf,100.0,1.0,1e-07,


In [33]:
result_df[(result_df.average == max(result_df.average))]

Unnamed: 0,gb1,PSD95pdz3,kka2_1:2,Pab1,Ubiquitin,E1_Ubiquitin,beta-lactamase,hsp90,WW_domain,average,sem,kernel,C,epsilon,gamma,degree
param_20,0.486064,0.439297,0.531666,0.673122,0.249591,0.27217,0.690556,0.406938,0.592559,0.48244,0.047167,linear,1e-05,0.01,,


In [34]:
result_df[(result_df.average == max(result_df[result_df.kernel == "poly"].average))&(result_df.kernel == "poly")]

Unnamed: 0,gb1,PSD95pdz3,kka2_1:2,Pab1,Ubiquitin,E1_Ubiquitin,beta-lactamase,hsp90,WW_domain,average,sem,kernel,C,epsilon,gamma,degree
param_80,0.560973,0.412925,0.523324,0.686511,0.232571,0.232575,0.656938,0.391007,0.531043,0.469763,0.049319,poly,0.01,1.0,,3.0
