In [1]:
import pandas as pd 
import numpy as np
import pickle
import matplotlib.pyplot as plt
import joblib

from sklearn import svm
from sklearn.svm import NuSVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [2]:
train = pickle.load(open('./pickle/train.pkl', 'rb'))
test = pickle.load(open('./pickle/test.pkl', 'rb'))

In [3]:
param_grid={
            'C': [0.1, 1, 10, 100, 1000],
            'gamma': ['auto','scale'],
#             'epsilon':[0.1, 1, 10, 100, 1000]
}

In [4]:
bd_list = []
bd_5 = pickle.load(open('./pickle/trial_10/bestDesc_5_2050.pkl', 'rb'))
bd_10 = pickle.load(open('./pickle/trial_10/bestDesc_10_2050.pkl', 'rb'))
bd_15 = pickle.load(open('./pickle/trial_10/bestDesc_15_2050.pkl', 'rb'))
bd_20 = pickle.load(open('./pickle/trial_10/bestDesc_20_2050.pkl', 'rb'))
bd_25 = pickle.load(open('./pickle/trial_10/bestDesc_25_2050.pkl', 'rb'))
bd_list.extend([bd_5,bd_10,bd_15,bd_20,bd_25])

In [5]:
r2_nusvr_rbf = pd.DataFrame(columns=['desc','C','Gamma','r2_train','r2_test','train_pred','test_pred'])

for i in tqdm(range(len(bd_list))):
    # Assign labels to train and test
    dfTrain = train.loc[:,bd_list[i]]
    dfTest = test.loc[:,bd_list[i]]
    
    # Preparation fata x & y
    x_train = dfTrain.iloc[:,:]
    x_test = dfTest.iloc[:,:]
    y_train = train.iloc[:, [-1]]
    y_test = test.iloc[:,[-1]]
    
    # Feature scaler Using MinMaxScaler()
    scaler = MinMaxScaler()
    scaler.fit(x_train)
    scale_x_train = scaler.transform(x_train)
    scale_x_test = scaler.transform(x_test)
    
    # Search best params for SVR()
    gsc = GridSearchCV(
        estimator= NuSVR(kernel='rbf'),
        param_grid= param_grid,
        cv=5, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)
    
#     print(gsc.estimator)
#     break
    
    gsc.fit(scale_x_train, y_train)
    param_c = gsc.best_params_['C']
    param_gamma = gsc.best_params_['gamma']
    
    # Assign best params to model
    model = NuSVR(C=param_c, kernel="rbf", gamma=param_gamma)
    model.fit(scale_x_train, y_train)
    
    #Calculate prediction
    y_train_pred = model.predict(scale_x_train)
    y_test_pred = model.predict(scale_x_test)

    #Calculate r2 score
    r2_train = r2_score(y_train,y_train_pred)
    r2_test = r2_score(y_test,y_test_pred)
    
    r2_nusvr_rbf = r2_nusvr_rbf.append(pd.Series([len(bd_list[i]),param_c, param_gamma, r2_train, r2_test, y_train_pred, y_test_pred], index=r2_nusvr_rbf.columns ), ignore_index=True)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:05<00:00,  1.14s/it]


In [8]:
r2_nusvr_rbf

Unnamed: 0,desc,C,Gamma,r2_train,r2_test,train_pred,test_pred
0,5,100,auto,0.686089,0.513148,"[6.057672922414157, 7.193325454024653, 6.91061...","[6.174985060707081, 6.264111501253489, 6.38244..."
1,10,100,auto,0.721759,0.460309,"[6.15789415654515, 7.105224874871015, 6.988333...","[6.347023917343267, 6.35493472171554, 6.556473..."
2,15,100,auto,0.741526,0.427337,"[6.165984520151636, 7.13378999710599, 6.961357...","[6.386112059755175, 6.384804310962079, 6.62246..."
3,20,10,scale,0.826793,0.485638,"[5.930508117687008, 7.244141756633685, 7.42237...","[6.2455896155550565, 6.3237322927481525, 6.235..."
4,25,10,scale,0.910351,0.548674,"[5.8910459667510136, 7.118888009315695, 7.5834...","[6.195649902125597, 6.127135745245323, 6.38847..."


In [9]:
joblib.dump(r2_nusvr_rbf, './pickle/trial_10/r2_nusvr_rbf.pkl')

['./pickle/trial_10/r2_nusvr_rbf.pkl']