In [5]:
import pandas as pd 
import numpy as np
import pickle
import matplotlib.pyplot as plt
import joblib

from sklearn import svm
from sklearn.svm import NuSVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [6]:
train = pickle.load(open('./pickle/train.pkl', 'rb'))
test = pickle.load(open('./pickle/test.pkl', 'rb'))

In [7]:
param_grid={
            'C': [0.1, 1, 10, 100, 1000],
            'degree': [1],
}

In [8]:
bd_list = []
bd_5 = pickle.load(open('./pickle/trial_10/bestDesc_5_2050.pkl', 'rb'))
bd_10 = pickle.load(open('./pickle/trial_10/bestDesc_10_2050.pkl', 'rb'))
bd_15 = pickle.load(open('./pickle/trial_10/bestDesc_15_2050.pkl', 'rb'))
bd_20 = pickle.load(open('./pickle/trial_10/bestDesc_20_2050.pkl', 'rb'))
bd_25 = pickle.load(open('./pickle/trial_10/bestDesc_25_2050.pkl', 'rb'))
bd_list.extend([bd_5,bd_10,bd_15,bd_20,bd_25])

In [9]:
r2_nusvr_linear = pd.DataFrame(columns=['Desc','C','Degree','r2_train','r2_test','train_pred','test_pred'])

for i in tqdm(range(len(bd_list))):
    # Assign labels to train and test
    dfTrain = train.loc[:,bd_list[i]]
    dfTest = test.loc[:,bd_list[i]]
    
    # Preparation fata x & y
    x_train = dfTrain.iloc[:,:]
    x_test = dfTest.iloc[:,:]
    y_train = train.iloc[:, [-1]]
    y_test = test.iloc[:,[-1]]
    
    # Feature scaler Using MinMaxScaler()
    scaler = MinMaxScaler()
    scaler.fit(x_train)
    scale_x_train = scaler.transform(x_train)
    scale_x_test = scaler.transform(x_test)
    
    # Search best params for SVR()
    gsc = GridSearchCV(
        estimator= NuSVR(kernel='linear'),
        param_grid= param_grid,
        cv=5, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)
    
    gsc.fit(scale_x_train, y_train)
    param_c = gsc.best_params_['C']
    param_degree = gsc.best_params_['degree']
    
    # Assign best params to model
    model = NuSVR(C=param_c, kernel="linear", degree=param_degree)
    model.fit(scale_x_train, y_train)
    
    #Calculate prediction
    y_train_pred = model.predict(scale_x_train)
    y_test_pred = model.predict(scale_x_test)

    #Calculate r2 score
    r2_train = r2_score(y_train,y_train_pred)
    r2_test = r2_score(y_test,y_test_pred)
    
    r2_nusvr_linear = r2_nusvr_linear.append(pd.Series([len(bd_list[i]),param_c, param_degree, r2_train, r2_test, y_train_pred, y_test_pred], index=r2_nusvr_linear.columns ), ignore_index=True)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:06<00:00,  1.30s/it]


In [10]:
r2_nusvr_linear

Unnamed: 0,Desc,C,Degree,r2_train,r2_test,train_pred,test_pred
0,5,100,1,0.614624,0.493891,"[6.136125372347067, 7.0883882312852045, 6.8835...","[6.209386186305817, 6.280366689202605, 6.33714..."
1,10,1000,1,0.711376,0.387296,"[6.159291250793302, 7.1296059114199295, 7.0140...","[6.501257754357738, 6.432182143344306, 6.74860..."
2,15,10,1,0.688329,0.435485,"[6.312138657580119, 7.076309851473935, 6.90122...","[6.473996558315379, 6.355059159469404, 6.61694..."
3,20,1000,1,0.769825,0.433446,"[6.138532040616401, 7.046656691999278, 7.55471...","[6.359011504843483, 6.506978193074946, 6.16301..."
4,25,1,1,0.663041,0.408284,"[6.244133956090293, 7.099227410919104, 7.10188...","[6.46240406569448, 6.487341769089113, 6.667762..."


In [11]:
joblib.dump(r2_nusvr_linear, './pickle/trial_10/r2_nusvr_linear.pkl')

['./pickle/trial_10/r2_nusvr_linear.pkl']