In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from pathlib import Path

from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
df_Ch001 = pd.DataFrame(pd.read_pickle(Path("C:/Users/Victor/Desktop/PIR/process/Ch001.pkl")))
df_Ch009 = pd.DataFrame(pd.read_pickle(Path("C:/Users/Victor/Desktop/PIR/process/Ch009.pkl")))
df_Ch020 = pd.DataFrame(pd.read_pickle(Path("C:/Users/Victor/Desktop/PIR/process/Ch020.pkl")))
df_Ch022 = pd.DataFrame(pd.read_pickle(Path("C:/Users/Victor/Desktop/PIR/process/Ch022.pkl")))

In [3]:
from model_util import data_scale
from sklearn.preprocessing import MinMaxScaler, StandardScaler

df_input=[df_Ch001, df_Ch009, df_Ch020]
df_input_scaled=[]

LOG_SCALE= False
scaler=MinMaxScaler()

for df in df_input:
    df_input_scaled.append(data_scale(df, scaler, LOG_SCALE))

df_output_scaled, output_scaler = data_scale(df_Ch022, scaler, LOG_SCALE, output= True)

In [4]:
MSE=[]
MEA=[]
R2=[]

zero_data = np.zeros(shape=(len(df_Ch022.index),len(df_Ch022.columns)))
df_pred = pd.DataFrame(zero_data, index= df_Ch022.index, columns=df_Ch022.columns)

In [5]:
size=len(df_Ch022.index)

for p in range(len(df_Ch022.columns)):

    #format, split data
    data=np.dstack(df_input_scaled)[:,p,:]
    target=df_output_scaled.iloc[:,p].copy().values

    split=int(size*0.15)

    X_test=data[:split]
    y_test=target[:split]

    X_train=data[split:]
    y_train=target[split:]

    #gridsearch
    svr_grid = GridSearchCV(SVR(kernel='rbf', gamma=0.1),
                   param_grid={"C": [1e0, 1e1, 1e2, 1e3],
                               "gamma": np.logspace(-2, 2, 5)})

    svr_grid.fit(X_train, y_train)
    C = svr_grid.best_params_['C']
    gamma= svr_grid.best_params_['gamma']

    #learning curve
    svr_lc = SVR(kernel='rbf', C=C, gamma=gamma)
    train_sizes, train_scores_svr, test_scores_svr = \
    learning_curve(svr_lc, X_train, y_train, train_sizes=np.linspace(0.1, 1, 10),
                   scoring="neg_mean_squared_error", cv=10)

    split = train_sizes[np.argmin(-test_scores_svr.mean(1))]
    if split >int(size*0.5):
        X_test=data[:size-split]
        y_test=target[:size-split]

        X_train=data[size-split:]
        y_train=target[size-split:]

    #calcul predict test, error
    svr=SVR(kernel='rbf', C=C, gamma=gamma)
    svr.fit(X_train, y_train)
    y_pred_test = svr.predict(X_test)

    min=output_scaler.data_min_[p]
    max=output_scaler.data_max_[p]
    y_test=(max-min)*y_test+min
    y_pred_test=(max-min)*y_pred_test+min

    MSE.append(mean_squared_error(y_test,y_pred_test))
    MEA.append(mean_absolute_error(y_test,y_pred_test))
    R2.append(r2_score(y_test,y_pred_test))

    #using model
    y_pred=svr.predict(data)
    min=output_scaler.data_min_[p]
    max=output_scaler.data_max_[p]
    y_pred=(max-min)*y_pred+min

    df_pred[df_pred.columns[p]] = y_pred

In [13]:
df_pred.to_pickle(Path("C:/Users/Victor/Desktop/PIR/model/saved_models/SVM_predictions.pkl"))
df_metrics=pd.DataFrame({"MSE": MSE, "MEA": MEA, "R2": R2})
df_metrics.to_pickle(Path("C:/Users/Victor/Desktop/PIR/model/saved_models/SVM_metrics.pkl"))

In [12]:
df_metrics

Unnamed: 0,MSE,MEA,R2
0,125.018,10.952686,-155.1081
1,408.7399,20.183472,-396.3106
2,4000360000.0,59015.891803,-1519043000.0
3,625.4434,24.87756,-99.22316
4,35.33907,4.768083,-0.8314923
5,15.59611,2.648192,0.001044149
6,18.99737,3.523843,-0.0885402
7,29.04815,4.656917,-0.4316106
8,50216.93,222.424257,-3144.73
9,82.54278,6.305944,-9.259101
