### AI-11 Quiz 1  

#### Import libraries  

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score
from sklearn.pipeline import Pipeline
from joblib import dump

#### Parameters  

In [3]:
csv_in = 'ai-11-quiz.csv'

#### Read CSV file  

In [4]:
df_all = pd.read_csv(csv_in, delimiter=',', skiprows=0, header=0)
print(df_all.shape)
print(df_all.info())
display(df_all.head())

(200, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   f1      200 non-null    float64
 1   f2      200 non-null    float64
 2   f3      200 non-null    float64
 3   b       200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB
None


Unnamed: 0,f1,f2,f3,b
0,6.11,9.35,6.03,48.3
1,9.59,7.44,8.06,46.0
2,8.83,7.59,6.48,46.02
3,5.94,5.4,8.69,42.45
4,7.21,5.79,9.4,43.03


In [6]:
X = df_all.loc[:, 'f1':'f3']  # explanatory variables
y = df_all['b']  # objective variable
print('X:', X.shape)
display(X.head())
print('y:', y.shape)
print(y.head())

X: (200, 3)


Unnamed: 0,f1,f2,f3
0,6.11,9.35,6.03
1,9.59,7.44,8.06
2,8.83,7.59,6.48
3,5.94,5.4,8.69
4,7.21,5.79,9.4


y: (200,)
0    48.30
1    46.00
2    46.02
3    42.45
4    43.03
Name: b, dtype: float64


In [7]:
pipe = Pipeline([('scaler', MinMaxScaler(feature_range=(0, 1), copy=True)),
                 ('svr', SVR(kernel='rbf'))])

C_grid = [1e3, 1e4, 1e5]
g_grid = [1e-3, 1e-2, 1e-1]
e_grid = [1e-5, 1e-4, 1e-3] 

# Hyper paramet settings for grid search
param_grid = {
    'svr__C': C_grid,
    'svr__gamma' : g_grid,
    'svr__epsilon' : e_grid,
}

In [8]:
grid_cv = KFold(n_splits=4, shuffle=True, random_state=19)  # for grid search
gen_cv = KFold(n_splits=4, shuffle=True, random_state=23)  # for estimation of generalization performance

In [9]:
gs = GridSearchCV(pipe, param_grid , cv=grid_cv, scoring='neg_mean_squared_error')

In [10]:
%%time
nested_score = cross_val_score(gs, X=X, y=y, cv=gen_cv,
                               scoring='neg_mean_squared_error')
print(nested_score)
print(np.sqrt(-nested_score.mean()))

[-2.58092523 -2.60025316 -2.51811577 -2.71032449]
1.613197031246102
Wall time: 28.6 s


In [11]:
%%time
gs.fit(X, y)
gs_best = gs.best_estimator_

Wall time: 10.1 s


In [12]:
print(gs_best)

Pipeline(steps=[('scaler', MinMaxScaler()),
                ('svr', SVR(C=10000.0, epsilon=0.0001, gamma=0.01))])
