In [16]:
# データ加工・処理・分析ライブラリ
import numpy as np
import numpy.random as random
import scipy as sp
from pandas import Series, DataFrame
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# 可視化ライブラリ
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns; sns.set()
%matplotlib inline

# 機械学習ライブラリ
import sklearn
from sklearn.model_selection import train_test_split

# 小数第3位まで表示
%precision 3

'%.3f'

In [17]:
# pandas の設定を調整 
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

# matplotlib の設定を調整   
plt.rcParams["figure.figsize"] = [18, 10]
plt.rcParams['font.size'] = 16 #フォントサイズを設定 default : 12
plt.rcParams['xtick.labelsize'] = 14 # 横軸のフォントサイズ
plt.rcParams['ytick.labelsize'] = 14

In [18]:
iris = sns.load_dataset('iris')

In [19]:
iris['fin_flg'] = iris['species'].map(lambda x: 0 if x=='setosa' else 1 if x=='versicolor' else 2)

In [20]:
# 標準化したサポートベクターマシン
X = iris[['sepal_length','sepal_width','petal_width']]
y = iris['fin_flg']
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.5,stratify=y)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

model = LinearSVC()
model.fit(X_train_std,y_train)

print('正解率(train):{:.3f}' . format(model.score(X_train_std,y_train)))
print('正解率(test):{:.3f}' . format(model.score(X_test_std,y_test)))

正解率(train):0.947
正解率(test):0.933


In [24]:
# SVCクラスのグリッドサーチ
X = iris[['sepal_length','sepal_width','petal_width']]
y = iris['fin_flg']
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.5,stratify=y)

param_grid = {'C':np.logspace(-3,2,num=6)
             ,'gamma':np.logspace(-3,2,num=6)}

gs = GridSearchCV(estimator=SVC(),
                 param_grid=param_grid,
                 cv=5)

gs.fit(X_train,y_train)
print('best cross validation score:{:.3f}' . format(gs.best_score_))
print('best parameters:{}' . format(gs.best_params_))
print('test score:{:.3f}' . format(gs.score(X_test,y_test)))

best cross validation score:0.973
best parameters:{'C': 10.0, 'gamma': 0.1}
test score:0.947


In [26]:
# SVC
X = iris[['sepal_length','sepal_width','petal_width']]
y = iris['fin_flg']
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size=0.5,stratify=y)

# sc = StandardScaler()
# sc.fit(X_train)
# X_train_std = sc.transform(X_train)
# X_test_std = sc.transform(X_test)

model = SVC(gamma=0.1,C=10.0)
model.fit(X_train,y_train)

print('正解率(train):{:.3f}' . format(model.score(X_train,y_train)))
print('正解率(test):{:.3f}' . format(model.score(X_test,y_test)))

正解率(train):0.987
正解率(test):0.947
