# Parameter Tuning 

In [10]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
iris=load_iris()
X=iris.data
Y=iris.target

In [17]:
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target']=iris.target
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [20]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.2)
cross_val_score(svm.SVC(kernel='linear',C=10,gamma='auto'),X_train,y_train,cv=5)

array([0.95833333, 0.91666667, 1.        , 0.91666667, 1.        ])

In [21]:
cross_val_score(svm.SVC(kernel='rbf',C=10,gamma='auto'),X_train,y_train,cv=5)

array([1.        , 0.91666667, 1.        , 0.91666667, 1.        ])

In [24]:
from sklearn.model_selection import GridSearchCV
kernels=['rbf','linear']
C=[1,10,20]
avg_score={}
for kval in kernels:
    for cval in C:
        cv_scores=cross_val_score(svm.SVC(kernel=kval,C=cval,gamma='auto'),X_train,y_train,cv=5)
        avg_score[kval+ '_' + str(cval)] = np.average(cv_scores)
avg_score

{'rbf_1': 0.975,
 'rbf_10': 0.9666666666666666,
 'rbf_20': 0.95,
 'linear_1': 0.975,
 'linear_10': 0.9583333333333333,
 'linear_20': 0.9416666666666667}

In [27]:
from sklearn.model_selection import GridSearchCV
clf=GridSearchCV(svm.SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
}, cv=5, return_train_score=False)

clf.fit(iris.data,iris.target)
clf.cv_results_

{'mean_fit_time': array([0.0011158 , 0.00042157, 0.00340943, 0.00119905, 0.00159998,
        0.00119929]),
 'std_fit_time': array([0.00067016, 0.00084314, 0.00536899, 0.00040028, 0.00048936,
        0.00040028]),
 'mean_score_time': array([0.00032992, 0.00240645, 0.00059881, 0.0010006 , 0.00099993,
        0.0008007 ]),
 'std_score_time': array([4.32650159e-04, 4.56346325e-03, 4.88929872e-04, 6.28991411e-07,
        5.84003864e-07, 4.00353018e-04]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20

In [30]:
df_1=pd.DataFrame(clf.cv_results_)
df_1

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001116,0.00067,0.00033,0.0004326502,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.000422,0.000843,0.002406,0.004563463,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.003409,0.005369,0.000599,0.0004889299,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.001199,0.0004,0.001001,6.289914e-07,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.0016,0.000489,0.001,5.840039e-07,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5
5,0.001199,0.0004,0.000801,0.000400353,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6


In [33]:
df_1[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


In [35]:
clf.best_score_

0.9800000000000001

In [36]:
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

In [37]:
clf.best_estimator_

In [46]:
from sklearn.model_selection import RandomizedSearchCV
rs=RandomizedSearchCV(svm.SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
}, cv=5, return_train_score=False,n_iter=2)

rs.fit(iris.data,iris.target)
pd.DataFrame(rs.cv_results_)[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.98
1,20,rbf,0.966667


In [52]:
model_params = {
    'svm': {
        'model':svm.SVC(gamma='auto'),
        'params' : {
            'C' : [1,10,20],
            'kernel' : ['rbf','linear']
        }
    },
     'random_forest' : {
         'model' : RandomForestClassifier(),
         'params' : {
             'n_estimators': [1,5,10]
         }
     },
     'logistic_regression' : {
         'model' : LogisticRegression(solver='liblinear',multi_class='auto'),
         'params' : {
             'C' : [1,5,10]
         }
     
    }
}

In [55]:
scores=[]
for model_name,mp in model_params.items():
    clf=GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
    clf.fit(iris.data, iris.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params':clf.best_params_
    })
df=pd.DataFrame(scores,columns=['model','best_score','best_params'])

In [56]:
df

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.96,{'n_estimators': 1}
2,logistic_regression,0.966667,{'C': 5}
