In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score

In [2]:
from sklearn.datasets import load_iris

In [3]:
iris=load_iris()

In [4]:
dir(iris)

['DESCR',
 'data',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [6]:
inputs=iris.data
target=iris.target

In [8]:
from sklearn.svm import SVC

In [12]:
cross_val_score(SVC(kernel='linear',C=10,gamma='auto'),inputs,target,cv=5).mean()

0.9733333333333334

In [13]:
cross_val_score(SVC(kernel='rbf',C=1,gamma='auto'),inputs,target,cv=5).mean()

0.9800000000000001

In [17]:
cross_val_score(SVC(kernel='linear',C=20,gamma='auto'),inputs,target,cv=5).mean()

0.9666666666666666

In [18]:
score={}
C=[1,10,20]
kernel=['rbf','linear']
for k in kernel:
    for c in C:
        avg=cross_val_score(SVC(kernel=k,C=c,gamma='auto'),inputs,target,cv=5).mean()
        score[k+'_'+str(c)]=avg
score       

{'rbf_1': 0.9800000000000001,
 'rbf_10': 0.9800000000000001,
 'rbf_20': 0.9666666666666668,
 'linear_1': 0.9800000000000001,
 'linear_10': 0.9733333333333334,
 'linear_20': 0.9666666666666666}

above process involve lot of manual stuff toreduce it sklearn provided API GridSearchCV

In [19]:
# import API GridSearchCV
from sklearn.model_selection import GridSearchCV

In [22]:
clf=GridSearchCV(SVC(gamma='auto'),
                     {
                         'C':[1,10,20],
                         'kernel':['rbf','linear']
                     },cv=5,return_train_score=False)
clf.fit(inputs,target)
# convert the results into the dataframe  
df=pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001599,0.003198,0.0,0.0,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.00161,0.003221,0.0,0.0,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.001596,0.003192,0.0,0.0,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.0,0.0,0.001599,0.003198,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.0,0.0,0.0,0.0,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5
5,0.0016,0.0032,0.0,0.0,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6


In [23]:
df=df[['param_C','param_kernel','mean_test_score']]

In [24]:
df

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


 the limitation GridSearchCV algorithm is that when our parameter size increases the cost increase exponentially becouse it use permuation and combination for selection of parameters.
 
 so another RandomizedSerachCV is preferres in practicality that accept n_iter as parameters 

In [30]:
# import RandomizedSearchCV 
from sklearn.model_selection import RandomizedSearchCV
clf=RandomizedSearchCV(SVC(gamma='auto'),
                      {
                          'C':[1,10,20],
                          'kernel':['rbf','linear']
                      },
                       cv=5,return_train_score=False,n_iter=3)
clf.fit(inputs,target)
df=pd.DataFrame(clf.cv_results_)[['param_C','param_kernel','mean_test_score']]
df

Unnamed: 0,param_C,param_kernel,mean_test_score
0,10,rbf,0.98
1,20,rbf,0.966667
2,1,rbf,0.98


# # now apply various algorithm to choose best out of them

In [31]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


In [34]:
# generate json file to store various models and respective parameters

In [43]:
model_params={
    'SVM':{
        'model':SVC(gamma='auto'),
        'params':{
            'C':[1,10,20],
            'kernel':['rbf','linear']
        }
    },
    'RandomForest':{
        'model':RandomForestClassifier(),
        'params':{
            'n_estimators':[20,5,10]
        }
    },
    'LogisticRegression':{
        'model':LogisticRegression(max_iter=1000),
        'params':{
            'C':[1,5,10]
        }
    }
}

In [44]:
score=[]
for model_name,mp in model_params.items():
    clf=GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
    clf.fit(inputs,target)
    score.append({
        'model':model_name,
        'best_score':clf.best_score_,
        'best_params':clf.best_params_
    })

In [45]:
pd.DataFrame(score)

Unnamed: 0,model,best_score,best_params
0,SVM,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,RandomForest,0.96,{'n_estimators': 10}
2,LogisticRegression,0.973333,{'C': 1}


from above problem we will use SVM with parameters C=1,kernel='rbf'