In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split,StratifiedKFold,cross_val_score,GridSearchCV,RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
iris = load_iris()
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [4]:
pd.set_option('display.max_rows', None)
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df['target'] = iris.target
df['target_names'] = df.target.apply(lambda x : iris.target_names[x])
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,target_names
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa
5,5.4,3.9,1.7,0.4,0,setosa
6,4.6,3.4,1.4,0.3,0,setosa
7,5.0,3.4,1.5,0.2,0,setosa
8,4.4,2.9,1.4,0.2,0,setosa
9,4.9,3.1,1.5,0.1,0,setosa


In [5]:
X = iris.data
Y = iris.target

In [6]:
kernels = ['rbf','linear']
Cs = [1,10,20]
gammas = ['auto','scale']
avg_scores = {}

for k in kernels:
    for g in gammas:
        for c in Cs:
            cv_scores = cross_val_score(SVC(kernel=k, C=c, gamma=g), X, Y, cv=5)
            avg_scores[k + ',' + g + ',' + str(c)] = np.average(cv_scores)
avg_scores

{'rbf,auto,1': 0.9800000000000001,
 'rbf,auto,10': 0.9800000000000001,
 'rbf,auto,20': 0.9666666666666668,
 'rbf,scale,1': 0.9666666666666666,
 'rbf,scale,10': 0.9800000000000001,
 'rbf,scale,20': 0.9800000000000001,
 'linear,auto,1': 0.9800000000000001,
 'linear,auto,10': 0.9733333333333334,
 'linear,auto,20': 0.9666666666666666,
 'linear,scale,1': 0.9800000000000001,
 'linear,scale,10': 0.9733333333333334,
 'linear,scale,20': 0.9666666666666666}

### GridSearchCV

In [24]:
gs = GridSearchCV(SVC(), {
    'kernel':['rbf','linear'],
    'C':[1,10,20],
    'gamma':['auto','scale']
}, cv=5, return_train_score=False)
gs

gs.fit(X,Y)

In [8]:
df = pd.DataFrame(gs.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001993,0.0007164363,0.001584,0.0005330803,1,auto,rbf,"{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.001728,0.0003885356,0.001222,0.0003896503,1,auto,linear,"{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.001991,7.092863e-06,0.001596,0.0004876637,1,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.966667,0.966667,0.966667,0.933333,1.0,0.966667,0.021082,10
3,0.001596,0.0004885972,0.001006,1.688273e-05,1,scale,linear,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
4,0.001595,0.0004885195,0.001197,0.0003985171,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
5,0.001396,0.0004888113,0.000599,0.0004887334,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,7
6,0.001389,0.0004943636,0.000798,0.0003989697,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
7,0.001197,0.0003987551,0.000798,0.0003989697,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,7
8,0.000997,2.780415e-07,0.000997,3.371748e-07,20,auto,rbf,"{'C': 20, 'gamma': 'auto', 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,9
9,0.000997,1.907349e-07,0.000798,0.0003990412,20,auto,linear,"{'C': 20, 'gamma': 'auto', 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,10


In [9]:
df = df[['param_C','param_gamma','param_kernel','mean_test_score','rank_test_score']]
df = df.sort_values('rank_test_score',ascending=True)
df

Unnamed: 0,param_C,param_gamma,param_kernel,mean_test_score,rank_test_score
0,1,auto,rbf,0.98,1
1,1,auto,linear,0.98,1
3,1,scale,linear,0.98,1
4,10,auto,rbf,0.98,1
6,10,scale,rbf,0.98,1
10,20,scale,rbf,0.98,1
5,10,auto,linear,0.973333,7
7,10,scale,linear,0.973333,7
8,20,auto,rbf,0.966667,9
2,1,scale,rbf,0.966667,10


In [10]:
gs.best_score_

0.9800000000000001

In [11]:
gs.best_params_

{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}

In [12]:
gs.best_index_

0

In [13]:
gs.best_estimator_

### RandomizedSearchCV

In [14]:
rs = RandomizedSearchCV(SVC(), {
    'kernel':['rbf','linear'],
    'C':[1,10,20],
    'gamma':['auto','scale']
}, cv=5, return_train_score=False, n_iter=3) # iterations = 3

rs.fit(X,Y)

In [15]:
df = pd.DataFrame(rs.cv_results_)
df = df[['param_C','param_gamma','param_kernel','mean_test_score','rank_test_score']]
df

Unnamed: 0,param_C,param_gamma,param_kernel,mean_test_score,rank_test_score
0,20,auto,rbf,0.966667,3
1,10,scale,rbf,0.98,1
2,1,auto,rbf,0.98,1


In [16]:
rs.best_score_

0.9800000000000001

In [17]:
rs.best_params_

{'kernel': 'rbf', 'gamma': 'scale', 'C': 10}

# FOR MULTIPLE MODELS

In [18]:
models_params = {
    'svm':{
        'model':SVC(),
        'param':{'kernel':['rbf','linear'], 'C':[1,10,20], 'gamma':['auto','scale']}
    },
    'ensemble':{
        'model':RandomForestClassifier(),
        'param':{'n_estimators':[10,20,30]}
    },
    'linear_model':{
        'model':LogisticRegression(),
        'param':{'C':[1,10,20]}
    }
}

In [19]:
for module_name, module_info in models_params.items():
    print('-'*100)
    print("Module:", module_name)
    print("Model:", module_info['model'])
    
    for param_name, param_values in module_info['param'].items():
        print("Parameter:", param_name, param_values)

----------------------------------------------------------------------------------------------------
Module: svm
Model: SVC()
Parameter: kernel ['rbf', 'linear']
Parameter: C [1, 10, 20]
Parameter: gamma ['auto', 'scale']
----------------------------------------------------------------------------------------------------
Module: ensemble
Model: RandomForestClassifier()
Parameter: n_estimators [10, 20, 30]
----------------------------------------------------------------------------------------------------
Module: linear_model
Model: LogisticRegression()
Parameter: C [1, 10, 20]


In [28]:
best_scores = []

for module_name, module_info in models_params.items():
    gs = GridSearchCV(module_info['model'],module_info['param'],cv=5,return_train_score=False)
    gs.fit(X,Y)
    best_scores.append({'model':module_info['model'], 'best_param':gs.best_params_, 'best_score': gs.best_score_})

best_scores

[{'model': SVC(),
  'best_param': {'C': 1, 'gamma': 'auto', 'kernel': 'rbf'},
  'best_score': 0.9800000000000001},
 {'model': RandomForestClassifier(),
  'best_param': {'n_estimators': 20},
  'best_score': 0.96},
 {'model': LogisticRegression(),
  'best_param': {'C': 10},
  'best_score': 0.9800000000000001}]

In [21]:
df = pd.DataFrame(best_scores)
df

Unnamed: 0,model,best_param,best_score
0,SVC(),"{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}",0.98
1,RandomForestClassifier(),{'n_estimators': 30},0.96
2,LogisticRegression(),{'C': 10},0.98
