In [2]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.datasets import load_iris


In [3]:
iris  = load_iris()

In [4]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [6]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)

In [7]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [8]:
df['flower'] = iris.target

In [9]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [10]:
df['flower'] = df['flower'].apply(lambda x: iris.target_names[x])

In [11]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [30]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size= 0.2)

In [31]:
model = SVC(kernel='rbf', C= 30, gamma='auto')

In [32]:
model.fit(x_train, y_train)

In [33]:
model.score(x_test, y_test)

0.9666666666666667

In [34]:
from sklearn.model_selection import cross_val_score

In [35]:
cross_val_score(SVC(kernel='linear', C= 10, gamma='auto', ), iris.data, iris.target, cv=5)

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [36]:
cross_val_score(SVC(kernel='rbf', C= 10, gamma='auto', ), iris.data, iris.target, cv=5)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [37]:
cross_val_score(SVC(kernel='linear', C= 20, gamma='auto', ), iris.data, iris.target, cv=5)

array([1.        , 1.        , 0.9       , 0.93333333, 1.        ])

In [42]:
kernals = ['rbf', 'linear']
C = [1,10, 20]
avg_scores = {}

for kval in kernals:
    for cval in C:
        cv_scores = cross_val_score(SVC(kernel= kval, C = cval, gamma = 'auto'), iris.data, iris.target, cv =5)
        avg_scores[kval +'_' + str(cval)] = np.average(cv_scores)
avg_scores

{'rbf_1': 0.9800000000000001,
 'rbf_10': 0.9800000000000001,
 'rbf_20': 0.9666666666666668,
 'linear_1': 0.9800000000000001,
 'linear_10': 0.9733333333333334,
 'linear_20': 0.9666666666666666}

In [43]:
from sklearn.model_selection import GridSearchCV

In [71]:
c_values = list(range(1, 50))
gamma_values = [0.001, 0.01, 0.1, 1, 10]

clf = GridSearchCV(SVC(),{
    'C' :c_values,
    'kernel':['rbf', 'linear','poly', 'sigmoid'],
    'gamma' : gamma_values
}, cv = 5, return_train_score=False)

clf.fit(iris.data, iris.target)
clf.cv_results_

{'mean_fit_time': array([3.08623314e-03, 1.40991211e-03, 2.18915939e-03, 3.79085541e-03,
        4.21829224e-03, 9.67979431e-04, 3.58629227e-04, 3.25031281e-03,
        3.35640907e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        6.24432564e-03, 0.00000000e+00, 1.64123535e-02, 0.00000000e+00,
        3.00431252e-03, 3.27587128e-04, 9.42999840e-02, 1.93414688e-03,
        2.73032188e-03, 1.22027397e-03, 2.80289650e-03, 2.41470337e-03,
        1.66587830e-03, 1.76959038e-03, 2.19993591e-03, 1.79738998e-03,
        1.57518387e-03, 0.00000000e+00, 6.00242615e-04, 5.42454720e-03,
        4.26673889e-04, 2.65574455e-03, 3.38657379e-02, 1.12161636e-03,
        5.74893951e-03, 2.87652016e-03, 9.65204239e-02, 1.99694633e-03,
        4.45194244e-03, 2.38289833e-03, 5.87472916e-03, 3.27949524e-03,
        5.14779091e-03, 8.00800323e-04, 2.72197723e-03, 3.13057899e-03,
        0.00000000e+00, 6.25696182e-03, 0.00000000e+00, 3.11803818e-03,
        3.43651772e-03, 0.00000000e+00, 2.30979

In [72]:
df2 = pd.DataFrame(clf.cv_results_)
df2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003086,0.000801,0.003550,0.001502,1,0.001,rbf,"{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}",0.866667,0.966667,0.833333,0.966667,0.933333,0.913333,0.054160,721
1,0.001410,0.001362,0.000819,0.000770,1,0.001,linear,"{'C': 1, 'gamma': 0.001, 'kernel': 'linear'}",0.966667,1.000000,0.966667,0.966667,1.000000,0.980000,0.016330,52
2,0.002189,0.000404,0.000800,0.000400,1,0.001,poly,"{'C': 1, 'gamma': 0.001, 'kernel': 'poly'}",0.800000,0.666667,0.800000,0.733333,0.733333,0.746667,0.049889,785
3,0.003791,0.000976,0.001240,0.000392,1,0.001,sigmoid,"{'C': 1, 'gamma': 0.001, 'kernel': 'sigmoid'}",0.866667,0.966667,0.833333,0.966667,0.933333,0.913333,0.054160,721
4,0.004218,0.002395,0.004200,0.003988,1,0.01,rbf,"{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}",0.900000,0.966667,0.866667,0.933333,1.000000,0.933333,0.047140,652
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
975,0.004132,0.002179,0.001513,0.001499,49,1,sigmoid,"{'C': 49, 'gamma': 1, 'kernel': 'sigmoid'}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.000000,834
976,0.005619,0.003119,0.003694,0.004737,49,10,rbf,"{'C': 49, 'gamma': 10, 'kernel': 'rbf'}",1.000000,0.900000,0.866667,0.933333,0.966667,0.933333,0.047140,652
977,0.002908,0.003080,0.001863,0.002659,49,10,linear,"{'C': 49, 'gamma': 10, 'kernel': 'linear'}",1.000000,1.000000,0.900000,0.933333,1.000000,0.966667,0.042164,203
978,0.192836,0.163995,0.001506,0.000775,49,10,poly,"{'C': 49, 'gamma': 10, 'kernel': 'poly'}",0.966667,0.933333,0.866667,0.933333,1.000000,0.940000,0.044222,547


In [73]:
df2[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.913333
1,1,linear,0.980000
2,1,poly,0.746667
3,1,sigmoid,0.913333
4,1,rbf,0.933333
...,...,...,...
975,49,sigmoid,0.333333
976,49,rbf,0.933333
977,49,linear,0.966667
978,49,poly,0.940000


In [74]:
dir(clf)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__sklearn_clone__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_build_request_for_signature',
 '_check_feature_names',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_doc_link_module',
 '_doc_link_template',
 '_doc_link_url_param_generator',
 '_estimator_type',
 '_format_results',
 '_get_default_requests',
 '_get_doc_link',
 '_get_metadata_request',
 '_get_param_names',
 '_get_routed_params_for_fit',
 '_get_scorers',
 '_get_tags',
 '_more_tags',
 '_parameter_constraints',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run

In [75]:
clf.best_score_

0.9866666666666667

In [76]:
clf.best_params_

{'C': 3, 'gamma': 0.1, 'kernel': 'rbf'}

In [94]:
from sklearn.model_selection import RandomizedSearchCV
c_values = list(range(1, 50))
gamma_values = [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

rs = RandomizedSearchCV(SVC(),{
    'C' :c_values,
    'kernel':['rbf', 'linear','poly', 'sigmoid'],
    'gamma' : gamma_values
}, cv = 5, return_train_score=False, n_iter= 99999)

rs.fit(iris.data, iris.target)
clf.cv_results_



{'mean_fit_time': array([3.08623314e-03, 1.40991211e-03, 2.18915939e-03, 3.79085541e-03,
        4.21829224e-03, 9.67979431e-04, 3.58629227e-04, 3.25031281e-03,
        3.35640907e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        6.24432564e-03, 0.00000000e+00, 1.64123535e-02, 0.00000000e+00,
        3.00431252e-03, 3.27587128e-04, 9.42999840e-02, 1.93414688e-03,
        2.73032188e-03, 1.22027397e-03, 2.80289650e-03, 2.41470337e-03,
        1.66587830e-03, 1.76959038e-03, 2.19993591e-03, 1.79738998e-03,
        1.57518387e-03, 0.00000000e+00, 6.00242615e-04, 5.42454720e-03,
        4.26673889e-04, 2.65574455e-03, 3.38657379e-02, 1.12161636e-03,
        5.74893951e-03, 2.87652016e-03, 9.65204239e-02, 1.99694633e-03,
        4.45194244e-03, 2.38289833e-03, 5.87472916e-03, 3.27949524e-03,
        5.14779091e-03, 8.00800323e-04, 2.72197723e-03, 3.13057899e-03,
        0.00000000e+00, 6.25696182e-03, 0.00000000e+00, 3.11803818e-03,
        3.43651772e-03, 0.00000000e+00, 2.30979

In [95]:
df2 = pd.DataFrame(rs.cv_results_)
df2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_gamma,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001816,0.000738,0.000710,0.000397,rbf,0.001,1,"{'kernel': 'rbf', 'gamma': 0.001, 'C': 1}",0.866667,0.966667,0.833333,0.966667,0.933333,0.913333,0.054160,3073
1,0.000599,0.000489,0.000806,0.000404,linear,0.001,1,"{'kernel': 'linear', 'gamma': 0.001, 'C': 1}",0.966667,1.000000,0.966667,0.966667,1.000000,0.980000,0.016330,56
2,0.001637,0.000377,0.000789,0.000730,poly,0.001,1,"{'kernel': 'poly', 'gamma': 0.001, 'C': 1}",0.800000,0.666667,0.800000,0.733333,0.733333,0.746667,0.049889,3137
3,0.001952,0.000921,0.000965,0.000627,sigmoid,0.001,1,"{'kernel': 'sigmoid', 'gamma': 0.001, 'C': 1}",0.866667,0.966667,0.833333,0.966667,0.933333,0.913333,0.054160,3073
4,0.001297,0.000402,0.001009,0.000015,rbf,0.01,1,"{'kernel': 'rbf', 'gamma': 0.01, 'C': 1}",0.900000,0.966667,0.866667,0.933333,1.000000,0.933333,0.047140,2999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4111,0.001001,0.000014,0.000402,0.000586,sigmoid,9,49,"{'kernel': 'sigmoid', 'gamma': 9, 'C': 49}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.000000,3186
4112,0.002011,0.000630,0.001398,0.000487,rbf,10,49,"{'kernel': 'rbf', 'gamma': 10, 'C': 49}",1.000000,0.900000,0.866667,0.933333,0.966667,0.933333,0.047140,2999
4113,0.000996,0.000630,0.000725,0.000631,linear,10,49,"{'kernel': 'linear', 'gamma': 10, 'C': 49}",1.000000,1.000000,0.900000,0.933333,1.000000,0.966667,0.042164,529
4114,0.084693,0.069455,0.000603,0.000492,poly,10,49,"{'kernel': 'poly', 'gamma': 10, 'C': 49}",0.966667,0.933333,0.866667,0.933333,1.000000,0.940000,0.044222,2309


In [96]:
df2[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.913333
1,1,linear,0.980000
2,1,poly,0.746667
3,1,sigmoid,0.913333
4,1,rbf,0.933333
...,...,...,...
4111,49,sigmoid,0.333333
4112,49,rbf,0.933333
4113,49,linear,0.966667
4114,49,poly,0.940000


In [97]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression

In [98]:
model_params = {
    'svm':{
        'model' : SVC(gamma='auto'),
        'params':{
            'C': list(range(1, 50)),
            'kernel':['rbf', 'linear','poly', 'sigmoid']
        }
    },
    'random_forest':{
        'model' : RandomForestClassifier(),
        'params':{
            'n_estimators' : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
        }
    },
    'logistic_regression':{
        'model' : LogisticRegression(solver = 'liblinear', multi_class='auto'),
        'params':{
            'C': list(range(1, 50))
        }
    },
    'linear_regression':{
        'model' : LinearRegression(),
        'params':{
            
        }
    }
    
}

In [100]:
scores = []

for model_name , mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv = 5, return_train_score=False)
    clf.fit(iris.data, iris.target)

    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })

In [101]:
df3 = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df3

Unnamed: 0,model,best_score,best_params
0,svm,0.986667,"{'C': 4, 'kernel': 'rbf'}"
1,random_forest,0.973333,{'n_estimators': 6}
2,logistic_regression,0.973333,{'C': 28}
3,linear_regression,0.322561,{}
