## Use iris flower dataset from sklearn library and use cross_val_score against following
models to measure the performance of each. In the end figure out the model with best performance,
# 1. Logistic Regression
# 2. SVM
# 3. Decision Tree
# 4. Random Forest


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn import datasets
iris = datasets.load_iris()

In [3]:
df = pd.DataFrame(iris.data , columns=iris.feature_names)
df['Flower'] = iris.target
df['Flower']=df['Flower'].apply(lambda x : iris.target_names[x])

In [4]:
x = df.iloc[:,0:4]
y = df.iloc[:,-1]

In [5]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),Flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [6]:
from sklearn.model_selection import train_test_split
x_train ,  x_test ,y_train ,y_test = train_test_split(x,y,test_size=0.3)

In [7]:
from sklearn import svm

In [8]:
model = svm.SVC(kernel='rbf',C=30,gamma='auto')
model.fit(x_train,y_train)
model.score(x_test,y_test)

0.9777777777777777

In [9]:
from sklearn.model_selection import cross_val_score

In [10]:
cvs = cross_val_score
cvs

<function sklearn.model_selection._validation.cross_val_score(estimator, X, y=None, *, groups=None, scoring=None, cv=None, n_jobs=None, verbose=0, fit_params=None, params=None, pre_dispatch='2*n_jobs', error_score=nan)>

In [11]:
cvs(svm.SVC(kernel='linear' , C=10, gamma='auto'),x_train, y_train,cv=5)

array([0.95238095, 0.95238095, 1.        , 0.85714286, 1.        ])

In [12]:
cvs(svm.SVC(kernel='rbf' , C=10, gamma='auto'),x_train, y_train,cv=5)

array([0.95238095, 0.95238095, 1.        , 0.85714286, 1.        ])

In [13]:
cvs(svm.SVC(kernel='rbf' , C=20, gamma='auto'),x_train, y_train,cv=5)

array([0.95238095, 0.95238095, 1.        , 0.85714286, 1.        ])

In [14]:
kernels = ['rbf' , 'linear']
C = [1,10,20]
avg_scores = {}
for kval in kernels:
    for cval in C:
        cv_scores = cvs(svm.SVC(kernel=kval, C=cval , gamma='auto'),x_train , y_train,cv=5)
        avg_scores[kval + '_' + str(cval)] = np.average(cv_scores)
        
         
avg_scores      

{'rbf_1': np.float64(0.9714285714285715),
 'rbf_10': np.float64(0.9523809523809523),
 'rbf_20': np.float64(0.9523809523809523),
 'linear_1': np.float64(0.961904761904762),
 'linear_10': np.float64(0.9523809523809523),
 'linear_20': np.float64(0.9428571428571428)}

In [15]:
from sklearn.model_selection import GridSearchCV

In [16]:
clf = GridSearchCV(svm.SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
},cv=5,return_train_score=False)

clf.fit(x_train , y_train)
clf.cv_results_

{'mean_fit_time': array([0.00419321, 0.00243802, 0.00330729, 0.00373712, 0.00359802,
        0.00313859]),
 'std_fit_time': array([0.00221391, 0.00234699, 0.00306291, 0.00297137, 0.00336097,
        0.00266548]),
 'mean_score_time': array([0.00313549, 0.0034575 , 0.00135741, 0.00341702, 0.0021183 ,
        0.00171771]),
 'std_score_time': array([0.00288091, 0.0035381 , 0.001884  , 0.00280198, 0.00295164,
        0.00181286]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value=np.str_('?'),
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],
 'split0_test_s

In [17]:
df = pd.DataFrame(clf.cv_results_)

In [18]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004193,0.002214,0.003135,0.002881,1,rbf,"{'C': 1, 'kernel': 'rbf'}",1.0,0.952381,1.0,0.904762,1.0,0.971429,0.038095,1
1,0.002438,0.002347,0.003457,0.003538,1,linear,"{'C': 1, 'kernel': 'linear'}",1.0,0.952381,1.0,0.857143,1.0,0.961905,0.055533,2
2,0.003307,0.003063,0.001357,0.001884,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.952381,0.952381,1.0,0.857143,1.0,0.952381,0.052164,3
3,0.003737,0.002971,0.003417,0.002802,10,linear,"{'C': 10, 'kernel': 'linear'}",0.952381,0.952381,1.0,0.857143,1.0,0.952381,0.052164,3
4,0.003598,0.003361,0.002118,0.002952,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.952381,0.952381,1.0,0.857143,1.0,0.952381,0.052164,3
5,0.003139,0.002665,0.001718,0.001813,20,linear,"{'C': 20, 'kernel': 'linear'}",0.952381,0.952381,1.0,0.857143,0.952381,0.942857,0.046657,6


In [22]:
df[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.971429
1,1,linear,0.961905
2,10,rbf,0.952381
3,10,linear,0.952381
4,20,rbf,0.952381
5,20,linear,0.942857


In [23]:
dir(clf)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__sklearn_clone__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_build_request_for_signature',
 '_check_feature_names',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_doc_link_module',
 '_doc_link_template',
 '_doc_link_url_param_generator',
 '_estimator_type',
 '_format_results',
 '_get_default_requests',
 '_get_doc_link',
 '_get_metadata_request',
 '_get_param_names',
 '_get_routed_params_for_fit',
 '_get_scorers',
 '_get_tags',
 '_more_tags',
 '_parameter_constraints',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run

In [24]:
clf.best_score_

np.float64(0.9714285714285715)

In [25]:
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

## RandomizedSearchCV

In [29]:
from sklearn.model_selection import RandomizedSearchCV

In [35]:
rs = RandomizedSearchCV(svm.SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
},
    cv=5,
    return_train_score=False,
    n_iter=2
                      )

In [36]:
rs.fit(iris.data,iris.target)

In [37]:
pd.DataFrame(rs.cv_results_)[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,10,linear,0.973333
1,1,linear,0.98
