In [1]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np

In [2]:
iris = load_iris()

In [3]:
file = pd.DataFrame(iris.data, columns=iris.feature_names)
file['target'] = iris.target
file['flower_name'] = file['target'].apply(lambda x : iris.target_names[x])

In [4]:
file

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,flower_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2,virginica
146,6.3,2.5,5.0,1.9,2,virginica
147,6.5,3.0,5.2,2.0,2,virginica
148,6.2,3.4,5.4,2.3,2,virginica


In [5]:
X = file.drop(['target', 'flower_name'], axis=1)
y = file.flower_name

In [6]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
y.head()

0    setosa
1    setosa
2    setosa
3    setosa
4    setosa
Name: flower_name, dtype: object

In [8]:
from sklearn.model_selection import cross_val_score

In [9]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [10]:
cross_val_score(SVC(C=1, kernel='rbf'), X, y, cv=5)

array([0.96666667, 0.96666667, 0.96666667, 0.93333333, 1.        ])

In [11]:
cross_val_score(SVC(C=10, kernel='poly'), X, y, cv=5)

array([1.        , 1.        , 0.9       , 0.93333333, 1.        ])

In [12]:
cross_val_score(SVC(C=20, kernel='linear'), X, y, cv=5)

array([1.        , 1.        , 0.9       , 0.93333333, 1.        ])

Now see there is single model and when we want to get the best score of the model there we are repeating the procedure of doing model cross validation and seeing result
There is another method via which we can do this same this is called as Hyperparameter testing and model can be tuned well

In [13]:
from sklearn.model_selection import GridSearchCV

In [14]:
grid_model = GridSearchCV(
    SVC(), 
    {
        'C' : [1,10,20], 
        'kernel' : ['rbf', 'poly', 'linear']
    },
    cv=5, 
    return_train_score=False
)

grid_model.fit(X, y)

In [15]:
SVM_scores = pd.DataFrame(grid_model.cv_results_)

In [16]:
SVM_scores[['params', 'mean_test_score', 'rank_test_score']]

Unnamed: 0,params,mean_test_score,rank_test_score
0,"{'C': 1, 'kernel': 'rbf'}",0.966667,6
1,"{'C': 1, 'kernel': 'poly'}",0.98,1
2,"{'C': 1, 'kernel': 'linear'}",0.98,1
3,"{'C': 10, 'kernel': 'rbf'}",0.98,1
4,"{'C': 10, 'kernel': 'poly'}",0.966667,6
5,"{'C': 10, 'kernel': 'linear'}",0.973333,5
6,"{'C': 20, 'kernel': 'rbf'}",0.98,1
7,"{'C': 20, 'kernel': 'poly'}",0.966667,6
8,"{'C': 20, 'kernel': 'linear'}",0.966667,6


Now lest choose the same from number of models

In [19]:
model_selection = {
    'svm': {
        'model' : SVC(gamma='scale'), 
        'params' : {
            'C' : [1,10,20], 
            'kernel' : ['rbf', 'poly', 'linear']
        }
    }, 
    'random_forest':{
        'model' : RandomForestClassifier(), 
        'params' : {
            'n_estimators' : [10, 100, 1000], 
            'criterion' : ["gini", "entropy", "log_loss"]
        }
    }, 
    'logistic_regression' : {
        'model' : LogisticRegression(), 
        'params' : {
            'C' : [1,10,20], 
            'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
        }
    }
}

In [20]:
performance = []

for model_name, model_param in model_selection.items():
    score = GridSearchCV(model_param['model'], model_param['params'], cv=5, return_train_score=False)
    score.fit(X, y)
    performance.append(
        {
            'Model' : model_name,
            'Best Params' : score.best_params_ , 
            'Best Score' : score.best_score_
        }
    )

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt



In [21]:
performance

[{'Model': 'svm',
  'Best Params': {'C': 1, 'kernel': 'poly'},
  'Best Score': 0.9800000000000001},
 {'Model': 'random_forest',
  'Best Params': {'criterion': 'log_loss', 'n_estimators': 1000},
  'Best Score': 0.9666666666666668},
 {'Model': 'logistic_regression',
  'Best Params': {'C': 1, 'solver': 'sag'},
  'Best Score': 0.9800000000000001}]