In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Importing The Dataset
df=pd.read_csv('./dataset/iris.csv')

In [3]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [4]:
#Checking Null Values
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [5]:
#Checking Duplicate rows
df.duplicated().sum()

1

In [6]:
#Dropping Duplicates
df.drop_duplicates(inplace=True)

In [7]:
#Reseting Index After Droping The Duplicate Rows
df.reset_index(inplace=True)

In [8]:
df

Unnamed: 0,index,sepal_length,sepal_width,petal_length,petal_width,species
0,0,5.1,3.5,1.4,0.2,setosa
1,1,4.9,3.0,1.4,0.2,setosa
2,2,4.7,3.2,1.3,0.2,setosa
3,3,4.6,3.1,1.5,0.2,setosa
4,4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...,...
144,145,6.7,3.0,5.2,2.3,virginica
145,146,6.3,2.5,5.0,1.9,virginica
146,147,6.5,3.0,5.2,2.0,virginica
147,148,6.2,3.4,5.4,2.3,virginica


In [9]:
#Droping the Index Column Auto Created Due To Reset Function
df.drop('index',axis=1,inplace=True)

In [10]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,virginica
145,6.3,2.5,5.0,1.9,virginica
146,6.5,3.0,5.2,2.0,virginica
147,6.2,3.4,5.4,2.3,virginica


In [11]:
#Seperate Depedant and Indepedant Feature
X=df.drop('species',axis=1)
y=df['species']

In [12]:
#Scaling Indepedant Feature
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_sc=sc.fit_transform(X)

In [13]:
#Importing The Models For Checking
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [14]:
#Createing The List Of Models
models=[LogisticRegression(),DecisionTreeClassifier(),KNeighborsClassifier(),RandomForestClassifier()]

## Cross_Val_Score

In [15]:
#Importing The Cross_Val_Score
from sklearn.model_selection import cross_val_score

In [16]:
score=[]
for model in models:
    cv_score=cross_val_score(model,X_sc,y,cv=5)
    cv_score=np.round(cv_score,3)
    score.append({'model':model,'scores':cv_score,'mean':np.mean(cv_score)})
score_df=pd.DataFrame(score)
score_df

Unnamed: 0,model,scores,mean
0,LogisticRegression(),"[0.967, 0.967, 0.933, 0.9, 1.0]",0.9534
1,DecisionTreeClassifier(),"[0.967, 0.967, 0.9, 0.967, 1.0]",0.9602
2,KNeighborsClassifier(),"[0.967, 0.967, 0.933, 0.933, 1.0]",0.96
3,RandomForestClassifier(),"[0.967, 0.967, 0.933, 0.967, 1.0]",0.9668


**Among The Above Models WE Can Choose DecisionTreeClassifier For Model Training With Default Hyperparameter For Best Accuracy**

In [26]:
#Creating The Dictionary of all ModelWise Feature for HyperParaMeter Tuning
params={'log':{'penalty':['l1','l2','elasticnet',None],'C':[1,2,3,4,5,6,7]},
        'dtc':{'criterion':["gini","entropy","log_loss"],'splitter':["best","random"]},
        'knc':{'weights':['uniform','distance'],'algorithm':['auto','ball_tree','kd_tree','brute']},
        'rfc':{'criterion':["gini","entropy","log_loss"],'n_estimators':[10, 20, 50, 100]}}

In [18]:
param_list=list(params.keys())

In [19]:
params[param_list[1]]

{'criterion': ['gini', 'entropy', 'log_loss'], 'splitter': ['best', 'random']}

# GridSearchCv

In [20]:
#Importing the GridSearchCv
from sklearn.model_selection import GridSearchCV

In [21]:
#Defining Function for Model Selection
def model_select_G(models,params):
    result=[]
    i=0
    for model in models:
        param=params[param_list[i]]
        i+=1
        clf=GridSearchCV(model,param,cv=5)
        clf.fit(X_sc,y)
        result.append({'model':model,'parameter':clf.best_params_,'score':clf.best_score_})
    result_df=pd.DataFrame(result)
    return result_df
        

In [22]:
#Passing the List_of_Models and Dictionary_of_parameters 
model_select_G(models,params)

Unnamed: 0,model,parameter,score
0,LogisticRegression(),"{'C': 1, 'penalty': None}",0.973333
1,DecisionTreeClassifier(),"{'criterion': 'gini', 'splitter': 'best'}",0.96
2,KNeighborsClassifier(),"{'algorithm': 'auto', 'weights': 'uniform'}",0.96
3,RandomForestClassifier(),"{'criterion': 'entropy', 'n_estimators': 10}",0.966667


**From Above Models We Can Choose Logistic Regression With Above Parameters To Train The Model To Get Highest Accuracy**

# RandomizedSearchCV

In [23]:
#Importing the RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [24]:
#Defining Function for Model Selection
def model_select_R(models,params):
    result1=[]
    i=0
    for model in models:
        param=params[param_list[i]]
        i+=1
        clf1=RandomizedSearchCV(model,param,cv=5)
        clf1.fit(X_sc,y)
        result1.append({'model':model,'parameter':clf1.best_params_,'score':clf1.best_score_})
    result_df1=pd.DataFrame(result1)
    return result_df1

In [25]:
model_select_R(models,params)

Unnamed: 0,model,parameter,score
0,LogisticRegression(),"{'penalty': None, 'C': 4}",0.973333
1,DecisionTreeClassifier(),"{'splitter': 'best', 'criterion': 'gini'}",0.966667
2,KNeighborsClassifier(),"{'weights': 'uniform', 'algorithm': 'auto'}",0.96
3,RandomForestClassifier(),"{'n_estimators': 100, 'criterion': 'gini'}",0.966667


**From Above Models We Can Choose Logistic Regression With Given Parameters To Train The Model To Get Highest Accuracy**