In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

I will compare some of classification models on iris dataset. In this notebook, first, I will use models with their default parameters. And after I will use GridSearchCV for find best hiperparameters. And finally I will find each tuned models accuracy score and compare all of them. 

## Load Dataset

In [None]:
df = pd.read_csv("/kaggle/input/iris-flower-dataset/IRIS.csv")
df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df.species = le.fit_transform(df.species)
df.head()

In [None]:
df.species.value_counts()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.corr()

## Train Test Split

In [None]:
X = df.drop(["species"], axis = 1)
y = df["species"]

In [None]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
X = sc.fit_transform(X)
X[:10]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25, random_state = 23)

In [None]:
X_train.shape

In [None]:
X_test.shape

# KNN Classification

## Default Model

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("KNN Default score is", accuracy)


In [None]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

## Tune Model with GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV
knn = KNeighborsClassifier()
knn_params = {"n_neighbors" : np.arange(1,20)}
gridcv_model = GridSearchCV(knn, knn_params, cv = 10).fit(X_train, y_train)
gridcv_model.best_params_

In [None]:
# Create tuned model
knn_tuned = KNeighborsClassifier(n_neighbors = 11)
knn_tuned.fit(X_train, y_train)
y_pred = knn_tuned.predict(X_test)
accuracy_tuned = accuracy_score(y_test, y_pred)
print("KNN tuned score is", accuracy_tuned)

In [None]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Logistic Regression Classification

## Default Model

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Logistic Regression Default score is", accuracy)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

## Tune Model with GridSearchCV

In [None]:
log_reg = LogisticRegression()
log_reg_params = {"C":[0.001, 0.01, 0.1, 1, 5, 10, 100, 1000]}
gridcv_model = GridSearchCV(log_reg, log_reg_params, cv = 10).fit(X_train, y_train)
gridcv_model.best_params_

In [None]:
log_reg_tuned = LogisticRegression(C=1000)
log_reg_tuned.fit(X_train, y_train)
y_pred = log_reg_tuned.predict(X_test)
accuracy_tuned = accuracy_score(y_test, y_pred)
print("Logistic Regression Tuned Score :", accuracy_tuned)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

# SVM Classification

## Default Model

In [None]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("SVM Default Score is", accuracy)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

## Tune Model with GridSearchCV

In [None]:
svc = SVC()
svc_params = {"kernel": ["rbf", "linear"],
             "C": [0.001, 0.01, 0.1, 1, 10, 50, 100],}
gridcv_model = GridSearchCV(svc, svc_params, cv = 10).fit(X_train, y_train)
gridcv_model.best_params_

In [None]:
svc_tuned = SVC(C=0.1, kernel = "rbf").fit(X_train, y_train)
y_pred = svc_tuned.predict(X_test)
accuracy_tuned = accuracy_score(y_test, y_pred)
print("SVC Tuned Score is", accuracy_tuned)


In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Naive Bayes Classification

## Default Model

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Naive Bayes Default Model Score is : ", accuracy)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Random Forest Classification

## Default Model

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state = 23)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Random Forest Default Model Score is : ", accuracy)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

## Tune Model with GridSearchCV

In [None]:
rf = RandomForestClassifier(random_state = 23)
rf_params = {"n_estimators":[10,50,100,500,1000,2000],
            "criterion": ["gini", "entropy"],
            "min_samples_split": [2,5,10],
            "min_samples_leaf": [1,2,5,10,20,50]}
gridcv_model = GridSearchCV(rf, rf_params, cv = 10).fit(X_train, y_train)
gridcv_model.best_params_

In [None]:
rf_tuned = RandomForestClassifier(criterion = "gini", 
                                  min_samples_leaf = 2, min_samples_split = 5,
                                 n_estimators = 10, random_state = 23).fit(X_train, y_train)
y_pred = rf_tuned.predict(X_test)
accuracy_tuned = accuracy_score(y_test, y_pred)
print("Random Forest Tuned Model Score is : ", accuracy_tuned)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Compare All Classification Models

## Model              Default Score                  Tuned Score 

**KNN** ( Default Model Score : **0.97**   - Tuned Model Score : **0.97**    )

**Logistic Regression** ( Default Model Score : **0.95**   - Tuned Model Score : **0.97**    )

**SVM** ( Default Model Score : **0.97**   - Tuned Model Score :  **0.97**   )

**Naive Bayes** ( Default Model Score : **0.94**   - Tuned Model Score :     )

**Random Forest** ( Default Model Score : **0.97**   - Tuned Model Score :  **0.97**   )


