In [None]:
import seaborn as sns; sns.set(style= "darkgrid", color_codes = True)
sns.set(rc={'figure.figsize':(11.7,8.27)})
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
plt.rcParams['figure.figsize'] = (12.0, 9.0)
import warnings; warnings.filterwarnings('ignore')

In [None]:
dataset = pd.read_csv("../input/iris.csv")

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.describe()

In [None]:
dataset.isnull().sum()

In [None]:
sns.pairplot(dataset, hue = 'species', height= 5, palette = "Set2")
plt.show()

In [None]:
sns.heatmap(dataset.corr(), annot= True, fmt= '.2g', cmap= 'Set2')

In [None]:
dataset.shape

In [None]:
ind = np.arange(150)
np.random.seed(1)
np.random.shuffle(ind)
iris_data = dataset.iloc[ind]

In [None]:
iris_data.shape

In [None]:
iris_data.head(5)

In [None]:
from sklearn.preprocessing import StandardScaler 

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(iris_data.drop('species',axis = 1)) 

In [None]:
scale = scaler.transform(iris_data.drop('species',axis = 1)) 

In [None]:
iris_scaled = pd.DataFrame(scale, columns= iris_data.columns[:-1])

In [None]:
iris_scaled.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = iris_scaled

Y = iris_data['species']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(X_train, y_train)

knn_pred = knn.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
knn_cm = pd.DataFrame(confusion_matrix(y_test, knn_pred))

In [None]:
print(knn_cm, "\n")
print(classification_report(y_test, knn_pred))

In [None]:
knn_acc = accuracy_score(y_test, knn_pred)
knn_acc

In [None]:
error_rate = []

In [None]:
for i in range(1,11):
    knn = KNeighborsClassifier(i)
    knn.fit(X_train, y_train)
    pred = knn.predict(X_test)
    error_rate.append(np.mean(pred != y_test))

In [None]:
plt.plot(error_rate, 'b--', marker = 'o', markerfacecolor = "red")

In [None]:
knn = KNeighborsClassifier(n_neighbors=6)

knn.fit(X_train, y_train)

knn_pred2 = knn.predict(X_test)

In [None]:
knn_cm2 = pd.DataFrame(confusion_matrix(y_test, knn_pred2))

sns.heatmap(confusion_matrix(y_test, knn_pred2), annot= True, fmt= 'd',
            xticklabels= ['setosa', 'versicolor', 'virginica'], 
            yticklabels= ['setosa', 'versicolor', 'virginica'], 
            cmap= "Set2")

In [None]:
print('with K = 6 \n')
print(classification_report(y_test, knn_pred2))

In [None]:
knn_acc2 = accuracy_score(y_test, knn_pred2)
knn_acc2

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
iris_data.head(3)

In [None]:
iris_data.shape

In [None]:
X = iris_data.drop('species', axis= 1)

Y = iris_data['species']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=42)

In [None]:
glm = LogisticRegression()

glm.fit(X_train, y_train)

glm_pred = glm.predict(X_test)

In [None]:
sns.heatmap(confusion_matrix(y_test, glm_pred), annot= True, fmt= 'd',
            xticklabels= ['setosa', 'versicolor', 'virginica'], 
            yticklabels= ['setosa', 'versicolor', 'virginica'], 
            cmap= "Set2")

In [None]:
print(classification_report(y_test, glm_pred))

In [None]:
glm_acc = accuracy_score(y_test, glm_pred)
glm_acc

In [None]:
scores = [glm_acc, knn_acc2]

In [None]:
algorithms = ["Logistic Regression","K Nearest Neighbor"]

In [None]:
sns.barplot(algorithms,scores, palette= 'Set2')