In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/iris-flower-dataset/IRIS.csv')
df.head()

In [None]:
df.info

In [None]:
df.describe()

The describe function helps us no the basic statistics of the dataset, including the 5 point stats summary

In [None]:
df.shape

In [None]:
#checking for null values
df.isna().apply(pd.value_counts)

One of the major time that a data scientist spends is in cleaning of the data. Thanks to the Almighty Mahadev, there is no missing data.

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
sns.distplot(df['sepal_length'])

In [None]:
sns.distplot(df['sepal_width'])

In [None]:
sns.distplot(df['petal_length'])

In [None]:
sns.distplot(df['petal_width'])

Checking out the normalization of all the variables, we can predict that the variables consist of outliers and some variables are more on the higher length and width of sepals & petals. We will draw boxplots to check out the outliers.

In [None]:
df.skew()

sepal_length and sepal_width are positivley skewed as we can depict it in the distplot function itself, while petal_length & petal_width are negatively skewed. A fatter tail on the left side of the distribution is negatively skewed while fatter tail on the right side represents positive skewness.

In [None]:
sns.pairplot(df, hue='species')

Boxplots to check outliers

In [None]:
sns.boxplot(df['sepal_length'])

In [None]:
sns.boxplot(df['sepal_width'])

In sepal_width, the outliers are clearly visible.

In [None]:
sns.boxplot(df['petal_length'])

In [None]:
sns.boxplot(df['petal_width'])

In [None]:
corr = df.corr()
corr

Using heatmap for better presentation of correlation of the data

In [None]:
plt.figure(figsize = (20,10))
sns.heatmap(corr, cmap='RdYlGn', vmax = 1.0, vmin = -1.0)

All the variables seem to highly correlate with each other. To learn the basic hacks of data visualization using seaborn visit this link https://datamahadev.com/13-ultimate-seaborn-tricks-using-python/

Applying different classification models

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score

In [None]:
X = df.iloc[0:,:4]
y = df['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)
print(X_train.shape)
print(X_test.shape)

Logistic Regression

In [None]:
# import logistic regression and training data set
model = LogisticRegression(random_state=1)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
#Classification Report
print(classification_report(y_test, y_pred, digits = 2))

K Nearest Neighbor

In [None]:
from sklearn.neighbors import KNeighborsClassifier
#Running KNN model for k=3

knn = KNeighborsClassifier(n_neighbors = 3)

#fitting the model in train data

knn.fit(X_train, y_train)

#predicting the model with k=3

knn_pred = knn.predict(X_test)

#printing the accuracy

print(accuracy_score(y_test, knn_pred))

#Running KNN model for k=5

knn = KNeighborsClassifier(n_neighbors = 5)

#fitting the model in train data

knn.fit(X_train, y_train)

#predicting the model with k=5

knn_pred = knn.predict(X_test)

#printing the accuracy

print(accuracy_score(y_test, knn_pred))

#Running KNN model for k=9

knn = KNeighborsClassifier(n_neighbors = 9)

#fitting the model in train data

knn.fit(X_train, y_train)

#predicting the model with k=9

knn_pred = knn.predict(X_test)

#printing the accuracy

print(accuracy_score(y_test, knn_pred))

In [None]:
conf_mat1 = confusion_matrix(y_test, knn_pred)
conf_mat1
#confusion matrix with heatmap
plt.figure(figsize = (9,7))
sns.heatmap(conf_mat1, annot=True,cmap='Blues', fmt='g')

Naive Bayes

In [None]:
#importing naive bayes
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)
#predicting the values
nb_predict = clf.predict(X_test)
nb_predict

In [None]:
#printing the accuracy
accuracy_score(y_test, nb_predict, normalize = True)

In [None]:
conf_mat2 = confusion_matrix(y_test, nb_predict)
conf_mat2
#confusion matrix with heatmap
plt.figure(figsize = (9,7))
sns.heatmap(conf_mat1, annot=True,cmap='Blues', fmt='g')

Support Vector Machine

In [None]:
from sklearn.svm import SVC
svc_model = SVC(C= .1, kernel='linear', gamma= 1)
svc_model.fit(X_train, y_train)

prediction = svc_model.predict(X_test)
prediction

In [None]:
# check the accuracy on the training set
print(svc_model.score(X_train, y_train))
print(svc_model.score(X_test, y_test))

In [None]:
#confusion matrix with heatmap
conf_mat3 = confusion_matrix(y_test, prediction)
sns.heatmap(conf_mat3, annot=True,cmap='Blues', fmt='g')

If you have reached till here, thanks a lot do upvote and if you want to Follow AI & Data Sciences religiously, visit https://datamahadev.com and subscribe to my blog & publishing website as the Kaggle community support is much needed.