# IRIS Flower Dataset Analysis

![iris](https://images.unsplash.com/photo-1611427396096-660d74e1c318?ixid=MXwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHw%3D&ixlib=rb-1.2.1&auto=format&fit=crop&w=967&q=80)

## Here's a picture of the three different Iris types:

In [None]:
# The Iris Setosa
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/5/56/Kosaciec_szczecinkowaty_Iris_setosa.jpg'
Image(url,width=300, height=300)

In [None]:
# The Iris Versicolor
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/4/41/Iris_versicolor_3.jpg'
Image(url,width=300, height=300)

In [None]:
# The Iris Virginica
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/9/9f/Iris_virginica.jpg'
Image(url,width=300, height=300)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")
sns.set_context('poster')
from sklearn.datasets import load_iris
iris = load_iris()
dir(iris)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("/kaggle/input/iris-flower-dataset/IRIS.csv")
df

In [None]:
df.shape

# Univariate Analysis

In [None]:
df_setosa=df.loc[df['species']=='Iris-setosa']
df_virginica=df.loc[df['species']=='Iris-virginica']
df_versicolor=df.loc[df['species']=='Iris-versicolor']

plt.plot(df_setosa['sepal_length'],np.zeros_like(df_setosa['sepal_length']),'o')
plt.plot(df_virginica['sepal_length'],np.zeros_like(df_virginica['sepal_length']), 'o')
plt.plot(df_versicolor['sepal_length'],np.zeros_like(df_versicolor['sepal_length']),'o') 
plt.xlabel('Sepal length')
plt.show()

In [None]:
sns.histplot(data=df_setosa, x="sepal_length")

In [None]:
sns.histplot(data=df_virginica, x="sepal_length")

In [None]:
sns.histplot(data=df_versicolor, x="sepal_length")

# Bivariate Analysis

In [None]:
sns.FacetGrid(df,hue='species',height=5).map(plt.scatter,'petal_length','sepal_width').add_legend()
plt.show()

# Multivariate Analysis

In [None]:
sns.pairplot(df,hue='species',height=3)

In [None]:
import pandas as pd
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

In [None]:
df['target'] = iris.target
df.head()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop(['target'],axis='columns'),iris.target,test_size=0.2)

# Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [None]:
model.score(X_test,y_test)

In [None]:
model = RandomForestClassifier(n_estimators=40)
model.fit(X_train, y_train)
model.score(X_test,y_test)

# K-Means Clustering

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

In [None]:
df.drop(['sepal length (cm)','sepal width (cm)'],axis='columns',inplace=True)
df.head()

In [None]:
scaler = MinMaxScaler()

#scaler.fit(df[['petal length (cm)']])
df['petal length (cm)'] = scaler.fit_transform(df[['petal length (cm)']])

scaler.fit(df[['petal width (cm)']])
df['petal width (cm)'] = scaler.transform(df[['petal width (cm)']])
df.head()

In [None]:

plt.scatter(df['petal length (cm)'],df['petal width (cm)'])

In [None]:
km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['petal length (cm)','petal width (cm)']])
y_predicted

In [None]:
df['cluster']=y_predicted
df.head()

In [None]:
sse = []
k_rng = range(1,10)
for k in k_rng:
    km_hypertuning = KMeans(n_clusters=k)
    km_hypertuning.fit(df[['petal length (cm)','petal width (cm)']])
    sse.append(km_hypertuning.inertia_)

In [None]:
plt.xlabel('K')
plt.ylabel('Sum of squared error')
plt.plot(k_rng,sse)

In [None]:
df1 = df[df.cluster==0]
df2 = df[df.cluster==1]
df3 = df[df.cluster==2]
plt.scatter(df1['petal length (cm)'],df1['petal width (cm)'],color='green')
plt.scatter(df2['petal length (cm)'],df2['petal width (cm)'],color='red')
plt.scatter(df3['petal length (cm)'],df3['petal width (cm)'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='purple',marker='*',label='centroid')
plt.legend()

In [None]:
km.cluster_centers_

# Logistic Regresion

In [None]:
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =train_test_split(iris.data,iris.target,test_size=0.2)
model.fit(X_train,y_train)

In [None]:
model.score(X_test, y_test)

In [None]:
model.predict(iris.data[0:5])

In [None]:
iris.target[0:5]

In [None]:
iris

In [None]:
y_predicted=model.predict(X_test)

# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_predicted)
cm

In [None]:
import seaborn as sn
plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')

# Support Vector Machine Classifier
**Call the SVC() model from sklearn and fit the model to the training data**


In [None]:
from sklearn.svm import SVC

In [None]:
svc_model = SVC()
svc_model.fit(X_train,y_train)

In [None]:
predictions = svc_model.predict(X_test)


In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

In [None]:
print(classification_report(y_test,predictions))