# Importing Libraries

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')

# Loading Data

In [None]:
data = pd.read_csv('../input/iris-flower-dataset/IRIS.csv')
data.head()

In [None]:
data = data.rename(columns={'sepal_length': 'Sepal Length',
                            'sepal_width': 'Sepal Width',
                            'petal_length': 'Petal Length',
                            'petal_width': 'Petal Width'})
data

# Data Exploration

In [None]:
# Structure of data
data.shape

In [None]:
# Detailed view of data
data.describe()

In [None]:
# Checking for Null Values
data.isna().sum().to_frame('Null Values')

In [None]:
# Unique values in 'Species'
data.species.unique().tolist()

# Visualizing Data

### Plotting Data Distrubution

In [None]:
sns.jointplot(data=data, s=100, alpha=0.7, height=6)
plt.show()

In [None]:
# Distribution of data using Boxplot
plt.figure(figsize=(10,6))
sns.boxplot(data=data)
plt.show()

In [None]:
# Plotting by Species using Catplot
sns.catplot(data=data, col='species', kind='violin')
plt.show()

### Correlation of Data 

In [None]:
plt.figure(figsize=(5,5))
sns.heatmap(data.corr(), cmap='YlGnBu', annot=True, 
            cbar=False, annot_kws={'size': 14})
plt.show()

In [None]:
sns.pairplot(data=data, hue='species', height=3)
plt.show()

# K-Means Clustering

In [None]:
# Importing Libraries
from sklearn.cluster import KMeans

### Preparing Data

In [None]:
# Petal-Length & Petal-Width are taken
X = data.iloc[:, [2,3]].values
y = data['species'].map({'Iris-setosa': 0, 'Iris-virginica':1, 'Iris-versicolor':2}).values

In [None]:
print(f'Shape of X: {X.shape}')
print(f'Shape of y: {y.shape}')

### Finding the Number of Clusters

In [None]:
wcss = []
for i in range(1,11):
    kmeans_c = KMeans(n_clusters=i, init='k-means++', 
                      max_iter = 300, n_init = 10, random_state=0)
    kmeans_c.fit(X)
    wcss.append(kmeans_c.inertia_)
    print(f'wcss {i}: {kmeans_c.inertia_}')

In [None]:
# Plotting Elbow Curve
plt.figure(figsize=(10,5))
plt.plot(range(1,11), wcss, color='green')
plt.title('The Elbow Curve', fontsize=14)
plt.xlabel('Number of Clusters', fontsize=15)
plt.ylabel('WCSS')
plt.show()

> * Number of Clusters : 3

### Making Predictions

In [None]:
kmeans = KMeans(n_clusters=3, init='k-means++', 
                max_iter = 300, n_init = 10, random_state=0)
y_means = kmeans.fit_predict(X)
y_means

### Plotting Clusters 

In [None]:
# Cluster Center values
kmeans.cluster_centers_

In [None]:
species = ['Iris-setosa', 'Iris-virginica', 'Iris-versicolor']
fig, ax = plt.subplots(1,2, figsize=(15,5))
ax[0].set_title('Unclustered data', fontsize=16)
ax[0].set_xlabel('Petal Length', fontsize=12)
ax[0].set_ylabel('Petal Width', fontsize=12)
sns.scatterplot(data['Petal Length'], data['Petal Width'], 
                s=75, color='red', alpha=0.8, ax=ax[0])
ax[1].set_title('Clustered data', fontsize=16)
ax[1].set_xlabel('Petal Length', fontsize=12)
ax[1].set_ylabel('Petal Width', fontsize=12)
for i in range(0, 3):
    sns.scatterplot(X[y_means == i, 0], X[y_means == i, 1], s=75, label=species[i], ax=ax[1])
sns.scatterplot(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], 
                s=700, color='purple', label='Centroids', marker='*', ax=ax[1])
plt.show()

## Thank You!