# Task: Prediction using Unsupervised ML

## Objective: To predict the optimum number of clusters and represent it visually

## Presented by: Simran Jain

### IMPORT LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cluster import KMeans 

### READ THE DATA

In [None]:
df=pd.read_csv('../input/iris-dataset/Iris.csv')

In [None]:
df_copy=df.copy()

In [None]:
df.head()

### SHAPE OF THE DATA

In [None]:
print('Number of Observations in Data:', df.shape[0])
print('Number of variables in Data:', df.shape[1])

### DESCRIBING DATA

In [None]:
df.info()

In [None]:
df.describe(include='all')

In [None]:
df.drop(['Id','Species'],inplace=True,axis=1)
df.head()

### DISTRIBUTION OF DATA

In [None]:
fig=plt.figure(figsize=(20,20))
for i in range(0,len(df.columns)):
    ax=fig.add_subplot(2,2,i+1)
    sns.distplot(df[df.columns[i]])
    ax.set_title(df.columns[i],color='red',fontsize=18,fontweight ='bold')
plt.tight_layout()

In [None]:
df.skew()

### PAIR PLOT

In [None]:
sns.pairplot(df_copy.drop('Id',axis=1),hue='Species')

## BUILDING CLUSTERS

### Calculating WSS for other values of K - Elbow Method

In [None]:
wss =[] 

In [None]:
for i in range(1,11):
    KM = KMeans(n_clusters=i, init = 'k-means++')
    KM.fit(df)
    wss.append(KM.inertia_)

In [None]:
wss

In [None]:
fig=plt.figure(figsize=(10,8))
plt.plot(range(1,11), wss)
plt.title('The Elbow Method',color='red',fontsize=22,fontweight ='bold')
plt.xlabel('Number of clusters',color='black',fontsize=16,fontweight ='bold')
plt.ylabel('WSS',color='black',fontsize=16,fontweight ='bold')
plt.show()

In [None]:
k_means=KMeans(n_clusters=3, init = 'k-means++')
label = k_means.fit_predict(df)
label

In [None]:
fig=plt.figure(figsize=(10,10))

#filter rows of original data
filtered_label0 = df[label == 0].iloc[:,:].values
filtered_label1 = df[label == 1].iloc[:,:].values
filtered_label2 = df[label == 2].iloc[:,:].values

#Plotting the results
plt.scatter(filtered_label0[:,0] , filtered_label0[:,1] ,s=100, color = 'dodgerblue', label = 'Iris-setosa')
plt.scatter(filtered_label1[:,0] , filtered_label1[:,1] ,s=100, color = 'red', label = 'Iris-versicolour')
plt.scatter(filtered_label2[:,0] , filtered_label2[:,1] ,s=100, color = 'yellow', label = 'Iris-virginica')

# Plotting the centroids of the clusters
plt.scatter(k_means.cluster_centers_[:, 0], k_means.cluster_centers_[:,1],s=150,color = 'black', label = 'Centroids')

plt.legend()