# Customer Segmentation Using KMeans (Clustering) - Unsupervised

### Steps to solve the problem :
### 1. Importing the required Libraries
### 2. Input the required data
### 3. Check for Null values 
### 4. Pick the fields you want to analyse (I have repeated this in three ways)
### 5. Selection of Clusters using Elbow Method
### 6. Ploting the Cluster

In [None]:
import numpy as np   #Linear algera Library
import pandas as pd
import matplotlib.pyplot as plt  #to plot graphs
import seaborn as sns  #to plot graphs
sns.set()  #setting seaborn as default 

import warnings
warnings.filterwarnings('ignore')

from sklearn.cluster import KMeans

## Method 1: Age and Spending Score (1-100)

In [None]:
data=pd.read_csv('../input/customer-segmentation-tutorial-in-python/Mall_Customers.csv')
data.head()

In [None]:
data.isnull().sum() #no missing values

In [None]:
plt.scatter(data['Age'],data['Spending Score (1-100)'])
plt.xlabel('Age')
plt.ylabel('Spending Score (1-100)')
plt.show()

In [None]:
# clustering based on age and spending score
x=data[['Age','Spending Score (1-100)']]
x

In [None]:
#to check the no of clusters to pick
wcss=[]
for i in range(1,10):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)
    



In [None]:
no_of_clusters=range(1,10)
plt.plot(no_of_clusters,wcss)

In [None]:
kmeans = KMeans(4)
kmeans.fit(x)

In [None]:
identified_clusters=kmeans.fit_predict(x)
identified_clusters  #clusters are 0,1,2,3

In [None]:
table_with_clusters=data.copy()
table_with_clusters['Clusters']=identified_clusters
table_with_clusters

In [None]:
plt.scatter(table_with_clusters['Age'],table_with_clusters['Spending Score (1-100)'],c=table_with_clusters['Clusters'],cmap='rainbow')
plt.xlabel('Age')
plt.ylabel('Spending Score (1-100)')
plt.title('Plot with clusters')
plt.show()

## Method 2: Gender verses Spending Score (1-100)

### This is done to only understand Categorical data

In [None]:
data=pd.read_csv('../input/customer-segmentation-tutorial-in-python/Mall_Customers.csv')
data.head()

In [None]:
plt.figure(1 , figsize = (10 , 5))
ax=sns.countplot(x = 'Gender' , data = data)
for p in ax.patches:
        ax.annotate('{:}'.format(p.get_height()), (p.get_x()+0.1, p.get_height()+1))  #to calculate the frequencies

plt.show()  

In [None]:
Gender=pd.get_dummies(data['Gender'],drop_first=True)
Gender.head()


In [None]:
data=pd.concat([data,Gender],axis=1)
data.head()

In [None]:
data.drop(['Gender'],axis=1,inplace=True)
data.head()

In [None]:
x=data[['Male','Spending Score (1-100)']]
x

In [None]:
#to check the no of clusters to pick
wcss=[]
for i in range(1,10):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)

In [None]:
no_of_clusters=range(1,10)
plt.plot(no_of_clusters,wcss)

In [None]:
kmeans = KMeans(4)
kmeans.fit(x)

In [None]:
identified_clusters=kmeans.fit_predict(x)
identified_clusters  #clusters are 0,1,2,3

In [None]:
table_with_clusters=data.copy()
table_with_clusters['Clusters']=identified_clusters
table_with_clusters

In [None]:
plt.scatter(table_with_clusters['Spending Score (1-100)'],table_with_clusters['Male'],c=table_with_clusters['Clusters'],cmap='rainbow')
plt.xlabel('Male')
plt.ylabel('Spending Score (1-100)')
plt.title('Plot with clusters')
plt.show()

## Method 3: using anual income and spending score

In [None]:
data=pd.read_csv('../input/customer-segmentation-tutorial-in-python/Mall_Customers.csv')
data.head()

In [None]:
plt.scatter(data['Annual Income (k$)'],data['Spending Score (1-100)'])
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.show()

In [None]:
# clustering based on age and spending score
x=data[['Annual Income (k$)','Spending Score (1-100)']]
x

In [None]:
#to check the no of clusters to pick
wcss=[]
for i in range(1,10):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)

In [None]:
no_of_clusters=range(1,10)
plt.plot(no_of_clusters,wcss)

In [None]:
kmeans = KMeans(5)
kmeans.fit(x)

In [None]:
identified_clusters=kmeans.fit_predict(x)
identified_clusters  #clusters are 0,1,2,3

In [None]:
table_with_clusters=data.copy()
table_with_clusters['Clusters']=identified_clusters
table_with_clusters

In [None]:
plt.scatter(table_with_clusters['Annual Income (k$)'],table_with_clusters['Spending Score (1-100)'],c=table_with_clusters['Clusters'],cmap='rainbow')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.title('Plot with clusters')
plt.show()