## Importing libraries and the dataset.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action="ignore")
plt.style.use(['seaborn-bright','dark_background'])

In [None]:
data  = pd.read_csv('/kaggle/input/customer-segmentation-tutorial-in-python/Mall_Customers.csv')
data.head()

#### Renaming the columns of the dataframe.

In [None]:
data = data.rename(columns ={'CustomerID':'customer_id','Gender':'gender','Age':'age',
                    'Annual Income (k$)':'annual_income_in_$','Spending Score (1-100)':'spending_score'})

In [None]:
data.head()

#### Checking for missing values.

In [None]:
data.info()

#### Checking for outliers.

In [None]:
data.describe()

#### Droping the unwanted columns.

In [None]:
data = data.drop(columns = 'customer_id')

#### Ploting the histogram according to the feature gender.

In [None]:
plt.figure(figsize=(15,8))
sns.histplot(data = data,x = data['age'],hue='gender',palette='hsv',kde=True)
plt.xlabel("Age",fontsize=20)
plt.ylabel("Count",fontsize=15)
plt.show()

In [None]:
plt.figure(figsize=(15,8))
sns.histplot(data = data,x = data['annual_income_in_$'],hue='gender',palette='PuOr',kde=True)
plt.xlabel("Annual Income",fontsize=20)
plt.ylabel("Count",fontsize=15)
plt.show()

In [None]:
plt.figure(figsize=(15,8))
sns.histplot(data = data,x = data['spending_score'],hue='gender',palette='ocean',kde=True)
plt.xlabel("Spending Score",fontsize=20)
plt.ylabel("Count",fontsize=15)
plt.show()

#### Visualizing the countplot for cheching count of data for corresponding values in age, annual income and spending score.

In [None]:
plt.figure(figsize=(20,7))
sns.countplot(data['age'])
plt.xlabel("Age",fontsize=20)
plt.ylabel("Count",fontsize=15)
plt.show()

In [None]:
plt.figure(figsize=(24,7))
sns.countplot(data['annual_income_in_$'])
plt.xlabel("Annual Income",fontsize=20)
plt.ylabel("Count",fontsize=15)
plt.show()

In [None]:
plt.figure(figsize=(23,7))
sns.countplot(data['spending_score'])
plt.xlabel("Spending Score",fontsize=20)
plt.ylabel("Count",fontsize=15)
plt.show()

#### Comparing the feature gender.

In [None]:
plt.figure(figsize=(8,4))
sns.countplot(data['gender'],palette=['red','cyan'])
plt.show()

#### Getting dummies of categorical features.

In [None]:
data = pd.get_dummies(data = data, columns=['gender'],drop_first=True)

In [None]:
data.head()

#### Ploting correlation heatmap.

In [None]:
plt.figure(figsize=(15,8))
sns.heatmap(data.corr(),annot=True)
plt.show()

In [None]:
X = data

In [None]:
from sklearn.cluster import KMeans
model = KMeans(n_clusters=2)
model.fit(X)
pred = model.predict(X)

In [None]:
model.inertia_

In [None]:
model.score(X)

In [None]:
sse = []
for cluster in range(1,10):
    km = KMeans(n_clusters=cluster)
    km.fit(X)
    sse.append(km.inertia_)

In [None]:
frame = pd.DataFrame({'Cluster':range(1,10),'SSE':sse})

#### Ploting plot for taking sutaible number of clusters using elbow method.

In [None]:
plt.figure(figsize = (13,6))
plt.plot(frame['Cluster'],frame['SSE'],color = 'lime' ,linestyle = '--',marker='o')
plt.title("Elbow Method",fontsize=20,color='r')
plt.xlabel('No Of Clusters',fontsize=20,color='lime')
plt.ylabel('Inertia',fontsize=15,color='lime')
plt.show()

In [None]:
km = KMeans(n_clusters=5)
km.fit(X)
pred = km.predict(X)

In [None]:
print(pred)

In [None]:
X['cluster'] = pred

In [None]:
def seg(str_x,str_y,clusters):
    x = []
    y = []
    for i in range(clusters):
        x.append(X[str_x][X['cluster']==i])
        y.append(X[str_y][X['cluster']==i])
    
    return x,y

In [None]:
def plot(str_x,str_y,clusters):
    plt.figure(figsize = (7,5),dpi = 120)
    
    x,y = seg(str_x,str_y,clusters)
    for i in range(clusters):
        plt.scatter(x[i], y[i], label = 'cluster {}'.format(i+1))
    plt.xlabel(str_x)
    plt.ylabel(str_y)
    plt.title(str(str_x+" VS "+str_y))
    plt.legend()
    plt.show()

### Plotting clusters for annual income and age.

In [None]:
plot('annual_income_in_$','age',5)

### Plotting clusters for spending score and age. 

In [None]:
plot('spending_score','age',5)

### Plotting clusters for annual income and spending score.

In [None]:
plot('annual_income_in_$','spending_score',5)