In [None]:

import numpy as nmp

import pandas as pd

import matplotlib.pyplot as pplt


from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize

from sklearn.cluster       import DBSCAN
from sklearn.decomposition import PCA 

In [None]:

M = pd.read_csv('Dataset_9.csv') 

# Dropping the CUST_ID column from the dataset with drop() function
M = M.drop('CUST_ID', axis = 1)

# Using fillna() function to handle missing values
M.fillna(method ='ffill', inplace = True)


In [None]:
M.keys()

In [None]:
M.head()

In [None]:

scaler = StandardScaler()

M_scaled = scaler.fit_transform(M)

M_normalized = normalize(M_scaled)

# Now we will convert numpy "arrays" in the dataset into "dataframes" of panda
M_normalized = pd.DataFrame(M_normalized)

In [None]:
# Transforming the normalized data with PCA
# Making dataframes from the transformed data

pcaFD = PCA(n_components = 2) # components of data

M_principal = pcaFD.fit_transform(M_normalized)

M_principal = pd.DataFrame(M_principal)

# Creating two columns in the transformed data
M_principal.columns = ['C1', 'C2']



In [None]:
# Printing the head of the transformed data
M_principal.head()

In [None]:
# Creating clustering model of the data using the DBSCAN function and providing parameters

db_default = DBSCAN(eps = 0.0375, min_samples = 3).fit(M_principal)


# Labelling the clusters we have created in the dataset
labeling = db_default.labels_ 

In [None]:
# Visualization of clustering model by giving different colours
colours = {}

# First colour in visualization is green
colours[0] = 'g'
# Second colour in visualization is black 
colours[1] = 'k'
# Third colour in visualization is red
colours[2] = 'r'
# Last colour in visualization is blue
colours[-1] = 'b'


# Creating a colour vector for each data point in the dataset cluster
cvec = [colours[label] for label in labeling]


# Construction of the legend

g = pplt.scatter(M_principal['C1'], M_principal['C2'], color ='g');

k = pplt.scatter(M_principal['C1'], M_principal['C2'], color ='k');

r = pplt.scatter(M_principal['C1'], M_principal['C2'], color ='r');

b = pplt.scatter(M_principal['C1'], M_principal['C2'], color ='b');

# Plotting C1 column on the X-Axis and C2 on the Y-Axis

# Fitting the size of the figure with figure function
pplt.figure(figsize =(9, 9))

# Scattering the data points in the Visualization graph
pplt.scatter(M_principal['C1'], M_principal['C2'], c = cvec)

# Building the legend with the coloured data points and labelled
pplt.legend((g, k, r, b), ('Label M.0', 'Label M.1', 'Label M.2', 'Label M.-1'))

# Showing Visualization in the output
pplt.show() 

In [None]:
# Tuning the parameters of the model inside the DBSCAN function
dts = DBSCAN(eps = 0.0375, min_samples = 50).fit(M_principal)

# Labelling the clusters of data points
labeling = dts.labels_

colours1 = {}

colours1[0] = 'r'
colours1[1] = 'g'
colours1[2] = 'b'
colours1[3] = 'c'
colours1[4] = 'y'
colours1[5] = 'm'

colours1[-1] = 'k'

# Labelling the data points with the colour variable we have defined
cvec = [colours1[label] for label in labeling]

# Defining all colour that we will use
colors = ['r', 'g', 'b', 'c', 'y', 'm', 'k' ]

# Scattering the colours onto the data points 
r = pplt.scatter( M_principal['C1'], M_principal['C2'], marker ='o', color = colors[0])
g = pplt.scatter( M_principal['C1'], M_principal['C2'], marker ='o', color = colors[1])
b = pplt.scatter( M_principal['C1'], M_principal['C2'], marker ='o', color = colors[2])
c = pplt.scatter( M_principal['C1'], M_principal['C2'], marker ='o', color = colors[3])
y = pplt.scatter( M_principal['C1'], M_principal['C2'], marker ='o', color = colors[4])
m = pplt.scatter( M_principal['C1'], M_principal['C2'], marker ='o', color = colors[5])
k = pplt.scatter( M_principal['C1'], M_principal['C2'], marker ='o', color = colors[6])

# Fitting the size of the figure with figure function
pplt.figure(figsize =(9, 9))

# Scattering column 1 into X-axis and column 2 into y-axis
pplt.scatter(M_principal['C1'], M_principal['C2'], c = cvec)

# Constructing a legend with the colours we have defined
pplt.legend((r, g, b, c, y, m, k),
     ('Label M.0', 'Label M.1', 'Label M.2', 'Label M.3', 'Label M.4','Label M.5', 'Label M.-1'), 
     scatterpoints = 1, # Defining the scatter point
     loc ='upper left', # Location of cluster scattering
     ncol = 3, # Number of columns
     fontsize = 10) # Size of the font

# Displaying the visualisation of changes in cluster scattering
pplt.show() 