In [None]:

# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn 
from sklearn.cluster import KMeans 
from kmodes.kmodes import KModes
from sklearn.preprocessing import StandardScaler, normalize 
from sklearn.metrics import silhouette_score 
from sklearn.metrics import silhouette_samples
from sklearn.decomposition import PCA
import collections
import plotly.graph_objects as go
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")

In [None]:
# reading csv file
df_dep=pd.read_csv("../Datasets/Depression.csv")
df_anx=pd.read_csv("../Datasets/Anxiety.csv")
df_str=pd.read_csv("../Datasets/Stress.csv")

In [None]:
# sampling dataset
df_dep_samp=df_dep.sample(n=2500)
df_anx_samp=df_anx.sample(n=2500)
df_str_samp=df_str.sample(n=2500)
X_dep=df_dep_samp.iloc[:,0:7]
X_anx=df_anx_samp.iloc[:,0:7]
X_str=df_str_samp.iloc[:,0:7]

In [None]:
# Reduce dimensions using PCA
pca = PCA(n_components = 3, random_state=1)
X_pca_dep = pca.fit_transform(X_dep)
X_pca_anx = pca.fit_transform(X_anx)
X_pca_str = pca.fit_transform(X_str)

In [None]:
# Kmeans

In [None]:
### Elbow Method
wcss_dep = []
wcss_anx = []
wcss_str = []
clusters=7
for i in range(1,clusters+1):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 66)
    kmeans.fit(X_dep)
    wcss_dep.append(kmeans.inertia_)
    kmeans.fit(X_anx)
    wcss_anx.append(kmeans.inertia_)
    kmeans.fit(X_str)
    wcss_str.append(kmeans.inertia_)
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range(1,clusters+1)), y=wcss_dep, mode='lines+markers', name='Depression Dataset'))
fig.add_trace(go.Scatter(x=list(range(1,clusters+1)), y=wcss_anx, mode='lines+markers', name='Anxiety Dataset'))
fig.add_trace(go.Scatter(x=list(range(1,clusters+1)), y=wcss_str, mode='lines+markers', name='Stress Dataset'))
fig.show()

In [None]:
### Silhouette Method
for cluster in range(2, clusters+1):
    km = KMeans(n_clusters = cluster, random_state = 1)
    y = km.fit_predict(X_dep)

In [None]:
    fig, ax1 = plt.subplots(1, 1)
    fig.set_size_inches(18, 7)
    
    ax1.set_xlim([-0.1, 1])
    ax1.set_ylim([0, len(X_dep) + (cluster + 1) * 10])
    
    score = silhouette_score(X_dep, y)
    print('score :',score)

In [None]:
    sil_sample = silhouette_samples(X_dep, y)
    y_lower = 10
    for i in range(cluster):
        i_sil_sample = sil_sample[y==i]
        i_sil_sample.sort()
        y_upper = y_lower + i_sil_sample.shape[0]
        ax1.fill_betweenx(np.arange(y_lower,y_upper),0,i_sil_sample)
        y_lower = y_upper + 10

In [None]:
score : 0.39232409588781264
score : 0.27720286655299414
score : 0.22110603157855677

In [None]:
## Kmeans visualization
kmeans = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 1)
ymeans = kmeans.fit_predict(X_dep)
print("Depression",collections.Counter(ymeans))
x, y, z = X_pca_dep.T
fig = px.scatter_3d(x = x, y = y, z = z, color = ymeans, opacity= 0.70, template='ggplot2')
fig.update_traces(marker=dict(size=4), selector=dict(mode='markers'))
fig.show()
