## Assuming all the steps to preprocess the images and generate embeddings have been executed. This Notebook is for visualization.

In [None]:
baseDir = "."
imgDir = baseDir + "/imgs"
faceDir = baseDir + "/faces"
!mkdir $faceDir $imgDir
!pip install -r "requirements.txt"

In [None]:
import pickle
import pandas as pd
import numpy as np
import plotly.express as px


embeddings = pickle.load(open(f"{baseDir}/embeddings-umap.pkl", "rb"))
embeddings = np.array(embeddings)
print(embeddings.shape)

df = pd.DataFrame(embeddings, columns=['x', 'y'])

print(df.head())

In [None]:
fig = px.scatter(df, x='x', y='y')
fig.show()

First using HDBSCAN

In [None]:
import hdbscan
import plotly.express as px
import pandas as pd
import numpy as np

clusterer = hdbscan.HDBSCAN(min_cluster_size=20, min_samples=15)
labels = clusterer.fit_predict(embeddings)

df = pd.DataFrame({'x': embeddings[:, 0], 'y': embeddings[:, 1], 'labels': labels})

fig = px.scatter(df, x='x', y='y', color='labels', title='HDBSCAN Clustering', labels={'labels': 'Cluster'})
fig.show()

Secondly, with DBSCAN

In [None]:
import numpy as np
from sklearn.cluster import DBSCAN
import plotly.express as px
import pandas as pd

clusterer = DBSCAN(eps=1.3, min_samples=10)
labels = clusterer.fit_predict(embeddings)

df = pd.DataFrame({'x': embeddings[:, 0], 'y': embeddings[:, 1], 'labels': labels})

fig = px.scatter(df, x='x', y='y', color='labels', title='DBSCAN Clustering', labels={'labels': 'Cluster'})
fig.show()