# Import Modules

In [1]:
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np
from tqdm import tqdm

#plotly imports
import plotly as py
import plotly.express as px
import plotly.graph_objs as go

# Load Encoded Dataset

### Encoded dataset : (# samples, 200)

In [2]:
name_array = np.load("./total_data/encoded_vectors_name.npy")
dataset_array = np.load("./total_data/encoded_vectors.npy").reshape(-1,200)

# Dimension Reduction via TSNE

### Make DataFrame for Visualization (Plotly)

In [None]:
df_name = pd.DataFrame(name_array); pdName.columns = ['title']
df_dataset = pd.DataFrame(dataset_array);

tsne_3d = TSNE(n_components=3, init='pca', learning_rate='auto'); 
TSNEs_3d = pd.DataFrame(tsne_3d.fit_transform(dataset_array))
TSNEs_3d.columns = ["TSNE1_3d", "TSNE2_3d", "TSNE3_3d"]

df_plot = pd.concat([df_name, TSNEs_3d, df_dataset], axis=1, join='inner')

## Clustering by AgglomerativeClustering

![image](https://t1.daumcdn.net/cfile/tistory/994B43425AB34B7430)

In [None]:
n_clusters_ = 18

agg = AgglomerativeClustering(n_clusters_).fit(TSNEs_3d.to_numpy())
labels = agg.labels_

df_plot["Cluster"] = labels

## Make Plotly Figure and Saving

In [None]:
trace = []
for i in range(n_clusters_):
    fig = go.Scatter3d(
        x = df_plot[df_plot["Cluster"] == i]["TSNE1_3d"],
        y = df_plot[df_plot["Cluster"] == i]["TSNE2_3d"],
        z = df_plot[df_plot["Cluster"] == i]["TSNE3_3d"],
        mode = "markers",
        name = f"Cluster {i}",
        marker = {"size": 1, "color": i},
#         colorscale="Viridis",
        text = df_plot[df_plot["Cluster"] == i]["title"])
    trace.append(fig)

# layout
title = "Visualizing Clusters Using TSNE"

layout = dict(title = title,
              xaxis= dict(title= 'TSNE1',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'TSNE2',ticklen= 5,zeroline= False)
             )
# save
fig = go.Figure(data=trace, layout=layout)


fig.write_html(f"./results/Clustering_TSNE_3D_Agglomerative_{n_clusters_}.html")