In [1]:
import pickle
import numpy as np
from sklearn.manifold import TSNE
import plotly.express as px
import pandas as pd

In [None]:
# Load original embeddings
with open("data/embeddings.pkl", "rb") as f:
    embeddings_dict = pickle.load(f)

pretrained_embeddings = np.array(list(embeddings_dict.values()))
job_ids = list(embeddings_dict.keys())
embedding_size = pretrained_embeddings.shape[1]

In [3]:
# load Reduced embeddings
refined_embeddings = np.load("data/reduced_embeddings.npy")

In [4]:
# t-SNE on pre-trained embeddings
print("Performing t-SNE on pre-trained embeddings...")
tsne_pretrained = TSNE(n_components=2, random_state=42)
pretrained_tsne_results = tsne_pretrained.fit_transform(pretrained_embeddings)

Performing t-SNE on pre-trained embeddings...


In [5]:
# t-SNE on refined embeddings
print("Performing t-SNE on refined embeddings...")
tsne_refined = TSNE(n_components=2, random_state=42)
refined_tsne_results = tsne_refined.fit_transform(refined_embeddings)

Performing t-SNE on refined embeddings...


In [6]:
# Prepare data for plotting
def prepare_plot_data(tsne_results, job_ids_list):
    df = pd.DataFrame({
        'TSNE-1': tsne_results[:, 0],
        'TSNE-2': tsne_results[:, 1],
        'job_id': job_ids_list
    })
    return df

df_pretrained = prepare_plot_data(pretrained_tsne_results, job_ids)
df_refined = prepare_plot_data(refined_tsne_results, job_ids)

In [7]:
# Plotting using Plotly
def plot_tsne(df, title):
    fig = px.scatter(
        df, x='TSNE-1', y='TSNE-2',
        hover_data=['job_id'],
        title=title
    )
    fig.show()

In [8]:
# Plot t-SNE results for pre-trained embeddings
plot_tsne(df_pretrained, 't-SNE Visualization of Pre-trained Embeddings')

In [9]:
# Plot t-SNE results for refined embeddings
plot_tsne(df_refined, 't-SNE Visualization of Refined Embeddings')
