In [29]:
import plotly.express as px
from sklearn.manifold import TSNE

import pickle
import torch
import pandas as pd

In [20]:
person_labels = ['EXT', 'NEU', 'AGR', 'CON', 'OPN']

In [None]:
# Load the extracted data
def load_data(file_name):
    processed_data, input_samples, labels = [], [], []
    with open(f'{file_name}.pkl', 'rb') as file:
        data = pickle.load(file)
        processed_data, input_samples, labels = list(zip(*data))

    # Convert to numpy
    processed_data = torch.stack(processed_data).numpy()
    
    return processed_data, labels


In [None]:
normal_embeddings, labels = load

In [None]:
def visualize_data(file_name):

    color_map = {'0': "black", '1': "red"}  

    for i, name in enumerate(person_labels):
        df['label'] = [label[i] for label in labels]

        fig = px.scatter(
            df, x="x_axis", y="y_axis", color=df["label"].astype(str), 
            color_discrete_map=color_map,
            title=f"t-SNE Multi-Label Visualization {name}")

        fig.show()
        break

    return None

In [49]:
llm = 'roberta'

normal_embeddings, labels = load_data(f'{llm}-normal')
finetuned_embeddings, _ = load_data(f'{llm}-finetuned-extracted')
finetuned_segmented_embeddings, _ = load_data(f'{llm}-finetuned-segmented-extracted')

tsne = TSNE(n_components=2, random_state=42, perplexity=5)

normal_embeddings = tsne.fit_transform(normal_embeddings)
finetuned_embeddings = tsne.fit_transform(finetuned_embeddings)
finetuned_segmented_embeddings = tsne.fit_transform(finetuned_segmented_embeddings)

 # Convert for plotly handling
normal_df = pd.DataFrame(normal_embeddings, columns=["x_axis", "y_axis"])
finetuned_df = pd.DataFrame(finetuned_embeddings, columns=["x_axis", "y_axis"])
finetuned_sentence_df = pd.DataFrame(finetuned_segmented_embeddings, columns=["x_axis", "y_axis"])


In [50]:
color_map = {'0': "black", '1': "red"}  

for i, name in enumerate(person_labels):
    specific_labels = [label[i] for label in labels]

    normal_df['label'] = specific_labels
    finetuned_df['label'] = specific_labels
    finetuned_sentence_df['label'] = specific_labels

    fig = px.scatter(
        normal_df, x="x_axis", y="y_axis", color=normal_df["label"].astype(str), 
        color_discrete_map=color_map,
        title=f"t-SNE Multi-Label Visualization for Normal - {name}")

    fig.show()
    
    fig = px.scatter(
        finetuned_df, x="x_axis", y="y_axis", color=finetuned_df["label"].astype(str), 
        color_discrete_map=color_map,
        title=f"t-SNE Multi-Label Visualization for Finetuning - {name}")

    fig.show()

    fig = px.scatter(
        finetuned_sentence_df, x="x_axis", y="y_axis", color=finetuned_sentence_df["label"].astype(str), 
        color_discrete_map=color_map,
        title=f"t-SNE Multi-Label Visualization for Finetuning with Segmentation - {name}")
    
    fig.show()

    break