In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP
from sklearn.preprocessing import StandardScaler
import os
from matplotlib import animation

In [7]:
def plot(X_2d, X_3d, labels, out_path):
    # 2D
    plt.figure(figsize=(6, 6))
    plt.scatter(X_2d[labels == 0, 0], X_2d[labels == 0, 1], alpha=0.3, s=10, label="Literal")
    plt.scatter(X_2d[labels == 1, 0], X_2d[labels == 1, 1], alpha=0.3, s=10, label="Figurative")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    png_path = f"{out_path}.png"
    plt.savefig(png_path)
    plt.close()

    # GIF
    fig3d = plt.figure(figsize=(6, 6))
    ax3d = plt.axes(projection='3d')
    ax3d.scatter(X_3d[labels == 0, 0], X_3d[labels == 0, 1], X_3d[labels == 0, 2], label="Literal", alpha=0.3, s=10)
    ax3d.scatter(X_3d[labels == 1, 0], X_3d[labels == 1, 1], X_3d[labels == 1, 2], label="Figurative", alpha=0.3, s=10)
    ax3d.legend()

    def rotate(angle):
        ax3d.view_init(30, angle)

    gif_path = f"{out_path}.gif"
    rot_animation = animation.FuncAnimation(fig3d, rotate, frames=np.arange(0, 360, 4), interval=60)
    rot_animation.save(gif_path, dpi=80, writer='pillow')
    plt.close(fig3d)

In [8]:
def main(emb_dir, models, emb_types, reducers):
    for model_name in models:
        print(f"{model_name}...")
        for emb_type in emb_types:
            emb_path = os.path.join(emb_dir, model_name, f"{emb_type}.npz")
            data = np.load(emb_path)
            embeddings = data["embeddings"]
            labels = data["labels"]

            X_scaled = StandardScaler().fit_transform(embeddings)

            out_dir = os.path.join(model_name, emb_type)
            os.makedirs(out_dir, exist_ok=True)

            for reducer_name, reducer in reducers.items():
                reduced_2d = reducer[0].fit_transform(X_scaled)
                reduced_3d = reducer[1].fit_transform(X_scaled)
                out_path = os.path.join(out_dir, reducer_name)
                plot(reduced_2d, reduced_3d, labels, out_path)

In [9]:
emb_dir = "../2-embeddings"
emb_types = ["cls", "average", "layerwise"]
reducers = {
    "pca": [PCA(n_components=2, random_state=42),
            PCA(n_components=3, random_state=42)],
    "tsne": [TSNE(n_components=2, random_state=42),
             TSNE(n_components=3, random_state=42)],
    "umap": [UMAP(n_components=2, random_state=42, n_jobs=1, init='random'),
             UMAP(n_components=3, random_state=42, n_jobs=1, init='random')]
}

In [10]:
models = ["bert", "roberta"]
main(emb_dir, models, emb_types, reducers)

bert...
roberta...


In [11]:
models = ["bert_ft", "roberta_ft"]
main(emb_dir, models, emb_types, reducers)

bert_ft...
roberta_ft...
