In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import umap.umap_ as umap
from scipy.stats import gaussian_kde

In [4]:
import pickle

with open("./data/Cross_vec_index.pkl", "rb") as f:
    cross_vec_lyric = pickle.load(f)

with open("./data/Cross_vec_index_diary.pkl", "rb") as f:
    cross_vec_diary = pickle.load(f)

with open("./data/Attn_vec_index.pkl", "rb") as f:
    attn_vec_lyric = pickle.load(f)

with open("./data/Attn_vec_index_diary.pkl", "rb") as f:
    attn_vec_diary = pickle.load(f)

with open("./data/Cross_test_vec_index.pkl", "rb") as f:
    cross_test_vec_lyric = pickle.load(f)

with open("./data/Cross_test_vec_index_diary.pkl", "rb") as f:
    cross_test_vec_diary = pickle.load(f)

with open("./data/Attn_test_vec_index.pkl", "rb") as f:
    attn_test_vec_lyric = pickle.load(f)

with open("./data/Attn_test_vec_index_diary.pkl", "rb") as f:
    attn_test_vec_diary = pickle.load(f)



cross_vec_lyric = np.array(cross_vec_lyric)
cross_vec_diary = np.array(cross_vec_diary)
cross_data = np.concat([cross_vec_lyric, cross_vec_diary], axis=0)

attn_vec_lyric = np.array(attn_vec_lyric)
attn_vec_diary = np.array(attn_vec_diary)
attn_data = np.concat([attn_vec_lyric, attn_vec_diary], axis=0)


cross_test_vec_lyric = np.array(cross_test_vec_lyric)
cross_test_vec_diary = np.array(cross_test_vec_diary)
cross_test_data = np.concat([cross_test_vec_lyric, cross_test_vec_diary], axis=0)

attn_test_vec_lyric = np.array(attn_test_vec_lyric)
attn_test_vec_diary = np.array(attn_test_vec_diary)
attn_test_data = np.concat([attn_test_vec_lyric, attn_test_vec_diary], axis=0)

In [5]:
import plotly.express as px

data = pd.read_csv("./data/train.csv")
test_data = pd.read_csv("./data/test.csv")

umap_model = umap.UMAP(n_components=3, random_state=42)

def draw_3D(data, vec_lyric, vec_diary, emotion : str, save_name : str):
    """
        emotion : [행복, 사랑, 슬픔, 분노, 희망]
    """

    emotion_index = data[data["emotion"] == emotion].index
    lyric = vec_lyric[emotion_index]
    diary = vec_diary[emotion_index]
    lyric_diary = np.concat([lyric, diary], axis=0)

    
    X_umap3 = umap_model.transform(lyric_diary)
    labels = ["Diary"] * len(emotion_index) + ["Lyric"] * len(emotion_index)

    fig = px.scatter_3d(
        x=X_umap3[:,0], 
        y=X_umap3[:,1], 
        z=X_umap3[:,2],
        color=labels
    )

    fig.update_traces(marker=dict(size=3))  # 점 크기 조절
    fig.update_layout(
        plot_bgcolor="gray",   # 그래프 배경
        paper_bgcolor="black",  # 전체 배경
        font=dict(color="white"),
        title=f"Diary&Lyric Encoder vec 3D map with emotion {emotion}"
    )
    
    fig.write_html(f"./umaps/{save_name}.html")

In [6]:
# Cross Model 3D visualize
umap_model.fit(cross_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    draw_3D(data, cross_vec_lyric, cross_vec_diary, emotion, f"cross_{emotion}_3D")


# Attn Model 3D visualize
umap_model.fit(attn_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    draw_3D(data, attn_vec_lyric, attn_vec_diary, emotion, f"attn_{emotion}_3D")


# Test Cross Model 3D visualize
umap_model.fit(cross_test_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    draw_3D(test_data, cross_vec_lyric, cross_vec_diary, emotion, f"cross_test_{emotion}_3D")

# Test Attn Model 3D visualize
umap_model.fit(attn_test_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    draw_3D(test_data, attn_vec_lyric, attn_vec_diary, emotion, f"attn_test_{emotion}_3D")

  warn(


In [7]:
umap_model_2d = umap.UMAP(n_components=2, random_state=42)
umap_model_2d.fit(cross_data)


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



0,1,2
,n_neighbors,15
,n_components,2
,metric,'euclidean'
,metric_kwds,
,output_metric,'euclidean'
,output_metric_kwds,
,n_epochs,
,learning_rate,1.0
,init,'spectral'
,min_dist,0.1


In [8]:
def draw_scatter(data, vec_lyric, vec_diary, emotion : str, title : str, 
                 size=20, alpha=0.6, sample_num : int = 100):
    """
        emotion : [행복, 사랑, 슬픔, 분노, 희망]
    """

    emotion_index = data[data["emotion"] == emotion].index
    lyric = vec_lyric[emotion_index]
    diary = vec_diary[emotion_index]
    lyric_diary = np.concat([lyric, diary], axis=0)

    X_umap2 = umap_model_2d.transform(lyric_diary)
    umap_lyric = X_umap2[:len(emotion_index)]
    umap_diary = X_umap2[len(emotion_index):]

    plt.clf()
    plt.scatter(umap_diary[:sample_num, 0], umap_diary[:sample_num, 1], s=size, alpha=alpha)
    plt.scatter(umap_lyric[:sample_num, 0], umap_lyric[:sample_num, 1], s=size, alpha=alpha)
    plt.legend(["Diary", "Lyric"])
    plt.title(title)
    plt.savefig(f"./scatters/{title}.png")
    plt.close()


def draw_KDE(data, vec_lyric, vec_diary, emotion: str, title: str, sample_num: int = 100):
    """
        emotion : [행복, 사랑, 슬픔, 분노, 희망]
    """

    emotion_index = data[data["emotion"] == emotion].index
    lyric = vec_lyric[emotion_index]
    diary = vec_diary[emotion_index]
    lyric_diary = np.concatenate([lyric, diary], axis=0)

    # UMAP transform
    X_umap2 = umap_model_2d.transform(lyric_diary)
    umap_lyric = X_umap2[:len(emotion_index)]
    umap_diary = X_umap2[len(emotion_index):]

    plt.clf()

    # KDE (Diary)
    ax = sns.kdeplot(
        x=umap_diary[:sample_num, 0],
        y=umap_diary[:sample_num, 1],
        cmap='Reds',
        fill=False,
    )

    # KDE (Lyric)
    sns.kdeplot(
        x=umap_lyric[:sample_num, 0],
        y=umap_lyric[:sample_num, 1],
        cmap='Blues',
        fill=False,
    )

    
    # Calculate Diary Peak
    kde_diary = gaussian_kde(umap_diary[:sample_num].T)
    x_min, x_max = umap_diary[:sample_num, 0].min(), umap_diary[:sample_num, 0].max()
    y_min, y_max = umap_diary[:sample_num, 1].min(), umap_diary[:sample_num, 1].max()

    grid_x = np.linspace(x_min, x_max, 100)
    grid_y = np.linspace(y_min, y_max, 100)
    xx, yy = np.meshgrid(grid_x, grid_y)
    positions = np.vstack([xx.ravel(), yy.ravel()])
    density_diary = kde_diary(positions).reshape(xx.shape)

    # Get Diary Peak Position & display
    peak_idx = np.unravel_index(np.argmax(density_diary), density_diary.shape)
    peak_x_diary = xx[peak_idx]
    peak_y_diary = yy[peak_idx]

    plt.scatter(peak_x_diary, peak_y_diary, color='red', s=60, zorder=3)

    
    # Calculate Lyric Peak
    kde_lyric = gaussian_kde(umap_lyric[:sample_num].T)
    x_min2, x_max2 = umap_lyric[:sample_num, 0].min(), umap_lyric[:sample_num, 0].max()
    y_min2, y_max2 = umap_lyric[:sample_num, 1].min(), umap_lyric[:sample_num, 1].max()

    grid_x2 = np.linspace(x_min2, x_max2, 100)
    grid_y2 = np.linspace(y_min2, y_max2, 100)
    xx2, yy2 = np.meshgrid(grid_x2, grid_y2)
    positions2 = np.vstack([xx2.ravel(), yy2.ravel()])
    density_lyric = kde_lyric(positions2).reshape(xx2.shape)

    # Get Lyric Peak Position & display
    peak_idx2 = np.unravel_index(np.argmax(density_lyric), density_lyric.shape)
    peak_x_lyric = xx2[peak_idx2]
    peak_y_lyric = yy2[peak_idx2]

    plt.scatter(peak_x_lyric, peak_y_lyric, color='blue', s=60, zorder=3)

    dist = np.sqrt((peak_x_diary - peak_x_lyric)**2 + (peak_y_diary - peak_y_lyric)**2)
    dist_text = f"Distance = {dist:.3f}"

    
    # Calculate Distance between peaks
    dist = np.sqrt((peak_x_diary - peak_x_lyric)**2 + (peak_y_diary - peak_y_lyric)**2)
    dist_text = f"Distance = {dist:.3f}"

    
    # Display distance at bottom-right
    plt.text(
        0.98, 0.02, dist_text,
        transform=plt.gca().transAxes,   # Axes 좌표계 (0~1)
        ha='right', va='bottom',
        fontsize=12,
        color='white',
        bbox=dict(facecolor='black', alpha=0.6, boxstyle="round,pad=0.3")
    )

    plt.grid(True, linestyle='--', alpha=0.4)
    plt.title(title)
    plt.savefig(f"./KDE/{title}.png")
    plt.close()

In [9]:
emotion_mapper = {
    "행복" : "Happy",
    "사랑" : "Love",
    "슬픔" : "Sad",
    "분노" : "Rage",
    "희망" : "Hope",
}

# ======================================= Train =======================================

# Cross 2D visualize
umap_model_2d.fit(cross_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_scatter(data, cross_vec_lyric, cross_vec_diary, emotion, f"cross_2D_{title}")

# Attn 2D visualize
umap_model_2d.fit(attn_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_scatter(data, attn_vec_lyric, attn_vec_diary, emotion, f"attn_2D_{title}")


# ======================================= Test =======================================

# Test Cross 2D visualize
umap_model_2d.fit(cross_test_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_scatter(test_data, cross_test_vec_lyric, cross_test_vec_diary, emotion, f"cross_test_2D_{title}")

# Test Attn 2D visualize
umap_model_2d.fit(attn_test_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_scatter(test_data, attn_test_vec_lyric, attn_test_vec_diary, emotion, f"attn_test_2D_{title}")

In [10]:
# ======================================= Train =======================================

# Cross 2D visualize
umap_model_2d.fit(cross_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_KDE(data, cross_vec_lyric, cross_vec_diary, emotion, f"cross_KDE_{title}")

# Attn 2D visualize
umap_model_2d.fit(attn_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_KDE(data, attn_vec_lyric, attn_vec_diary, emotion, f"attn_KDE_{title}")

# ======================================= Test =======================================

# Test Cross 2D visualize
umap_model_2d.fit(cross_test_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_KDE(test_data, cross_test_vec_lyric, cross_test_vec_diary, emotion, f"cross_test_KDE_{title}")

# Attn 2D visualize
umap_model_2d.fit(attn_test_data)
for emotion in ["행복", "사랑", "슬픔", "분노", "희망"]:
    title = emotion_mapper[emotion]
    draw_KDE(test_data, attn_test_vec_lyric, attn_test_vec_diary, emotion, f"attn_test_KDE_{title}")

