In [22]:
import pickle
import numpy as np
import pandas as pd
from scipy.stats import gaussian_kde
import plotly.graph_objects as go

from umap.umap_ import UMAP

In [23]:
with open("./data/Base_vec_index.pkl", "rb") as f:
    base_vec_lyric = pickle.load(f)

with open("./data/Base_vec_index_diary.pkl", "rb") as f:
    base_vec_diary = pickle.load(f)

base_vec_lyric = np.array(base_vec_lyric)
base_vec_diary = np.array(base_vec_diary)
base_data = np.concat([base_vec_diary, base_vec_lyric], axis=0)

In [24]:
umap_3D = UMAP(n_components=3, random_state=42)
umap_3D.fit(base_data)


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



0,1,2
,n_neighbors,15
,n_components,3
,metric,'euclidean'
,metric_kwds,
,output_metric,'euclidean'
,output_metric_kwds,
,n_epochs,
,learning_rate,1.0
,init,'spectral'
,min_dist,0.1


In [25]:
def draw_3D_KDE(data, emotion1 : str, emotion2 : str, 
                vec_diary : np.array, vec_lyric : np.array, 
                threshold=95, sample_num : int = 100):
    diary = umap_3D.transform(vec_diary)

    emotion1_index = data[data["emotion"] == emotion1].index
    emotion2_index = data[data["emotion"] == emotion2].index

    diary_emotion1 = diary[emotion1_index, :]
    diary_emotion2 = diary[emotion2_index, :]

    x1, y1, z1 = diary_emotion1[:, 0], diary_emotion1[:, 1], diary_emotion1[:, 2]
    x2, y2, z2 = diary_emotion2[:, 0], diary_emotion2[:, 1], diary_emotion2[:, 2]

    all_x = np.concatenate([x1, x2])
    all_y = np.concatenate([y1, y2])
    all_z = np.concatenate([z1, z2])

    # train KDE Model
    kde1 = gaussian_kde(np.vstack([x1, y1, z1]))
    kde2 = gaussian_kde(np.vstack([x2, y2, z2]))

    
    
    # Generate 3D GRID
    grid_n = 35  # 너무 크면 느려짐
    X, Y, Z = np.mgrid[
        all_x.min():all_x.max():grid_n*1j,
        all_y.min():all_y.max():grid_n*1j,
        all_z.min():all_z.max():grid_n*1j
    ]

    coords = np.vstack([X.ravel(), Y.ravel(), Z.ravel()])

    
    # Calculate KDE
    density1 = kde1(coords).reshape(X.shape)
    density2 = kde2(coords).reshape(X.shape)

    
    
    # Visualize Plotly
    fig = go.Figure()

    # Set Isosurface 
    low_color_percent = threshold
    surface_count = 4

    # --- emotion 1 visualize ---
    isomin_value1 = np.percentile(density1, low_color_percent)

    fig.add_trace(go.Isosurface(
        x=X.flatten(),
        y=Y.flatten(),
        z=Z.flatten(),
        value=density1.flatten(),
        isomin=isomin_value1,
        isomax=np.percentile(density1, 99.5),
        surface_count=surface_count,
        opacity=0.6,
        colorscale="Plasma",
        name=f"{emotion1}",
        showscale=False
    ))

    # --- emotion 2 visualize ---
    isomin_value2 = np.percentile(density2, low_color_percent)

    fig.add_trace(go.Isosurface(
        x=X.flatten(),
        y=Y.flatten(),
        z=Z.flatten(),
        value=density2.flatten(),
        isomin=isomin_value2,
        isomax=np.percentile(density2, 99.5),
        surface_count=surface_count,
        opacity=0.6,
        colorscale="Viridis",
        name=f"{emotion2}",
        showscale=False
    ))


    fig.update_layout(
        title=f"Base KDE Group Between {emotion1} & {emotion2}",
        legend=dict( 
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        paper_bgcolor="black",
    )

    fig.write_html("./KDE/Base_KDE_group_between_Happy_Sad.html")
    fig.show()

        

In [26]:
data = pd.read_csv("./data/train.csv")
umap_3D.fit(base_data)
draw_3D_KDE(data, "행복", "슬픔", base_vec_diary, base_vec_lyric)