In [None]:
import torch
from torch import nn, optim
from jcopdl.callback import Callback, set_config

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

# Dataset & Dataloader

In [None]:
from src.dataset import VCTKSpeakerDataset, VCTKSpeakerDataloader

In [None]:
config = torch.load("model/configs.pth")

In [None]:
bs = 64
dataset = VCTKSpeakerDataset("vctk_dataset/wav48/", "vctk_dataset/txt/", n_speaker=25, n_each_speaker=100, sr=config.sr, min_dur=config.min_dur)
dataloader = VCTKSpeakerDataloader(dataset, batch_size=bs, num_workers=2)

# Architecture & Config

In [None]:
from src.model import Encoder

In [None]:
weight = torch.load("model/weights_best.pth", map_location="cpu")

In [None]:
model = Encoder(config.ndim).to(device)
model.load_state_dict(weight)

# Embed

In [None]:
from tqdm.auto import tqdm

In [None]:
embed = []
label = []
with torch.no_grad():
    model.eval()
    for images, labels in tqdm(dataloader):
        images = images.to(device)
        output = model(images)
        embed.extend(output)
        label.extend(labels)
embed = torch.stack(embed).cpu().numpy()
label = torch.stack(label).cpu().numpy()

# Visualize Embedding

In [None]:
from umap import UMAP
import numpy as np
import pandas as pd
import plotly.express as px

In [None]:
umap = UMAP(n_neighbors=3, min_dist=0.01, a=1, b=1)
X = umap.fit_transform(embed)

df = pd.DataFrame(X, columns=["umap1", "umap2"])
df["text"] = label.astype(str)

fig = px.scatter(df, x="umap1", y="umap2", color="text")
fig.update_traces(textposition='top center')
fig.update_layout(
    height=800,
    title_text='Reduced Vec Visualization'
)
fig.show()