In [None]:
import os
import torch
import pandas as pd
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

In [None]:
# GPU
n_gpu = torch.cuda.device_count()
print("n_gpu: ", n_gpu)

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

if n_gpu == 0:
    print("Warning: There\'s no GPU available on this machine")
    device = 'cpu'
else:
    print("Visible devices: ", os.environ["CUDA_VISIBLE_DEVICES"])
    device = 'cuda:0'
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print("Default tensor type set to torch.cuda.FloatTensor")

print("Device: ", device)

In [None]:
filepath_real = 'embeddings/real/openl3_embeddings_zapsplat_pack_footsteps_high_heels_1s_aligned.csv'
df_real = pd.read_csv(filepath_real)

filepath_synth = 'embeddings/2021-09-20_13h23m-hifi/120k_large_openl3_embeddings_synth.csv'
# filepath_synth = 'embeddings/2021-09-20_19h46m-wave/120k_large_openl3_embeddings_synth.csv'
df_synth = pd.read_csv(filepath_synth)

feat_cols = [ 'openl3_'+str(i) for i in range(0,512) ]

df_real[:2]

In [None]:
# PCA
pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_real[feat_cols].values)
df_real['pca-1'] = pca_result[:,0]
df_real['pca-2'] = pca_result[:,1] 
df_real['pca-3'] = pca_result[:,2]
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_synth[feat_cols].values)
df_synth['pca-1'] = pca_result[:,0]
df_synth['pca-2'] = pca_result[:,1] 
df_synth['pca-3'] = pca_result[:,2]
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

In [None]:
%matplotlib inline
cmap = ListedColormap(sns.color_palette("tab10", 7).as_hex())
fig, ax = plt.subplots(2, 3, figsize=(24,6))

sns.scatterplot(
    ax=ax[0][0],
    x="pca-1", y="pca-2",
    hue="class",
    palette=cmap.colors,
    data=df_real,
    legend=False,
    alpha=0.9
)
sns.scatterplot(
    ax=ax[0][1],
    x="pca-1", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_real,
    legend=False,
    alpha=0.9
)
sns.scatterplot(
    ax=ax[0][2],
    x="pca-2", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_real,
    legend=True,
    alpha=0.9
)
ax[0][2].legend(bbox_to_anchor=(1.3, 1), loc="upper right")

sns.scatterplot(
    ax=ax[1][0],
    x="pca-1", y="pca-2",
    hue="class",
    palette=cmap.colors,
    data=df_synth,
    legend=False,
    alpha=0.9
)
sns.scatterplot(
    ax=ax[1][1],
    x="pca-1", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_synth,
    legend=False,
    alpha=0.9
)

sns.scatterplot(
    ax=ax[1][2],
    x="pca-2", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_synth,
    legend=True,
    alpha=0.9
)
ax[1][2].legend(bbox_to_anchor=(1.3, 1), loc="upper right")

ax[0][0].set_xlim([-6,6]), ax[0][0].set_ylim([-4,4])
ax[0][1].set_xlim([-6,6]), ax[0][0].set_ylim([-4,4])
ax[0][2].set_xlim([-6,6]), ax[0][0].set_ylim([-4,4])
ax[1][0].set_xlim([-6,6]), ax[0][0].set_ylim([-4,4])
ax[1][1].set_xlim([-6,6]), ax[0][0].set_ylim([-4,4])
ax[1][2].set_xlim([-6,6]), ax[0][0].set_ylim([-4,4])

In [None]:
# uncomment this to rotate 3d plots (works only on browser, not on vscode)
# %matplotlib notebook
%matplotlib inline
fig = plt.figure(figsize=(10,10))
plt.rcParams.update({'font.size': 20})
ax = fig.add_subplot(projection='3d')
ax.set_xlabel("PCA-1", labelpad=30)
ax.set_ylabel("PCA-2", labelpad=30)
ax.set_zlabel("PCA-3", labelpad=20)

classes = sorted(set(df_real['class']))
for i, c in enumerate(classes):
    ax.scatter(
        xs=df_real[df_real['class'] == c]['pca-1'], 
        ys=df_real[df_real['class'] == c]['pca-2'], 
        zs=df_real[df_real['class'] == c]['pca-3'],
        c=cmap.colors[i], 
        label=c, 
        alpha=0.8,
        s=40)
# ax.legend(loc=2, prop={'size': 14})
ax.set_xlim([-4,6])
ax.set_ylim([-4,6])
ax.set_zlim([-4,6])
plt.tight_layout()
# plt.savefig('%s/%s' % ('pca_plots', 'real_3d_openl3_embs.jpg'), format='jpg', dpi=500, bbox_inches='tight', pad_inches=-0.0)

In [None]:
fig = plt.figure(figsize=(10,10))
plt.rcParams.update({'font.size': 20})
ax = fig.add_subplot(projection='3d')
ax.set_xlabel("PCA-1", labelpad=30)
ax.set_ylabel("PCA-2", labelpad=30)
ax.set_zlabel("PCA-3", labelpad=20)

classes = sorted(set(df_real['class']))
for i, c in enumerate(classes):
    ax.scatter(
        xs=df_synth[df_synth['class'] == c]['pca-1'], 
        ys=df_synth[df_synth['class'] == c]['pca-2'], 
        zs=df_synth[df_synth['class'] == c]['pca-3'],
        c=cmap.colors[i], 
        label=c, 
        alpha=0.7,
        s=20)
lgnd = ax.legend(loc="upper right", bbox_to_anchor=(1,0.94), prop={'size': 24})
for i, c in enumerate(classes):
    lgnd.legendHandles[i]._sizes = [150]
ax.set_xlim([-4,6])
ax.set_ylim([-4,6])
ax.set_zlim([-4,6])
plt.tight_layout()
# plt.savefig('%s/%s' % ('pca_plots', 'hifi_3d_openl3_embs.jpg'), format='jpg', dpi=500, bbox_inches='tight', pad_inches=-0.0)

In [None]:
%matplotlib inline
cmap = ListedColormap(sns.color_palette("tab10", 7).as_hex())
fig, ax = plt.subplots(1, 3, figsize=(24,3))

sns.scatterplot(
    ax=ax[0],
    x="pca-1", y="pca-2",
    hue="class",
    palette=cmap.colors,
    data=df_real,
    legend=False,
    alpha=0.9
)
sns.scatterplot(
    ax=ax[1],
    x="pca-1", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_real,
    legend=False,
    alpha=0.9
)
sns.scatterplot(
    ax=ax[2],
    x="pca-2", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_real,
    legend=True,
    alpha=0.9
)
ax[2].legend(bbox_to_anchor=(1.3, 1), loc="upper right")
ax[0].set_xlim([-6,6]), ax[0].set_ylim([-4,4])
ax[1].set_xlim([-6,6]), ax[1].set_ylim([-4,4])
ax[2].set_xlim([-6,6]), ax[2].set_ylim([-4,4])

# plt.savefig('%s/%s' % ('pca_plots', 'real_openl3_embs.jpg'), dpi=500, format='jpg')

In [None]:
%matplotlib inline
cmap = ListedColormap(sns.color_palette("tab10", 7).as_hex())
fig, ax = plt.subplots(1, 3, figsize=(24,3))

sns.scatterplot(
    ax=ax[0],
    x="pca-1", y="pca-2",
    hue="class",
    palette=cmap.colors,
    data=df_synth,
    legend=False,
    alpha=0.9
)
sns.scatterplot(
    ax=ax[1],
    x="pca-1", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_synth,
    legend=False,
    alpha=0.9
)

sns.scatterplot(
    ax=ax[2],
    x="pca-2", y="pca-3",
    hue="class",
    palette=cmap.colors,
    data=df_synth,
    legend=True,
    alpha=0.9
)
ax[2].legend(bbox_to_anchor=(1.3, 1), loc="upper right")
ax[0].set_xlim([-6,6]), ax[0].set_ylim([-4,4])
ax[1].set_xlim([-6,6]), ax[1].set_ylim([-4,4])
ax[2].set_xlim([-6,6]), ax[2].set_ylim([-4,4])

# plt.savefig('%s/%s' % ('pca_plots', 'hifi_openl3_embs.jpg'), dpi=500, format='jpg')