# dimension reduction analysis
## steps:
1. load a bignet model. Model can be trained in "trained.ipynb" or can be dwnloaded [here](https://drive.google.com/file/d/1pIr0VcdTEiTuR4CmwhmQ4YfnaHrbyEm4/view?usp=sharing)
2. load dataset created from "create dataset.ipynb" or can be downloaded [here](https://drive.google.com/file/d/1FgSsBIPzOgKaXGAYXavOU56oW8111gu4/view?usp=sharing)
3. get latent vector (before the last fc layer) of every data measured. This step will produce train/test latent vectors that can be downloaded [here](https://drive.google.com/file/d/1DxIC6I5gm-LKRwwm-kX6moLPln-6xS_z/view?usp=sharing)
4. perform 2D/3D tSNE/PCA. results should be in "dimension reduction" dir and match with [these plots](https://drive.google.com/file/d/1F9-CT3p5EVLK222yWxqkO-5m9g1ryd4W/view?usp=share_link)

# load model and dataset

In [20]:
import util as u

model_path="trained_bignet1_naug_norm.pt"
pkl_folder="pkl"
dim_red_folder="dimension reduction"

dim_reds = ["tsne","pca"]
datasets = ["train","test","reference"]

u.initialize([dim_red_folder])
for dim_red in dim_reds:
    u.initialize(["%s/%s"%(dim_red_folder, dim_red)])
    for dataset in datasets:
        u.initialize(["%s/%s/%s"%(dim_red_folder, dim_red, dataset)])

In [4]:
train_set_name = "%s/train_data_no_aug.pkl"%pkl_folder
train_curve_label_name="%s/train_curve_label_no_aug.pkl"%pkl_folder
train_label_name= "%s/train_label_no_aug.pkl"%pkl_folder
train_dist_mat_name="%s/train_dist_mat_no_aug.pkl"%pkl_folder
train_set, train_curve_label, train_label, train_dist_mat=\
u.load_set(train_set_name, train_curve_label_name,\
           train_label_name, train_dist_mat_name)

In [3]:
test_set_name = "%s/test_data_no_aug.pkl"%pkl_folder
test_curve_label_name="%s/test_curve_label_no_aug.pkl"%pkl_folder
test_label_name= "%s/test_label_no_aug.pkl"%pkl_folder
test_dist_mat_name="%s/test_dist_mat_no_aug.pkl"%pkl_folder
test_set, test_curve_label, test_label, test_dist_mat=\
u.load_set(test_set_name, test_curve_label_name,\
           test_label_name, test_dist_mat_name)

In [2]:
# ref_set_name = "%s/ref_data.pkl"%pkl_folder
# ref_curve_label_name="%s/ref_curve_label.pkl"%pkl_folder
# ref_label_name= "%s/ref_label.pkl"%pkl_folder
# ref_dist_mat_name="%s/ref_dist_mat.pkl"%pkl_folder
# ref_set, ref_curve_label, ref_label, ref_dist_mat=\
# u.load_set(ref_set_name, ref_curve_label_name,\
#            ref_label_name, ref_dist_mat_name)

In [5]:
mode="norm"
train_latent_vec = u.get_latent(train_set, train_curve_label, \
                              train_dist_mat, model_path, mode)#this is slow
test_latent_vec = u.get_latent(test_set, test_curve_label, \
                             test_dist_mat, model_path, mode)#this is slow
# ref_latent_vec=u.get_latent(ref_set, ref_curve_label,\
#                            ref_dist_mat, model_path, mode)

# generate 3D tSNE latent vectors

In [6]:
#this step is very slow, should run separately
#use letent2plot will run every single time

latent_vec=test_latent_vec
latent_vec=train_latent_vec
train_tsne3vector=u.vec2plot(train_latent_vec, mode="tsne", dimension=3)
test_tsne3vector=u.vec2plot(test_latent_vec, mode="tsne", dimension=3)
# ref_tsne3vector=u.vec2plot(ref_latent_vec, mode="tsne", dimension=3)

In [7]:
u.dump_item(train_latent_vec,"%s/train_latent_norm2.pkl"%pkl_folder)
u.dump_item(test_latent_vec,"%s/test_latent_norm2.pkl"%pkl_folder)
# u.dump_item(ref_latent_vec,"%s/ref_latent_norm2.pkl"%pkl_folder)

u.dump_item(train_tsne3vector,"%s/train_tsne3_norm2.pkl"%pkl_folder)
u.dump_item(test_tsne3vector,"%s/test_tsne3_norm2.pkl"%pkl_folder)
# u.dump_item(ref_tsne3vector,"%s/ref_tsne3_norm2.pkl"%pkl_folder)

# load 3D tSNE latent vectors

In [None]:
train_latent_vec = u.load_item("%s/train_latent_norm2.pkl"%pkl_folder)
test_latent_vec = u.load_item("%s/test_latent_norm2.pkl"%pkl_folder)
# ref_latent_vec = u.load_item("%s/ref_latent_norm2.pkl"%pkl_folder)

train_tsne3vector = u.load_item("%s/train_tsne3_norm2.pkl"%pkl_folder)
test_tsne3vector = u.load_item("%s/test_tsne3_norm2.pkl"%pkl_folder)
# ref_tsne3vector = u.load_item("%s/ref_tsne3_norm2.pkl"%pkl_folder)

# plot 3D tSNE latent vectors

In [21]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

dataset="train"
vector=train_tsne3vector
for i in range(18):
    for j in range(18):
        threshold=int(vector.shape[0]/2)
        u.plot_vec(vector=vector, threshold=threshold, angles=(i*10,9,j*10))
        red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
        blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
        plt.legend(handles=[red_patch, blue_patch], fontsize=15)
        plt.savefig("%s/tsne/%s/%s %s"%(dim_red_folder,dataset,i,j))
        plt.close()
        
dataset="test"    
vector=test_tsne3vector
for i in range(18):
    for j in range(18):
        threshold=int(vector.shape[0]/2)
        u.plot_vec(vector=vector, threshold=threshold, angles=(i*10,9,j*10))
        red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
        blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
        plt.legend(handles=[red_patch, blue_patch], fontsize=15)
        plt.savefig("%s/tsne/%s/%s %s"%(dim_red_folder,dataset,i,j))
        plt.close()

# dataset="reference"
# vector=ref_tsne3vector
# for i in range(18):
#     for j in range(18):
#         threshold=int(vector.shape[0]/2)
#         u.plot_vec(vector=vector, threshold=threshold, angles=(i*10,9,j*10))
#         red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
#         blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
#         plt.legend(handles=[red_patch, blue_patch], fontsize=15)
#         plt.savefig("%s/tsne/%s/%s %s"%(dim_red_folder,dataset,i,j))
#         plt.close()

# PCA 3D latent plot

In [23]:
#3D PCA plot
train_pca3vector=u.vec2plot(train_latent_vec, mode="pca", dimension=3)
test_pca3vector=u.vec2plot(test_latent_vec, mode="pca", dimension=3)
ref_pca3vector=u.vec2plot(ref_latent_vec, mode="pca", dimension=3)
u.dump_item(train_pca3vector,"%s/train_pca3_norm2.pkl"%pkl_folder)
u.dump_item(test_pca3vector,"%s/test_pca3_norm2.pkl"%pkl_folder)
# u.dump_item(ref_pca3vector,"%s/ref_pca3_norm2.pkl"%pkl_folder)

In [25]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

dataset="train"
vector=train_pca3vector
space = 20
count = int(180/space)
for i in range(count):
    for j in range(count):
        threshold=int(vector.shape[0]/2)
        u.plot_vec(vector=vector, threshold=threshold, angles=(i*space,9,j*space))
        red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
        blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
        plt.legend(handles=[red_patch, blue_patch], fontsize=15)
        plt.savefig("%s/pca/%s/%s %s"%(dim_red_folder,dataset,i,j))
        plt.close()
        
dataset="test"    
vector=test_pca3vector
for i in range(count):
    for j in range(count):
        threshold=int(vector.shape[0]/2)
        u.plot_vec(vector=vector, threshold=threshold, angles=(i*space,9,j*space))
        red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
        blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
        plt.legend(handles=[red_patch, blue_patch], fontsize=15)
        plt.savefig("%s/pca/%s/%s %s"%(dim_red_folder,dataset,i,j))
        plt.close()

# dataset="reference"
# vector=ref_pca3vector
# for i in range(count):
#     for j in range(count):
#         threshold=int(vector.shape[0]/2)
#         u.plot_vec(vector=vector, threshold=threshold, angles=(i*space,9,j*space))
#         red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
#         blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
#         plt.legend(handles=[red_patch, blue_patch], fontsize=15)
#         plt.savefig("%s/pca/%s/%s %s"%(dim_red_folder,dataset,i,j))
#         plt.close()

# PCA 2D latent plot

In [30]:
#2D PCA plot
train_pca2vector=u.vec2plot(train_latent_vec, mode="pca", dimension=2)
test_pca2vector=u.vec2plot(test_latent_vec, mode="pca", dimension=2)
# ref_pca2vector=u.vec2plot(ref_latent_vec, mode="pca", dimension=2)

In [31]:
u.dump_item(train_pca2vector,"%s/train_pca2_norm2.pkl"%pkl_folder)
u.dump_item(test_pca2vector,"%s/test_pca2_norm2.pkl"%pkl_folder)
# u.dump_item(ref_pca2vector,"%s/ref_pca2_norm2.pkl"%pkl_folder)

In [32]:
dataset="train"
vector=train_pca2vector

plt.figure(figsize=(10,10))
threshold=int(vector.shape[0]/2)
u.plot_vec(vector=vector, threshold=threshold)
red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
plt.legend(handles=[red_patch, blue_patch], fontsize=15)
plt.savefig("%s/pca/%s"%(dim_red_folder,dataset))
plt.close()


dataset="test"
vector=test_pca2vector

plt.figure(figsize=(10,10))
threshold=int(vector.shape[0]/2)
u.plot_vec(vector=vector, threshold=threshold)
red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
plt.legend(handles=[red_patch, blue_patch], fontsize=15)
plt.savefig("%s/pca/%s"%(dim_red_folder,dataset))
plt.close()

# dataset="reference"
# vector=ref_pca2vector

# plt.figure(figsize=(10,10))
# threshold=int(vector.shape[0]/2)
# u.plot_vec(vector=vector, threshold=threshold)
# red_patch = mpatches.Patch(color=[1,0,0], label='iphone')
# blue_patch = mpatches.Patch(color=[0,0,1], label='samsung')
# plt.legend(handles=[red_patch, blue_patch], fontsize=15)
# plt.savefig("%s/pca/%s"%(dim_red_folder,dataset))
# plt.close()