# t-SNE visualization

This notebook visualizes the EEG embeddings computed by the model trained.

-----

## Load Packages

In [1]:
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%cd ..
%load_ext autoreload
%autoreload 2

C:\Users\Minjae\Desktop\EEG_Project


In [2]:
# Load some packages
import os
from copy import deepcopy
import hydra
from omegaconf import OmegaConf
from collections import OrderedDict

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE

import pprint
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import offsetbox

# custom package
from datasets.caueeg_script import build_dataset_for_train
import models
from train.evaluate import check_accuracy
from train.evaluate import check_accuracy_extended
from train.evaluate import check_accuracy_extended_debug
from train.evaluate import check_accuracy_multicrop
from train.evaluate import check_accuracy_multicrop_extended
from train.visualize import draw_roc_curve
from train.visualize import draw_confusion, draw_confusion2
from train.visualize import draw_class_wise_metrics
from train.visualize import draw_error_table
from train.visualize import annotate_heatmap

In [3]:
print('PyTorch version:', torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available(): print('cuda is available.')
else: print('cuda is unavailable.') 

PyTorch version: 2.0.0+cu117
cuda is available.


In [4]:
# Other settings
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # cleaner text

plt.style.use('default') 
# ['Solarize_Light2', '_classic_test_patch', 'bmh', 'classic', 'dark_background', 'fast', 
#  'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 
#  'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 
#  'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 
#  'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']

plt.rcParams['image.interpolation'] = 'bicubic'
plt.rcParams["font.family"] = 'Helvetica' # 'NanumGothic' # for Hangul in Windows

-----

## Load the configuration used during the train phase

In [5]:
# VGG
# model_name_1 = 'lo88puq7'  # mixup o, awgn o
# model_name_2 = '1nu3jagp'  # mixup x, awgn x
# model_name_2 = '1mwdhqbz' # mixup x, awgn x, dropout x
# model_name_2 = '2r88ber7' # mixup o, awgn x

# ResNet
# model_name_1 = 'l8524nml'  # mixup o, awgn o
# model_name_2 = 'ph0mix3b'  # mixup x, awgn o, dropout x

# ResNet 
model_name_1 = '2apj72km'  # mixup x, awgn o
model_name_2 = '2k8xomy6'  # mixup x, awgn x


repeat = 3
save_fig = True
output_folder = './local/output/awgn_tsne'

In [6]:
ckpt = torch.load(os.path.join(r'E:\CAUEEG\checkpoint', model_name_1, 'checkpoint.pt'), map_location=device)
# print(ckpt.keys())

config_1 = ckpt['config']
model_1 = hydra.utils.instantiate(config_1).to(device)

model_state_1 = ckpt['model_state']
if config_1.get('ddp', False):
    model_state_1_ddp = deepcopy(model_state_1)
    model_state_1 = OrderedDict()
    for k, v in model_state_1_ddp.items():
        name = k[7:] # remove 'module.' of DataParallel/DistributedDataParallel
        model_state_1[name] = v
        
model_1.load_state_dict(model_state_1)
pprint.pprint(config_1, width=250)

{'EKG': 'O',
 '_target_': 'models.resnet_1d.ResNet1D',
 'activation': 'gelu',
 'age_mean': tensor([71.1417], device='cuda:0'),
 'age_std': tensor([9.7840], device='cuda:0'),
 'awgn': 0.004872735559634612,
 'awgn_age': 0.03583361229344302,
 'base_channels': 64,
 'base_lr': 0.00033918432381593736,
 'block': 'basic',
 'class_label_to_name': ['Normal', 'MCI', 'Dementia'],
 'class_name_to_label': {'Dementia': 2, 'MCI': 1, 'Normal': 0},
 'conv_layers': [2, 2, 2, 2],
 'criterion': 'multi-bce',
 'crop_multiple': 4,
 'crop_timing_analysis': False,
 'cwd': 'C:\\Users\\Minjae\\Desktop\\EEG_Project',
 'dataset_name': 'CAUEEG dataset',
 'dataset_path': 'local/dataset/02_Curated_Data_220419/',
 'ddp': False,
 'device': device(type='cuda'),
 'draw_result': True,
 'dropout': 0.3,
 'fc_stages': 3,
 'file_format': 'memmap',
 'in_channels': 21,
 'input_norm': 'dataset',
 'iterations': 195312,
 'latency': 2000,
 'load_event': False,
 'lr_scheduler_type': 'cosine_decay_with_warmup_half',
 'mgn': 0.09575622

In [7]:
ckpt = torch.load(os.path.join(r'E:\CAUEEG\checkpoint', model_name_2, 'checkpoint.pt'), map_location=device)
# print(ckpt.keys())

config_2 = ckpt['config']
model_2 = hydra.utils.instantiate(config_2).to(device)

model_state_2 = ckpt['model_state']
if config_2.get('ddp', False):
    model_state_2_ddp = deepcopy(model_state_2)
    model_state_2 = OrderedDict()
    for k, v in model_state_2_ddp.items():
        name = k[7:] # remove 'module.' of DataParallel/DistributedDataParallel
        model_state_2[name] = v
        
model_2.load_state_dict(model_state_2)
pprint.pprint(config_2, width=250)

{'EKG': 'O',
 '_target_': 'models.resnet_1d.ResNet1D',
 'activation': 'gelu',
 'age_mean': tensor([71.1417], device='cuda:0'),
 'age_std': tensor([9.7840], device='cuda:0'),
 'awgn': 0,
 'awgn_age': 0,
 'base_channels': 64,
 'base_lr': 0.00033918432381593736,
 'block': 'basic',
 'class_label_to_name': ['Normal', 'MCI', 'Dementia'],
 'class_name_to_label': {'Dementia': 2, 'MCI': 1, 'Normal': 0},
 'conv_layers': [2, 2, 2, 2],
 'criterion': 'multi-bce',
 'crop_multiple': 4,
 'crop_timing_analysis': False,
 'cwd': '/home/imkbsz/workspace/eeg_analysis',
 'dataset_name': 'CAUEEG dataset',
 'dataset_path': 'local/dataset/02_Curated_Data_220419/',
 'ddp': False,
 'device': device(type='cuda'),
 'draw_result': True,
 'dropout': 0.3,
 'fc_stages': 3,
 'file_format': 'memmap',
 'in_channels': 21,
 'input_norm': 'dataset',
 'iterations': 195312,
 'latency': 2000,
 'load_event': False,
 'lr_scheduler_type': 'cosine_decay_with_warmup_half',
 'mgn': 0,
 'minibatch': 512,
 'mixup': 0,
 'model': '1D-Re

### Configurations

In [8]:
config_1.pop('cwd', 0)
config_1['ddp'] = False
config_1['crop_timing_analysis'] = True
config_1['eval'] = True
config_1['crop_multiple'] = 32
config_1['device'] = device

config_2.pop('cwd', 0)
config_2['ddp'] = False
config_2['crop_timing_analysis'] = True
config_2['eval'] = True
config_2['crop_multiple'] = 32
config_2['device'] = device

target_from_last = 2

### Build Dataset

In [9]:
if '220419' in config_1['dataset_path']:
    config_1['dataset_path'] = './local/dataset/caueeg-dataset/'
if '220419' in config_2['dataset_path']:
    config_2['dataset_path'] = './local/dataset/caueeg-dataset/'

train_loader, val_loader, test_loader, multicrop_test_loader = build_dataset_for_train(deepcopy(config_1), verbose=True)

transform: Compose(
    EegRandomCrop(crop_length=2000, length_limit=10000000, multiple=32, latency=2000, segment_simulation=False, return_timing=True, reject_events=False)
    EegDropChannels(drop_index=[])
    EegToTensor()
)

----------------------------------------------------------------------------------------------------

transform_multicrop: Compose(
    EegRandomCrop(crop_length=2000, length_limit=10000000, multiple=8, latency=2000, segment_simulation=False, return_timing=True, reject_events=False)
    EegDropChannels(drop_index=[])
    EegToTensor()
)

----------------------------------------------------------------------------------------------------


task config:
{'class_label_to_name': ['Normal', 'MCI', 'Dementia'],
 'class_name_to_label': {'Dementia': 2, 'MCI': 1, 'Normal': 0},
 'task_description': 'Classification of [Normal], [MCI], and [Dementia] '
                     'symptoms.',
 'task_name': 'CAUEEG-Dementia benchmark'}

 -------------------------------------------

In [10]:
for i in range(len(config_1['preprocess_train'])):
    print(config_1['preprocess_train'][i].__class__)
    
print('---')

for i in range(len(config_2['preprocess_train'])):
    print(config_2['preprocess_train'][i].__class__)
    
print('---')

if model_name_1 == 'lo88puq7':
    config_2['preprocess_train_noisy'] = torch.nn.Sequential(*[config_2['preprocess_train'][0], config_2['preprocess_train'][1], 
                                                               config_1['preprocess_train'][2], 
                                                               config_2['preprocess_train'][2],
                                                               config_1['preprocess_train'][4], config_1['preprocess_train'][5],
                                                               config_2['preprocess_train'][3], config_2['preprocess_train'][4]])
elif model_name_1 == 'l8524nml':
    config_2['preprocess_train_noisy'] = torch.nn.Sequential(*[config_2['preprocess_train'][0], config_2['preprocess_train'][1], 
                                                               config_1['preprocess_train'][2], 
                                                               config_2['preprocess_train'][2],
                                                               config_1['preprocess_train'][4], config_1['preprocess_train'][5]])
elif model_name_1 == '2apj72km':    
    config_2['preprocess_train_noisy'] = torch.nn.Sequential(*[config_2['preprocess_train'][0], config_2['preprocess_train'][1], 
                                                               config_1['preprocess_train'][2], 
                                                               config_2['preprocess_train'][2],
                                                               config_1['preprocess_train'][4], config_1['preprocess_train'][5]])

for i in range(len(config_2['preprocess_train_noisy'])):
    print(config_2['preprocess_train_noisy'][i].__class__)

<class 'datasets.pipeline.EegToDevice'>
<class 'datasets.pipeline.EegNormalizeAge'>
<class 'datasets.pipeline.EegAddGaussianNoiseAge'>
<class 'datasets.pipeline.EegNormalizeMeanStd'>
<class 'datasets.pipeline.EegMultiplicativeGaussianNoise'>
<class 'datasets.pipeline.EegAdditiveGaussianNoise'>
---
<class 'datasets.pipeline.EegToDevice'>
<class 'datasets.pipeline.EegNormalizeAge'>
<class 'datasets.pipeline.EegNormalizeMeanStd'>
---
<class 'datasets.pipeline.EegToDevice'>
<class 'datasets.pipeline.EegNormalizeAge'>
<class 'datasets.pipeline.EegAddGaussianNoiseAge'>
<class 'datasets.pipeline.EegNormalizeMeanStd'>
<class 'datasets.pipeline.EegMultiplicativeGaussianNoise'>
<class 'datasets.pipeline.EegAdditiveGaussianNoise'>


## Test accuracy

In [11]:
_ = check_accuracy(model_1, train_loader, 
                   config_1['preprocess_test'], config_1, repeat=1)
print(_)

_ = check_accuracy(model_1, test_loader, 
                   config_1['preprocess_test'], config_1, repeat=3)
print(_)

_ = check_accuracy(model_1, train_loader, 
                   config_1['preprocess_train'], config_1, repeat=1)
print(_)

_ = check_accuracy(model_1, test_loader, 
                   config_1['preprocess_train'], config_1, repeat=3)
print(_)

_ = check_accuracy(model_1, train_loader, 
                   config_2['preprocess_train_noisy'], config_1, repeat=1)
print(_)

_ = check_accuracy(model_1, test_loader, 
                   config_2['preprocess_train_noisy'], config_1, repeat=3)
print(_)

100.0
63.42690677966102
100.0
63.32097457627118
100.0
63.029661016949156


In [12]:
_ = check_accuracy(model_2, train_loader, 
                   config_2['preprocess_test'], config_2, repeat=1)
print(_)

_ = check_accuracy(model_2, test_loader, 
                   config_2['preprocess_test'], config_2, repeat=3)
print(_)

_ = check_accuracy(model_2, train_loader, 
                   config_2['preprocess_train'], config_2, repeat=1)
print(_)

_ = check_accuracy(model_2, test_loader, 
                   config_2['preprocess_train'], config_2, repeat=3)
print(_)

_ = check_accuracy(model_2, train_loader, 
                   config_2['preprocess_train_noisy'], config_2, repeat=1)
print(_)

_ = check_accuracy(model_2, test_loader, 
                   config_2['preprocess_train_noisy'], config_2, repeat=3)
print(_)

100.0
62.82662429378531
100.0
63.24152542372882
100.0
62.976694915254235


In [13]:
_ = check_accuracy(model_2, train_loader, 
                   config_1['preprocess_test'], config_2, repeat=1)
print(_)

_ = check_accuracy(model_2, test_loader, 
                   config_1['preprocess_test'], config_2, repeat=3)
print(_)

_ = check_accuracy(model_2, train_loader, 
                   config_1['preprocess_train'], config_2, repeat=1)
print(_)

_ = check_accuracy(model_2, test_loader, 
                   config_1['preprocess_train'], config_2, repeat=3)
print(_)

100.0
63.356285310734464
100.0
63.04731638418079


## t-SNE embedding

In [14]:
@torch.no_grad()
def compute_embedding(model, sample_batched, preprocess, crop_multiple, target_from_last):
    # evaluation mode
    model.eval()
    
    # preprocessing (this includes to-device operation)
    preprocess(sample_batched)

    # apply model on whole batch directly on device
    x = sample_batched['signal']
    age = sample_batched['age']
    e = model.compute_feature_embedding(x, age, target_from_last=target_from_last)
    y = sample_batched['class_label']
    
    if crop_multiple > 1:
        # multi-crop averaging
        if e.size(0) % crop_multiple != 0:
            raise ValueError(f"compute_embedding(): Real minibatch size={e.size(0)} is not multiple of "
                             f"crop_multiple={crop_multiple}.")

        real_minibatch = e.size(0) // crop_multiple
        e_ = torch.zeros((real_minibatch, e.size(1)))
        y_ = torch.zeros((real_minibatch,), dtype=torch.int32)

        for m in range(real_minibatch):
            e_[m] = e[crop_multiple*m:crop_multiple*(m + 1)].mean(dim=0, keepdims=True)
            y_[m] = y[crop_multiple*m]
                
        e = e_
        y = y_
    
    return e, y

In [15]:
result_1 = [{'name': 'Train Dataset', 'loader': train_loader}]

for r in range(len(result_1)):
    name = result_1[r]['name']
    loader = result_1[r]['loader']

    for i, sample_batched in enumerate(loader):
        if i == 0:
            crop_multiple = config_1['crop_multiple']
            minibatch_size = loader.batch_size

        # estimate
        e, y = compute_embedding(model_1, deepcopy(sample_batched), config_1['preprocess_test'], 
                                 crop_multiple, target_from_last=target_from_last)
        if i == 0:
            embedding = e.detach().cpu().numpy()
            target = y.detach().cpu().numpy()
        else:
            embedding = np.concatenate([embedding, e.detach().cpu().numpy()], axis=0)
            target = np.concatenate([target, y.detach().cpu().numpy()], axis=0)     

        for m in range(repeat):
            e_noisy, y_noisy = compute_embedding(model_1, deepcopy(sample_batched), config_1['preprocess_train'], 
                                                 crop_multiple, target_from_last=target_from_last)
            if m == 0 and i == 0:
                embedding_noisy = e_noisy.detach().cpu().numpy()
                target_noisy = y_noisy.detach().cpu().numpy()
            else:
                embedding_noisy = np.concatenate([embedding_noisy, e_noisy.detach().cpu().numpy()], axis=0)
                target_noisy = np.concatenate([target_noisy, y_noisy.detach().cpu().numpy()], axis=0)

    result_1[r]['embedding'] = embedding
    result_1[r]['target'] = target
    result_1[r]['embedding_noisy'] = embedding_noisy
    result_1[r]['target_noisy'] = target_noisy

In [16]:
result_2 = [{'name': 'Train Dataset', 'loader': train_loader}]

for r in range(len(result_2)):
    name = result_2[r]['name']
    loader = result_2[r]['loader']

    for i, sample_batched in enumerate(loader):
        if i == 0:
            crop_multiple = config_2['crop_multiple']
            minibatch_size = loader.batch_size

        # estimate
        e, y = compute_embedding(model_2, deepcopy(sample_batched), config_2['preprocess_test'], 
                                 crop_multiple, target_from_last=target_from_last)
        if i == 0:
            embedding = e.detach().cpu().numpy()
            target = y.detach().cpu().numpy()
        else:
            embedding = np.concatenate([embedding, e.detach().cpu().numpy()], axis=0)
            target = np.concatenate([target, y.detach().cpu().numpy()], axis=0)     

        for m in range(repeat):
            e_noisy, y_noisy = compute_embedding(model_2, deepcopy(sample_batched), config_2['preprocess_train_noisy'], 
                                                 crop_multiple, target_from_last=target_from_last)
            if m == 0 and i == 0:
                embedding_noisy = e_noisy.detach().cpu().numpy()
                target_noisy = y_noisy.detach().cpu().numpy()
            else:
                embedding_noisy = np.concatenate([embedding_noisy, e_noisy.detach().cpu().numpy()], axis=0)
                target_noisy = np.concatenate([target_noisy, y_noisy.detach().cpu().numpy()], axis=0)

    result_2[r]['embedding'] = embedding
    result_2[r]['target'] = target
    result_2[r]['embedding_noisy'] = embedding_noisy
    result_2[r]['target_noisy'] = target_noisy

## Draw 2D

In [17]:
# plt.style.use('default') 
# plt.style.use('fivethirtyeight') # default, ggplot, fivethirtyeight, bmh, dark_background, classic
# plt.rcParams.update({'font.size': 16})
# plt.rcParams.update({'font.family': 'Roboto Slab'})
# plt.rcParams["savefig.dpi"] = 1200
# color_map = ['tab:green', 'tab:orange', 'tab:red']
# color_map2 = [['tab:green', 'tab:brown', 'tab:blue'], 
#               ['gray', 'tab:orange', 'tab:pink'], 
#               ['gray', 'gray', 'tab:red']]


# for n_iter in [7000]:
#     for perplexity in [25, 50, 100, 200, 300]:
#         tsne_transform = TSNE(n_components=2, init="pca", learning_rate="auto", perplexity=perplexity,
#                               n_iter=n_iter, n_iter_without_progress=1000, n_jobs=2, random_state=0,)
        
#         for r in range(len(result_1)):
#             output = tsne_transform.fit_transform(np.concatenate([result_1[r]['embedding'], result_1[r]['embedding_noisy']]))
#             result_1[r]['tsne_embedding'] = output[:result_1[r]['embedding'].shape[0]]
#             result_1[r]['tsne_embedding_noisy'] = output[result_1[r]['embedding'].shape[0]:]
            
#         for r in range(len(result_1)):
#             fig, ax = plt.subplots()
#             for class_name, class_label in config_1['class_name_to_label'].items():
#                 ax.scatter(
#                     result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 0],
#                     result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 1],
#                     label=class_name,
#                     color=color_map[class_label],
#                     alpha=0.8,
#                     edgecolors='k',                    
#                     zorder=2)
#             ax.set_xticklabels([])
#             ax.set_yticklabels([])
#             ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
#             if save_fig:
#                 for ext in ['pdf', 'jpg', 'svg']:
#                     os.makedirs(os.path.join(output_folder, model_name_1, ext), exist_ok=True)
#                     fig.savefig(os.path.join(output_folder, model_name_1, ext, f"dim2_per{perplexity:03}_iter{n_iter:05}_ori.{ext}"), 
#                                 transparent=True, bbox_inches='tight')
#             else:
#                 plt.show()
#             fig.clear()
#             plt.close(fig)
            
#         for r in range(len(result_1)):
#             fig, ax = plt.subplots()
#             for class_name, class_label in config_1['class_name_to_label'].items():
#                 ax.scatter(
#                     result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 0],
#                     result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 1],
#                     label=class_name,
#                     color=color_map[class_label],
#                     alpha=0.2,
#                     zorder=2)        
#                 ax.scatter(
#                     result_1[r]['tsne_embedding_noisy'][result_1[r]['target_noisy'] == class_label][:, 0],
#                     result_1[r]['tsne_embedding_noisy'][result_1[r]['target_noisy'] == class_label][:, 1],
#                     label=class_name + ' (noisy)',
#                     color=color_map[class_label],
#                     alpha=0.8,
#                     edgecolors='k',
#                     zorder=2)               
#             ax.set_xticklabels([])
#             ax.set_yticklabels([])
#             ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
#             if save_fig:
#                 for ext in ['pdf', 'jpg', 'svg']:
#                     os.makedirs(os.path.join(output_folder, model_name_1, ext), exist_ok=True)
#                     fig.savefig(os.path.join(output_folder, model_name_1, ext, f"dim2_per{perplexity:03}_iter{n_iter:05}.{ext}"), 
#                                 transparent=True, bbox_inches='tight')
#             else:
#                 plt.show()
#             fig.clear()
#             plt.close(fig)

In [18]:
# plt.style.use('default') 
# plt.style.use('fivethirtyeight') # default, ggplot, fivethirtyeight, bmh, dark_background, classic
# plt.rcParams.update({'font.size': 16})
# plt.rcParams.update({'font.family': 'Roboto Slab'})
# plt.rcParams["savefig.dpi"] = 1200
# color_map = ['tab:green', 'tab:orange', 'tab:red']
# color_map2 = [['tab:green', 'tab:brown', 'tab:blue'], 
#               ['gray', 'tab:orange', 'tab:pink'], 
#               ['gray', 'gray', 'tab:red']]


# for n_iter in [7000]:
#     for perplexity in [25, 50, 100, 200, 300]:
#         tsne_transform = TSNE(n_components=2, init="pca", learning_rate="auto", perplexity=perplexity,
#                               n_iter=n_iter, n_iter_without_progress=1000, n_jobs=2, random_state=0,)
        
#         for r in range(len(result_2)):
#             output = tsne_transform.fit_transform(np.concatenate([result_2[r]['embedding'], result_2[r]['embedding_noisy']]))
#             result_2[r]['tsne_embedding'] = output[:result_2[r]['embedding'].shape[0]]
#             result_2[r]['tsne_embedding_noisy'] = output[result_2[r]['embedding'].shape[0]:]
            
#         for r in range(len(result_2)):
#             fig, ax = plt.subplots()
#             for class_name, class_label in config_2['class_name_to_label'].items():
#                 ax.scatter(
#                     result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 0],
#                     result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 1],
#                     label=class_name,
#                     color=color_map[class_label],
#                     alpha=0.8,
#                     edgecolors='k',                    
#                     zorder=2)
#             ax.set_xticklabels([])
#             ax.set_yticklabels([])
#             ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
#             if save_fig:
#                 for ext in ['pdf', 'jpg', 'svg']:
#                     os.makedirs(os.path.join(output_folder, model_name_2, ext), exist_ok=True)
#                     fig.savefig(os.path.join(output_folder, model_name_2, ext, f"dim2_per{perplexity:03}_iter{n_iter:05}_ori.{ext}"), 
#                                 transparent=True, bbox_inches='tight')
#             else:
#                 plt.show()
#             fig.clear()
#             plt.close(fig)
            
#         for r in range(len(result_2)):
#             fig, ax = plt.subplots()
#             for class_name, class_label in config_2['class_name_to_label'].items():
#                 ax.scatter(
#                     result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 0],
#                     result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 1],
#                     label=class_name,
#                     color=color_map[class_label],
#                     alpha=0.2,
#                     zorder=2)        
#                 ax.scatter(
#                     result_2[r]['tsne_embedding_noisy'][result_2[r]['target_noisy'] == class_label][:, 0],
#                     result_2[r]['tsne_embedding_noisy'][result_2[r]['target_noisy'] == class_label][:, 1],
#                     label=class_name + ' (noisy)',
#                     color=color_map[class_label],
#                     alpha=0.8,
#                     edgecolors='k',
#                     zorder=2)               
#             ax.set_xticklabels([])
#             ax.set_yticklabels([])
#             ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
#             if save_fig:
#                 for ext in ['pdf', 'jpg', 'svg']:
#                     os.makedirs(os.path.join(output_folder, model_name_2, ext), exist_ok=True)
#                     fig.savefig(os.path.join(output_folder, model_name_2, ext, f"dim2_per{perplexity:03}_iter{n_iter:05}.{ext}"), 
#                                 transparent=True, bbox_inches='tight')
#             else:
#                 plt.show()
#             fig.clear()
#             plt.close(fig)

## Draw 3D

In [19]:
plt.style.use('default') 
plt.style.use('fivethirtyeight') # default, ggplot, fivethirtyeight, bmh, dark_background, classic
plt.rcParams.update({'font.size': 16})
plt.rcParams.update({'font.family': 'Roboto Slab'})
plt.rcParams["savefig.dpi"] = 1200
color_map = ['tab:green', 'tab:orange', 'tab:red']
color_map2 = [['tab:green', 'tab:brown', 'tab:blue'], 
              ['gray', 'tab:orange', 'tab:pink'], 
              ['gray', 'gray', 'tab:red']]


for n_iter in [10000]:
    for perplexity in [200]:# [50, 70, 100, 150, 200]:
        tsne_transform = TSNE(n_components=3, init="pca", learning_rate="auto", perplexity=perplexity,
                              n_iter=n_iter, n_iter_without_progress=1000, n_jobs=2, random_state=0,)
        
        for r in range(len(result_1)):
            output = tsne_transform.fit_transform(result_1[r]['embedding'])
            result_1[r]['tsne_embedding'] = output[:result_1[r]['embedding'].shape[0]]
            
        for r in range(len(result_1)):
            fig = plt.figure(num=1, clear=True, figsize=(12.0, 12.0))
            ax = fig.add_subplot(1, 1, 1, projection='3d')
            for class_name, class_label in config_1['class_name_to_label'].items():
                ax.scatter(
                    xs=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 0],
                    ys=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 1],
                    zs=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 2],
                    label=class_name,
                    color=color_map[class_label],
                    alpha=0.8,
                    s=40,
                    # zorder=2
                )
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
            if save_fig:
                for ext in ['pdf', 'jpg', 'svg']:
                    os.makedirs(os.path.join(output_folder, model_name_1, ext), exist_ok=True)
                    fig.savefig(os.path.join(output_folder, model_name_1, ext, f"dim3_per{perplexity:03}_iter{n_iter:05}_ori0.{ext}"), 
                                transparent=True, bbox_inches='tight')
            else:
                plt.show()
            fig.clear()
            plt.close(fig)


The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



In [20]:
plt.style.use('default') 
plt.style.use('fivethirtyeight') # default, ggplot, fivethirtyeight, bmh, dark_background, classic
plt.rcParams.update({'font.size': 16})
plt.rcParams.update({'font.family': 'Roboto Slab'})
plt.rcParams["savefig.dpi"] = 1200
color_map = ['tab:green', 'tab:orange', 'tab:red']
color_map2 = [['tab:green', 'tab:brown', 'tab:blue'], 
              ['gray', 'tab:orange', 'tab:pink'], 
              ['gray', 'gray', 'tab:red']]


for n_iter in [10000]:
    for perplexity in [200]:# [50, 70, 100, 150, 200]:
        tsne_transform = TSNE(n_components=3, init="pca", learning_rate="auto", perplexity=perplexity,
                              n_iter=n_iter, n_iter_without_progress=1000, n_jobs=2, random_state=0,)
        
        for r in range(len(result_1)):
            output = tsne_transform.fit_transform(np.concatenate([result_1[r]['embedding'], result_1[r]['embedding_noisy']]))
            result_1[r]['tsne_embedding'] = output[:result_1[r]['embedding'].shape[0]]
            result_1[r]['tsne_embedding_noisy'] = output[result_1[r]['embedding'].shape[0]:]
            
        for r in range(len(result_1)):
            fig = plt.figure(num=1, clear=True, figsize=(12.0, 12.0))
            ax = fig.add_subplot(1, 1, 1, projection='3d')
            for class_name, class_label in config_1['class_name_to_label'].items():
                ax.scatter(
                    xs=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 0],
                    ys=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 1],
                    zs=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 2],
                    label=class_name,
                    color=color_map[class_label],
                    alpha=0.8,
                    s=40,
                    # zorder=2
                )
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
            if save_fig:
                for ext in ['pdf', 'jpg', 'svg']:
                    os.makedirs(os.path.join(output_folder, model_name_1, ext), exist_ok=True)
                    fig.savefig(os.path.join(output_folder, model_name_1, ext, f"dim3_per{perplexity:03}_iter{n_iter:05}_ori.{ext}"), 
                                transparent=True, bbox_inches='tight')
            else:
                plt.show()
            fig.clear()
            plt.close(fig)
            
        for r in range(len(result_1)):
            fig = plt.figure(num=1, clear=True, figsize=(12.0, 12.0))
            ax = fig.add_subplot(1, 1, 1, projection='3d')
            for class_name, class_label in config_1['class_name_to_label'].items():
                ax.scatter(
                    xs=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 0],
                    ys=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 1],
                    zs=result_1[r]['tsne_embedding'][result_1[r]['target'] == class_label][:, 2],
                    label=class_name,
                    color=color_map[class_label],
                    alpha=0.2,
                    s=40,
                    # zorder=2
                )       
                ax.scatter(
                    xs=result_1[r]['tsne_embedding_noisy'][result_1[r]['target_noisy'] == class_label][:, 0],
                    ys=result_1[r]['tsne_embedding_noisy'][result_1[r]['target_noisy'] == class_label][:, 1],
                    zs=result_1[r]['tsne_embedding_noisy'][result_1[r]['target_noisy'] == class_label][:, 2],
                    label=class_name + ' (noisy)',
                    color=color_map[class_label],
                    alpha=0.8,
                    s=40,
                    # zorder=2
                )      
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
            if save_fig:
                for ext in ['pdf', 'jpg', 'svg']:
                    os.makedirs(os.path.join(output_folder, model_name_1, ext), exist_ok=True)
                    fig.savefig(os.path.join(output_folder, model_name_1, ext, f"dim3_per{perplexity:03}_iter{n_iter:05}.{ext}"), 
                                transparent=True, bbox_inches='tight')
            else:
                plt.show()
            fig.clear()
            plt.close(fig)


The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



In [21]:
plt.style.use('default') 
plt.style.use('fivethirtyeight') # default, ggplot, fivethirtyeight, bmh, dark_background, classic
plt.rcParams.update({'font.size': 16})
plt.rcParams.update({'font.family': 'Roboto Slab'})
plt.rcParams["savefig.dpi"] = 1200
color_map = ['tab:green', 'tab:orange', 'tab:red']
color_map2 = [['tab:green', 'tab:brown', 'tab:blue'], 
              ['gray', 'tab:orange', 'tab:pink'], 
              ['gray', 'gray', 'tab:red']]


for n_iter in [10000]:
    for perplexity in [200]:# [50, 70, 100, 150, 200]:
        tsne_transform = TSNE(n_components=3, init="pca", learning_rate="auto", perplexity=perplexity,
                              n_iter=n_iter, n_iter_without_progress=1000, n_jobs=2, random_state=0,)
        
        for r in range(len(result_2)):
            output = tsne_transform.fit_transform(result_2[r]['embedding'])
            result_2[r]['tsne_embedding'] = output[:result_2[r]['embedding'].shape[0]]
            
        for r in range(len(result_2)):
            fig = plt.figure(num=1, clear=True, figsize=(12.0, 12.0))
            ax = fig.add_subplot(1, 1, 1, projection='3d')
            for class_name, class_label in config_2['class_name_to_label'].items():
                ax.scatter(
                    xs=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 0],
                    ys=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 1],
                    zs=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 2],
                    label=class_name,
                    color=color_map[class_label],
                    alpha=0.8,
                    s=40,
                    # zorder=2
                )
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
            if save_fig:
                for ext in ['pdf', 'jpg', 'svg']:
                    os.makedirs(os.path.join(output_folder, model_name_2, ext), exist_ok=True)
                    fig.savefig(os.path.join(output_folder, model_name_2, ext, f"dim3_per{perplexity:03}_iter{n_iter:05}_ori0.{ext}"), 
                                transparent=True, bbox_inches='tight')
            else:
                plt.show()
            fig.clear()
            plt.close(fig)


The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



In [22]:
plt.style.use('default') 
plt.style.use('fivethirtyeight') # default, ggplot, fivethirtyeight, bmh, dark_background, classic
plt.rcParams.update({'font.size': 16})
plt.rcParams.update({'font.family': 'Roboto Slab'})
plt.rcParams["savefig.dpi"] = 1200
color_map = ['tab:green', 'tab:orange', 'tab:red']
color_map2 = [['tab:green', 'tab:brown', 'tab:blue'], 
              ['gray', 'tab:orange', 'tab:pink'], 
              ['gray', 'gray', 'tab:red']]


for n_iter in [10000]:
    for perplexity in [200]: # [50, 70, 100, 150, 200]:
        tsne_transform = TSNE(n_components=3, init="pca", learning_rate="auto", perplexity=perplexity,
                              n_iter=n_iter, n_iter_without_progress=1000, n_jobs=2, random_state=0,)
        
        for r in range(len(result_2)):
            output = tsne_transform.fit_transform(np.concatenate([result_2[r]['embedding'], result_2[r]['embedding_noisy']]))
            result_2[r]['tsne_embedding'] = output[:result_2[r]['embedding'].shape[0]]
            result_2[r]['tsne_embedding_noisy'] = output[result_2[r]['embedding'].shape[0]:]
            
        for r in range(len(result_2)):
            fig = plt.figure(num=1, clear=True, figsize=(12.0, 12.0))
            ax = fig.add_subplot(1, 1, 1, projection='3d')
            for class_name, class_label in config_2['class_name_to_label'].items():
                ax.scatter(
                    xs=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 0],
                    ys=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 1],
                    zs=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 2],
                    label=class_name,
                    color=color_map[class_label],
                    alpha=0.8,
                    s=40,
                    # zorder=2
                )
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
            if save_fig:
                for ext in ['pdf', 'jpg', 'svg']:
                    os.makedirs(os.path.join(output_folder, model_name_2, ext), exist_ok=True)
                    fig.savefig(os.path.join(output_folder, model_name_2, ext, f"dim3_per{perplexity:03}_iter{n_iter:05}_ori.{ext}"), 
                                transparent=True, bbox_inches='tight')
            else:
                plt.show()
            fig.clear()
            plt.close(fig)
            
        for r in range(len(result_2)):
            fig = plt.figure(num=1, clear=True, figsize=(12.0, 12.0))
            ax = fig.add_subplot(1, 1, 1, projection='3d')
            for class_name, class_label in config_2['class_name_to_label'].items():
                ax.scatter(
                    xs=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 0],
                    ys=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 1],
                    zs=result_2[r]['tsne_embedding'][result_2[r]['target'] == class_label][:, 2],
                    label=class_name,
                    color=color_map[class_label],
                    alpha=0.2,
                    s=40,
                    # zorder=2
                )       
                ax.scatter(
                    xs=result_2[r]['tsne_embedding_noisy'][result_2[r]['target_noisy'] == class_label][:, 0],
                    ys=result_2[r]['tsne_embedding_noisy'][result_2[r]['target_noisy'] == class_label][:, 1],
                    zs=result_2[r]['tsne_embedding_noisy'][result_2[r]['target_noisy'] == class_label][:, 2],
                    label=class_name + ' (noisy)',
                    color=color_map[class_label],
                    alpha=0.8,
                    s=40,
                    # zorder=2
                )      
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.legend(bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0.)
            if save_fig:
                for ext in ['pdf', 'jpg', 'svg']:
                    os.makedirs(os.path.join(output_folder, model_name_2, ext), exist_ok=True)
                    fig.savefig(os.path.join(output_folder, model_name_2, ext, f"dim3_per{perplexity:03}_iter{n_iter:05}.{ext}"), 
                                transparent=True, bbox_inches='tight')
            else:
                plt.show()
            fig.clear()
            plt.close(fig)


The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.

