In [1]:
import sys
sys.path.append('..')
sys.path.append('../models')

import glob
import cv2
import numpy as np
import math
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import pandas as pd
from sklearn.utils import shuffle
import umap
from sklearn.decomposition import PCA

import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode()

from UserParams import UserParams

constants = UserParams('predict')
random_state = 42


Intel(R) Data Analytics Acceleration Library (Intel(R) DAAL) solvers for sklearn enabled: https://intelpython.github.io/daal4py/sklearn.html

IPython.utils.traitlets has moved to a top-level traitlets package.



UserParams Status:  predict 1 FNA_VGG19_classifier_regressor_input256


## UMAP and its helper functions

In [3]:
def draw_custom_umap(embedding, group_id_list, feature_id_list, hex_colors):
    x = embedding[:,0]
    y = embedding[:,1]
    
    feature_data_dict = {'x': x, 'y':y, 'group_id':group_id_list, 'feature_id':feature_id_list}
    feature_df = pd.DataFrame.from_dict(feature_data_dict)

    fig = px.scatter(feature_df, x="x", y="y", color="group_id", hover_data=['feature_id'],
                     width=700, height=610,
                     color_discrete_sequence= hex_colors, #px.colors.qualitative.D3,
                     title=f"UMAP of the style vectors at neighbors={n} mindist={d}",
                    labels={'group_id':'Dataset Type'}, template='simple_white')

    fig.update_layout(yaxis={'visible': True, 'showticklabels': False, 'ticks': ''},
                     xaxis={'visible': True, 'showticklabels': False, 'ticks': ''})
    fig.update_traces(marker=dict(line=dict(width=1, color='Grey')),
                      selector=dict(mode='markers'))

#     fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
#     fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
    fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)


    fig.show()

In [4]:
def adjust_lightness(color, amount=1):
    import matplotlib.colors as mc
    import colorsys
    try:
        c = mc.cnames[color]
    except:
        c = color
    c = colorsys.rgb_to_hls(*mc.to_rgb(c))
    return colorsys.hls_to_rgb(c[0], max(0, min(1, amount * c[1])), c[2])

In [16]:
def rgb_to_hex_converter(rgb_colors):
    hex_colors = []
    for rgb in rgb_colors:
        hex_list = []
        for a_color in rgb:
            a_color = a_color * 255
            a_hex = hex(int(a_color))[2:]
            if len(a_hex) == 1:
                a_hex = '0' + a_hex
            hex_list.append(a_hex)

        hex_colors.append( '#' + ''.join(hex_list))
    return hex_colors

def get_hex_colormap(group_id_list):
    class_num = len(set(group_id_list))
    custom_cm = cm.get_cmap('jet', class_num)
    a_cmap = custom_cm(np.linspace(0, 1, class_num))
    a_cmap = a_cmap[:,:3]

    new_cmap= []
    for a_color in a_cmap:
        new_cmap.append(adjust_lightness(a_color))

    hex_colors = rgb_to_hex_converter(new_cmap)

### Single Graph case

In [23]:
def preprocess_datasets(model_type):
    constants = UserParams('predict')
    model_name = constants.model_names[0]
    dataset_name = constants.dataset_names[0]
    frame = constants.frame_list[0]
    repeat_index = 0
    args = constants.get_args()  # get hyper parameters
    
    root_load_path = f"../models/results/predict_wholeframe_round1_{model_type}/{dataset_name}/frame{str(frame)}_{model_name}_repeat{str(repeat_index)}/"
    loaded_feature_vectors = np.load(root_load_path + 'feature_vector.npy', allow_pickle=True, encoding="bytes")
    prediction_result_list = np.load(root_load_path + 'prediction_result_list.npy', allow_pickle=True, encoding="bytes")
    mask_area_list = np.load(root_load_path + 'mask_area_list.npy', allow_pickle=True, encoding="bytes")
    mask_area_list = mask_area_list[:,0]
    print(loaded_feature_vectors.shape, prediction_result_list.shape, mask_area_list.shape)
    assert loaded_feature_vectors.shape[0] == len(prediction_result_list)
    
    # mask label
    group_id_list = [prediction_result for prediction_result in prediction_result_list]
    feature_id_list = [f"{i}" for i in range(loaded_feature_vectors.shape[0])]
    
    return loaded_feature_vectors, group_id_list, feature_id_list, mask_area_list

In [26]:
feature_id_list = []
n = 18
d = 0.8

combined_feature_vectors, group_id_list, feature_id_list, mask_area_list = preprocess_datasets(constants.strategy_type)


group_id_list = mask_area_list
hex_colors = get_hex_colormap(group_id_list)

embedding = umap.UMAP(n_neighbors=n, min_dist=d, n_components=2, random_state=random_state).fit_transform(combined_feature_vectors)
print('embedding', embedding.shape)
draw_custom_umap(embedding, group_id_list, feature_id_list, hex_colors)

UserParams Status:  predict 1 FNA_VGG19_classifier_regressor_input256
(18576, 512) (18576,) (18576,)
embedding (18576, 2)


### Multi-modal case

In [None]:
group_id_list = []
feature_id_list = []

combined_feature_vectors_one, group_id_list, feature_id_list = preprocess_datasets(group_id_list, feature_id_list, 'one_generalist_unet_feature_extractor_big')
combined_feature_vectors_two, group_id_list, feature_id_list = preprocess_datasets(group_id_list, feature_id_list, 'one_generalist_VGG19_dropout_feature_extractor_big')

print(combined_feature_vectors_one.shape)
print(combined_feature_vectors_two.shape)
combined_feature_vectors = np.concatenate((combined_feature_vectors_one, combined_feature_vectors_two), axis=0)

# shuffle
# combined_feature_vectors, group_id_list, feature_id_list = shuffle(combined_feature_vectors, group_id_list, feature_id_list, random_state=0)

In [None]:
n = 100
d = 100
# for n in range(5000,151,10):
#     for d in range(70, 101, 10):
d = d/100
embedding = umap.UMAP(n_neighbors=n, min_dist=d, n_components=2, random_state=42).fit_transform(combined_feature_vectors)

In [None]:
half_index = int(embedding.shape[0]/2)  # frames*16

draw_custom_umap(embedding[:half_index,:], group_id_list[:half_index], feature_id_list[:half_index], hex_colors)
draw_custom_umap(embedding[half_index:,:], group_id_list[half_index:], feature_id_list[half_index:], hex_colors)