In [14]:
import pandas as pd
import numpy as np
import time

from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler, RobustScaler

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh import palettes
from bokeh.models import Legend

output_notebook()

In [28]:
def load_latent(name=""):
    train_latent = np.load("../results/AE/train_latent_{0}.npy".format(name))
    val_latent = np.load("../results/AE/val_latent_{0}.npy".format(name))
    train_df = pd.read_csv("../data/train_val_test_split/seed0/train_fold0.csv", header=None, index_col=0)
    val_df = pd.read_csv("../data/train_val_test_split/seed0/val_fold0.csv", header=None, index_col=0)
    train_df["latent0"] = train_latent[:, 0]
    train_df["latent1"] = train_latent[:, 1]
    val_df["latent0"] = val_latent[:, 0]
    val_df["latent1"] = val_latent[:, 1]
    both_df = pd.concat([train_df, val_df]) 
    return train_df, val_df, both_df

In [29]:
train_df, val_df, both_df = load_latent(name="simple2")
p = figure(plot_width=800, plot_height=700)
p.circle(train_df["latent0"], train_df["latent1"], 
         size=3, color="green", alpha=0.6, legend="train")
p.circle(val_df["latent0"], val_df["latent1"], 
         size=3, color="red", alpha=0.6, legend="val")
p.legend.click_policy="hide"
show(p)

In [30]:
label_df = pd.read_csv("../data/labels.csv", index_col="sample_id")
joined_df = label_df.join(both_df)

In [31]:
this_df = joined_df[["label_tissue", "latent0", "latent1"]].dropna()
colored_tissues = this_df["label_tissue"].value_counts().index

p = figure(plot_width=1000, plot_height=800)
with open("../colors.txt", "r") as f:
    colors = f.read().split(",")
for i, datatype in enumerate(colored_tissues):
    subset_df = this_df[this_df["label_tissue"] == datatype]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, color=colors[i], legend=datatype)
    
p.legend.click_policy="hide"
p.legend.location = "bottom_left"
show(p)

In [76]:
this_df = joined_df[["label_gender", "latent0", "latent1"]].dropna()
colored_genders = this_df["label_gender"].value_counts().index

p = figure(plot_width=800, plot_height=800)
colors = palettes.Category10[10]
for i, datatype in enumerate(colored_genders):
    subset_df = this_df[this_df["label_gender"] == datatype]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, 
             alpha=0.7, color=colors[i], legend=datatype)
    
p.legend.click_policy="hide"
p.legend.location = "bottom_left"
show(p)

In [78]:
this_df = joined_df[["label_tumor", "latent0", "latent1"]].dropna()
colored_tumors = this_df["label_tumor"].value_counts().index

p = figure(plot_width=800, plot_height=800)
colors = palettes.Category10[10]
for i, datatype in enumerate(colored_tumors):
    subset_df = this_df[this_df["label_tumor"] == datatype]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, 
             alpha=0.7, color=colors[i], legend=datatype)
    
p.legend.click_policy="hide"
p.legend.location = "top_right"
show(p)

In [106]:
this_df = joined_df[["label_age", "latent0", "latent1"]].dropna()
this_df["age_bin"] = this_df["label_age"].apply(lambda x:int(x/10)-1)

p = figure(plot_width=800, plot_height=800)
colors = palettes.Category10[10]
rainbow = palettes.RdBu11[:4] + palettes.RdBu11[-5:]
for i in sorted(list(set(this_df["age_bin"]))):
    subset_df = this_df[this_df["age_bin"] == i]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, 
             alpha=0.7, color=rainbow[i], legend="{0}0s".format(i+1))
    
p.legend.click_policy="hide"
p.legend.location = "top_right"
show(p)

In [32]:
train_df, val_df, both_df = load_latent(name="complex2")
p = figure(plot_width=800, plot_height=700)
p.circle(train_df["latent0"], train_df["latent1"], 
         size=3, color="green", alpha=0.6, legend="train")
p.circle(val_df["latent0"], val_df["latent1"], 
         size=3, color="red", alpha=0.6, legend="val")
p.legend.click_policy="hide"
show(p)

In [33]:
label_df = pd.read_csv("../data/labels.csv", index_col="sample_id")
joined_df = label_df.join(both_df)

In [34]:
this_df = joined_df[["label_tissue", "latent0", "latent1"]].dropna()
colored_tissues = this_df["label_tissue"].value_counts().index

p = figure(plot_width=1000, plot_height=800)
with open("../colors.txt", "r") as f:
    colors = f.read().split(",")
for i, datatype in enumerate(colored_tissues):
    subset_df = this_df[this_df["label_tissue"] == datatype]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, color=colors[i], legend=datatype)
    
p.legend.click_policy="hide"
p.legend.location = "bottom_left"
show(p)

In [11]:
this_df = joined_df[["label_gender", "latent0", "latent1"]].dropna()
colored_genders = this_df["label_gender"].value_counts().index

p = figure(plot_width=800, plot_height=800)
colors = palettes.Category10[10]
for i, datatype in enumerate(colored_genders):
    subset_df = this_df[this_df["label_gender"] == datatype]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, 
             alpha=0.7, color=colors[i], legend=datatype)
    
p.legend.click_policy="hide"
p.legend.location = "bottom_left"
show(p)

In [12]:
this_df = joined_df[["label_tumor", "latent0", "latent1"]].dropna()
colored_tumors = this_df["label_tumor"].value_counts().index

p = figure(plot_width=800, plot_height=800)
colors = palettes.Category10[10]
for i, datatype in enumerate(colored_tumors):
    subset_df = this_df[this_df["label_tumor"] == datatype]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, 
             alpha=0.7, color=colors[i], legend=datatype)
    
p.legend.click_policy="hide"
p.legend.location = "top_right"
show(p)

In [13]:
this_df = joined_df[["label_age", "latent0", "latent1"]].dropna()
this_df["age_bin"] = this_df["label_age"].apply(lambda x:int(x/10)-1)

p = figure(plot_width=800, plot_height=800)
colors = palettes.Category10[10]
rainbow = palettes.RdBu11[:4] + palettes.RdBu11[-5:]
for i in sorted(list(set(this_df["age_bin"]))):
    subset_df = this_df[this_df["age_bin"] == i]
    p.circle(subset_df["latent0"], subset_df["latent1"], size=3, 
             alpha=0.7, color=rainbow[i], legend="{0}0s".format(i+1))
    
p.legend.click_policy="hide"
p.legend.location = "top_right"
show(p)