# Dimensionality Reduction of TCGA and GTEx Tissues Via TriMap

In [38]:
import pandas as pd
import numpy as np
import rnaseq_lib as r
import holoviews as hv
hv.extension('bokeh', logo=False)

In [31]:
# Synapse ID: syn12009613
data_path = '/mnt/data/Objects/tcga_gtex_data.hd5'
exp = pd.read_hdf(data_path, key='exp')
met = pd.read_hdf(data_path, key='met')

In [32]:
# Cast expression data as float64 which is required for TriMap
exp = exp.astype(np.float64)

In [33]:
# Construct combined dataframe and Holoview wrapper
df = r.data.add_metadata_to_exp(exp, met)
h = r.plot.Holoview(df)

# Color scheme for sample counts
colors = ['green', 'blue', 'yellow', 'red']

In [34]:
# Set global Scatter options
opts = {'Scatter': {'plot': dict(color_index='type', width=750, finalize_hooks=[r.plot.disable_logo]),
                    'style': dict(size=5, alpha=0.25, hover_alpha=0.75, cmap='Set1')}}

In [35]:
tissues = [['Adrenal'], 
           ['Bile', 'Liver'],
           ['Bladder', 'Kidney'],
           ['Bone_marrow', 'Whole', 'Cells'],
           ['Brain'],
           ['Breast'],
           ['Cervix', 'Ovary', 'Uterus', 'Vagina'],
           ['Colon', 'Small_intestine', 'Stomach', 'Esophagus'],
           ['Eye', 'Brain'],
           ['Head', 'Skin', 'Minor'],
           ['Kidney'],
           ['Liver'],
           ['Lung', 'Pleura'],
           ['Lymph', 'Cells', 'Whole'],
           ['Pancreas'],
           ['Prostate'],
           ['Skin'],
           ['Soft_tissue', 'Muscle', 'Adipose'],
           ['Stomach'],
           ['Testis'],
           ['Thyroid'],
           ['Uterus']]

In [None]:
# Currently (4/13/18) a bug with Layout, so using HMAP
#hlayout = [h.trimap(h.genes, tissue_subset=x).opts(opts).relabel('_'.join(x)) for x in tissues]
hmap = {'_'.join(x): h.trimap(h.genes, tissue_subset=x).opts(opts) for x in tissues}

In [47]:
hmap_plot = hv.HoloMap(hmap, kdims='Tissue(s)')

In [54]:
%%opts Scatter [width=650 height=500 legend_position='bottom_left'] {+axiswise +framewise}
hmap_plot

#### Notes
- **Cholangiocarcinoma** samples look like a subset of **Liver** samples, but not that many. 
- **Bladder** comparison: **Kidney**
- **Leukemia**: to GTEx **Whole Blood** and the different available **Cell Lines**
- **Eye** comparison: **Brain**
- **Head** comparison: **Skin** and **Minor_Salivary_Gland**
- **Lung**: Add in **Mesothelioma** samples which originate from the **Pleura**.
- **Lymph**: **Whole Blood** and **Cell Lines**
- **Sarcomas**: **Muscle** and **Adipose**