# Tumor Biomarkers: CSPA Gene Subset 
Sorted by fold change across all tissues

In [1]:
# Set autoreload module for dev
%load_ext autoreload
%autoreload 2
%aimport rnaseq_lib

In [2]:
from collections import defaultdict
import pandas as pd
import rnaseq_lib as r

import holoviews as hv
hv.extension('bokeh', logo=False)

In [3]:
# TCGA and GTEx expression and metadata dataframe
df_path = '/mnt/rnaseq-cancer/Objects/tcga-gtex-metadata-expression.tsv'
df = pd.read_csv(df_path, sep='\t', index_col=0, dtype=r.tissues.dtype)

In [4]:
# Holoviews plot object
h = r.plot.Holoview(df)

In [5]:
# Read in de table
de = pd.read_csv('de-table.tsv', sep='\t', header=[0, 1], index_col=0)

In [8]:
def extract_feature(feature):
    df = pd.DataFrame()
    df['value'] = r.utils.flatten([de[x, feature].tolist() for x in h.tissues])
    df['gene'] = r.utils.flatten([de[x, feature].index.tolist() for x in h.tissues])
    df['tissue'] = [t for _ in de.index for t in h.tissues]
    return df

In [9]:
max_genes = extract_feature('l2fc_gtex').sort_values('value', ascending=False).gene

## Sample Counts

In [10]:
%%opts Bars [xrotation=80]
h.sample_counts()

## Top 10

In [11]:
%%opts Overlay [tabs=True]
dists = [h.gene_distribution(g).relabel(g) for g in max_genes[:10]]
hv.Overlay(dists)

## 10 - 20

In [12]:
%%opts Overlay [tabs=True]
dists = [h.gene_distribution(g).relabel(g) for g in max_genes[10:20]]
hv.Overlay(dists)

## 20 - 30

In [16]:
%%opts Overlay [tabs=True]
dists = [h.gene_distribution(g).relabel(g) for g in max_genes[20:30]]
hv.Overlay(dists)

## 30 - 40

In [14]:
%%opts Overlay [tabs=True]
dists = [h.gene_distribution(g).relabel(g) for g in max_genes[30:40]]
hv.Overlay(dists)

## 40 - 50

In [15]:
%%opts Overlay [tabs=True]
dists = [h.gene_distribution(g).relabel(g) for g in max_genes[40:50]]
hv.Overlay(dists)