# Antineoplastic Sankey Diagrams
<center>
Sankey diagrams relating the different classes, subgroups, and tissues targeted by antineoplastic (anti-cancer) agents.
</center>

In [7]:
# Set autoreload module for dev
%load_ext autoreload
%autoreload 2
%aimport rnaseq_lib

In [8]:
import pandas as pd
import rnaseq_lib as r
from rnaseq_lib.plot.sankey import Sankey
import holoviews as hv
hv.extension('bokeh', logo=False)

In [9]:
# Convenience function for adding links
def make_links(df, groups):
    links = []
    for i in xrange(len(groups) - 1):
        links.extend(add_links(df.groupby(groups[i])[groups[i+1]].value_counts().iteritems()))
    return pd.DataFrame(links)

def add_links(iteritems):
    links = []
    type_count = 0
    current_type = None

    for pair, count in iteritems:
        source, target = pair

        # Track type by grouping samples by "source"
        if source != current_type:
            current_type = source
            type_count += 1
        links.append({'source': source, 'target': target, 'value': count})#, 'type': type_count})
    return links

def sankey_tissue(df, tissues, groups):
    """Sankey Diagram subset by tissue"""
    sub = df[df.Tissue.isin(tissues)]
    links = make_links(sub, groups)
    return r.plot.sankey.Sankey((pd.DataFrame(links))).redim(source='Source', target='Target', value='Count')

def sankey_drugs(df, drugs, groups):
    """Sankey Diagram subset by drugs"""
    sub = df[df.Name.isin(drugs)]
    links = make_links(sub, groups)
    return r.plot.sankey.Sankey((pd.DataFrame(links))).redim(source='Source', target='Target', value='Count')

In [10]:
# Read in link data
biomarker_path = '../1-Data-Collection-and-Processing/KEGG/tables/drug-classification-tissue.tsv'
df = pd.read_csv(biomarker_path, sep='\t', index_col=0)

In [11]:
%%opts Sankey [color_index='index' label_index='index' xaxis=None yaxis=None inspection_policy='edges']
%%opts Sankey [selection_policy='nodes' width=1000 height=600 show_frame=False]
%%opts Sankey (cmap='Category20' label_text_font_size='11pt' edge_nonselection_alpha=0.0)
%%opts Sankey (edge_hover_alpha=0.8 edge_line_alpha=0 edge_fill_alpha=0.25)
%%opts Sankey (node_line_alpha=0 node_size=10)

links = make_links(df, ['Class', 'Subgroup', 'Specification', 'Tissue'])
r.plot.sankey.Sankey((links)).redim(source='Source', target='Target', value='Count')

# Drugs for Individual Tissues

In [15]:
%%opts Sankey [color_index='index' label_index='index' xaxis=None yaxis=None inspection_policy='edges']
%%opts Sankey [selection_policy='nodes' width=800 height=300 show_frame=False show_legend=False]
%%opts Sankey (cmap='Category20' label_text_font_size='11pt' edge_nonselection_alpha=0.0)
%%opts Sankey (edge_hover_alpha=0.8 edge_line_alpha=0 edge_fill_alpha=0.25)
%%opts Sankey (node_line_alpha=0 node_size=10)
%%opts Overlay [tabs=True]
groups = ['Tissue', 'Class', 'Specification', 'Name']
#hv.HoloMap({x: sankey_tissue(df, tissues=[x], groups=groups) for x in df.Tissue.unique().tolist()}, kdims=['Tissue'])
hv.Overlay([sankey_tissue(df, tissues=[x], groups=groups).relabel(x) for x in sorted(df.Tissue.unique().tolist())])

# Tissues for Individual Drugs

In [18]:
%%opts Sankey [color_index='index' label_index='index' xaxis=None yaxis=None inspection_policy='edges']
%%opts Sankey [selection_policy='nodes' width=800 height=300 show_frame=False show_legend=False]
%%opts Sankey (cmap='Category20' label_text_font_size='11pt' edge_nonselection_alpha=0.0)
%%opts Sankey (edge_hover_alpha=0.8 edge_line_alpha=0 edge_fill_alpha=0.25)
%%opts Sankey (node_line_alpha=0 node_size=10)
%%opts Overlay [tabs=True]
groups = ['Name', 'Class', 'Specification', 'Tissue']
#hv.HoloMap({x: sankey_drugs(df, drugs=[x], groups=groups) for x in sorted(df.Name.unique().tolist())}, kdims=['Drug'])
hv.Overlay([sankey_drugs(df, drugs=[x], groups=groups).relabel(x) for x in sorted(df.Name.unique().tolist())])