In [2]:
from anndata import read_h5ad
import pandas as pd
import numpy as np

import altair as alt
from altair_saver import save as alt_save

alt.data_transformers.disable_max_rows();

import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
interactions_df = pd.read_csv(snakemake.input['interactions'], sep='\t')
interactions_df.head()

In [4]:
# a_and_b: partner A is expressed in human cell line, partner B is expressed in tabula muris
# b_and_a: partner A is expressed in tabula muris, partner B is expressed in human cell line

In [5]:
def make_interaction_name(a_genes, b_genes):
    return ";".join([ a_gene for a_gene in a_genes if pd.notna(a_gene) ]) + "__" + ";".join([ b_gene for b_gene in b_genes if pd.notna(b_gene) ])

In [6]:
interaction_name_df = pd.DataFrame(columns=['interaction_id', 'interaction_name', 'case'])
interaction_name_map = {}

for interaction_id, interaction_df in interactions_df.groupby(by=['interaction_id']):
    a_interaction_df = interaction_df.loc[interaction_df['a_or_b'] == 'a']
    b_interaction_df = interaction_df.loc[interaction_df['a_or_b'] == 'b']
    
    a_human_genes = a_interaction_df['human_gene'].values.tolist()
    a_mouse_genes = a_interaction_df['mouse_gene'].values.tolist()
    b_human_genes = b_interaction_df['human_gene'].values.tolist()
    b_mouse_genes = b_interaction_df['mouse_gene'].values.tolist()
    
    a_and_b_name = make_interaction_name(a_human_genes, b_mouse_genes)
    b_and_a_name = make_interaction_name(b_human_genes, a_mouse_genes)
    
    interaction_name_df = interaction_name_df.append({
        'interaction_id': interaction_id,
        'interaction_name': a_and_b_name,
        'case': "a_and_b"
    }, ignore_index=True)
    interaction_name_map[("a_and_b", interaction_id)] = a_and_b_name
    interaction_name_df = interaction_name_df.append({
        'interaction_id': interaction_id,
        'interaction_name': b_and_a_name,
        'case': "b_and_a"
    }, ignore_index=True)
    interaction_name_map[("b_and_a", interaction_id)] = b_and_a_name

In [7]:
tissue_adata = read_h5ad(snakemake.input['coexpression'])

In [8]:
tissue_adata = tissue_adata[tissue_adata.obs['met_penetrance'] != 0, :].copy()

In [9]:
tissue_adata.obs

In [10]:
tissue_adata.var['interaction_name'] = tissue_adata.var.apply(lambda row: interaction_name_map[(row['case'], row['interaction_id'])], axis='columns')

In [11]:
tissue_adata.var

In [12]:
# Columns are interactions, rows are (cancer cell line, human cell type) pairs
tissue_adata.X.shape

In [13]:
# For every row (cell line, cell type), get the column indices for the top 5 interactions
top_indices = np.argsort(tissue_adata.X, axis=1)[:,-5:]
top_indices.shape

In [14]:
top_indices[0,:].shape

In [15]:
tissue_adata.var.iloc[top_indices[0,:]]

In [18]:
all_top_interaction_df = pd.DataFrame()

In [19]:
for pair_i in range(tissue_adata.X.shape[0]):
    top_values = tissue_adata.X[pair_i,top_indices[pair_i,:]]
    pair_row = tissue_adata.obs.iloc[pair_i]
    top_interaction_df = tissue_adata.var.iloc[top_indices[pair_i,:]].copy()
    top_interaction_df['value'] = top_values
    top_interaction_df['cell_line'] = pair_row['cell_line']
    top_interaction_df['cell_ontology_id'] = pair_row['cell_ontology_id']
    top_interaction_df['cell_ontology_name'] = pair_row['cell_ontology_name']
    top_interaction_df['met_potential_mean'] = pair_row['met_potential_mean']
    top_interaction_df['met_penetrance'] = pair_row['met_penetrance']
    top_interaction_df['metmap_tissue'] = pair_row['metmap_tissue']
    
    all_top_interaction_df = all_top_interaction_df.append(top_interaction_df, ignore_index=True)

In [20]:
all_top_interaction_df.head()

In [21]:
len(all_top_interaction_df['cell_line'].unique().tolist())

In [22]:
ALL_CELLTYPES = all_top_interaction_df['cell_ontology_name'].unique().tolist()

celltype_dropdown = alt.binding_select(options=ALL_CELLTYPES)
celltype_selection = alt.selection_single(fields=['cell_ontology_name'], bind=celltype_dropdown, name="Cell Type", init={'cell_ontology_name': ALL_CELLTYPES[0]})

plot = alt.Chart(all_top_interaction_df).transform_filter(
    celltype_selection
).mark_bar().encode(
    x=alt.X("value:Q"),
    y=alt.Y("interaction_name:N", axis=alt.Axis(title="Interaction")),
    color=alt.Color("interaction_name:N", legend=alt.Legend(title="Interaction", symbolLimit=100)),
    size=alt.Size("met_potential_mean:Q", legend=alt.Legend(title="Metastasis Potential"))
).properties(
    width=100,
    height=100
).facet(
    facet=alt.Facet('cell_line:N', header=alt.Header(title="Cell Line")),
    columns=5
).resolve_scale(y='independent', x='independent').add_selection(
    celltype_selection
)

In [23]:
plot.save(snakemake.output[0], embed_options={'renderer':'svg'})