# Load Modules

In [12]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

from plotly.subplots import make_subplots
from multiprocessing import Pool
from tqdm import tqdm
import glob
import os

pio.templates.default = 'plotly_white'
pd.options.mode.chained_assignment = None

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load Input Data

## Define Paths

In [13]:
# Input directory with enrichments
input_dir = "../data/enriched/TororoKanunguRound2/enrichments/"

# Output Figure Directory
fig_dir = "../figures/HPEC/"

## Calculate Peptide Set Size

In [3]:
def get_num_peptides(peptide_list_fn):
    f = open(peptide_list_fn)
    size = 0
    for line in f:
        size+=1
    
    return size

def get_param_sizes(directory):
    frame = []
    for fn in glob.glob(directory + "*_peptides.txt"):
        z_min, c_min = fn.split("/")[-1].replace("z", "").replace("c", "").split("_")[:2]
        size = get_num_peptides(fn)
        frame.append({
            "z_min" : int(z_min),
            "c_min" : int(c_min),
            "size" : size
        })
    
    return pd.DataFrame(frame)
    


peptide_set_size = get_param_sizes(input_dir)

## Calculate Healthy/Patient Peptide Sizes

In [4]:
def process_frame(fn):
    frame = pd.read_csv(fn)
    sums = frame.sum(axis = 0)
    sums.name = "enriched_size"
    sums.index.name = "sample_name"
    sums = sums.reset_index()
    sums['sample_class'] = sums.apply(
        lambda x : "Healthy" if "HC" in x.sample_name else "Patient",
        axis = 1
    )
    return sums
    
def load_sizes(directory, c = 8):
    frame = []
    for fn in glob.glob(directory + "*c{}_bool.csv".format(c)):
        z = int(fn.split("/")[-1].split('_')[0].replace("z", ""))
        sample_counts = process_frame(fn)
        sample_counts['z_min'] = z
        sample_counts['c_min'] = int(c)

        frame.append(sample_counts)
    
    return pd.concat(frame)
        
sample_peptide_sizes = load_sizes(input_dir)

## Calculate Patient Peptide Sizes with varying c_min

In [18]:
varying_cmin = []
for c_min in range(1, 15):
    subframe = load_sizes(input_dir, c_min)
    varying_cmin.append(subframe)

varying_cmin = pd.concat(varying_cmin)

# Plots

## Plot Peptide Set Size as a function of Z-Score and Minimum Hit Rate

In [14]:
def plot_heatmap(frame):
    
    mat = pd.pivot_table(
        peptide_set_size,
        index = "z_min", 
        columns = "c_min", 
        values = "size"
    )
    
    fig = px.imshow(
        np.log10(mat),
        labels = dict(
            x = "Minimum Hit Rate Among Patients",
            y = "Minimum Z-Score Threshold Among Replicates",
            color = "Enriched Peptide Set Size (log10)"
        )
    )
    
    fig.update_xaxes(side = 'top')
    
    return fig

fig_heatmap = plot_heatmap(peptide_set_size)
fig_heatmap.write_html(os.path.join(fig_dir, "zc_heatmap.html"))
fig_heatmap

## Plot Healthy vs Patient Enrichment Sizes

In [15]:
def plot_box(frame):
    fig = px.box(
        frame, x = "z_min", y = "enriched_size",
        color = "sample_class"
    )
    
    fig.update_yaxes(title_text = "Number of Enriched Peptides")
    fig.update_xaxes(title_text = "Z-Score Threshold")
    fig.update_layout(title_text = "Number of Enriched Peptides (Enrichment Rate Threshold = 8)")
    return fig
    
    

fig_box = plot_box(sample_peptide_sizes)
fig_box.write_html(os.path.join(fig_dir, "varying_zscore_box.html"))
fig_box

## Plot Patient Enrichment with Varying Enrichment Rate Threshold

In [20]:
def plot_line_cmin(frame):
    frame = frame[frame.sample_class == "Patient"]
    frame = frame.\
        groupby(["z_min", "c_min"]).\
        apply(lambda x : pd.Series({"enriched_size" : x.enriched_size.mean()})).\
        reset_index()

    fig = px.line(
        frame, x = "z_min", y = "enriched_size",
        color = "c_min"
    )
    
    fig.update_yaxes(title_text = "Number of Enriched Peptides")
    fig.update_xaxes(title_text = "Z-Score Threshold")
    fig.update_layout()
    return fig
    
fig_cmin_line = plot_box_cmin(varying_cmin)
fig_cmin_line.write_html(os.path.join(fig_dir, "varying_cmin_line.html"))
fig_cmin_line