Calculates Enrichments for a given input read count dataset

# Load Modules

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

from plotly.subplots import make_subplots
from multiprocessing import Pool
from tqdm import tqdm
import subprocess
import os
import datetime
import shutil

pio.templates.default = 'plotly_white'
pd.options.mode.chained_assignment = None

# Define Paths

In [2]:
# raw count input matrix filepath
fn_raw = "../data/raw_counts/TororoKanunguRound2.csv"

# output directory to write enrichments to
fn_outdir = "../data/enriched/TororoKanunguRound2/"

## Create Enrichment Directory

In [3]:
if not os.path.exists("../data/enriched"):
    os.makedirs("../data/enriched")

# Run Pipeline

## Preprocess

In [4]:
pipeline_p = subprocess.Popen(
    args = " ".join([
        "../src/PhageLoader/pipeline/preprocess.sh",
        fn_raw,
        fn_outdir
    ]),
    stdout = subprocess.PIPE,
    stderr = subprocess.PIPE,
    shell=True
)
stdout, stderr = pipeline_p.communicate()

for line in stderr.decode("ascii").split("\n"):
    print(line)


real	0m0.001s
user	0m0.002s
sys	0m0.000s

real	0m0.970s
user	0m0.892s
sys	0m0.183s

real	0m2.878s
user	0m2.557s
sys	0m0.665s

real	0m10.490s
user	0m9.268s
sys	0m1.212s



## Enrichment

### Create Enrichment Directory

In [5]:
if not os.path.exists("../data/enriched/TororoKanunguRound2/enrichments/"):
    os.mkdir("../data/enriched/TororoKanunguRound2/enrichments/")

In [6]:
def run_enrichment(input_dir, z=10, c=8):
    
    out_name = "{}/enrichments/z{}_c{}".format(input_dir, z, c)
    
    enrichment_p = subprocess.Popen(
        args = " ".join([
            "../src/PhageLoader/bin/enrichment",
            "-i {}/raw_counts.arma".format(input_dir),
            "-n {}/sample_names.txt".format(input_dir),
            "-p {}/peptide_names.txt".format(input_dir),
            "-o {}".format(out_name),
            "-z {}".format(z),
            "-c {}".format(c)
        ]),
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE,
        shell=True
    )
    enrichment_p.communicate()
    print("wrote to : {}".format(out_name))
    
def write_log(fn_outdir, fn_raw):
    log = open("{}/log.txt".format(fn_outdir), 'w+')
    log.write(
        "# Enrichment Generation\n\nGenerated : {}\nInput Counts : {}\n".format(
            datetime.datetime.now(),
            fn_raw
        )
    )

## Run On Default Z-Threshold and C-min

In [7]:
# Run Default 
run_enrichment(fn_outdir)

wrote to : ../data/enriched/TororoKanunguRound2//enrichments/z10_c8


## Run over parameter matrix

In [None]:
# Run Over Map
for z in np.arange(1, 15):
    for c in np.arange(1, 15):
        run_enrichment(fn_outdir, z, c)
        
write_log(fn_outdir, fn_raw)