In [None]:
metadata_path = '../../config/metadata.tsv'
bed_targets_path = "../../resources/AgamDao_info.tsv"

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

def natural_sort( l ): 
    import re 
    """ Sort the given iterable in the way that humans expect.""" 
    convert = lambda text: int(text) if text.isdigit() else text 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

### Coverage

In this notebook, we calculate coverage across the whole-genome and specifically at the SNP targets of the amplicon panel. 

In [None]:
# load panel metadata
metadata = pd.read_csv(metadata_path, sep="\t")
panel_metadata = pd.read_csv(bed_targets_path, sep="\t")
panel_metadata.columns = ['chrom', 'start', 'end', 'snp_target', 'type']
contigs = panel_metadata.chrom.unique()

#### Whole-genome

Lets plot coverage across the whole genome to see if we have off-target effects!

In [None]:
cov_list = []
for sampleID in metadata['sampleID']:
    cov_df = pd.read_csv(f"results/coverage/{sampleID}.per-base.bed.gz", sep="\t", header=None)
    cov_df.columns = ["chrom", "start", "end", "depth"]
    cov_df = cov_df.assign(sampleID=sampleID).query("chrom in @contigs")
    cov_list.append(cov_df)

cov_df = pd.concat(cov_list, axis=0)

total_cov_df = cov_df.groupby(['chrom', 'start', 'end']).agg({'depth':'sum'}).reset_index()
total_cov_df = total_cov_df.assign(midpoint=lambda x: ((x.start + x.end)/2).astype(int))

In [None]:
for idx, contig in enumerate(contigs):
    fig = px.scatter(
            total_cov_df.query("chrom == @contig"),
            x='midpoint', 
            y="depth",
            color_discrete_sequence=['lightsalmon'],
            title=contig,
            template='simple_white',
            height=300,
        )
    
    amp_lines = panel_metadata.query("chrom == @contig")[['start', 'snp_target']].drop_duplicates()
    for idx2, row in amp_lines.iterrows():
        fig.add_vline(x=row['start'], line_width=1, line_dash="dash", opacity=0.2)
        
    fig.show()

### Coverage at each target SNP

In [None]:
targets_cov = []

for idx, row in panel_metadata.iterrows():
    
    snptarget_cov = cov_df.query("chrom == @row.chrom").query("start >= @row.start & end <= @row.end")
    snptarget_cov = snptarget_cov.assign(snptarget=row.snp_target).drop(columns='sampleID')
    targets_cov.append(snptarget_cov)                                            
                                                             
targets_cov = pd.concat(targets_cov)
snp_targets_sorted = natural_sort(targets_cov.snptarget.unique().astype(str))

In [None]:
fig = px.box(targets_cov, x='snptarget', y='depth', width=1000)
fig.update_xaxes(categoryorder='array', categoryarray= snp_targets_sorted )
fig.show()