In [None]:
import pandas as pd 
import plotly.express as px
import numpy as np
import pysam

def count_mapped_reads(bam_file):
    mapped_reads = 0
    # Open the BAM file
    with pysam.AlignmentFile(bam_file, "rb") as bam:
        # Iterate over alignments
        for alignment in bam:
            # Check if the alignment is mapped
            if not alignment.is_unmapped:
                mapped_reads += 1

    return mapped_reads

In [None]:
metadata_path = "../../config/metadata.tsv"

# Plate statistics

In this notebook, we explore how sample-level statistics look when we map out the sample by their position on a plate. 
First, lets explore a histogram of the overall reads per sample. 

In [None]:
def extract_percentage(string):
    import re
    # Pattern to match a percentage value
    pattern = r'(\d+(?:\.\d+)?)%'
    # Search for the pattern in the string
    match = re.search(pattern, string)
    if match:
        percentage = float(match.group(1))
        return percentage
    return None

df_samples = pd.read_csv(metadata_path, sep="\t")

# count mapped reads in bams 
mapped_reads = []
freq_mapped = []
for sampleID in df_samples['sampleID']:
    # Call the count_mapped_reads function
    mapped_reads_count = count_mapped_reads(f"results/alignments/{sampleID}.bam")
    mapped_reads.append(mapped_reads_count)
    
    df = pd.read_csv(f"results/alignments/bamStats/{sampleID}.flagstat")
    freq_mapped.append(extract_percentage(df.iloc[6, 0]))

In [None]:
df_samples = df_samples.assign(mapped_reads=mapped_reads, freq_mapped=freq_mapped)

fig = px.histogram(df_samples, x='mapped_reads', nbins=60, width=600, height=400)
fig

### Mapped reads per well

The below plot displays the samples in their 96 well plate format, showing the number of mapped reads assigned to each sample. Extra data on each sample is available by hovering over the wells. 

In [None]:
def plot_96well_plate(df_samples, color_var='mapped_reads', title='Plate A - Number of mapped reads'):
    fig = px.scatter(df_samples[::-1], 
                     y='well_letter', 
                     x='well_number',
                     color=color_var, 
                     hover_data=df_samples.columns, 
                     template='plotly_white')
    fig.update_traces(marker_size=40)
    fig.update_layout(xaxis = dict(
                                side='top',
                                tickmode = 'linear',
                                tick0 = 0,
                                dtick = 1), 
                      title=title)
    return fig

for plate in df_samples.plate.unique():
    df = df_samples.query(f"plate == @plate")
    fig = plot_96well_plate(df, color_var='mapped_reads', title=f'Plate {plate} - Number of mapped reads')
    fig.show()

In [None]:
for plate in df_samples.plate.unique():
    df = df_samples.query(f"plate == @plate")
    fig = plot_96well_plate(df, color_var='freq_mapped', title=f'Plate {plate} - % of reads that align to reference')
    fig.show() 