In [10]:
#This is a notebook for our figure
import math
import pysam
import numpy as np
import statistics
from itertools import groupby
from scipy.interpolate import make_interp_spline
import operator

import bokeh.io
import bokeh.plotting
import bokeh.models



#### We start by loading in the .bam file and parsing through all properly paired reads:

In [7]:
# load in the .bam file representing alignment
bam_file = pysam.AlignmentFile("../data/alignments/SRR13397491_sorted.bam", "rb")

# create holder datastructures
mean_base_qualities = []
insert_sizes = []
zipped = []

# loop over every read in the .bam file
i = 0
for read in bam_file:
    # Only processing paired proper reads
    if read.is_paired and read.is_proper_pair and read.is_read1:
        if read.template_length != 0:
            i+=1
            mean_bq = np.mean(read.query_qualities)
            i_size = int(
                math.sqrt(
                    math.pow(
                        read.template_length, 2)))
            zipped.append((i_size, mean_bq))
            mean_base_qualities.append(mean_bq)
            insert_sizes.append(i_size)

#### Now we rank order the reads by length for easier interpretability:

In [12]:
res = [(key, statistics.mean(map(operator.itemgetter(1), ele)))
       for key, ele in groupby(sorted(zipped, key = operator.itemgetter(0)),
                                                key = operator.itemgetter(0))]

tuples = zip(*res)
list1, list2 = [list(tuple) for tuple in  tuples]  

xnew = np.linspace(min(list1), max(list1), len(list1))


In [16]:
!pwd

/Users/anastasiyagrebin/Desktop/OneDrive_California_Institute_of_Technology/grad_classes/20.440/zomBEES/code


In [17]:
figure = bokeh.plotting.figure(
    title="Read quality as a function of size",
    width=600,
    height=400,
    x_axis_label="Read sizes",
    y_axis_label="Quality scores",
)

figure.line(
    xnew,
    list2,
    color="red"
)

bokeh.io.export_png(figure, filename="../figures/fig1.png")

'/Users/anastasiyagrebin/Desktop/OneDrive_California_Institute_of_Technology/grad_classes/20.440/zomBEES/figures/fig1.png'