In [None]:
import copy
import glob
import gzip
import os
import shutil

import numpy as np
from Bio import SeqIO, motifs
from Bio.Seq import Seq
from matplotlib import pyplot as plt

In [None]:
def gunzip_all(headpath):
    for filename in glob.iglob(headpath + "/**/*.gz", recursive=True):
        with gzip.open(filename, "rb") as f_in:
            with open(os.path.splitext(filename)[0], "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)

In [None]:
headpath = "/n/scratch3/users/d/de64/2020-08-22_lDE11_run"

In [None]:
"/n/scratch3/users/d/de64/2020-08-22_lDE11_run/BC2_L001-ds.c37aa7639b2b4ee188346cb6e15bac31/"

### Unzip All

In [None]:
gunzip_all(headpath)

In [None]:
!head '/n/scratch3/users/d/de64/2020-08-22_lDE11_run/BC1_L001-ds.b282eb9272a34059a378b4aeae518283/BC1_S2_L001_R1_001.fastq'

### Binning by Nmer

In [None]:
datapath = "/n/scratch3/users/d/de64/2020-08-22_lDE11_run/BC1_L001-ds.b282eb9272a34059a378b4aeae518283/BC1_S2_L001_R1_001.fastq"

In [None]:
def rev_comp(instr):
    instr = Seq(instr.upper())
    rcinstr = instr.reverse_complement()
    rcinstr = str(rcinstr)
    return rcinstr


def get_fastq_paths(headpath):
    fwdread_paths = {}
    revread_paths = {}
    for path in glob.iglob(headpath + "/**/*.fastq", recursive=True):
        filename = path.split("/")[-1].split("_")
        read_ID = filename[-2]
        dset_name = filename[0]
        if read_ID == "R1":
            fwdread_paths[dset_name] = path
        else:
            revread_paths[dset_name] = path
    return fwdread_paths, revread_paths


def get_Nmer_list(
    fastqpath,
    handle_seq="ACGAACGTTAGCAGCACTAT",
    reverse_complement=False,
    Nmer_len=15,
):
    if reverse_complement:
        handle_seq = rev_comp(handle_seq)

    handle_len = len(handle_seq)
    Nmer_dict = {}

    for idx, record in enumerate(SeqIO.parse(fastqpath, "fastq")):
        Nmer_start = record.seq.find(handle_seq)
        if Nmer_start > 0:
            if reverse_complement:
                Nmer = rev_comp(str(record.seq[Nmer_start - Nmer_len : Nmer_start]))
            else:
                Nmer = str(
                    record.seq[
                        Nmer_start + handle_len : Nmer_start + handle_len + Nmer_len
                    ]
                )
            Nmer_dict[idx] = Nmer
        else:
            Nmer_dict[idx] = None
    return Nmer_dict


def get_Nmer_codebook(Nmer_list, final_Nmer_idx):
    Nmer_codebook = {}
    for key, val in Nmer_list.items():
        if val in final_Nmer_idx.keys():
            Nmer_codebook[key] = final_Nmer_idx[val]
        else:
            Nmer_codebook[key] = None
    return Nmer_codebook


def get_perc_mapped(Nmer_codebook):
    mapped_arr = np.array([val != None for val in Nmer_codebook.values()])
    perc_mapped = np.sum(mapped_arr) / mapped_arr.shape[0]
    return perc_mapped

In [None]:
fwdread_paths, revread_paths = get_fastq_paths(headpath)

In [None]:
Nmer_list_dict = {}

In [None]:
Nmer_list_dict["GFP"] = get_Nmer_list(
    revread_paths["GFP"], handle_seq="ACGAACGTTAGCAGCACTAT", reverse_complement=True
)
Nmer_list_dict["BC1"] = get_Nmer_list(
    fwdread_paths["BC1"], handle_seq="ACGAACGTTAGCAGCACTAT", reverse_complement=False
)
Nmer_list_dict["BC2"] = get_Nmer_list(
    fwdread_paths["BC2"], handle_seq="ACGAACGTTAGCAGCACTAT", reverse_complement=False
)

In [None]:
all_Nmers = [
    Nmer for _, val in Nmer_list_dict.items() for _, Nmer in val.items() if Nmer != None
]
all_Nmer_arr = np.array(all_Nmers)

unique, counts = np.unique(all_Nmer_arr, return_counts=True)
final_Nmer_arr = unique[counts > 30]
final_Nmer_arr_counts = counts[counts > 30]
final_Nmer_idx = dict(zip(final_Nmer_arr, range(len(final_Nmer_arr))))

In [None]:
plt.hist(counts, bins=50, range=(0, 100))
plt.show()

In [None]:
Nmer_codebooks = {}

Nmer_codebooks["GFP"] = get_Nmer_codebook(Nmer_list_dict["GFP"], final_Nmer_idx)
Nmer_codebooks["BC1"] = get_Nmer_codebook(Nmer_list_dict["BC1"], final_Nmer_idx)
Nmer_codebooks["BC2"] = get_Nmer_codebook(Nmer_list_dict["BC2"], final_Nmer_idx)

In [None]:
print("Percent of reads mapped to UMI: ")
print("GFP: " + str(get_perc_mapped(Nmer_codebooks["GFP"])))
print("BC1: " + str(get_perc_mapped(Nmer_codebooks["BC1"])))
print("BC2: " + str(get_perc_mapped(Nmer_codebooks["BC2"])))

### Make consensuses

In [None]:
len(final_Nmer_idx)

In [None]:
def group_reads(
    final_Nmer_idx, reads_path, Nmer_codebook, handle_seq, min_read_len=142
):
    grouped_reads = {i: [] for i in range(len(final_Nmer_idx))}

    for idx, record in enumerate(SeqIO.parse(reads_path, "fastq")):
        if Nmer_codebook[idx] != None:
            record_str = str(record.seq)
            handle_start = record.seq.find(handle_seq)
            if (
                len(record.seq[handle_start : handle_start + min_read_len])
                == min_read_len
            ):
                grouped_reads[Nmer_codebook[idx]].append(
                    record.seq[handle_start : handle_start + min_read_len]
                )
    return grouped_reads


def get_all_grouped_reads(
    key_list, handle_dict, fwdread_paths, revread_paths, Nmer_codebooks, final_Nmer_idx
):
    grouped_reads_dict = {}
    for key in key_list:
        fwd_path = fwdread_paths[key]
        rev_path = revread_paths[key]
        fwd_handle, rev_handle = tuple(handle_dict[key])
        Nmer_codebook = Nmer_codebooks[key]

        fwd_grouped_reads = group_reads(
            final_Nmer_idx, fwd_path, Nmer_codebook, fwd_handle
        )
        rev_grouped_reads = group_reads(
            final_Nmer_idx, rev_path, Nmer_codebook, rev_handle
        )
        grouped_reads_dict[key] = [fwd_grouped_reads, rev_grouped_reads]
    return grouped_reads_dict

In [None]:
key_list = ["GFP", "BC1", "BC2"]
handle_dict = {
    "GFP": ["AAGTAGTGACAAGTGTTGGC", "AGGCTAGCTAACGTTACTGT"],
    "BC1": ["ACGAACGTTAGCAGCACTAT", "GTATCTGTTATGTAATTGCTAG"],
    "BC2": ["ACGAACGTTAGCAGCACTAT", "ATTACTGATGGCAATGTGAT"],
}

grouped_reads_dict = get_all_grouped_reads(
    key_list, handle_dict, fwdread_paths, revread_paths, Nmer_codebooks, final_Nmer_idx
)

In [None]:
grouped_reads_dict["BC1"][0]

#### Filter out barcodes with low representation in at least one group

In [None]:
def get_underrep_barcodes(grouped_reads_dict, min_count=5):
    underrep_barcodes = []
    for key in grouped_reads_dict.keys():
        for idx, val in grouped_reads_dict[key][0].items():
            if len(val) < min_count:
                underrep_barcodes.append(idx)
        for idx, val in grouped_reads_dict[key][1].items():
            if len(val) < min_count:
                underrep_barcodes.append(idx)
    underrep_barcodes = sorted(list(set(underrep_barcodes)))
    return underrep_barcodes

In [None]:
underrep_barcodes = get_underrep_barcodes(grouped_reads_dict)

In [None]:
len(underrep_barcodes)

In [None]:
def remove_underrep(grouped_reads, underrep_barcodes):
    new_idx = 0
    output_dict = copy.copy(grouped_reads)

    for key, val in grouped_reads.items():
        if key in underrep_barcodes:
            pass
        else:
            output_dict[new_idx] = val
            new_idx += 1
    return output_dict


def remove_all_underrep(grouped_reads_dict, underrep_barcodes):
    represented_reads_dict = copy.copy(grouped_reads_dict)

    for key in grouped_reads_dict.keys():
        represented_reads_dict[key][0] = remove_underrep(
            grouped_reads_dict[key][0], underrep_barcodes
        )
        represented_reads_dict[key][1] = remove_underrep(
            grouped_reads_dict[key][1], underrep_barcodes
        )

    return represented_reads_dict

In [None]:
represented_reads_dict = remove_all_underrep(grouped_reads_dict, underrep_barcodes)

In [None]:
represented_reads_dict["BC1"][1]

In [None]:
def get_group_consensus(grouped_reads):
    consensus_seqs = []

    for key, val in grouped_reads.items():
        working_motif = motifs.create(val)
        consensus_seqs.append(str(working_motif.consensus))

    consensus_seqs = np.array(consensus_seqs)
    return consensus_seqs

In [None]:
consensus_seqs = get_group_consensus(represented_reads_dict["BC1"][1])

In [None]:
consensus_seqs[0]

#### Bit Extractiton

In [None]:
def get_bitmap(start_bit, start_bit_idx, rev_read, bit_len=20, read_length=145):
    bit_starts = list(range(start_bit_idx, read_length, bit_len + 1))[:-1]
    if rev_read:
        bit_map = {
            start_bit - i: slice(bit_start, bit_start + bit_len)
            for i, bit_start in enumerate(bit_starts)
        }
    else:
        bit_map = {
            start_bit + i: slice(bit_start, bit_start + bit_len)
            for i, bit_start in enumerate(bit_starts)
        }
    return bit_map

In [None]:
bc1_f_map = get_bitmap(0, 56, False)
bc1_r_map = get_bitmap(9, 21, True)
bc2_f_map = get_bitmap(0, 56, False)
bc2_r_map = get_bitmap(29, 21, True)

In [None]:
bc1_r_map

#### Define Bit Sequence Reference

In [None]:
pos_seqs = [
    "ACACTACCACCATTTCCTAT",
    "AAACACACACTAAACCACCC",
    "ATCCTCCTTCAATACATCCC",
    "TATCTCATCAATCCCACACT",
    "ACTCCACTACTACTCACTCT",
    "AACTCATCTCAATCCTCCCA",
    "ACCACAACCCATTCCTTTCA",
    "TCTATCATCTCCAAACCACA",
    "ACCCTCTAACTTCCATCACA",
    "AATACTCTCCCACCTCAACT",
    "TTTCTACCACTAATCAACCC",
    "TCCAACTCATCTCTAATCTC",
    "TCCTATTCTCAACCTAACCT",
    "ATAAATCATTCCCACTACCC",
    "ACCCTTTACAAACACACCCT",
    "TTCCTAACAAATCACATCCC",
    "TATCCTTCAATCCCTCCACA",
    "ACCCAACACTCATAACATCC",
    "TTTACTCCCTACACCTCCAA",
    "ACTTTCCACATACTATCCCA",
    "ACATTACACCTCATTCTCCC",
    "TACTACAAACCCATAATCCC",
    "TTCTCCCTCTATCAACTCTA",
    "TTCTTCCCTCAATCTTCATC",
    "TCCTAACAACCAACTACTCC",
    "ACCTTTCTCCATACCCAACT",
    "ACCCTTACTACTACATCATC",
    "AATCTCACCTTCCACTTCAC",
    "TCTATCATTACCCTCCTCCT",
    "TCCTCATCTTACTCCCTCTA",
]

neg_seqs = [
    "TCACCTTTCTCCTTTCCTCT",
    "CCCTCTACTCTCCATCTTAT",
    "AACCTCCTCTCTCCATCATA",
    "TCACCATAATTCCTCCTCCT",
    "ACCAACTTCCACACATCACT",
    "CCCTCTTACTTATCTACCCA",
    "ACATCTTCTCTCCAACCTTC",
    "TATCATCCTCCTTCTCTCAC",
    "CTTCTTCTCTTACACCCTCT",
    "TCCCACCTTCACTTCACTAT",
    "CACCCTAACATACAACTCTC",
    "AAACTTCATCACTCTCCTCC",
    "TCAATCCACCATTCCTCAAC",
    "TAAAACCCATCCCACATCCT",
    "TTAAACAACCCATCCCACCA",
    "CATAACCCTACACACAACAC",
    "CTCTCTACACCCACCAATAA",
    "ATTCCATACCCACTCTCTTC",
    "CCCTTACCAACAACAATCCT",
    "TCAACTCATTACCCACAACC",
    "CATATCCAACCACAACCTCA",
    "CAACCACACTCAACTACCAT",
    "ACCTTCTACTCCCAACATTC",
    "CCTCTTCATCCTCTTTCAAC",
    "AACTCACAAACACCTCACCT",
    "CCCAAAACCACACACCAATT",
    "ATCCATATCCTTCTCACCCT",
    "CTCTTAACTACCCTCATTCC",
    "TTTCCTTCTTCCCACCAACT",
    "CAACCACCAACTTCAATCTC",
]

bc_ref_map = np.array(list(zip(neg_seqs, pos_seqs)))

In [None]:
bc_ref_map.shape

#### Decode Consensus Bits

In [None]:
def str_to_int(string):
    code = {"A": 0, "C": 1, "G": 2, "T": 3}
    conv_str = np.array(list(map(lambda x: code[x], string)))
    return conv_str


def compare_seqs(target_arr, reference_arr):
    target_int_arr = np.array(list(map(str_to_int, target_arr)), dtype="uint8")
    reference_int_arr = np.array(list(map(str_to_int, reference_arr)), dtype="uint8")

    bool_arr = target_int_arr[:, np.newaxis, :] == reference_int_arr[np.newaxis, :, :]

    agreement_arr = np.sum(bool_arr, axis=2, dtype=int)
    hamming_arr = bool_arr.shape[2] - agreement_arr

    return hamming_arr


def get_bit_assignment(seq_arr, bc_ref, single_bit_map, rev_read):
    if rev_read:
        bit_arr = np.array(list(map(lambda x: x[single_bit_map], seq_arr)))
    else:
        bit_arr = np.array(list(map(lambda x: rev_comp(x[single_bit_map]), seq_arr)))

    hamming_arr = compare_seqs(bit_arr, bc_ref)
    assigned_bit_arr = np.argmin(hamming_arr, axis=1)

    return assigned_bit_arr


def get_read_bit_assignment(seq_arr, bit_map, bc_ref_map, rev_read):
    bit_assignment = {}

    for bit in bit_map.keys():
        bc_ref = bc_ref_map[bit]
        single_bit_map = bit_map[bit]
        bit_assignment[bit] = get_bit_assignment(
            seq_arr, bc_ref, single_bit_map, rev_read
        )

    return bit_assignment


def get_perc_matched(grouped_reads, bit_assignment, bit_map, bc_ref_map, rev_read):
    perc_match_dict = {key: [] for key in bit_map.keys()}

    for read_idx in grouped_reads.keys():
        query_arr = np.array([str(item) for item in grouped_reads[read_idx]])
        query_assign = get_read_bit_assignment(query_arr, bit_map, bc_ref_map, rev_read)

        for key in perc_match_dict.keys():
            correct_assign_arr = query_assign[key] == bit_assignment[key][read_idx]
            perc_match = np.sum(correct_assign_arr) / correct_assign_arr.shape[0]
            perc_match_dict[key].append(perc_match)

    return perc_match_dict

In [None]:
consensus_seqs = get_group_consensus(represented_reads_dict["BC1"][0])
bit_assignment = get_read_bit_assignment(consensus_seqs, bc1_f_map, bc_ref_map, False)
bc1_f_perc_match_dict = get_perc_matched(
    represented_reads_dict["BC1"][0], bit_assignment, bc1_f_map, bc_ref_map, False
)

In [None]:
plt.hist(
    bc1_f_perc_match_dict[0],
    range=(0.5, 1.0),
    bins=20,
    label="Bit 0",
    color="grey",
    alpha=0.7,
)
plt.hist(
    bc1_r_perc_match_dict[9],
    range=(0.5, 1.0),
    bins=20,
    label="Bit 9",
    color="salmon",
    alpha=0.7,
)
plt.ylim(0, 4000)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig("BC1.png", dpi=200)

In [None]:
plt.hist(
    bc2_f_perc_match_dict[0],
    range=(0.5, 1.0),
    bins=20,
    label="Bit 0",
    color="grey",
    alpha=0.7,
)
plt.hist(
    bc2_r_perc_match_dict[29],
    range=(0.5, 1.0),
    bins=20,
    label="Bit 29",
    color="salmon",
    alpha=0.7,
)
plt.ylim(0, 4000)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig("BC2.png", dpi=200)

In [None]:
plt.hist(bc1_f_perc_match_dict[0], range=(0.5, 1.0), bins=20)
plt.ylim(0, 4000)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 0 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
plt.hist(bc1_f_perc_match_dict[3], range=(0.5, 1.0), bins=20)
plt.ylim(0, 4000)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 3 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
consensus_seqs = get_group_consensus(represented_reads_dict["BC1"][1])
bit_assignment = get_read_bit_assignment(consensus_seqs, bc1_r_map, bc_ref_map, True)
bc1_r_perc_match_dict = get_perc_matched(
    represented_reads_dict["BC1"][1], bit_assignment, bc1_r_map, bc_ref_map, True
)

In [None]:
plt.hist(bc1_r_perc_match_dict[5], range=(0.5, 1.0), bins=20)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 5 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
plt.hist(bc1_r_perc_match_dict[9], range=(0.5, 1.0), bins=20)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 9 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
consensus_seqs = get_group_consensus(represented_reads_dict["BC2"][0])
bit_assignment = get_read_bit_assignment(consensus_seqs, bc2_f_map, bc_ref_map, False)
bc2_f_perc_match_dict = get_perc_matched(
    represented_reads_dict["BC2"][0], bit_assignment, bc2_f_map, bc_ref_map, False
)

In [None]:
plt.hist(bc2_f_perc_match_dict[0], range=(0.5, 1.0), bins=20)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 0 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
plt.hist(bc2_f_perc_match_dict[3], range=(0.5, 1.0), bins=20)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 3 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
consensus_seqs = get_group_consensus(represented_reads_dict["BC2"][1])
bit_assignment = get_read_bit_assignment(consensus_seqs, bc2_r_map, bc_ref_map, True)
bc2_r_perc_match_dict = get_perc_matched(
    represented_reads_dict["BC2"][1], bit_assignment, bc2_r_map, bc_ref_map, True
)

In [None]:
plt.hist(bc2_r_perc_match_dict[25], range=(0.5, 1.0), bins=20)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 25 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
plt.hist(bc2_r_perc_match_dict[29], range=(0.5, 1.0), bins=20)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("% Bit 29 Agreement", fontsize=20)
plt.ylabel("# of Barcodes", fontsize=20)
plt.show()

In [None]:
test_assign[1] == bit_assignment[1][0]

In [None]:
def get_bit_assignment(seq_arr,bit_map,rev_read)
bit_arr = np.array(list(map(lambda x: rev_comp(x[bc1_f_map[0]]), consensus_seqs)))

hamming_arr = compare_seqs(bit_arr,bc_ref_map[0])
assigned_bit = np.argmin(hamming_arr,axis=1)
dist_from_assigned = np.min(hamming_arr,axis=1)
good_assign = dist_from_assigned < 3
perc_confident_assignment = np.sum(good_assign)/good_assign.shape[0]

In [None]:
print(perc_confident_assignment)

In [None]:
plt.hist(dist_from_assigned, range=(1, 15))
plt.show()

In [None]:
hamming_arr.shape

In [None]:
bit_0_arr

In [None]:
grouped_reads_dict["BC1"][1][100]

In [None]:
bit_map

In [None]:
def plot_second_max(grouped_reads):
    try:
        m = motifs.create(grouped_reads).counts.normalize(pseudocounts=0.001)
        m = np.array([list(m[key]) for key in m.keys()]).T
        where_max = np.equal(m, np.max(m, axis=1)[:, np.newaxis])
        second_max = np.max(m[~where_max].reshape(m.shape[0], 3), axis=1)
        plt.plot(second_max, c="grey", alpha=0.3)
    except:
        pass

In [None]:
grouped_reads_dict["BC1"][0][100]

In [None]:
def plot_second_max(grouped_reads):
    try:
        m = motifs.create(grouped_reads).counts.normalize(pseudocounts=0.001)
        m = np.array([list(m[key]) for key in m.keys()]).T
        where_max = np.equal(m, np.max(m, axis=1)[:, np.newaxis])
        second_max = np.max(m[~where_max].reshape(m.shape[0], 3), axis=1)
        plt.plot(second_max, c="grey", alpha=0.3)
    except:
        pass


def get_over_thr_arr(grouped_reads):
    over_thr_arr = []
    for _, val in grouped_reads.items():
        try:
            m = motifs.create(val).counts.normalize(pseudocounts=0.001)
            m = np.array([list(m[key]) for key in m.keys()]).T
            where_max = np.equal(m, np.max(m, axis=1)[:, np.newaxis])
            second_max = np.max(m[~where_max].reshape(m.shape[0], 3), axis=1)
            over_thr = second_max > 0.1
            over_thr_arr.append(over_thr)
        except:
            pass
    over_thr_arr = np.array(over_thr_arr)
    return over_thr_arr

In [None]:
for i in range(1000):
    plot_second_max(grouped_reads[i])
plt.show()

In [None]:
over_thr_arr = get_over_thr_arr(grouped_reads)

In [None]:
over_thr_arr.shape

In [None]:
plt.hist(np.sum(over_thr_arr, axis=1) / over_thr_arr.shape[1], range=(0, 0.1), bins=30)
plt.show()

In [None]:
handle_seq = rev_comp("ATCACATTGCCATCAGTAAT")
key = "BC2"

grouped_reads = {i: [] for i in range(len(final_Nmer_idx))}

fwd_path = fwdread_paths[key]
rev_path = revread_paths[key]
Nmer_codebook = Nmer_codebooks[key]
for idx, record in enumerate(SeqIO.parse(rev_path, "fastq")):
    if Nmer_codebook[idx] != None:
        record_str = str(record.seq)
        Nmer_start = record.seq.find(handle_seq)
        if len(record.seq[Nmer_start : Nmer_start + 142]) == 142:
            grouped_reads[Nmer_codebook[idx]].append(
                record.seq[Nmer_start : Nmer_start + 142]
            )

In [None]:
str(grouped_reads[0][0])

In [None]:
for i in range(1000):
    plot_second_max(grouped_reads[i])
plt.show()

In [None]:
over_thr_arr = get_over_thr_arr(grouped_reads)

In [None]:
plt.hist(np.sum(over_thr_arr, axis=1) / over_thr_arr.shape[1], range=(0, 0.1), bins=30)
plt.show()

In [None]:
plt.hist(np.sum(over_thr_arr, axis=1) / over_thr_arr.shape[1], range=(0, 0.1), bins=30)
plt.show()

In [None]:
fwdread_paths["BC1"]

In [None]:
plt.hist(counts, range=(5, 100))

In [None]:
plt.hist(final_Nmer_arr_counts, range=(0, 500))

In [None]:
len(filtered_unique)

In [None]:
filtered_unique