In [1]:
import os
import pandas as pd
from CRISPResso2 import CRISPRessoShared, CRISPRessoPlot, CRISPResso2Align
from collections import defaultdict
import matplotlib.pyplot as plt
import ast
import numpy as np

In [2]:
%matplotlib inline

In [3]:
# ! unzip ../CRISPResso2_tests/cli_integration_tests/CRISPResso_on_params/Alleles_frequency_table.zip

## Load info dict

In [9]:
root = '../CRISPResso2_tests/cli_integration_tests/CRISPResso_on_params/'
crispresso2_info = CRISPRessoShared.load_crispresso_info(root)
refs = crispresso2_info['results']['refs']
reference_seq = refs['FANC']['sequence']
exon_positions = refs['FANC']['exon_positions']
coding_seq = ''.join([refs['FANC']['sequence'][i] for i in refs['FANC']['exon_positions']])

Exception: Cannot open CRISPResso info file at ../CRISPResso2_tests/cli_integration_tests/CRISPResso_on_params/CRISPResso2_info.json

## Get DF to Plot

* Aligned Sequence is the aligned read from the fastq. '-' is a deletion.
* Reference Sequence is the aligned reference sequence. '-' is an insertion.

Our goal is to grab the coding sequences out of the aligned sequences. We'll use `CRISPRessoShared.get_dataframe_around_cut`

In [5]:
df_alleles = pd.read_csv('df_alleles.txt', sep='\t', index_col=0)
df_alleles['ref_positions'] = df_alleles['ref_positions'].apply(ast.literal_eval)

In [6]:
df_alleles

Unnamed: 0,#Reads,Aligned_Sequence,Reference_Sequence,n_inserted,n_deleted,n_mutated,Reference_Name,Read_Status,Aligned_Reference_Names,Aligned_Reference_Scores,ref_positions,%Reads,contains dsODN fw,contains dsODN rv,contains dsODN,contains dsODN fragment fw,contains dsODN fragment rv,contains dsODN fragment
6,3,ACATCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.4,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
12,3,AGAGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.8,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
56,3,AGGGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&85.2,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
29,3,GCAGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.861,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
110,3,GGAGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.8,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,1,TTGTCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,87.302&85.141,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",0.480769,False,False,False,False,False,False
49,1,TTGTCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.8,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",0.480769,False,False,False,False,False,False
155,1,TTTCCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.8,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",0.480769,False,False,False,False,False,False
153,1,TTTGCGGATGTTCCAATCAGTACGCAGAGCGTCGCCGTCTCCAAGG...,CGGCCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,2,HDR,MODIFIED,HDR,82.239&83.6,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",0.480769,False,False,False,False,False,False


In [8]:
cut_point = refs['FANC']['sgRNA_cut_points'][0]

In [9]:
print(f'coding seq ragng: ({exon_positions[0]}:{exon_positions[-1]})')
print(f'coding seq: {coding_seq}')
print(f'sgRNA cut points: {refs["FANC"]["sgRNA_cut_points"]}')

coding seq ragng: (56:106)
coding seq: GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT
sgRNA cut points: [91, 188, 172]


In [8]:
plot_left = cut_point - exon_positions[0] + 1
plot_right = exon_positions[-1] - cut_point
print(plot_left, plot_right)


NameError: name 'cut_point' is not defined

In [15]:
#[0,1,2,3,4,5,6] 3 is cut_point, then plot_left = 3 = cut_point - exon_pos[0], plot_right = exon_pos[-1] - cut_point 

In [16]:
cut_point

91

In [17]:
coding_seq

'GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT'

In [7]:
df_test = df_alleles_around_cut=CRISPRessoShared.get_amino_acid_dataframe_around_cut_assymetrical(df_alleles.loc[df_alleles['Reference_Name'] == 'FANC'], cut_point, plot_left, plot_right, './CRISPResso2/BLOSUM62')

NameError: name 'cut_point' is not defined

In [19]:
df_test

Unnamed: 0_level_0,Reference_Sequence,Unedited,n_deleted,n_inserted,n_mutated,#Reads,%Reads
Aligned_Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,162,77.884615
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCA---CCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,8,3.846154
GGGCCTTCGCGCACCTCATGGAATCCCTTCTG----ACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGC--CTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCCACCTGGATCGCTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGC-ACCTGGATC...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATC---TGTGGATAACC-GTATTACCGCC,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTC------ACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTCT-----ACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTCT----CACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGC-----CTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769


Note: it's important that we get the start of the coding sequence. Otherwise the sequence will be shifted and the codons will be incorrect.

In [21]:
df_test.iloc[0]

Reference_Sequence    GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...
Unedited                                                           True
n_deleted                                                             0
n_inserted                                                            0
n_mutated                                                             0
#Reads                                                              162
%Reads                                                        77.884615
Name: GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT, dtype: object

In [22]:
coding_seq

'GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT'

## Plotting

In [23]:
df_to_plot = df_test

In [24]:
df_to_plot.head()

Unnamed: 0_level_0,Reference_Sequence,Unedited,n_deleted,n_inserted,n_mutated,#Reads,%Reads
Aligned_Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,162,77.884615
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCA---CCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,8,3.846154
GGGCCTTCGCGCACCTCATGGAATCCCTTCTG----ACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGC--CTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCCACCTGGATCGCTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGC-ACCTGGATC...,True,0,0,0,2,0.961538


In [25]:
plot_9_inputs = {
    'reference_seq': coding_seq, 
    'df_alleles': df_to_plot, 
    'fig_filename_root': (fig_filename_root:='./scripts/figures/9'), 
    'custom_colors': (custom_colors:={
        'Substitution': '#0000FF', 
        'Insertion': '#008000', 
        'Deletion': '#FF0000', 
        'A': '#7FC97F', 
        'T': '#BEAED4', 
        'C': '#FDC086', 
        'G': '#FFFF99', 
        'N': '#C8C8C8', 
        '-': '#1E1E1E'
    }), 
    'MIN_FREQUENCY': (MIN_FREQUENCY:=0.2), 
    'MAX_N_ROWS': (MAX_N_ROWS:=50), 
    'SAVE_ALSO_PNG': (SAVE_ALSO_PNG:=True), 
    'plot_cut_point': (plot_cut_point:=True), 
    'sgRNA_intervals': (sgRNA_intervals:=[(3, 22), (99, 119), (98, 112)]), 
    'sgRNA_names': (sgRNA_names:=['hi', 'dear', '']), 
    'sgRNA_mismatches': (sgRNA_mismatches:=[[], [0], [7]]), 
    'annotate_wildtype_allele': (annotate_wildtype_allele:='')
}

In [26]:
plot_9a_inputs = {
    'reference_seq': coding_seq, 
    'df_alleles': df_to_plot, 
    'fig_filename_root': (fig_filename_root:='./scripts/figures/9a'), 
    'custom_colors': (custom_colors:={
        'Substitution': '#0000FF', 
        'Insertion': '#008000', 
        'Deletion': '#FF0000', 
        'A': '#7FC97F', 
        'T': '#BEAED4', 
        'C': '#FDC086', 
        'G': '#FFFF99', 
        'N': '#C8C8C8', 
        '-': '#1E1E1E'
    }), 
    'MIN_FREQUENCY': (MIN_FREQUENCY:=0.2), 
    'MAX_N_ROWS': (MAX_N_ROWS:=50), 
    'SAVE_ALSO_PNG': (SAVE_ALSO_PNG:=True), 
    'plot_cut_point': (plot_cut_point:=True), 
    'sgRNA_intervals': (sgRNA_intervals:=[(3, 22), (99, 119), (98, 112)]), 
    'sgRNA_names': (sgRNA_names:=['hi', 'dear', '']), 
    'sgRNA_mismatches': (sgRNA_mismatches:=[[], [0], [7]]), 
    'annotate_wildtype_allele': (annotate_wildtype_allele:='')
}

In [27]:
(
    X, # 2d array: This is the sequence converted to ints I THINK for the cmap 
    annot, # This is the sequence of bp's 
    y_labels, # this is the percentage and tallies displayed to the right ``
    insertion_dict, # I think this is key: which aligned_seq has insertion, value: where the insertion is
    per_element_annot_kws, # dict of dicts: this is for bolding the substitutions
    is_reference,
    ref_sequence_amino_acids) = CRISPRessoPlot.prep_amino_acid_table(
        df_to_plot, 
        plot_9a_inputs['reference_seq'], 
        plot_9a_inputs['MAX_N_ROWS'], 
        plot_9a_inputs['MIN_FREQUENCY']
        )


In [29]:
for x in X:
    print(len(x))

17
17
17
17
17
17
17
17
17
17
17
17
17
17
17
17
17
17
17
17
17


In [31]:
CRISPRessoPlot.plot_amino_acid_table(**plot_9a_inputs)

In [30]:
CRISPRessoPlot.plot_alleles_table(**plot_9_inputs)

In [23]:
def pad_amino_acids(amino_acids, amino_acid_seq_length):
        return amino_acids + [''] * (amino_acid_seq_length - len(amino_acids))

In [46]:
df_alleles.iloc[0]['Aligned_Sequence'][exon_positions[0] + 4:exon_positions[-1] + 4 + 1]

'GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT'

In [42]:
coding_seq

'GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT'

In [44]:
df_alleles.iloc[0]['Aligned_Sequence'][:10]
df_alleles.iloc[0]['Reference_Sequence'][:10]

'----CGGATG'

In [47]:
df_alleles.iloc[0]['Reference_Sequence']

'----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGGTGAAAGCGGAAGTAGGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTTCCGAGCTTCTGGCGGTCTCAAGCACTACCTACGTCAGCACCTGGGACCCCGCCACCGTGCGCCGGGCCTTGCAGTGGGCGCGCTACCTGCGCCACATCCATCGGCGCTTTGGTCGG-----------------------'

in the above example, there's a 4 bp insertion at the start of the aligned sequence (which can be seen in the aligned reference sequence). This means `exon_positions` is off by 4 bps.

In [48]:
df_alleles['Reference_Sequence'].head()

6      ----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...
12     ----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...
56     ----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...
29     ----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...
110    ----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...
Name: Reference_Sequence, dtype: object

we may be able to use `df_alleles['ref_positions']`

In [51]:
cut_point = 91
df_alleles.iloc[0]['ref_positions'][cut_point]

87

## Reference to Read Coord Conversion

In [42]:
aligned_seq = df_alleles.iloc[0]['Aligned_Sequence']
reference_seq = df_alleles.iloc[0]['Reference_Sequence']

In [58]:
print(reference_seq)
print(aligned_seq)
len(aligned_seq) == len(reference_seq)

----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGGTGAAAGCGGAAGTAGGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTTCCGAGCTTCTGGCGGTCTCAAGCACTACCTACGTCAGCACCTGGGACCCCGCCACCGTGCGCCGGGCCTTGCAGTGGGCGCGCTACCTGCGCCACATCCATCGGCGCTTTGGTCGG-----------------------
ACATCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGGTGAAAGCGGAAGTAGGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTTCCGAGCTTCTGGCGGTCTCAAGCACTACCTACGTCAGCACCTGGGACCCCGCCACCGTGCGCCGGGCCTTGCAGTGGGCGCGCTACCTGCGCCACATCCATCGGCGCTTTGGTCGGCATGGCCCCATTCGCACGGCTCT


True

In [13]:
ref =     '--ACT--CTCGTC'
aligned = 'GGACTGGCT--TC'

# ref_to_aligned_coords = [-1, -1, 0, 1, 2, 2, 2, 3, 4, 7, 8 ]

In [14]:
CRISPRessoShared.get_relative_coordinates(ref, aligned)

([-1, -1, 0, 1, 2, 2, 2, 3, 4, 7, 8], [0, 0, 0, 1, 2, 3, 3, 3, 4, 7, 8])

In [15]:
df_alleles.head()

Unnamed: 0,#Reads,Aligned_Sequence,Reference_Sequence,n_inserted,n_deleted,n_mutated,Reference_Name,Read_Status,Aligned_Reference_Names,Aligned_Reference_Scores,ref_positions,%Reads,contains dsODN fw,contains dsODN rv,contains dsODN,contains dsODN fragment fw,contains dsODN fragment rv,contains dsODN fragment
6,3,ACATCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.4,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
12,3,AGAGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.8,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
56,3,AGGGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&85.2,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
29,3,GCAGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.861,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False
110,3,GGAGCGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,----CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGG...,0,0,0,FANC,UNMODIFIED,FANC,89.2&84.8,"[-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...",1.442308,False,False,False,False,False,False


In [16]:
seq = df_alleles.iloc[0]['Aligned_Sequence']
ref = df_alleles.iloc[0]['Reference_Sequence']

## AA Alignment

In [78]:
AA_MATRIX = CRISPResso2Align.read_matrix('./CRISPResso2/BLOSUM62')

In [95]:
seq1, seq2, score = CRISPResso2Align.global_align(seqj:='GPSRTSWNPFCSTWIAF', seqi:='GPSRTSWNPFCTWIAF', matrix=AA_MATRIX, gap_incentive=np.zeros(len(seqi) + 1, dtype=int))

In [96]:
print(seq1, seq2, score)

GPSRTSWNPFCSTWIAF GPSRTSWNPFC-TWIAF 94.118


In [86]:
seq2

'ATTA'

In [87]:
score

100.0

In [97]:
CRISPRessoShared.get_amino_acid_dataframe_around_cut_assymetrical()

Unnamed: 0_level_0,Reference_Sequence,Unedited,n_deleted,n_inserted,n_mutated,#Reads,%Reads
Aligned_Sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,162,77.884615
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCA---CCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,8,3.846154
GGGCCTTCGCGCACCTCATGGAATCCCTTCTG----ACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGC--CTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCCACCTGGATCGCTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGC-ACCTGGATC...,True,0,0,0,2,0.961538
GGGCCTTCGCGCACCTCATGGAATC---TGTGGATAACC-GTATTACCGCC,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTC------ACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTCT-----ACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTCT----CACCTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
GGGCCTTCGCGCACCTCATGGAATCCCTTCTGC-----CTGGATCGCTTTT,GGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCG...,True,0,0,0,1,0.480769
