# Analysis of specific junction

We are exemplary considering junction CIRMBUYJFK_f__CWCCKOQCWZ_r

In [None]:
# set working directory to project folder
import sys, os, re
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go

import mplcursors

import altair as alt
from itertools import combinations
import numpy as np
import pypangraph as pp
from Bio import Phylo, SeqIO, AlignIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

import plotly.io as pio
pio.renderers.default = "browser" 


from pathlib import Path
import subprocess

from Bio.Phylo.TreeConstruction import DistanceCalculator
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from scipy.spatial.distance import squareform

from junction_analysis.helpers import get_tree_order, convert_gbk_fasta
import junction_analysis.pangraph_utils as pu
from junction_analysis.plotting import plot_junction_pangraph_combined, plot_junction_pangraph_interactive, plot_dendrogram
from junction_analysis.consensus import find_consensus_paths, make_deduplicated_paths
from junction_analysis.block_alignment import create_block_msas, summarize_block_msas, analyze_alignment, cluster_alignment, retrieve_cluster_assignments

from junction_analysis.annotate_insertions import write_insertions_fasta, get_insertions_deletions_from_consensus, retrieve_SAMids_txt, combine_NCBI_atb_results, find_insertion_hits_own_genome

In [2]:
example_junction = "CIRMBUYJFK_f__CWCCKOQCWZ_r"
example_pangraph = pp.Pangraph.from_json(f"../results/junction_pangraphs/{example_junction}.json")

aln_path = f"../results/block_alignments/{example_junction}/block_11400043001338627984_aln.fa"
distance_matrix, Z, names = cluster_alignment(aln_path)
clusters = retrieve_cluster_assignments(Z, names, n_clusters=2)

## Analyze deviations (insertions / deletions) from consensus path
Consider the two consensus paths seperately from each other.

In [3]:
# choosing higher thresholds merges the two consensus paths into one, however this might not be possible for all junctions while still keeping all desired consensus paths
consensus_paths, path_dict, similarity_matrix, assignment_df = find_consensus_paths(example_pangraph, rare_block_threshold=10, rare_edge_threshold=10, min_n_isolates_per_consensus=5)
plot_junction_pangraph_interactive(
    example_pangraph,
    show_consensus=True,
    consensus_paths=consensus_paths,
    assignments=assignment_df,
    order="tree",
    cluster_map=clusters,
    title = "Junction Block Structure with 2 Clusters"
)

Found 2 unique paths.


### Consensus 2

In [None]:
insertions_2, deletions_2 = get_insertions_deletions_from_consensus(example_pangraph, assignment_df, consensus_paths, consensus = 2)
write_insertions_fasta(example_junction, insertions_2, consensus = 2)

# upload fastas: scp -r /Users/mariebecker/Documents/Uni/ETH/Masterarbeit/marie-junctions/results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2 becker0010@transfer12.scicore.unibas.ch:/scicore/home/neher/GROUP/data/2025_all_the_bacteria/lexicmap_index/queries/CIRMBUYJFK_f__CWCCKOQCWZ_r/
# run:  bash run_all_lexicmap.sh ./queries/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2 on cluster
# download results:  scp -r becker0010@transfer12.scicore.unibas.ch:'/scicore/home/neher/GROUP/data/2025_all_the_bacteria/lexicmap_index/queries/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2/*.lexicmap.tsv' /Users/mariebecker/Documents/Uni/ETH/Masterarbeit/marie-junctions/results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2/

Insertions:
NZ_AP022171.1 INSERTED: [18228278273347143766|+|]
NZ_CP098219.1 INSERTED: [2278011600046483881|+|]

Deletions:


In [None]:
# retrieve SAM... ids for NCBI lookup
retrieve_SAMids_txt(parent_dir="../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2")

# to lookup in NCBI run from results/atb_lookup: bash fetch_biosamples_consensus.sh CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus1

In [22]:
combine_NCBI_atb_results(parent_dir="../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2")

In [None]:
# not annotated as insertion sequence but this is gene content annotation:
#NZ_AP022171.1	GenBank	gene	10173	10694	.	+	.	ID=H7R27_RS10275;Name=ugpC;gene=ugpC;is_partial=False;locus_tag=H7R27_RS10275;pseudo=
#NZ_AP022171.1	GenBank	CDS	10173	10694	.	+	.	ID=H7R27_RS10275;Name=ugpC;codon_start=1;gene=ugpC;inference=COORDINATES: similar to AA sequence:RefSeq:NP_312323.2;is_partial=False;locus_tag=H7R27_RS10275;note=incomplete%3B partial in the middle of a contig%3B missing N-terminus%3B Derived by automated computational analysis using gene prediction method: Protein Homology.;product=sn-glycerol 3-phosphate ABC transporter ATP binding protein UgpC;pseudo=;transl_table=11
#NZ_AP022171.1	GenBank	gene	10691	11008	.	+	.	ID=H7R27_RS10280;Name=H7R27_RS10280;is_partial=False;locus_tag=H7R27_RS10280;old_locus_tag=WP5S18E09_20050;pseudo=
#NZ_AP022171.1	GenBank	CDS	10691	11008	.	+	.	ID=H7R27_RS10280;Name=H7R27_RS10280;go_function=GO:0008081 - phosphoric diester hydrolase activity [Evidence IEA];go_process=GO:0006629 - lipid metabolic process [Evidence IEA];codon_start=1;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709219.1;is_partial=False;locus_tag=H7R27_RS10280;note=incomplete%3B partial in the middle of a contig%3B missing C-terminus%3B Derived by automated computational analysis using gene prediction method: Protein Homology.;old_locus_tag=WP5S18E09_20050;product=glycerophosphodiester phosphodiesterase family protein;pseudo=;transl_table=11
atb_hits_info_df = pd.read_csv("../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2/NZ_AP022171.1_segment_0.hits_info.tsv", sep = "\t")
atb_hits_info_df.value_counts("organism")

organism
Escherichia coli                  187
Escherichia coli O25b:H4-ST131      4
Acinetobacter baumannii             1
Campylobacter jejuni                1
Escherichia coli KTE216             1
Escherichia coli O13/O135:H4        1
Shigella sonnei                     1
Name: count, dtype: int64

In [None]:
# long insertion: prophage
atb_hits_info_df = pd.read_csv("../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus2/NZ_CP098219.1_segment_0.hits_info.tsv", sep = "\t")
atb_hits_info_df.value_counts("organism")

organism
Escherichia coli                       83
Escherichia coli O145:H28               3
Salmonella enterica                     2
Salmonella enterica subsp. enterica     1
Name: count, dtype: int64

### Consensus 1

In [None]:
# get a list of all blocks that deviate from consensus paths (consider insertions and deletions!)

insertions, deletions = get_insertions_deletions_from_consensus(example_pangraph, assignment_df, consensus_paths, consensus = 1)
write_insertions_fasta(example_junction, insertions, consensus = 1)

Insertions:
NZ_OX030701.1 INSERTED: [6932368721519041699|-|1314867519434933921]
NZ_CP021935.1 INSERTED: [517157241956952768|+|]
NZ_CP076693.1 INSERTED: [964531385716605116|+|]_[16982887268023976258|+|]
NZ_CP128947.1 INSERTED: [964531385716605116|+|]_[16982887268023976258|+|]
NZ_CP107114.1 INSERTED: [517157241956952768|+|]
NZ_CP107172.1 INSERTED: [517157241956952768|+|]
NZ_CP030337.1 INSERTED: [6932368721519041699|+|16056383497489254679]
NZ_CP124429.1 INSERTED: [517157241956952768|-|]
NZ_CP018970.1 INSERTED: [6932368721519041699|-|17115477248436968411]
NZ_CP019015.1 INSERTED: [16982887268023976258|-|]_[964531385716605116|-|]
NZ_CP124460.1 INSERTED: [964531385716605116|+|]_[16982887268023976258|+|]
NZ_AP022044.1 INSERTED: [6932368721519041699|+|12883103544713419939]
NZ_AP022044.1 INSERTED: [6932368721519041699|-|14506298656834147691]
NZ_CP103562.1 INSERTED: [7844090243072536214|+|]
NZ_CP051661.1 INSERTED: [6932368721519041699|+|12883103544713419939]
NZ_CP051661.1 INSERTED: [6932368721519

run lexicmap search on cluster for all sequences: bash run_all_lexicmap.sh ./queries/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus1

In [None]:
# retrieve SAM... to look up in NCBI:
retrieve_SAMids_txt(parent_dir="../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus1")

retrieve info from NCBI: bash fetch_biosamples_consensus1.sh out of this folder results/atb_lookup

In [None]:
# merge results
combine_NCBI_atb_results(parent_dir="../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus1")

In [141]:
pd.read_csv("../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus1/NZ_CP030337.1_segment_0.ncbi_results.tsv", sep="\t").value_counts('organism')

organism
Campylobacter jejuni         9
Escherichia coli             2
Helicobacter winghamensis    1
Klebsiella pneumoniae        1
Name: count, dtype: int64

In [None]:
# TODO: should I run blocks seperately or insertions as a whole
# TODO: rerun lexicmap ... now with correctly inverted sequences

Information about different insertion sequences:

- 5 different versions of this block, if a block is twice in one sequence it is always the same version twice, versions can be inserted or not inserted, found between 1 and 9 times in other genomes
    - NZ_CP107151.1, blocks [6932368721519041699|-|]: majority of hits are Mycoplasma mycoides or E. coli and some others, IS1 based on annotation, same insertion in NZ_CP107162.1 and NZ_CP107122.1 blocks [6932368721519041699|-|], same hits, NZ_CP107122.1 even has same insertion a second time[6932368721519041699|+|], NZ_CP128907.1|segment_0 path[6932368721519041699|+|1708810210093552769] length760, NZ_AP022326.1|segment_0 path[6932368721519041699|+|5100696243500885373] length777, NZ_AP022326.1|segment_1 path[6932368721519041699|+|6097212296783908714] length777, NZ_CP124410.1: path[6932368721519041699|+|] IS1, Mycoplasma mycoides and E.coli and Campylobacter jejuni and some more
    - NZ_CP021454.1|segment_0 path[6932368721519041699|-|913783977968963568] length777 and NZ_CP019012.1|segment_0 path[6932368721519041699|-|10145330714100251892] length768 (only found once in genome), inverted, IS1, only 4 hits and different ones --> difference within block!
    - NZ_OX030701.1|segment_0 path[6932368721519041699|-|] length760, NZ_CP018970.1|segment_0 path[6932368721519041699|-|17115477248436968411] length767, NZ_CP023826.1|segment_0 path[6932368721519041699|-|17115477248436968411] length767, NZ_LR890693.1|segment_0 path[6932368721519041699|-|11588582069103229002] length760, NZ_CP051661.1|segment_1 path[6932368721519041699|+|10145330714100251892] length776 and segment_0 path[6932368721519041699|+|12883103544713419939] length776, NZ_CP051663.1|segment_0 path[6932368721519041699|+|7987278316461198430] length764, NZ_CP051659.1|segment_0 path[6932368721519041699|+|10145330714100251892] length760, NZ_CP018970.1|segment_0 path[6932368721519041699|-|17115477248436968411] length767, NZ_CP023826.1|segment_0 path[6932368721519041699|-|17115477248436968411] length767, NZ_CP124455.1|segment_0 path[6932368721519041699|-|15558722379768673797] length768, same hits, less hits, IS1
    - NZ_CP030337.1|segment_0 path[6932368721519041699|+|16056383497489254679] length760, Campylobacter jejuni version (?)
    - NZ_AP022044.1|segment_0 path[6932368721519041699|+|12883103544713419939] length767 and NZ_AP022044.1|segment_1 path[6932368721519041699|-|14506298656834147691] length767, new version, mostly E.coli, 25 hits

- NZ_CP103562.1: path[7844090243072536214|+|] E. coli and some single different ones, IS3, found twice in genome

- double block: all found twice (once elsewhere in genome), all at same position, just some inverted
    - NZ_CP124374.1 and NZ_CP019015.1: [16982887268023976258|-|]_[964531385716605116|-|] ISL3 (both blogs together): no hits (TODO: change parameters and rerun or run blogs seperately)
    - NZ_CP059130.1 (segment 1), NZ_AP026788.1 and NZ_AP026794.1, [964531385716605116|+|]_[16982887268023976258|+|], NZ_CP124460.1|segment_0 path[964531385716605116|+|]_[16982887268023976258|+|] length1863, NZ_CP027534.1|segment_0 path[964531385716605116|+|]_[16982887268023976258|+|] length1863, NZ_CP128947.1|segment_0 path[964531385716605116|+|]_[16982887268023976258|+|] length1863: ISL3, mostly E.coli, same hits
    - NZ_LR890693.1|segment_1 path[964531385716605116|-|]_[16982887268023976258|-|], NZ_CP010876.1|segment_0 path[964531385716605116|-|]_[16982887268023976258|-|] length1893, (order was inverted here) length1893, this one got hits now, all E.coli (why hits even though sequences don't fit)
- NZ_CP124424.1|segment_0 path[964531385716605116|+|] length1324, exist as partly insertion (only first block), less hits, E.coli and Lelliottia amnigena, ISL3, found twice


- NZ_CP021935.1, NZ_CP059130.1 (segment 0), NZ_CP107114.1 and NZ_CP107172.1 (identical), NZ_CP107114.1, NZ_CP107172, NZ_CP021179.1, NZ_CP018979.1 and NZ_CP124429.1 (inverted): 517157241956952768 IS3, 100% hits in E.coli and in other bacteria, mostly E. coli, all get the same hits, they might differ in a few bases or some bases at ends, inserted at different positions in genome, one has 3 base overhang, apart from that they are completely the same, different amount of hits in genome (between 2 and 7)

- NZ_AP026788.1 and NZ_AP026794.1: path[16556873922195442803|+|] IS1380, E. coli, Klebsiella pneumoniae, Pseudomonas aeruginosa,Salmonella enterica all frequent, many more, twice in genome and both have same hits

- NZ_CP019008.1|segment_0 path[185719230501067033|+|] length4204, NZ_CP133923.1|segment_0 path[185719230501067033|+|] length4204, NZ_CP124467.1|segment_0 path[185719230501067033|+|] length4204, NZ_CP134384.1|segment_0 path[185719230501067033|+|] length4203 (one nucleotide shorter): 200 hits that are all E. coli. all sequences identical, only once in genome


Difference between consensus 1 and consensus 2 in one big prophage according to annotations

Be careful to not confuse context for next block!

In [87]:
pd.read_csv("../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus1/NZ_AP022044.1_segment_0.hits_info.tsv", sep="\t")[["query", "qlen", 'hits', 'sgenome', 'qcovGnm', 'cls', 'hsp',
       'qcovHSP', 'alenHSP', 'pident', 'gaps', 'evalue', 'bitscore', 'organism', 'strain',
       'serovar', 'sequence_type']]

Unnamed: 0,query,qlen,hits,sgenome,qcovGnm,cls,hsp,qcovHSP,alenHSP,pident,gaps,evalue,bitscore,organism,strain,serovar,sequence_type
0,NZ_AP022044.1|segment_0,767,25,SAMEA4062129,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,,,
1,NZ_AP022044.1|segment_0,767,25,SAMN14049574,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,E. coli QEC12-15,,
2,NZ_AP022044.1|segment_0,767,25,SAMN07508953,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,29-KPC,,
3,NZ_AP022044.1|segment_0,767,25,SAMEA104040230,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,,,
4,NZ_AP022044.1|segment_0,767,25,SAMEA111271919,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,,,
5,NZ_AP022044.1|segment_0,767,25,SAMN08382692,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,GASRECE110,,
6,NZ_AP022044.1|segment_0,767,25,SAMEA111273518,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,,,
7,NZ_AP022044.1|segment_0,767,25,SAMN18977267,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,missing,missing,
8,NZ_AP022044.1|segment_0,767,25,SAMEA111273646,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,,,
9,NZ_AP022044.1|segment_0,767,25,SAMEA111273548,100.0,1,1,100.0,768,99.87,1,0.0,1389,Escherichia coli,,,


In [None]:
# for deletions, retrieve block consensus sequence to search in atb database
# TODO: maybe find a better way that takes all isolates beloning to consensus and containing the block and recalculate consensus sequence (talk to Marco)
# TODO: do I also search deletions in the database?

'GGTAATGACTCCAACTTACTGATAGTGTTTTATGTTCAGATAATGCCCGATGACTTTGTCATGCAGCTCCACCGATTTTGAGAACGACAGTGACTTCCGTCCCAGCCTTGCCAGATGTTGTCTCAGATTCAGGTTATGTCGCTCAATGCGCTGAGTGTAACGCTTGCTGATTACGTGCAGCTTTCCCTTCAGGCGGGATTCATACAGCGGCCAGCCATCCGTCATCCATACCACGACCTCAAAGGCCGACAGCAGGCTCAGAAGACGCTCCAGTGTGGCCAGAGTGCGTTCACCGAAGACGTGCGCCACAACCGTCCTCCGTATCCTGTCATACGCGTAAAACAGCCAGCGCTGACGTGATTTAGCACCGACGTAGCCCCACTGTTCGTCCATTTCAGCGCAGACAATCACATCACTGCCCGGCTGTATGCGCGAGGTTACCGACTGCGGCCTGAGTTTTTTAAGTGACGTAAAATCGTGTTGAGGCCAACGCCCATAATGCGGGCGGTTGCCCGGCATCCAACACCATTCATGGCCATATCAATGATTTTCTGGTGTGTACCGGGTTGAGAAGCGGTGTAAGTGAACTGCAGTTGCCATGTTTTACGGCAGTGAGAGCAGAGATAGCGCTGATGTCCGGCAGTACTTTTACCGTTACGCACCACGCCTTCAGTAGCTGAGCAGGAGGGACAACTGATGGAGATGGAAGCCACGGGAGCACCTCAAAAACACCATCATACACTAAATCAGTAAGTTGGCA'

## Search insertion sequences in their own genome

In [4]:
# make sure that all genome fasta files exist
convert_gbk_fasta(gbk_folder = "../data/gbk", fasta_folder = "../results/genome_fastas")

Converting: ../data/gbk/NZ_CP128947.1.gbk → ../results/genome_fastas/NZ_CP128947.1.fasta
Converting: ../data/gbk/NZ_CP107182.1.gbk → ../results/genome_fastas/NZ_CP107182.1.fasta
Converting: ../data/gbk/NZ_CP124367.1.gbk → ../results/genome_fastas/NZ_CP124367.1.fasta
Converting: ../data/gbk/NZ_CP124322.1.gbk → ../results/genome_fastas/NZ_CP124322.1.fasta
Converting: ../data/gbk/NZ_CP027534.1.gbk → ../results/genome_fastas/NZ_CP027534.1.fasta
Converting: ../data/gbk/NZ_CP107142.1.gbk → ../results/genome_fastas/NZ_CP107142.1.fasta
Converting: ../data/gbk/NZ_CP010876.1.gbk → ../results/genome_fastas/NZ_CP010876.1.fasta
Converting: ../data/gbk/NZ_CP116085.1.gbk → ../results/genome_fastas/NZ_CP116085.1.fasta
Converting: ../data/gbk/NZ_CP133927.1.gbk → ../results/genome_fastas/NZ_CP133927.1.fasta
Converting: ../data/gbk/NZ_AP022044.1.gbk → ../results/genome_fastas/NZ_AP022044.1.fasta
Converting: ../data/gbk/NZ_CP095137.1.gbk → ../results/genome_fastas/NZ_CP095137.1.fasta
Converting: ../data/g

In [None]:
insertions_df = find_insertion_hits_own_genome(genome_root = "../results/genome_fastas", insertions_seq_dir="../results/atb_lookup")

[M::mm_idx_gen::0.077*1.09] collected minimizers
[M::mm_idx_gen::0.085*1.28] sorted minimizers
[M::main::0.085*1.28] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.091*1.26] mid_occ = 50
[M::mm_idx_stat] kmer size: 19; skip: 19; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.095*1.25] distinct minimizers: 504461 (98.70% are singletons); average occurrences: 1.024; average spacing: 9.992; total length: 5163545
[M::worker_pipeline::0.096*1.25] mapped 1 sequences
[M::main] Version: 2.30-r1287
[M::main] CMD: minimap2 -x asm5 -N 50 -p 0.9 -k 19 --eqx ../results/genome_fastas/NZ_CP124455.1.fasta ../results/atb_lookup/CIRMBUYJFK_f__CWCCKOQCWZ_r/consensus1/NZ_CP124455.1_segment_0.fasta
[M::main] Real time: 0.102 sec; CPU: 0.125 sec; Peak RSS: 0.045 GB
[M::mm_idx_gen::0.064*1.04] collected minimizers
[M::mm_idx_gen::0.071*1.24] sorted minimizers
[M::main::0.071*1.24] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.076*1.23] mid_occ = 50
[M::mm_idx_stat] k

In [15]:
insertions_df.sort_values(['consensus','insertion_path'])

Unnamed: 0,junction_name,consensus,genome_name,insertion_path,insertion_length,segment,hits_in_genome
27,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_AP026794.1,16556873922195442803|+|,1661,segment_1,2
28,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_AP026788.1,16556873922195442803|+|,1661,segment_1,2
9,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP124467.1,185719230501067033|+|,4204,segment_0,1
29,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP134384.1,185719230501067033|+|,4203,segment_0,1
36,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP133923.1,185719230501067033|+|,4204,segment_0,1
41,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP019008.1,185719230501067033|+|,4204,segment_0,1
12,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP021935.1,517157241956952768|+|,1248,segment_0,3
13,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP107114.1,517157241956952768|+|,1248,segment_0,5
14,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP107172.1,517157241956952768|+|,1248,segment_0,2
18,CIRMBUYJFK_f__CWCCKOQCWZ_r,consensus1,NZ_CP059130.1,517157241956952768|+|,1253,segment_0,4
