Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decrease Docker image size and fix PE naming and parameter behavior #404

Merged
merged 2 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 7 additions & 7 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def process_fastq(fastq_filename, variantCache, ref_names, refs, args):
aln_matrix = CRISPResso2Align.read_matrix(aln_matrix_loc)

pe_scaffold_dna_info = (0, None) #scaffold start loc, scaffold seq to search
if args.prime_editing_pegRNA_scaffold_seq != "":
if args.prime_editing_pegRNA_scaffold_seq != "" and args.prime_editing_pegRNA_extension_seq != "":
pe_scaffold_dna_info = get_pe_scaffold_search(refs['Prime-edited']['sequence'], args.prime_editing_pegRNA_extension_seq, args.prime_editing_pegRNA_scaffold_seq, args.prime_editing_pegRNA_scaffold_min_match_length)

not_aln = {} #cache for reads that don't align
Expand Down Expand Up @@ -555,7 +555,7 @@ def process_bam(bam_filename, bam_chr_loc, output_bam, variantCache, ref_names,
aln_matrix = CRISPResso2Align.read_matrix(aln_matrix_loc)

pe_scaffold_dna_info = (0, None) #scaffold start loc, scaffold sequence
if args.prime_editing_pegRNA_scaffold_seq != "":
if args.prime_editing_pegRNA_scaffold_seq != "" and args.prime_editing_pegRNA_extension_seq != "":
pe_scaffold_dna_info = get_pe_scaffold_search(refs['Prime-edited']['sequence'], args.prime_editing_pegRNA_extension_seq, args.prime_editing_pegRNA_scaffold_seq, args.prime_editing_pegRNA_scaffold_min_match_length)

not_aln = {} #cache for reads that don't align
Expand Down Expand Up @@ -694,7 +694,7 @@ def process_fastq_write_out(fastq_input, fastq_output, variantCache, ref_names,
aln_matrix = CRISPResso2Align.read_matrix(aln_matrix_loc)

pe_scaffold_dna_info = (0, None) #scaffold start loc, scaffold sequence
if args.prime_editing_pegRNA_scaffold_seq != "":
if args.prime_editing_pegRNA_scaffold_seq != "" and args.prime_editing_pegRNA_extension_seq != "":
pe_scaffold_dna_info = get_pe_scaffold_search(refs['Prime-edited']['sequence'], args.prime_editing_pegRNA_extension_seq, args.prime_editing_pegRNA_scaffold_seq, args.prime_editing_pegRNA_scaffold_min_match_length)
not_aln = {} #cache for reads that don't align
not_aln[''] = "" #add empty sequence to the not_aln in case the fastq has an extra newline at the end
Expand Down Expand Up @@ -823,7 +823,7 @@ def process_single_fastq_write_bam_out(fastq_input, bam_output, bam_header, vari
aln_matrix = CRISPResso2Align.read_matrix(aln_matrix_loc)

pe_scaffold_dna_info = (0, None) # scaffold start loc, scaffold sequence
if args.prime_editing_pegRNA_scaffold_seq != "":
if args.prime_editing_pegRNA_scaffold_seq != "" and args.prime_editing_pegRNA_extension_seq != "":
pe_scaffold_dna_info = get_pe_scaffold_search(refs['Prime-edited']['sequence'], args.prime_editing_pegRNA_extension_seq, args.prime_editing_pegRNA_scaffold_seq, args.prime_editing_pegRNA_scaffold_min_match_length)
not_aln = {} # cache for reads that don't align
not_aln[''] = "" # add empty sequence to the not_aln in case the fastq has an extra newline at the end
Expand Down Expand Up @@ -1428,6 +1428,8 @@ def rreplace(s, old, new):


#Prime editing
if 'Prime-edited' in amplicon_name_arr:
raise CRISPRessoShared.BadParameterException("An amplicon named 'Prime-edited' must not be provided.")
prime_editing_extension_seq_dna = "" #global var for the editing extension sequence for the scaffold quantification below
prime_editing_edited_amp_seq = ""
if args.prime_editing_pegRNA_extension_seq != "":
Expand Down Expand Up @@ -1489,8 +1491,6 @@ def rreplace(s, old, new):
if new_ref in amplicon_seq_arr:
raise CRISPRessoShared.BadParameterException('The calculated prime-edited amplicon is the same as the reference sequence.')
amplicon_seq_arr.append(new_ref)
if 'Prime-edited' in amplicon_name_arr:
raise CRISPRessoShared.BadParameterException("An amplicon named 'Prime-edited' must not be provided.")
amplicon_name_arr.append('Prime-edited')
amplicon_quant_window_coordinates_arr.append('')
prime_editing_edited_amp_seq = new_ref
Expand Down Expand Up @@ -2380,7 +2380,7 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited

info('Done!', {'percent_complete': 20})

if args.prime_editing_pegRNA_scaffold_seq != "":
if args.prime_editing_pegRNA_scaffold_seq != "" and args.prime_editing_pegRNA_extension_seq != "":
#introduce a new ref (that we didn't align to) called 'Scaffold Incorporated' -- copy it from the ref called 'prime-edited'
new_ref = deepcopy(refs['Prime-edited'])
new_ref['name'] = "Scaffold-incorporated"
Expand Down
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ MAINTAINER Kendell Clement
RUN apt-get update && apt-get install gcc g++ bowtie2 samtools libsys-hostname-long-perl \
-y --no-install-recommends \
&& apt-get clean \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /usr/share/man/* \
&& rm -rf /usr/share/doc/* \
&& conda install -c defaults -c conda-forge -c bioconda -y -n base --debug -c bioconda trimmomatic flash numpy cython jinja2 tbb=2020.2 pyparsing=2.3.1 scipy matplotlib pandas plotly\
&& conda install -c defaults -c conda-forge -c bioconda -y -n base --debug trimmomatic flash numpy cython jinja2 tbb=2020.2 pyparsing=2.3.1 scipy matplotlib-base pandas plotly\
&& conda clean --all --yes

#install ms fonts
Expand All @@ -40,4 +41,4 @@ RUN python setup.py install \
&& CRISPRessoCompare -h


ENTRYPOINT ["python","/CRISPResso2/CRISPResso2_router.py"]
ENTRYPOINT ["python","/CRISPResso2/CRISPResso2_router.py"]