Skip to content

Commit

Permalink
Merge pull request #77 from koszullab/3.2.0
Browse files Browse the repository at this point in the history
3.2.0 release
  • Loading branch information
js2264 committed Aug 31, 2023
2 parents cb8d3bd + 2d32875 commit b6b5d25
Show file tree
Hide file tree
Showing 12 changed files with 342 additions and 76 deletions.
32 changes: 20 additions & 12 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,31 @@ jobs:
max-parallel: 5

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2

- name: Checkout repo
uses: actions/checkout@v2

- name: Create micromamba env. for package
uses: mamba-org/setup-micromamba@v1
with:
python-version: 3.9
- name: Add conda to system path
generate-run-shell: true
environment-file: environment.yml

- name: Install package
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
pip install .
shell: micromamba-shell {0}

- name: Check installed package
run: |
conda config --add channels bioconda
conda install -c conda-forge python=3.9 minimap2 bowtie2=2.4.5 bwa samtools htslib pysam pytest cooler pytest-cov pylint codecov mappy
pip install -Ur requirements.txt
pip install pytest-pylint
hicstuff --version
shell: micromamba-shell {0}

- name: Lint and test
run: |
pip install pytest-pylint pytest pytest-cov pylint codecov mappy
pytest --pylint --pylint-error-types=EF --pylint-rcfile=.pylintrc --doctest-modules --doctest-modules hicstuff
pytest --cov=hicstuff
codecov
shell: micromamba-shell {0}

2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM continuumio/miniconda3:4.9.2

LABEL Name=hicstuff Version=3.1.7
LABEL Name=hicstuff Version=3.2.0

COPY * ./ /app/
WORKDIR /app
Expand Down
4 changes: 2 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
author = "Cyril Matthey-Doret"

# The short X.Y version
version = "3.1"
version = "3.2"
# The full version, including alpha/beta/rc tags
release = "3.1.7"
release = "3.2.0"


# -- General configuration ---------------------------------------------------
Expand Down
24 changes: 24 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: env-name
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- python >= 3.7
- pip
- bowtie2
- bwa
- minimap2
- samtools
- numpy
- scipy
- pandas >= 1.5.0
- matplotlib >= 3.4.0
- docopt
- biopython
- requests
- scikit-learn
- pysam
- htslib
- pyfastx
- cooler
33 changes: 23 additions & 10 deletions hicstuff/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,10 +741,11 @@ class Pipeline(AbstractCommand):
usage:
pipeline [--aligner=bowtie2] [--centromeres=FILE] [--circular] [--distance-law]
[--duplicates] [--enzyme=5000] [--filter] [--force] [--mapping=normal]
[--matfmt=graal] [--no-cleanup] [--outdir=DIR] [--plot] [--prefix=PREFIX]
[--binning=INT] [--zoomify] [--balancing_args=STR] [--quality-min=30]
[--read-len=INT] [--remove-centromeres=0] [--size=0] [--start-stage=fastq]
[--duplicates] [--enzyme=5000] [--exclude=STR] [--filter] [--force]
[--mapping=normal] [--matfmt=graal] [--no-cleanup] [--outdir=DIR]
[--plot] [--prefix=PREFIX] [--binning=INT] [--zoomify=BOOL]
[--balancing_args=STR] [--quality-min=30] [--read-len=INT]
[--remove-centromeres=0] [--size=0] [--start-stage=fastq]
[--threads=1] [--tmpdir=DIR] --genome=FILE <input1> [<input2>]
arguments:
Expand All @@ -767,6 +768,10 @@ class Pipeline(AbstractCommand):
option.
-C, --circular Enable if the genome is circular.
Discordant with the centromeres option.
-E, --exclude=STR Exclude specific chromosomes from the
generated matrix. Multiple chromosomes
can be listed separated by commas (e.g.
`--exclude "chrM,2u"`) [default: None].
-d, --distance-law If enabled, generates a distance law file
with the values of the probabilities to
have a contact between two distances for
Expand Down Expand Up @@ -802,7 +807,7 @@ class Pipeline(AbstractCommand):
Can be "bg2" for 2D Bedgraph format,
"cool" for Mirnylab's cooler software, or
"graal" for graal-compatible plain text
COO format. [default: graal]
COO format. [default: cool]
-n, --no-cleanup If enabled, intermediary BED files will
be kept after generating the contact map.
Disabled by defaut.
Expand All @@ -812,9 +817,10 @@ class Pipeline(AbstractCommand):
at different steps of the pipeline.
-P, --prefix=STR Overrides default filenames and prefixes all
output files with a custom name.
-b,--binning=INT Bin the resulting matrix to a given resolution
-b,--binning=INT Bin the contact matrix to a given resolution.
By default, the contact matrix is not binned.
(only used if `--matfmt cool")
-z, --zoomify Zoomify binned cool matrix
-z, --zoomify=BOOL Zoomify binned cool matrix [default: True]
(only used if mat_fmt == "cool" and binning is set)
-B, --balancing_args=STR Arguments to pass to `cooler balance`
(default: "") (only used if zoomify == True)
Expand Down Expand Up @@ -868,8 +874,14 @@ def execute(self):
if not self.args["--binning"]:
self.args["--binning"] = "0"

if not self.args["--zoomify"]:
self.args["--zoomify"] = "True"

if not self.args["--balancing_args"]:
self.args["--balancing_args"] = ""
self.args["--balancing_args"] = None

if not self.args["--exclude"]:
self.args["--exclude"] = None

if self.args["--matfmt"] not in ("graal", "bg2", "cool"):
logger.error("matfmt must be either bg2, cool or graal.")
Expand All @@ -878,22 +890,23 @@ def execute(self):
read_len = self.args["--read-len"]
if read_len is not None:
read_len = int(read_len)

hpi.full_pipeline(
genome=self.args["--genome"],
input1=self.args["<input1>"],
input2=self.args["<input2>"],
aligner=self.args["--aligner"],
centromeres=self.args["--centromeres"],
circular=self.args["--circular"],
exclude=self.args["--exclude"],
distance_law=self.args["--distance-law"],
enzyme=self.args["--enzyme"],
filter_events=self.args["--filter"],
force=self.args["--force"],
mapping=self.args["--mapping"],
mat_fmt=self.args["--matfmt"],
binning=int(self.args["--binning"]),
zoomify=self.args["--zoomify"],
zoomify=eval(self.args["--zoomify"]),
balancing_args=self.args["--balancing_args"],
min_qual=int(self.args["--quality-min"]),
min_size=int(self.args["--size"]),
Expand Down
6 changes: 3 additions & 3 deletions hicstuff/cutsite.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,15 @@ def cutsite_read(ligation_sites, seq, qual, seed_size=0):
Returns:
--------
list of str
List of string of the sequences. The split is made at the start of the
ligation sites.
List of cut sequences. The split is made 4 bases after the start of
the ligation site.
list of str
List of string of the qualities.
Examples:
---------
>>> cutsite_read(re.compile(r'GA.TA.TC'), "AAGAGTATTC", "FFF--FAFAF")
(['AA', 'GAGTATTC'], ['FF', 'F--FAFAF'])
(['AAGAGT', 'ATTC'], ['FFF--F', 'AFAF'])
"""

# Find the ligation sites.
Expand Down
2 changes: 1 addition & 1 deletion hicstuff/distance_law.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def normalize_distance_law(xs, ps, inf=3000, sup=None):
List of ps each normalized separately.
"""
# Sanity check: xs and ps have the same dimension
if np.shape(xs) != np.shape(ps):
if np.shape(np.asarray(xs, dtype="object")) != np.shape(np.asarray(ps, dtype="object")):
logger.error("xs and ps should have the same dimension.")
sys.exit(1)
# Define the length of shortest chromosomes as a lower bound for the sup boundary
Expand Down
63 changes: 63 additions & 0 deletions hicstuff/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import bz2
import io
import os
import pysam
import functools
import sys
import numpy as np
Expand Down Expand Up @@ -1431,3 +1432,65 @@ def check_is_fasta(in_file):
fasta = False

return fasta

def check_fastq_entries(in_file):
"""
Check how many reads are in the input fastq. Requires zcat.
Parameters
----------
in_file : str
Path to the input file.
Returns
-------
int :
How many reads listed in the input fastq
"""

with open(in_file, 'rb') as f:
is_gzip = f.read(2) == b'\x1f\x8b'

if is_gzip:
n_lines = sp.run(
"zcat < {f} | wc -l".format(f = in_file),
stdout=sp.PIPE,
stderr=sp.PIPE,
shell = True,
encoding = 'utf-8'
).stdout.removesuffix("\n")
else:
n_lines = sp.run(
"wc -l {f}".format(f = in_file),
stdout=sp.PIPE,
stderr=sp.PIPE,
shell = True,
encoding = 'utf-8'
).stdout.removesuffix("\n")

n_reads = int(n_lines)/4
return n_reads

def check_bam_entries(in_file):
"""
Check how many reads are in the input bam
Parameters
----------
in_file : str
Path to the input file.
Returns
-------
int :
How many reads listed in the input bam
"""

n_reads = sp.run(
["samtools", "view", "-c", in_file],
stdout=sp.PIPE,
stderr=sp.PIPE,
encoding = 'utf-8'
).stdout.removesuffix("\n")

return int(n_reads)

0 comments on commit b6b5d25

Please sign in to comment.