Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:maxplanck-ie/snakepipes into dev_wd
Browse files Browse the repository at this point in the history
Conflicts:
	.github/workflows/linux.yml
	.github/workflows/osx.yml
	docs/content/News.rst
	snakePipes/shared/rules/envs/shared.yaml
  • Loading branch information
WardDeb committed May 31, 2023
2 parents 9fc8e2d + b38533d commit 2c6d2b4
Show file tree
Hide file tree
Showing 23 changed files with 1,182 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .ci_stuff/organism.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
genome_size: 2652783500
genome_fasta: "/tmp/genome.fa"
genome_index: "/tmp/genome.fa.fai"
genome_2bit: ".ci_stuff/genome_fasta/genome.2bit"
genome_2bit: "/tmp/genome.2bit"
bowtie2_index: ".ci_stuff/BowtieIndex/genome"
hisat2_index: ".ci_stuff/HISAT2Index/genome"
bwa_index: ".ci_stuff/BWAindex/genome.fa"
Expand Down
7 changes: 6 additions & 1 deletion .ci_stuff/test_dag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ touch allelic_BAM_input/allelic_bams/sample1.genome1.sorted.bam \
allelic_BAM_input/bamCoverage/allele_specific/sample5.genome1.seq_depth_norm.bw \
allelic_BAM_input/bamCoverage/allele_specific/sample6.genome1.seq_depth_norm.bw
mkdir -p output
touch /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf
touch /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf /tmp/genome.2bit
mkdir -p allelic_input
mkdir -p allelic_input/Ngenome
touch allelic_input/file.vcf.gz allelic_input/snpfile.txt
Expand Down Expand Up @@ -214,6 +214,11 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 657 ]; then exit 1 ; fi
#multiple comparison groups
WC=`mRNA-seq --mode alignment,alignment-free -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 869 ]; then exit 1 ; fi
# three prime sequencing
WC=`mRNA-seq -i PE_input -o output --mode three-prime-seq --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 618 ]; then exit 1 ; fi
WC=`mRNA-seq -i PE_input -o output --mode three-prime-seq,deepTools_qc --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1199 ]; then exit 1 ; fi
#allelic
WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1357 ]; then exit 1 ; fi
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: flake
run: |
micromamba activate snakePipes_CI
flake8 --ignore=E501,E722,E402 --exclude docs/conf.py .
flake8 --ignore=E501,E722,E402 --exclude docs/conf.py,build/lib/snakePipes/shared/tools/three_prime_seq,snakePipes/shared/tools/three_prime_seq .
CI:
runs-on: ubuntu-latest
steps:
Expand Down Expand Up @@ -95,6 +95,6 @@ jobs:
- name: createEnvs
run: |
micromamba activate snakePipes_CI
conda config --set channel_priority strict
snakePipes createEnvs --force --only ${{matrix.envs}}
conda config --set channel_priority flexible
snakePipes createEnvs --autodetectCondaEnvDir --force --only ${{matrix.envs}}
4 changes: 2 additions & 2 deletions .github/workflows/osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@ jobs:
- name: createEnvsOSX
run: |
micromamba activate snakePipes_CI
conda config --set channel_priority strict
snakePipes createEnvs --force --only ${{matrix.envs}}
conda config --set channel_priority flexible
snakePipes createEnvs --force --autodetectCondaEnvDir --only ${{matrix.envs}}
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ output

# pip stuff
snakePipes.egg-info

# misc
.vscode/
83 changes: 32 additions & 51 deletions bin/snakePipes
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,16 @@ def parse_arguments():
help="Prints the location in which each conda environment is actually stored.",
)

mex = createEnvsParser.add_mutually_exclusive_group()
mex.add_argument(
"--keepCondaDir",

createEnvsParser.add_argument(
"--autodetectCondaEnvDir",
action="store_true",
help="If specified, the `snakemakeOptions:` setting in "
"the workflow `defaults.yaml` files will NOT be overwritten. "
"This is typically unwise and only expert users should specify this.",
)
mex.add_argument(
"--condaDir",
help="If specified, use this as the base directory for the "
"created conda environments. This will ignore what is already "
"in the workflow-specific yaml files and where conda is installed.",
help="If specified, this will set condaEnvDir to system conda prefix,"
"and will overwrite the condaEnvDir entry in defaults.yaml ."
"Use with caution."
)


createEnvsParser.add_argument(
"--only",
nargs="+",
Expand Down Expand Up @@ -117,6 +112,13 @@ def parse_arguments():
default=defaults["snakemakeOptions"],
)

configParser.add_argument(
"--condaEnvDir",
help="If specified, use this as the base directory for the "
"created conda environments. This will ignore what is already "
"in the workflow-specific yaml files and where conda is installed.",
)

configParser.add_argument(
"--organismsDir",
help="The directory where global organism YAML files are to be stored. Both "
Expand Down Expand Up @@ -254,19 +256,13 @@ def envInfo():
For each environment yaml file print where its conda env is actually located
"""
baseDir = os.path.dirname(snakePipes.__file__)
condaDir = os.environ.get("CONDA_PREFIX")
if "envs" in condaDir:
condaDir = os.path.dirname(condaDir)
else:
condaDir = os.path.join(condaDir, "envs")

f = open(os.path.join(baseDir, "shared/defaults.yaml"))
cf = yaml.load(f, Loader=yaml.FullLoader)
f.close()

_ = cf["snakemakeOptions"].split(" ")
idx = _.index("--conda-prefix")
condaEnvDir = _[idx + 1]
condaEnvDir=cf["condaEnvDir"]


for env in cof.set_env_yamls().values():
# Hash the file ala snakemake
Expand Down Expand Up @@ -307,36 +303,16 @@ def createCondaEnvs(args):
"""
baseDir = os.path.dirname(snakePipes.__file__)

condaDir = os.environ.get("CONDA_PREFIX")
rootDir = condaDir
if "envs" in condaDir:
condaDir = os.path.dirname(condaDir)
else:
condaDir = os.path.join(condaDir, "envs")

f = open(os.path.join(baseDir, "shared/defaults.yaml"))
cf = yaml.load(f, Loader=yaml.FullLoader)
f.close()
_ = cf["snakemakeOptions"].split(" ")
try:
idx = _.index("--conda-prefix")
except:
idx = len(_)
_.extend(["--conda-prefix", condaDir])

condaEnvDir = _[idx + 1]
if args.condaDir:
condaDirUse = args.condaDir
_[idx + 1] = condaDirUse
elif args.keepCondaDir:
condaDirUse = _[idx + 1]
else:
condaDirUse = condaDir
_[idx + 1] = condaDirUse
cf["snakemakeOptions"] = " ".join(_)
condaEnvDir=cf["condaEnvDir"]
condaDirUse=condaEnvDir

if args.autodetectCondaEnvDir:
condaDirUse=detectCondaDir()
# rewrite defaults.yaml
cof.write_configfile(os.path.join(baseDir, "shared/defaults.yaml"), cf)
cof.write_configfile(os.path.join(baseDir, "shared/defaults.yaml"), cf)

for envName, env in cof.set_env_yamls().items():
if args.only is not None and envName not in args.only:
Expand All @@ -362,12 +338,7 @@ def createCondaEnvs(args):
"--file",
os.path.join(baseDir, "shared/rules", env),
]
if "--conda-prefix" in cf["snakemakeOptions"] and (
args.condaDir or args.keepCondaDir
):
cmd += ["--prefix", os.path.join(condaDirUse, h)]
else:
cmd += ["--name", h]
cmd += ["--prefix", os.path.join(condaDirUse, h)]

# Don't actually create the env if either --info is set or it already exists and --force is NOT set
if not args.info:
Expand All @@ -388,6 +359,15 @@ def createCondaEnvs(args):
if args.noSitePackages and not args.info:
fixSitePy(rootDir)

def detectCondaDir():
"Detect the default conda folder."
condaDir = os.environ.get("CONDA_PREFIX")
if "envs" in condaDir:
condaDir = os.path.dirname(condaDir)
else:
condaDir = os.path.join(condaDir, "envs")
return(condaDir)


def updateConfig(args):
"""Update the global defaults"""
Expand All @@ -400,6 +380,7 @@ def updateConfig(args):
if args.configMode == "manual":
d = {
"snakemakeOptions": args.snakemakeOptions,
"condaEnvDir": args.condaEnvDir,
"organismsDir": args.organismsDir,
"clusterConfig": args.clusterConfig,
"tempDir": args.tempDir,
Expand Down
13 changes: 8 additions & 5 deletions docs/content/News.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
snakePipes News
===============

snakePipes 2.7.3
----------------

* Fixes #884 by creating an additional conda env for DSS. Build tests are now run with strict channel priority.

snakePipes x.x.x
----------------
* Changed the behaviour of snakePipes createEnvs - it is no longer possible to set condaEnvDir with this function. It is required to set it with snakePipes config beforhand, instead. To ingore what's in the defaults.yaml and overwrite the condaEnvDir value with default system conda prefix, use '--autodetectCondaEnvDir'.
* Snakemake options in the defaults.yaml are now an empty string. The required arguments '--use-conda --conda-prefix' have been directly added to the command string. condaEnvDir is parsed from defaults.yaml, requiring running snakePipes config first.
* Fixes #819
* fastq files are checked for validity
* an 'on success' file is touched in the output directory when a workflow is finished successfully
* fuzzywuzzy deprecated in favor for thefuzz

snakePipes 2.7.2
----------------
Expand Down
22 changes: 22 additions & 0 deletions docs/content/workflows/mRNA-seq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,28 @@ using the **deepTools_qc** mode. It's a very useful add-on with any of the other

.. note:: Since most deeptools functions require an aligned (BAM) file, the deepTools_qc mode will additionally perform the alignment of the fastq files. However this would not interfere with operations of the other modes.

"threePrimeSeq"
~~~~~~~~~~~~~~~

**threePrimeSeq** uses a pipeline developed by the Hilgers lab to annotate and
count clusters of reads mapping to three prime ends of genes using
poly(T)VN-primed 3' sequencing kits such as Lexogen's 3' mRNA-seq kit.
In this mode, **fastp** is used to pretrim with preset parameters, followed by
**STAR** mapping.

First, a blacklist of possible internal priming sites is generated for the
given organism. Next, the mapped regions are filtered according to this
blacklist and associated with the nearest gene within a certain window.
For all samples within the run, a database of PAS sites is generated
and read counts aggregated for each particular site. These are then
summarized on a metagene level and output to a counts.tsv file for
further downstream analysis.

The output for this mode will be stored in the ``three_prime_seq/`` subfolder.

.. note:: The ``--three-prime-seq`` option must be invoked (which will also set mode to threePrimeSeq) as this will set **fastp** and **STAR** with the appropriate parameters.


Understanding the outputs
---------------------------

Expand Down
10 changes: 4 additions & 6 deletions snakePipes/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,6 @@ def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript):
and create the DAG
"""
workflowName = os.path.basename(callingScript)
snakemake_path = os.path.dirname(os.path.abspath(callingScript))

os.makedirs(args.outdir, exist_ok=True)

if isinstance(args.snakemakeOptions, list):
Expand Down Expand Up @@ -634,15 +632,15 @@ def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript):
args.snakemakeOptions += " --notemp"

snakemake_cmd = """
TMPDIR={tempDir} PYTHONNOUSERSITE=True {snakemake} {snakemakeOptions} --latency-wait {latency_wait} --snakefile {snakefile} --jobs {maxJobs} --directory {workingdir} --configfile {configFile} --keep-going
""".format(snakemake=os.path.join(snakemake_path, "snakemake"),
latency_wait=cluster_config["snakemake_latency_wait"],
TMPDIR={tempDir} PYTHONNOUSERSITE=True snakemake {snakemakeOptions} --latency-wait {latency_wait} --snakefile {snakefile} --jobs {maxJobs} --directory {workingdir} --configfile {configFile} --keep-going --use-conda --conda-prefix {condaEnvDir}
""".format(latency_wait=cluster_config["snakemake_latency_wait"],
snakefile=os.path.join(workflowDir, "Snakefile"),
maxJobs=args.maxJobs,
workingdir=args.workingdir,
snakemakeOptions=str(args.snakemakeOptions or ''),
tempDir=cfg["tempDir"],
configFile=os.path.join(args.outdir, '{}.config.yaml'.format(workflowName))).split()
configFile=os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)),
condaEnvDir=cfg["condaEnvDir"]).split()

if args.verbose:
snakemake_cmd.append("--printshellcmds")
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
# permitted here.
################################################################################
#
snakemakeOptions: ' --use-conda --conda-prefix /package/mamba/envs/ '
condaEnvDir: '/package/mamba/envs/'
snakemakeOptions: ''
organismsDir: 'shared/organisms'
clusterConfig: 'shared/cluster.yaml'
tempDir: /data/extended/
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rules/envs/shared.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ dependencies:
- fastp = 0.23.2
- umi_tools = 1.1.2
- fq = 0.10.0
- pybigwig = 0.3.18

Loading

0 comments on commit 2c6d2b4

Please sign in to comment.