Merge pull request #135 from epigen/0.6-rc2

0.6 RC2
pepkit · Jun 30, 2017 · d6e4602 · d6e4602
2 parents 5e8a546 + 966286b
commit d6e4602
Show file tree

Hide file tree

Showing 42 changed files with 3,587 additions and 3,093 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -13,5 +13,6 @@ install:
 script: pytest
 branches:
   only:
+    - 0.6-rc2
     - dev
     - master
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
@@ -27,6 +27,7 @@ Changelog
 
     - Various small bug fixes and dev improvements.
 
+    - Require `setuptools` for installation, and `pandas 0.20.2`. If `numexpr` is installed, version `2.6.2` is required.
 
 - **v0.5** (*2017-03-01*):
 

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -140,6 +140,7 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
+html_static_path = []  # it's empty; suppress warning
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied

diff --git a/doc/source/config-files.rst b/doc/source/config-files.rst
@@ -19,7 +19,7 @@ If you are planning to submit jobs to a cluster, then you need to know about a s
 That should be all you need to worry about as a pipeline user. If you need to adjust compute resources or want to develop a pipeline or have more advanced project-level control over pipelines, then you'll need to know about a few others:
 
 Pipeline developers
-*****************
+**********************
 
 If you want to add a new pipeline to looper, tweak the way looper interacts with a pipeline for a given project, or change the default cluster resources requested by a pipeline, then you need to know about a configuration file that coordinates linking your pipeline in to your looper project.
 

diff --git a/doc/source/define-your-project.rst b/doc/source/define-your-project.rst
@@ -40,8 +40,8 @@ For example, by default, your jobs will run serially on your local computer, whe
 
 Let's go through the more advanced details of both annotation sheets and project config files:
 
-.. include:: sample-annotation-sheet.rst
+.. include:: sample-annotation-sheet.rst.inc
 
-.. include:: project-config.rst
+.. include:: project-config.rst.inc
 
 
diff --git a/doc/source/inputs.rst b/doc/source/inputs.rst
diff --git a/doc/source/pipeline-interface-mapping.rst → ...source/pipeline-interface-mapping.rst.inc b/doc/source/pipeline-interface-mapping.rst → ...source/pipeline-interface-mapping.rst.inc
@@ -1,4 +1,6 @@
-.. _pipeline-interface-mapping:
+:orphan:
+
+.. _pi_mapping:
 
 Pipeline interface section: protocol_mapping 
 ********************************************

diff --git a/doc/source/pipeline-interface-pipelines.rst → ...urce/pipeline-interface-pipelines.rst.inc b/doc/source/pipeline-interface-pipelines.rst → ...urce/pipeline-interface-pipelines.rst.inc
@@ -1,3 +1,5 @@
+:orphan:
+
 .. _pipeline-interface-pipelines:
 
 Pipeline interface section: pipelines 

diff --git a/doc/source/pipeline-interface.rst b/doc/source/pipeline-interface.rst
@@ -31,7 +31,7 @@ Let's start with a very simple example. A basic ``pipeline_interface.yaml`` file
 
 The first section specifies that samples of protocol ``RRBS`` will be mapped to the pipeline specified by key ``rrbs_pipeline``. The second section describes where the pipeline named ``rrbs_pipeline`` is located and what command-line arguments it requires. Pretty simple. Let's go through each of these sections in more detail:
 
-.. include:: pipeline-interface-mapping.rst
+.. include:: pipeline-interface-mapping.rst.inc
 
-.. include:: pipeline-interface-pipelines.rst
+.. include:: pipeline-interface-pipelines.rst.inc
 
diff --git a/doc/source/project-config.rst → doc/source/project-config.rst.inc b/doc/source/project-config.rst → doc/source/project-config.rst.inc
@@ -1,3 +1,5 @@
+:orphan:
+
 Project config file
 ***************************************************
 

diff --git a/doc/source/sample-annotation-sheet.rst → doc/source/sample-annotation-sheet.rst.inc b/doc/source/sample-annotation-sheet.rst → doc/source/sample-annotation-sheet.rst.inc
@@ -1,3 +1,4 @@
+:orphan:
 
 Sample annotation sheet
 **************************************************

diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst
@@ -41,7 +41,7 @@ Inside there will be two directories:
 -  ``submissions`` [2]_ - which holds yaml representations of the samples and log files of the submited jobs.
 
 
-The sample-specific output of each pipeline type varies and is described in :doc:`pipelines`.
+The sample-specific output of each pipeline type varies.
 
 To use pre-made pipelines with your project, all you have to do is :doc:`define your project <define-your-project>` using looper's standard format. To link your own, custom built pipelines, you can :doc:`connect your pipeline to looper with a pipeline interface <pipeline-interface>`.
 

diff --git a/examples/microtest_project_config.yaml b/examples/microtest_project_config.yaml
@@ -1,94 +1,35 @@
-# This project config file describes all *project-specific variables*
-# Its primary purpose as as input to Looper, which will submit jobs as appropriate
-# for each sample in the project.
-# But it is also read by other tools, including:
-# - project sample loop (primary purpose)
-# - make_trackhubs scripts to produce web accessible results
-# - stats summary scripts
-# - analysis scripts requiring pointers to metadata, results, and other options.
-
 metadata:
-  # output_dir: ABSOLUTE PATH to the parent, shared space where project results go
   output_dir: /scratch/lab_bock/shared/projects/microtest
-  # results and submission subdirs are subdirectors directories under parent output_dir
-  # results: where output sample folders will go
-  # submission: where cluster submit scripts and log files will go
   results_subdir: results_pipeline
   submission_subdir: submission
-  # pipelines_dir: ABSOLUTE PATH the directory where the Looper will find pipeline
-  # scripts (and accompanying pipeline config files) for submission.
   pipelines_dir: $CODEBASE/pipelines
-  # Elements in this section can be absolute or relative.
-  # Typically, this project config file is stored with the project metadata, so
-  # relative paths are considered relative to this project config file.
-  # sample_annotation: one-row-per-sample metadata
   sample_annotation: microtest_sample_annotation.csv
-  # merge_table: input for samples with more than one input file
   merge_table: microtest_merge_table.csv
-  # compare_table: comparison pairs or groups, like normalization samples
-  compare_table: null.csv
-
 
-# a list of annotation sheet columns that are "derived"
-# the values in these are constructed using a regex-like expression
-# of variables (defined in the next section).
 derived_columns: [data_source]
 
-
 data_sources:
-  # specify the ABSOLUTE PATH of input files using variable path expressions
-  # entries correspond to values in the data_source column in sample_annotation table
-  # {variable} can be used to replace environment variables or other sample_annotation columns
-  # If you use {variable} codes, you should quote the field so python can parse it.
   bsf_samples: "{RAWDATA}{flowcell}/{flowcell}_{lane}_samples/{flowcell}_{lane}#{BSF_name}.bam"
   microtest: "/data/groups/lab_bock/shared/resources/microtest/{sample_name}.bam"
   microtest_merge: "/data/groups/lab_bock/shared/resources/microtest/{sample_name}{file_number}.bam"
 
-
 subprojects:
   config_test:
     pipeline_config:
       wgbs.py: wgbs_ds.yaml
 
-
-genomes:
-  human: hg19
-  mouse: mm10
-
-transcriptomes:
-  human: hg19_cdna
-  mouse: mm10_cdna
-
+implied_columns:
+  organism:
+    human:
+      genomes: hg19
+      transcriptome: hg19_cdna
+    mouse:
+      genome: mm10
+      transcriptome: mm10_cdna
 
 pipeline_config:
-  # pipeline configuration files used in project.
-  # Key string must match the _name of the pipeline script_ (including extension)
-  # Relative paths are relative to this project config file.
-  # Default (null) means use the generic config for the pipeline.
-  # wgbs.py: null
-  # Or you can point to a specific config to be used in this project:
-  # rrbs.py: rrbs_config.yaml
-  # wgbs.py: wgbs_config.yaml
-  # cgps: cpgs_config.yaml
-
+  rrbs.py: rrbs_config.yaml
 
 pipeline_args:
   rnaBitSeq.py:
-    "-w": 50
-
-
-trackhubs:
-  trackhub_dir: /data/groups/lab_bock/public_html/arendeiro/microtest/
-  # url: if you include this, the make_trackhubs will produce a link to your track hub in the project folder.
-  url: http://www.whatever.com/
-  matrix_x: cell_type
-  matrix_y: cell_count
-  sort_order: cell_type=+
-  parent_track_name: ews_rrbs
-  visibility: dense
-  hub_name: ews_hub
-  short_label_column: sample_name
-  email: arendeiro@cemm.oeaw.ac.at
-
-username: user
-email: user@email.com
+    "-w": 50
diff --git a/looper/__init__.py b/looper/__init__.py
@@ -19,14 +19,18 @@
 # Default user logging format is simple
 DEFAULT_LOGGING_FMT = "%(message)s"
 # Developer logger format is more information-rich
-DEV_LOGGING_FMT = "%(module)s:%(lineno)d [%(levelname)s] > %(message)s "
+DEV_LOGGING_FMT = "%(module)s:%(lineno)d (%(funcName)s) [%(levelname)s] > %(message)s "
 
 
 
 def setup_looper_logger(level, additional_locations=None, devmode=False):
     """
-    Called by test configuration via `pytest`'s `conftest`.
-    All arguments are optional and have suitable defaults.
+    Establish a logger for a looper CLI program.
+
+    This configures a logger to provide information about a looper program's
+    execution. Verbosity, destination(s) for messages, and message text
+    format are controlled by the arguments' values. This is also used by
+    looper's test suite.
 
     :param int | str level: logging level
     :param tuple(str | FileIO[str]) additional_locations: supplementary

diff --git a/looper/_version.py b/looper/_version.py
@@ -1 +1 @@
-__version__ = "0.6.0-rc1"
+__version__ = "0.6.0-rc2"