diff --git a/README.md b/README.md index ada86b41..ef6d502a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # peppy python package -[![Documentation Status](http://readthedocs.org/projects/pep/badge/?version=latest)](http://peppy.readthedocs.io/en/latest/?badge=latest) [![Build Status](https://travis-ci.org/pepkit/peppy.svg?branch=master)](https://travis-ci.org/pepkit/peppy) +[![Documentation Status](http://readthedocs.org/projects/pep/badge/?version=latest)](http://peppy.readthedocs.io/en/latest/?badge=latest) [![Build Status](https://travis-ci.org/pepkit/peppy.svg?branch=master)](https://travis-ci.org/pepkit/peppy) [![PEP compatible](http://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io) `peppy` is the official python package for reading **Portable Encapsulated Projects** or **PEP**s in `python`. diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 208576ec..57342964 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -1,6 +1,21 @@ Changelog ****************************** +- **v0.19** (*2019-01-16*): + + - Changed + + - ``Project`` construction no longer requires sample annotations sheet. + + - Specification of assembly/ies in project config outside of ``implied_attributes`` is deprecated. + + - ``implied_columns`` and ``derived_columns`` are deprecated in favor of ``implied_attributes`` and ``derived_attributes``. + + - New + + - Added ``activate_subproject`` method to ``Project``. + + - **v0.18.2** (*2018-07-23*): - Fixed @@ -22,7 +37,7 @@ Changelog - Add ``get_sample`` and ``get_samples`` functions to ``Project`` objects. - - Add ``get_subsamples``and ``get_subsample`` functions to both ``Project`` and ``Sample`` objects. + - Add ``get_subsamples`` and ``get_subsample`` functions to both ``Project`` and ``Sample`` objects. - Subsamples are now objects that can be retrieved individually by name, with the ``subsample_name`` as the index column header. diff --git a/doc/source/jupyter/subannotation.ipynb b/doc/source/jupyter/subannotation.ipynb index 1ff384e4..681addde 100644 --- a/doc/source/jupyter/subannotation.ipynb +++ b/doc/source/jupyter/subannotation.ipynb @@ -6,8 +6,28 @@ "source": [ "# Sample subannotation\n", "\n", + "The PEPs that this examples are based on are available in the [example_peps repsitory](https://github.com/pepkit/example_peps).\n", + "\n", "This vignette will show you how sample subannotations work in a series of examples.\n", "\n", + "Import libraries and set the working directory" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import peppy\n", + "os.chdir(\"/Users/mstolarczyk/Uczelnia/UVA/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "## Example 1: basic sample subannotation table\n", "\n", "Example 1 demonstrates how a `sample_subannotation` is used. In this example, 2 samples have multiple input files that need merging (`frog_1` and `frog_2`), while 1 sample (`frog_3`) does not. Therefore, `frog_3` specifies its file in the `sample_annotation` table, while the others leave that field blank and instead specify several files in the `sample_subannotation`." @@ -15,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -24,20 +44,19 @@ "'data/frog1a_data.txt data/frog1b_data.txt data/frog1c_data.txt'" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import peppy\n", - "p1 = peppy.Project(\"example_subannotation1/project_config.yaml\")\n", + "p1 = peppy.Project(\"example_peps/example_subannotation1/project_config.yaml\")\n", "p1.samples[0].file" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -66,38 +85,38 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation2/../data/frog1a_data.txt /home/mjs5kd/UVA/example_peps/example_subannotation2/../data/frog1b_data.txt /home/mjs5kd/UVA/example_peps/example_subannotation2/../data/frog1c_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation2/../data/frog1a_data.txt /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation2/../data/frog1b_data.txt /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation2/../data/frog1c_data.txt'" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import peppy\n", - "p2 = peppy.Project(\"example_subannotation2/project_config.yaml\")\n", + "p2 = peppy.Project(\"example_peps/example_subannotation2/project_config.yaml\")\n", "p2.samples[0].file" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation2/../data/frog2a_data.txt /home/mjs5kd/UVA/example_peps/example_subannotation2/../data/frog2b_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation2/../data/frog2a_data.txt /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation2/../data/frog2b_data.txt'" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -108,16 +127,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation2/../data/frog3_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation2/../data/frog3_data.txt'" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -128,16 +147,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation2/../data/frog4_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation2/../data/frog4_data.txt'" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -157,37 +176,37 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog1a_data.txt /home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog1b_data.txt /home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog1c_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog1a_data.txt /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog1b_data.txt /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog1c_data.txt'" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "p3 = peppy.Project(\"example_subannotation3/project_config.yaml\")\n", + "p3 = peppy.Project(\"example_peps/example_subannotation3/project_config.yaml\")\n", "p3.samples[0].file" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog2_data.txt /home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog2a_data.txt /home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog2b_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog2_data.txt /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog2a_data.txt /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog2b_data.txt'" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -198,16 +217,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog3_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog3_data.txt'" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -218,16 +237,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation3/../data/frog4_data.txt'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation3/../data/frog4_data.txt'" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -247,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -256,19 +275,19 @@ "'frog1a_data.txt frog1b_data.txt frog1c_data.txt'" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "p4 = peppy.Project(\"example_subannotation4/project_config.yaml\")\n", + "p4 = peppy.Project(\"example_peps/example_subannotation4/project_config.yaml\")\n", "p4.samples[0].read1" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -277,7 +296,7 @@ "'frog1a_data2.txt frog1b_data2.txt frog1b_data2.txt'" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -297,37 +316,37 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog1a_R1.fq.gz /home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog1b_R1.fq.gz /home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog1c_R1.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog1a_R1.fq.gz /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog1b_R1.fq.gz /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog1c_R1.fq.gz'" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "p5 = peppy.Project(\"example_subannotation5/project_config.yaml\")\n", + "p5 = peppy.Project(\"example_peps/example_subannotation5/project_config.yaml\")\n", "p5.samples[0].read1" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog1a_R2.fq.gz /home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog1b_R2.fq.gz /home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog1c_R2.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog1a_R2.fq.gz /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog1b_R2.fq.gz /Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog1c_R2.fq.gz'" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -338,16 +357,16 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog2_R1.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog2_R1.fq.gz'" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -358,16 +377,16 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog2_R2.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog2_R2.fq.gz'" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -378,16 +397,16 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog3_R1.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog3_R1.fq.gz'" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -398,16 +417,16 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog3_R2.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog3_R2.fq.gz'" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -418,16 +437,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog4_R1.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog4_R1.fq.gz'" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -438,16 +457,16 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/home/mjs5kd/UVA/example_peps/example_subannotation5/../data/frog4_R2.fq.gz'" + "'/Users/mstolarczyk/Uczelnia/UVA/example_peps/example_subannotation5/../data/frog4_R2.fq.gz'" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -455,13 +474,6 @@ "source": [ "p5.samples[3].read2" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/doc/source/jupyter/subprojects.ipynb b/doc/source/jupyter/subprojects.ipynb new file mode 100644 index 00000000..f1c2303b --- /dev/null +++ b/doc/source/jupyter/subprojects.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Subprojects\n", + "\n", + "The PEP that this example is based on is available in the [example_peps repsitory](https://github.com/pepkit/example_peps) in the [example_subprojects1 folder](https://github.com/pepkit/example_peps/tree/master/example_subprojects1).\n", + "\n", + "The example below demonstrates how and why to use implied attributes functionality to **define numerous similar projects in a single project config file**. This functionality is extremely convenient when one has to define projects with small settings discreptancies, like different attributes in the annotation sheet. For example libraries `ABCD` and `EFGH` instead of the original `RRBS`.\n", + "\n", + "Import libraries and set the working directory:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import peppy\n", + "os.chdir(\"/Users/mstolarczyk/Uczelnia/UVA/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code\n", + "\n", + "Read in the project metadata by specifying the path to the `project_config.yaml`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "p_subproj = peppy.Project(\"example_peps/example_subprojects1/project_config.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To see whether there are any subprojects available within the `project_config.yaml` file run the following command:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's inspect the sample annotation sheet." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_namelibraryorganismtimefile_path
0pig_0hRRBSpig0source1
1pig_1hRRBSpig1source1
2frog_0hRRBSfrog0source1
3frog_1hRRBSfrog1source1
\n", + "
" + ], + "text/plain": [ + " sample_name library organism time file_path\n", + "0 pig_0h RRBS pig 0 source1\n", + "1 pig_1h RRBS pig 1 source1\n", + "2 frog_0h RRBS frog 0 source1\n", + "3 frog_1h RRBS frog 1 source1" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p_subproj.sheet" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'newLib': {'metadata': {'sample_annotation': 'sample_annotation_newLib.csv'}}, 'newLib2': {'metadata': {'sample_annotation': 'sample_annotation_newLib2.csv'}}}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p_subproj.subprojects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, there are two subprojects available: `newLib` and `newLib2`. Nonetheless, only the main opne is \"active\".\n", + "\n", + "Each of subprojects can be activated with the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "p_subproj.activate_subproject(\"newLib\")\n", + "p_subproj.activate_subproject(\"newLib2\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's inspect the sample annotation sheet when the `newLib2` subproject is active." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_namelibraryorganismtimefile_path
0pig_0hEFGHpig0source1
1pig_1hEFGHpig1source1
2frog_0hEFGHfrog0source1
3frog_1hEFGHfrog1source1
\n", + "
" + ], + "text/plain": [ + " sample_name library organism time file_path\n", + "0 pig_0h EFGH pig 0 source1\n", + "1 pig_1h EFGH pig 1 source1\n", + "2 frog_0h EFGH frog 0 source1\n", + "3 frog_1h EFGH frog 1 source1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p_subproj.sheet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The PEP\n", + "\n", + "The `library` attribute in each sample has changed from `RRBS` to `EFGH`. This behavior was specified in the `project_config.yaml` that points to a different `sample_annotation_newLib2.csv` with changed `library` attribute." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "metadata:\n", + " sample_annotation: sample_annotation.csv\n", + " output_dir: $HOME/hello_looper_results\n", + "\n", + "derived_attributes: [file_path]\n", + "data_sources:\n", + " source1: /data/lab/project/{organism}_{time}h.fastq\n", + " source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq\n", + "\n", + "subprojects:\n", + " newLib:\n", + " metadata:\n", + " sample_annotation: sample_annotation_newLib.csv\n", + " newLib2:\n", + " metadata:\n", + " sample_annotation: sample_annotation_newLib2.csv\n", + "\n", + "\n" + ] + } + ], + "source": [ + "with open(\"example_peps/example_subprojects1/project_config.yaml\") as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sample_name,library,organism,time,file_path\n", + "pig_0h,EFGH,pig,0,source1\n", + "pig_1h,EFGH,pig,1,source1\n", + "frog_0h,EFGH,frog,0,source1\n", + "frog_1h,EFGH,frog,1,source1\n", + "\n" + ] + } + ], + "source": [ + "with open(\"example_peps/example_subprojects1/sample_annotation_newLib2.csv\") as f:\n", + " print(f.read())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/jupyter/tutorial.ipynb b/doc/source/jupyter/tutorial.ipynb index 1a06dc2d..1dbc59d3 100644 --- a/doc/source/jupyter/tutorial.ipynb +++ b/doc/source/jupyter/tutorial.ipynb @@ -6,6 +6,8 @@ "source": [ "# Basic PEP example\n", "\n", + "The PEP that this example is based on is available in the [example_peps repsitory](https://github.com/pepkit/example_peps) in the [example_basic](https://github.com/pepkit/example_peps/tree/master/example_basic) folder.\n", + "\n", "This vignette will show you a simple example PEP-formatted project, and how to read it into python using the `peppy` package.\n", "\n", "\n", diff --git a/peppy/__init__.py b/peppy/__init__.py index 284cdac6..6a63fa00 100644 --- a/peppy/__init__.py +++ b/peppy/__init__.py @@ -14,12 +14,13 @@ from ._version import __version__ from .attribute_dict import AttributeDict from .const import * +from .exceptions import PeppyError from .project import Project, ProjectContext from .sample import Sample, Subsample __classes__ = ["AttributeDict", "Project", "Sample"] -__all__ = __classes__ +__all__ = __classes__ + ["PeppyError"] LOGGING_LEVEL = "INFO" @@ -41,7 +42,7 @@ def setup_peppy_logger(level, additional_locations=None, devmode=False): """ - Establish a logger for a pe. + Establish a project logger. This configures a logger to provide information about pep models. Verbosity, destination(s) for messages, and message text format are diff --git a/peppy/_version.py b/peppy/_version.py index b0d7306a..5fb6b765 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.18.2" +__version__ = "0.19" diff --git a/peppy/attribute_dict.py b/peppy/attribute_dict.py index cb549f75..3061ff73 100644 --- a/peppy/attribute_dict.py +++ b/peppy/attribute_dict.py @@ -9,7 +9,9 @@ from pandas import Series -from .utils import copy +from .const import DERIVATIONS_DECLARATION, IMPLICATIONS_DECLARATION +from .utils import \ + copy, has_null_value, non_null_value, warn_derived_cols, warn_implied_cols ATTRDICT_METADATA = {"_force_nulls": False, "_attribute_identity": False} @@ -67,6 +69,7 @@ def add_entries(self, entries): :param Iterable[(object, object)] | Mapping | pandas.Series entries: collection of pairs of keys and values + :return AttributeDict: the updated instance """ if entries is None: return @@ -82,6 +85,27 @@ def add_entries(self, entries): # Assume we now have pairs; allow corner cases to fail hard here. for key, value in entries_iter: self.__setitem__(key, value) + return self + + + def is_null(self, item): + """ + Conjunction of presence in underlying mapping and value being None + + :param object item: Key to check for presence and null value + :return bool: True iff the item is present and has null value + """ + return has_null_value(item, self) + + + def non_null(self, item): + """ + Conjunction of presence in underlying mapping and value not being None + + :param object item: Key to check for presence and non-null value + :return bool: True iff the item is present and has non-null value + """ + return non_null_value(item, self) def __setattr__(self, key, value): @@ -141,6 +165,12 @@ def __setitem__(self, key, value): :raises _MetadataOperationException: if attempt is made to set value for privileged metadata key """ + if key == "derived_columns": + warn_derived_cols() + key = DERIVATIONS_DECLARATION + elif key == "implied_columns": + warn_implied_cols() + key = IMPLICATIONS_DECLARATION if isinstance(value, Mapping): try: # Combine AttributeDict instances. diff --git a/peppy/const.py b/peppy/const.py index 2d810014..62b091ed 100644 --- a/peppy/const.py +++ b/peppy/const.py @@ -14,9 +14,10 @@ # Project-related DATA_SOURCES_SECTION = "data_sources" -IMPLICATIONS_DECLARATION = "implied_columns" +DERIVATIONS_DECLARATION = "derived_attributes" +IMPLICATIONS_DECLARATION = "implied_attributes" SAMPLE_INDEPENDENT_PROJECT_SECTIONS = \ - ["metadata", "derived_columns", IMPLICATIONS_DECLARATION, "trackhubs"] + ["metadata", DERIVATIONS_DECLARATION, IMPLICATIONS_DECLARATION, "trackhubs"] PROJECT_CONSTANTS = ["DATA_SOURCES_SECTION", "IMPLICATIONS_DECLARATION", "SAMPLE_INDEPENDENT_PROJECT_SECTIONS"] diff --git a/peppy/exceptions.py b/peppy/exceptions.py new file mode 100644 index 00000000..ad9fcc1c --- /dev/null +++ b/peppy/exceptions.py @@ -0,0 +1,12 @@ +""" Custom error types """ + +from abc import ABCMeta + + +class PeppyError(Exception): + """ Base error type for peppy custom errors. """ + + __metaclass__ = ABCMeta + + def __init__(self, msg): + super(PeppyError, self).__init__(msg) diff --git a/peppy/project.py b/peppy/project.py index 52c2dc7d..6dec8024 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -55,6 +55,7 @@ from collections import Iterable, Mapping else: from collections.abc import Iterable, Mapping +import warnings import pandas as pd import yaml @@ -64,12 +65,17 @@ COMPUTE_SETTINGS_VARNAME, DATA_SOURCE_COLNAME, \ DEFAULT_COMPUTE_RESOURCES_NAME, IMPLICATIONS_DECLARATION, \ SAMPLE_ANNOTATIONS_KEY, SAMPLE_NAME_COLNAME +from .exceptions import PeppyError from .sample import merge_sample, Sample from .utils import \ - add_project_sample_constants, alpha_cased, copy, fetch_samples, is_url + add_project_sample_constants, alpha_cased, copy, fetch_samples, is_url, \ + non_null_value, warn_derived_cols, warn_implied_cols MAX_PROJECT_SAMPLES_REPR = 12 +GENOMES_KEY = "genomes" +TRANSCRIPTOMES_KEY = "transcriptomes" +IDEALLY_IMPLIED = [GENOMES_KEY, TRANSCRIPTOMES_KEY] _LOGGER = logging.getLogger(__name__) @@ -120,7 +126,7 @@ def __exit__(self, *args): @copy class Project(AttributeDict): """ - A class to model a Project. + A class to model a Project (collection of samples and metadata). :param config_file: Project config file (YAML). :type config_file: str @@ -165,7 +171,7 @@ class Project(AttributeDict): """ - DERIVED_COLUMNS_DEFAULT = [DATA_SOURCE_COLNAME] + DERIVED_ATTRIBUTES_DEFAULT = [DATA_SOURCE_COLNAME] def __init__(self, config_file, subproject=None, @@ -195,15 +201,16 @@ def __init__(self, config_file, subproject=None, default_compute, when_missing=no_environment_exception) # Load settings from environment yaml for local compute infrastructure. + compute_env_file = compute_env_file or os.getenv(self.compute_env_var) if compute_env_file: - _LOGGER.debug("Updating environment settings based on file '%s'", - compute_env_file) - self.update_environment(compute_env_file) - + if os.path.isfile(compute_env_file): + self.update_environment(compute_env_file) + else: + _LOGGER.warning("Compute env path isn't a file: {}". + format(compute_env_file)) else: - _LOGGER.info("Using default {envvar}. You may set environment " - "variable {envvar} to configure environment " - "settings.".format(envvar=self.compute_env_var)) + _LOGGER.info("No compute env file was provided and {} is unset; " + "using default".format(self.compute_env_var)) # Initialize default compute settings. _LOGGER.debug("Establishing project compute settings") @@ -216,7 +223,7 @@ def __init__(self, config_file, subproject=None, if no_compute_exception: no_compute_exception(message) else: - _LOGGER.warn(message) + _LOGGER.warning(message) else: _LOGGER.debug("Compute: %s", str(self.compute)) @@ -233,7 +240,7 @@ def __init__(self, config_file, subproject=None, _LOGGER.info("Using subproject: '{}'".format(subproject)) self.parse_config_file(subproject) - if "data_sources" in self: + if self.non_null("data_sources"): # Expand paths now, so that it's not done for every sample. for src_key, src_val in self.data_sources.items(): src_val = os.path.expandvars(src_val) @@ -254,11 +261,11 @@ def __init__(self, config_file, subproject=None, # Establish derived columns. try: # Do not duplicate derived column names. - self.derived_columns.extend( - [colname for colname in self.DERIVED_COLUMNS_DEFAULT - if colname not in self.derived_columns]) + self.derived_attributes.extend( + [colname for colname in self.DERIVED_ATTRIBUTES_DEFAULT + if colname not in self.derived_attributes]) except AttributeError: - self.derived_columns = self.DERIVED_COLUMNS_DEFAULT + self.derived_attributes = self.DERIVED_ATTRIBUTES_DEFAULT self.finalize_pipelines_directory() @@ -269,29 +276,18 @@ def __init__(self, config_file, subproject=None, self.metadata.pipelines_dir)) path_anns_file = self.metadata.sample_annotation - _LOGGER.debug("Reading sample annotations sheet: '%s'", path_anns_file) - try: + if path_anns_file: + _LOGGER.debug("Reading sample annotations sheet: '%s'", path_anns_file) _LOGGER.info("Setting sample sheet from file '%s'", path_anns_file) - self.sheet = check_sample_sheet(path_anns_file) - except IOError: - _LOGGER.error("Alleged annotations file doesn't exist: '%s'", - path_anns_file) - anns_folder_path = os.path.dirname(path_anns_file) - try: - annotations_file_folder_contents = \ - os.listdir(anns_folder_path) - except OSError: - _LOGGER.error("Annotations file folder doesn't exist either: " - "'%s'", anns_folder_path) - else: - _LOGGER.error("Annotations file folder's contents: {}". - format(annotations_file_folder_contents)) - raise + self._sheet = self.parse_sample_sheet(path_anns_file) + else: + _LOGGER.warning("No sample annotations sheet in config") + self._sheet = None self.sample_subannotation = None # Basic sample maker will handle name uniqueness check. - if defer_sample_construction: + if defer_sample_construction or self._sheet is None: self._samples = None else: self._set_basic_samples() @@ -299,11 +295,13 @@ def __init__(self, config_file, subproject=None, def __repr__(self): """ Representation in interpreter. """ + if len(self) == 0: + return "{}" samples_message = "{} (from '{}')". \ format(self.__class__.__name__, self.config_file) try: num_samples = len(self._samples) - except AttributeError: + except (AttributeError, TypeError): pass else: samples_message += " with {} sample(s)".format(num_samples) @@ -337,14 +335,50 @@ def constants(self): @property def default_compute_envfile(self): - """ Path to default compute environment settings file. """ + """ + Path to default compute environment settings file. + + :return str: Path to this project's default compute env config file. + """ return os.path.join( self.templates_folder, "default_compute_settings.yaml") + @property + def derived_columns(self): + """ + Collection of sample attributes for which value of each is derived from elsewhere + + :return list[str]: sample attribute names for which value is derived + """ + warn_derived_cols() + try: + return self.derived_attributes + except AttributeError: + return [] + + + @property + def implied_columns(self): + """ + Collection of sample attributes for which value of each is implied by other(s) + + :return list[str]: sample attribute names for which value is implied by other(s) + """ + warn_implied_cols() + try: + return self.implied_attributes + except AttributeError: + return AttributeDict() + + @property def num_samples(self): - """ Number of samples available in this Project. """ + """ + Count the number of samples available in this Project. + + :return int: number of samples available in this Project. + """ return sum(1 for _ in self.sample_names) @@ -424,13 +458,22 @@ def samples(self): :return Iterable[Sample]: Sample instance for each of this Project's samples """ - if self._samples is None: - _LOGGER.debug("Building basic sample object(s) for %s", - self.__class__.__name__) - self._set_basic_samples() return self._samples + @property + def sheet(self): + """ + Annotations/metadata sheet describing this Project's samples. + + :return pandas.core.frame.DataFrame: table of samples in this Project + """ + from copy import copy as cp + if self._sheet is None: + self._sheet = self.parse_sample_sheet(self.metadata.sample_annotation) + return cp(self._sheet) + + @property def templates_folder(self): """ @@ -482,9 +525,9 @@ def get_sample(self, sample_name): :return Sample: The requested Sample object """ - samples = self.get_samples(sample_name) + samples = self.get_samples([sample_name]) if len(samples) > 1: - _LOGGER.warn("More than one sample was detected; returning the first") + _LOGGER.warning("More than one sample was detected; returning the first") if len(samples) == 0: raise ValueError("Project has no sample named {name}.".format(name=sample_name)) @@ -492,6 +535,23 @@ def get_sample(self, sample_name): return samples[0] + def activate_subproject(self, subproject): + """ + Activate a subproject. + + This method will update Project attributes, adding new values + associated with the subproject indicated, and in case of collision with + an existing key/attribute the subproject's value will be favored. + + :param str subproject: A string with a subproject name to be activated + :return Project: A Project with the selected subproject activated + """ + conf_file = self.config_file + self.clear() + self.__init__(conf_file, subproject) + return self + + def get_samples(self, sample_names): """ Returns a list of sample objects given a list of sample names @@ -536,9 +596,9 @@ def _check_unique_samples(self): repeats = {name: n for name, n in Counter( s.name for s in self._samples).items() if n > 1} if repeats: - histogram_text = "\n".join( + hist_text = "\n".join( "{}: {}".format(name, n) for name, n in repeats.items()) - _LOGGER.warn("Non-unique sample names:\n{}".format(histogram_text)) + _LOGGER.warning("Non-unique sample names:\n{}".format(hist_text)) def finalize_pipelines_directory(self, pipe_path=""): @@ -648,7 +708,7 @@ def make_project_dirs(self): try: os.makedirs(folder_path) except OSError as e: - _LOGGER.warn("Could not create project folder: '%s'", + _LOGGER.warning("Could not create project folder: '%s'", str(e)) @@ -667,13 +727,13 @@ def _set_basic_samples(self): except KeyError: _LOGGER.debug("No sample subannotations") else: - _LOGGER.warn("Switch to 'sample_subannotation' in lieu of " - "'merge_table.'") + _LOGGER.warning("'merge_table' attribute is deprecated. Please use " + "'sample_subannotation' instead.") if self.sample_subannotation is None: if sub_ann and os.path.isfile(sub_ann): _LOGGER.info("Reading subannotations: %s", sub_ann) - self.sample_subannotation = pd.read_table( + self.sample_subannotation = pd.read_csv( sub_ann, sep=None, engine="python") _LOGGER.debug("Subannotations shape: {}". format(self.sample_subannotation.shape)) @@ -703,14 +763,13 @@ def _prep_samples(self): # Add values that are constant across this Project's samples. sample = add_project_sample_constants(sample, self) - # TODO: use implied_columns in 0.8. sample.set_genome(self.get("genomes")) sample.set_transcriptome(self.get("transcriptomes")) _LOGGER.debug("Merging sample '%s'", sample.name) - sample.infer_columns(self.get(IMPLICATIONS_DECLARATION)) + sample.infer_attributes(self.get(IMPLICATIONS_DECLARATION)) merge_sample(sample, self.sample_subannotation, - self.data_sources, self.derived_columns) + self.data_sources, self.derived_attributes) _LOGGER.debug("Setting sample file paths") sample.set_file_paths(self) # Hack for backwards-compatibility @@ -732,6 +791,7 @@ def parse_config_file(self, subproject=None): """ Parse provided yaml config file and check required fields exist. + :param str subproject: Name of subproject to activate, optional :raises KeyError: if config file lacks required section(s) """ @@ -740,6 +800,9 @@ def parse_config_file(self, subproject=None): with open(self.config_file, 'r') as conf_file: config = yaml.safe_load(conf_file) + for msg in suggest_implied_attributes(config): + warnings.warn(msg, DeprecationWarning) + _LOGGER.debug("{} config data: {}".format( self.__class__.__name__, config)) @@ -752,19 +815,27 @@ def parse_config_file(self, subproject=None): self.__class__.__name__, len(self.keys()), self.keys())) # Overwrite any config entries with entries in the subproject. - if "subprojects" in config and subproject: + if non_null_value("subprojects", config) and subproject: _LOGGER.debug("Adding entries for subproject '{}'". format(subproject)) - subproj_updates = config['subprojects'][subproject] + try: + subproj_updates = config['subprojects'][subproject] + except KeyError: + raise Exception( + "Unknown subproject ({}); defined subprojects: {}".format( + subproject, ", ".join([sp for sp in config["subprojects"]]))) _LOGGER.debug("Updating with: {}".format(subproj_updates)) self.add_entries(subproj_updates) + elif subproject: + _LOGGER.warning("Subproject {} requested but no subprojects " + "are defined".format(subproject)) else: - _LOGGER.debug("No subproject") + _LOGGER.debug("No subproject requested") # In looper 0.4, for simplicity the paths section was eliminated. # For backwards compatibility, mirror the paths section into metadata. if "paths" in config: - _LOGGER.warn( + _LOGGER.warning( "Paths section in project config is deprecated. " "Please move all paths attributes to metadata section. " "This option will be removed in future versions.") @@ -868,9 +939,7 @@ def parse_config_file(self, subproject=None): # Required variables check if not hasattr(self.metadata, SAMPLE_ANNOTATIONS_KEY): - raise _MissingMetadataException( - missing_section=SAMPLE_ANNOTATIONS_KEY, - path_config_file=self.config_file) + self.metadata.sample_annotation = None def set_compute(self, setting): @@ -914,9 +983,7 @@ def set_compute(self, setting): def set_project_permissions(self): - """ - Make the project's public_html folder executable. - """ + """ Make the project's public_html folder executable. """ try: os.chmod(self.trackhubs.trackhub_dir, 0o0755) except OSError: @@ -993,47 +1060,75 @@ def _handle_missing_env_attrs(self, env_settings_file, when_missing): message = "'{}' lacks environment attributes: {}". \ format(env_settings_file, missing_env_attrs) if when_missing is None: - _LOGGER.warn(message) + _LOGGER.warning(message) else: when_missing(message) + @staticmethod + def parse_sample_sheet(sample_file, dtype=str): + """ + Check if csv file exists and has all required columns. + + :param str sample_file: path to sample annotations file. + :param type dtype: data type for CSV read. + :raises IOError: if given annotations file can't be read. + :raises ValueError: if required column(s) is/are missing. + """ + # Although no null value replacements or supplements are being passed, + # toggling the keep_default_na value to False solved an issue with 'nan' + # and/or 'None' as an argument for an option in the pipeline command + # that's generated from a Sample's attributes. + # + # See https://github.com/pepkit/peppy/issues/159 for the original issue + # and https://github.com/pepkit/peppy/pull/160 for the pull request + # that resolved it. + try: + df = pd.read_csv(sample_file, sep=None, dtype=dtype, index_col=False, + engine="python", keep_default_na=False) + except IOError: + raise Project.MissingSampleSheetError(sample_file) + else: + _LOGGER.info("Setting sample sheet from file '%s'", sample_file) + missing = {SAMPLE_NAME_COLNAME} - set(df.columns) + if len(missing) != 0: + _LOGGER.warning( + "Annotation sheet ('{}') is missing column(s):\n{}\n" + "It has: {}".format(sample_file, "\n".join(missing), + ", ".join(list(df.columns)))) + return df + + + class MissingMetadataException(PeppyError): + """ Project needs certain metadata. """ + def __init__(self, missing_section, path_config_file=None): + reason = "Project configuration lacks required metadata section {}".\ + format(missing_section) + if path_config_file: + reason += "; used config file '{}'".format(path_config_file) + super(Project.MissingMetadataException, self).__init__(reason) + + + class MissingSampleSheetError(PeppyError): + """ Represent case in which sample sheet is specified but nonexistent. """ + def __init__(self, sheetfile): + super(Project.MissingSampleSheetError, self).__init__( + "Missing sample annotation sheet ({}); a project need not use " + "a sample sheet, but if it does the file must exist." + .format(sheetfile)) + + -def check_sample_sheet(sample_file, dtype=str): +def suggest_implied_attributes(prj): """ - Check if csv file exists and has all required columns. + If given project contains what could be implied attributes, suggest that. - :param str sample_file: path to sample annotations file. - :param type dtype: data type for CSV read. - :raises IOError: if given annotations file can't be read. - :raises ValueError: if required column(s) is/are missing. + :param Iterable prj: Intent is a Project, but this could be any iterable + of strings to check for suitability of declaration as implied attr + :return list[str]: (likely empty) list of warning messages about project + config keys that could be implied attributes """ - # Although no null value replacements or supplements are being passed, - # toggling the keep_default_na value to False solved an issue with 'nan' - # and/or 'None' as an argument for an option in the pipeline command - # that's generated from a Sample's attributes. - # - # See https://github.com/pepkit/peppy/issues/159 for the original issue - # and https://github.com/pepkit/peppy/pull/160 for the pull request - # that resolved it. - df = pd.read_table(sample_file, sep=None, dtype=dtype, - index_col=False, engine="python", keep_default_na=False) - req = [SAMPLE_NAME_COLNAME] - missing = set(req) - set(df.columns) - if len(missing) != 0: - raise ValueError( - "Annotation sheet ('{}') is missing column(s):\n{}\nIt has: {}". - format(sample_file, "\n".join(missing), - ", ".join(list(df.columns)))) - return df - - - -class _MissingMetadataException(Exception): - """ Project needs certain metadata. """ - def __init__(self, missing_section, path_config_file=None): - reason = "Project configuration lacks required metadata section {}".\ - format(missing_section) - if path_config_file: - reason += "; used config file '{}'".format(path_config_file) - super(_MissingMetadataException, self).__init__(reason) + def suggest(key): + return "To declare {}, consider using {}".format( + key, IMPLICATIONS_DECLARATION) + return [suggest(k) for k in prj if k in IDEALLY_IMPLIED] diff --git a/peppy/sample.py b/peppy/sample.py index 5c5020b0..3fc18a65 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -21,7 +21,7 @@ ALL_INPUTS_ATTR_NAME, DATA_SOURCE_COLNAME, DATA_SOURCES_SECTION, \ REQUIRED_INPUTS_ATTR_NAME, SAMPLE_EXECUTION_TOGGLE, VALID_READ_TYPES from .utils import check_bam, check_fastq, copy, get_file_size, \ - grab_project_data, is_url,parse_ftype, sample_folder + grab_project_data, parse_ftype, sample_folder COL_KEY_SUFFIX = "_key" @@ -205,7 +205,7 @@ def determine_missing_requirements(self): # set_pipeline_attributes must be run first. if not hasattr(self, "required_inputs"): - _LOGGER.warn("You must run set_pipeline_attributes " + _LOGGER.warning("You must run set_pipeline_attributes " "before determine_missing_requirements") return null_return @@ -316,11 +316,11 @@ def get_sheet_dict(self): originally provided via the sample sheet (i.e., the a map-like representation of the instance, excluding derived items) """ - return OrderedDict([[k, getattr(self, k)] - for k in self.sheet_attributes]) + return OrderedDict( + [[k, getattr(self, k)] for k in self.sheet_attributes]) - def infer_columns(self, implications): + def infer_attributes(self, implications): """ Infer value for additional field(s) from other field(s). @@ -480,7 +480,7 @@ def locate_data_source(self, data_sources, column_name=DATA_SOURCE_COLNAME, try: # Grab a temporary dictionary of sample attributes and update these # with any provided extra variables to use in the replacement. - # This is necessary for derived_columns in the merge table. + # This is necessary for derived_attributes in the merge table. # Here the copy() prevents the actual sample from being # updated by update(). temp_dict = self.__dict__.copy() @@ -490,7 +490,7 @@ def locate_data_source(self, data_sources, column_name=DATA_SOURCE_COLNAME, _LOGGER.debug("Pre-glob: %s", val) val_globbed = sorted(glob.glob(val)) if not val_globbed: - _LOGGER.warn("Unmatched regex-like: '%s'", val) + _LOGGER.warning("Unmatched regex-like: '%s'", val) else: val = " ".join(val_globbed) _LOGGER.debug("Post-glob: %s", val) @@ -524,7 +524,7 @@ def set_file_paths(self, project=None): project = project or self.prj - for col in project.get("derived_columns", []): + for col in project.get("derived_attributes", []): # Only proceed if the specified column exists # and was not already merged or derived. if not hasattr(self, col): @@ -659,16 +659,14 @@ def set_pipeline_attributes( # read_type, read_length, paired. self.ngs_inputs = self.get_attr_values("ngs_inputs_attr") - set_rtype = False + set_rtype_reason = "" if not hasattr(self, "read_type"): set_rtype_reason = "read_type not yet set" - set_rtype = True elif not self.read_type or self.read_type.lower() \ not in VALID_READ_TYPES: set_rtype_reason = "current read_type is invalid: '{}'". \ format(self.read_type) - set_rtype = True - if set_rtype: + if set_rtype_reason: _LOGGER.debug( "Setting read_type for %s '%s': %s", self.__class__.__name__, self.name, set_rtype_reason) @@ -747,7 +745,7 @@ def set_read_type(self, rlen_sample_size=10, permissive=True): except NotImplementedError as e: if not permissive: raise - _LOGGER.warn(e.message) + _LOGGER.warning(e.message) return except IOError: if not permissive: @@ -798,7 +796,7 @@ def set_read_type(self, rlen_sample_size=10, permissive=True): setattr(self, feature, feat_val) if getattr(self, feature) is None and len(existing_files) > 0: - _LOGGER.warn("Not all input files agree on '%s': '%s'", + _LOGGER.warning("Not all input files agree on '%s': '%s'", feature, self.name) @@ -879,7 +877,7 @@ def obj2dict(obj, name=None, for k, v in obj.__dict__.items() if k not in to_skip} elif isinstance(obj, Series): - _LOGGER.warn("Serializing series as mapping, not array-like") + _LOGGER.warning("Serializing series as mapping, not array-like") return obj.to_dict() elif hasattr(obj, 'dtype'): # numpy data types # TODO: this fails with ValueError for multi-element array. @@ -929,16 +927,24 @@ def obj2dict(obj, name=None, outfile.write(yaml_data) - def update(self, newdata): + def update(self, newdata, **kwargs): """ Update Sample object with attributes from a dict. """ - for key, value in newdata.items(): - setattr(self, key, value) + duplicates = [k for k in set(newdata.keys()) & set(kwargs.keys()) + if newdata[k] != kwargs[k]] + if len(duplicates) != 0: + raise ValueError("{} duplicate keys with different values: {}". + format(len(duplicates), ", ".join(duplicates))) + for k, v in newdata.items(): + setattr(self, k, v) + for k, v in kwargs.items(): + setattr(self, k, v) -def merge_sample(sample, sample_subann, data_sources=None, derived_columns=None): +def merge_sample(sample, sample_subann, + data_sources=None, derived_attributes=None): """ Use merge table (subannotation) data to augment/modify Sample. @@ -946,9 +952,9 @@ def merge_sample(sample, sample_subann, data_sources=None, derived_columns=None) :param sample_subann: data with which to alter Sample :param Mapping data_sources: collection of named paths to data locations, optional - :param Iterable[str] derived_columns: names of columns for which + :param Iterable[str] derived_attributes: names of attributes for which corresponding Sample attribute's value is data-derived, optional - :return Set[str]: names of columns that were merged + :return Set[str]: names of columns/attributes that were merged """ merged_attrs = {} @@ -966,9 +972,9 @@ def merge_sample(sample, sample_subann, data_sources=None, derived_columns=None) format(data_sources)) # Hash derived columns for faster lookup in case of many samples/columns. - derived_columns = set(derived_columns or []) - _LOGGER.debug("Merging Sample with derived columns: {}". - format(derived_columns)) + derived_attributes = set(derived_attributes or []) + _LOGGER.debug("Merging Sample with derived attributes: {}". + format(derived_attributes)) sample_name = getattr(sample, SAMPLE_NAME_COLNAME) sample_indexer = sample_subann[SAMPLE_NAME_COLNAME] == sample_name @@ -988,7 +994,6 @@ def merge_sample(sample, sample_subann, data_sources=None, derived_columns=None) merged_attrs = {key: "" for key in this_sample_rows.columns} subsamples = [] _LOGGER.debug(this_sample_rows) - subsample_count = 0 for subsample_row_id, row in this_sample_rows.iterrows(): try: row['subsample_name'] @@ -1004,7 +1009,7 @@ def merge_sample(sample, sample_subann, data_sources=None, derived_columns=None) # during-iteration change of dictionary size. for attr_name in this_sample_rows.columns: if attr_name == SAMPLE_NAME_COLNAME or \ - attr_name not in derived_columns: + attr_name not in derived_attributes: _LOGGER.log(5, "Skipping merger of attribute '%s'", attr_name) continue @@ -1019,9 +1024,9 @@ def merge_sample(sample, sample_subann, data_sources=None, derived_columns=None) extra_vars=rowdata) # 1) rowdata[attr_name] = data_src_path - _LOGGER.log(5, "Adding derived columns") + _LOGGER.log(5, "Adding derived attributes") - for attr in derived_columns: + for attr in derived_attributes: # Skip over any attributes that the sample lacks or that are # covered by the data from the current (row's) data. diff --git a/peppy/utils.py b/peppy/utils.py index 7b088c1a..4da2be0f 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -12,10 +12,12 @@ from urlparse import urlparse else: from urllib.parse import urlparse - - +if sys.version_info < (3, 3): + from collections import Sized +else: + from collections.abc import Sized +import warnings import yaml - from .const import GENERIC_PROTOCOL_KEY, SAMPLE_INDEPENDENT_PROJECT_SECTIONS @@ -112,6 +114,17 @@ def check_sample_sheet_row_count(sheet, filepath): +def coll_like(c): + """ + Determine whether an object is collection-like. + + :param object c: Object to test as collection + :return bool: Whether the argument is a (non-string) collection + """ + return isinstance(c, Iterable) and not isinstance(c, str) + + + def copy(obj): def copy(self): """ @@ -245,6 +258,18 @@ def grab_project_data(prj): +def has_null_value(k, m): + """ + Determine whether a mapping has a null value for a given key. + + :param Hashable k: Key to test for null value + :param Mapping m: Mapping to test for null value for given key + :return bool: Whether given mapping contains given key with null value + """ + return k in m and _is_null(m[k]) + + + def import_from_source(module_filepath): """ Import a module from a particular filesystem location. @@ -295,6 +320,18 @@ def is_url(maybe_url): +def non_null_value(k, m): + """ + Determine whether a mapping has a non-null value for a given key. + + :param Hashable k: Key to test for non-null value + :param Mapping m: Mapping to test for non-null value for given key + :return bool: Whether given mapping contains given key with non-null value + """ + return k in m and not _is_null(m[k]) + + + def parse_ftype(input_file): """ Checks determine filetype from extension. @@ -380,6 +417,28 @@ def standard_stream_redirector(stream): +def warn_derived_cols(): + """ Produce deprecation warning about derived columns. """ + _warn_cols_to_attrs("derived") + + +def warn_implied_cols(): + """ Produce deprecation warning about implied columns. """ + _warn_cols_to_attrs("implied") + + +def _is_null(x): + """ Whether an object is effectively null """ + return x in [None, ""] or (coll_like(x) and isinstance(x, Sized) and 0 == len(x)) + + +def _warn_cols_to_attrs(prefix): + """ Produce deprecation warning about 'columns' rather than 'attributes' """ + warnings.warn("{pfx}_columns should be encoded and referenced " + "as {pfx}_attributes".format(pfx=prefix), DeprecationWarning) + + + class CommandChecker(object): """ Validate PATH availability of executables referenced by a config file. diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 9fc487d2..8e0796aa 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,2 +1,2 @@ mock>=2.0.0 -pytest>=3.0.7 +pytest==3.10.1 diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 0dbaf66b..9f0c760d 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,2 +1,2 @@ coveralls>=1.1 -pytest-cov>=2.4.0 +pytest-cov==2.6.1 diff --git a/setup.py b/setup.py index 8d99b9d3..b0ebc769 100644 --- a/setup.py +++ b/setup.py @@ -5,17 +5,25 @@ import sys +REQDIR = "requirements" + + +def read_reqs(reqs_name): + deps = [] + with open(os.path.join(REQDIR, "requirements-{}.txt".format(reqs_name)), 'r') as f: + for l in f: + if not l.strip(): + continue + #deps.append(l.split("=")[0].rstrip("<>")) + deps.append(l) + return deps + + # Additional keyword arguments for setup(). extra = {} # Ordinary dependencies -DEPENDENCIES = [] -with open("requirements/requirements-all.txt", "r") as reqs_file: - for line in reqs_file: - if not line.strip(): - continue - #DEPENDENCIES.append(line.split("=")[0].rstrip("<>")) - DEPENDENCIES.append(line) +DEPENDENCIES = read_reqs("all") # numexpr for pandas try: @@ -54,9 +62,10 @@ def get_static(name, condition=None): try: import pypandoc long_description = pypandoc.convert_file('README.md', 'rst') -except(IOError, ImportError): +except(IOError, ImportError, OSError): long_description = open('README.md').read() + setup( name="peppy", packages=["peppy"], @@ -77,7 +86,7 @@ def get_static(name, condition=None): scripts=scripts, include_package_data=True, test_suite="tests", - tests_require=(["mock", "pytest"]), + tests_require=read_reqs("dev"), setup_requires=(["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else []), **extra ) diff --git a/tests/conftest.py b/tests/conftest.py index 4f4084c7..db5c81a0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -35,14 +35,14 @@ pipeline_interfaces: pipelines sample_subannotation: merge.csv -derived_columns: [{derived_column_names}] +derived_attributes: [{derived_attribute_names}] data_sources: src1: "{basedir}/data/{sample_name}{col_modifier}.txt" src3: "{basedir}/data/{sample_name}.txt" src2: "{basedir}/data/{sample_name}-bamfile.bam" -implied_columns: +implied_attributes: sample_name: a: genome: hg38 @@ -127,7 +127,7 @@ "testngs.sh": FILE_BY_SAMPLE } -SAMPLE_ANNOTATION_LINES = """sample_name,library,file,file2,organism,nonmerged_col,data_source,dcol2 +SAMPLE_ANNOTATION_LINES = """sample_name,protocol,file,file2,organism,nonmerged_col,data_source,dcol2 a,testlib,src3,src3,,src3,src3, b,testlib,,,,src3,src3,src1 c,testlib,src3,src3,,src3,src3, @@ -170,7 +170,7 @@ } COMPARISON_FUNCTIONS = ["__eq__", "__ne__", "__len__", "keys", "values", "items"] -COLUMNS = [SAMPLE_NAME_COLNAME, "val1", "val2", "library"] +COLUMNS = [SAMPLE_NAME_COLNAME, "val1", "val2", "protocol"] PROJECT_CONFIG_DATA = {"metadata": {"sample_annotation": "annotations.csv"}} @@ -312,8 +312,8 @@ class _DataSourceFormatMapping(dict): mechanism that pep uses to derive columns, but it's also the core string formatting mechanism. """ - def __missing__(self, derived_column): - return "{" + derived_column + "}" + def __missing__(self, derived_attribute): + return "{" + derived_attribute + "}" @@ -333,8 +333,8 @@ def _write_temp(lines, dirpath, fname): :return str: full path to written file """ basedir_replacement = _DataSourceFormatMapping(basedir=dirpath) - derived_columns_replacement = _DataSourceFormatMapping( - **{"derived_column_names": ", ".join(DERIVED_COLNAMES)} + derived_attributes_replacement = _DataSourceFormatMapping( + **{"derived_attribute_names": ", ".join(DERIVED_COLNAMES)} ) filepath = os.path.join(dirpath, fname) data_source_formatter = string.Formatter() @@ -342,12 +342,14 @@ def _write_temp(lines, dirpath, fname): with open(filepath, 'w') as tmpf: for l in lines: if "{basedir}" in l: - l = data_source_formatter.vformat( + out = data_source_formatter.vformat( l, (), basedir_replacement) - elif "{derived_column_names}" in l: - l = data_source_formatter.vformat( - l, (), derived_columns_replacement) - tmpf.write(l) + elif "{derived_attribute_names}" in l: + out = data_source_formatter.vformat( + l, (), derived_attributes_replacement) + else: + out = l + tmpf.write(out) num_lines += 1 _LOGGER.debug("Wrote %d line(s) to disk: '%s'", num_lines, filepath) return filepath @@ -442,6 +444,14 @@ def write_project_files(request): +@pytest.fixture(scope="function") +def subannotation_filepath(tmpdir): + """ Write sample subannotations (temp) file and return path to it. """ + return _write_temp(SAMPLE_SUBANNOTATION_LINES, + dirpath=tmpdir.strpath, fname=MERGE_TABLE_FILENAME) + + + # Placed here (rather than near top of file) for data/use locality. _TEST_DATA_FOLDER = "data" _BAMFILE_PATH = os.path.join(os.path.dirname(__file__), @@ -505,8 +515,8 @@ def proj(request): Create project instance using data from file pointed to by request class. To use this fixture, the test case must reside within a class that - defines a "project_config_file" attribute. This is best done by marking - the class with "@pytest.mark.usefixtures("write_project_files")" + defines a "project_config_file" attribute. This is most easily done by + marking the class with "@pytest.mark.usefixtures('write_project_files')" :param pytest._pytest.fixtures.SubRequest request: test case requesting a project instance diff --git a/tests/helpers.py b/tests/helpers.py index 6a2d2687..edffcf1b 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -4,6 +4,7 @@ import itertools import numpy as np import pytest +import peppy __author__ = "Vince Reuter" @@ -61,3 +62,45 @@ def powerset(items, min_items=0, include_full_pop=True): nonempty_powerset = partial(powerset, min_items=1) + + + +class TempLogFileHandler(object): + """ Context manager for temporary file handler logging attachment """ + + def __init__(self, filepath, level, mode='w'): + """ + Create the temporary file handler by providing path and level + + :param str filepath: Path to file to use for logging handler. + :param str | int level: Minimal severity level for file handler. + :param str mode: Mode in which to create the file handler. + """ + self.logfile = filepath + self._level = level + self._mode = mode + self._used = False + + def __enter__(self): + """ Add the handler to project module's logger, and update state. """ + import logging + if self._used: + raise Exception("Cannot reuse a {}".format(self.__class__.__name__)) + handler = logging.FileHandler(self.logfile, mode='w') + handler.setLevel(self._level) + peppy.project._LOGGER.handlers.append(handler) + self._used = True + + def __exit__(self, exc_type, exc_val, exc_tb): + """ Remove the added file handler from the logger. """ + del peppy.project._LOGGER.handlers[-1] + + @property + def messages(self): + """ Open the handler's underlying file and read the messages. """ + if not self._used: + raise Exception( + "Attempted to read messages from unused logfile: " + "{}", self.logfile) + with open(self.logfile, 'r') as f: + return f.readlines() diff --git a/tests/models/independent/test_AttributeDict.py b/tests/models/independent/test_AttributeDict.py index d62ecbba..85bb0881 100644 --- a/tests/models/independent/test_AttributeDict.py +++ b/tests/models/independent/test_AttributeDict.py @@ -546,12 +546,48 @@ def test_attribute_access( assert expected == observed +class NullityTests: + """ Tests of null/non-null values """ + + _KEYNAMES = ["sample_name", "protocol", "arbitrary_attribute"] + + @pytest.mark.parametrize( + argnames="item", argvalues=ATTRDICT_METADATA.keys()) + def test_metadata_are_non_null(self, item): + """ Test the special/reserverd AD keys """ + assert AttributeDict().non_null(item) + assert not AttributeDict().is_null(item) + + @pytest.mark.parametrize(argnames="item", argvalues=_KEYNAMES) + def test_missing_is_neither_null_nor_non_null(self, item): + """ Value of absent key is neither null nor non-null """ + ad = AttributeDict() + assert not ad.is_null(item) and not ad.non_null(item) + + @pytest.mark.parametrize(argnames="item", argvalues=_KEYNAMES) + def test_is_null(self, item): + """ Null-valued key/item evaluates as such. """ + ad = AttributeDict() + ad[item] = None + assert ad.is_null(item) and not ad.non_null(item) + + @pytest.mark.parametrize( + argnames=["k", "v"], + argvalues=list(zip(_KEYNAMES, ["sampleA", "WGBS", "random"]))) + def test_non_null(self, k, v): + """ AD is sensitive to value updates """ + ad = AttributeDict() + assert not ad.is_null(k) and not ad.non_null(k) + ad[k] = None + assert ad.is_null(k) and not ad.non_null(k) + ad[k] = v + assert not ad.is_null(k) and ad.non_null(k) + @pytest.mark.usefixtures("write_project_files") class SampleYamlTests: """ AttributeDict metadata only appear in YAML if non-default. """ - @pytest.mark.parametrize( argnames="metadata_attribute", argvalues=ATTRDICT_METADATA.keys(), ids=lambda attr_name: " metadata item = {} ".format(attr_name)) @@ -561,7 +597,6 @@ def test_all_defaults_no_metadata(self, tmpdir, proj, metadata_attribute): filepath = os.path.join(tmpdir.strpath, "sample{}.yaml".format(i)) lines, _ = self._yaml_data(sample, filepath) assert all([metadata_attribute not in line for line in lines]) - @staticmethod def _yaml_data(sample, filepath, section_to_change=None, @@ -585,3 +620,11 @@ def _yaml_data(sample, filepath, section_to_change=None, with open(filepath, 'r') as f: lines = f.readlines() return lines, data + + +@pytest.mark.parametrize( + ["func", "exp"], + [(repr, "{}"), (str, AttributeDict().__class__.__name__ + ": {}")]) +def test_text_repr_empty(func, exp): + """ Empty AttributeDict is correctly represented as text. """ + assert exp == func(AttributeDict()) diff --git a/tests/models/independent/test_Project.py b/tests/models/independent/test_Project.py index b6038b8e..3f59aab1 100644 --- a/tests/models/independent/test_Project.py +++ b/tests/models/independent/test_Project.py @@ -3,21 +3,21 @@ import copy import logging import os +import warnings import mock from numpy import random as nprand import pytest import yaml -import peppy from peppy import AttributeDict, Project, Sample -from peppy.const import SAMPLE_ANNOTATIONS_KEY, SAMPLE_NAME_COLNAME -from peppy.project import _MissingMetadataException +from peppy.const import IMPLICATIONS_DECLARATION, SAMPLE_ANNOTATIONS_KEY +from peppy.project import GENOMES_KEY, TRANSCRIPTOMES_KEY from peppy.sample import COL_KEY_SUFFIX from tests.conftest import \ DERIVED_COLNAMES, EXPECTED_MERGED_SAMPLE_FILES, \ MERGED_SAMPLE_INDICES, NUM_SAMPLES -from tests.helpers import named_param +from tests.helpers import named_param, TempLogFileHandler __author__ = "Vince Reuter" @@ -25,28 +25,32 @@ +_GENOMES = {"human": "hg19", "mouse": "mm10"} +_TRASCRIPTOMES = {"human": "hg19_cdna", "mouse": "mm10_cdna"} + + + @pytest.fixture(scope="function") def project_config_data(): """ Provide some basic data for a Project configuration. """ return { "metadata": { - SAMPLE_ANNOTATIONS_KEY: "sample-anns-filler.csv", + SAMPLE_ANNOTATIONS_KEY: "samples.csv", "output_dir": "$HOME/sequencing/output", "pipeline_interfaces": "${CODE}/pipelines"}, "data_sources": {"arbitrary": "placeholder/data/{filename}"}, - "genomes": {"human": "hg19", "mouse": "mm10"}, - "transcriptomes": {"human": "hg19_cdna", "mouse": "mm10_cdna"}} + } def pytest_generate_tests(metafunc): """ Dynamic parameterization/customization for tests in this module. """ - if metafunc.cls == DerivedColumnsTests: - # Parameterize derived columns tests over whether the specification - # is explicit (vs. implied), and which default column to validate. + if metafunc.cls == DerivedAttributesTests: + # Parameterize derived attribute tests over whether the specification + # is explicit (vs. implied), and which default attribute to validate. metafunc.parametrize( argnames="case_type", - argvalues=DerivedColumnsTests.DERIVED_COLUMNS_CASE_TYPES, + argvalues=DerivedAttributesTests.DERIVED_ATTRIBUTES_CASE_TYPES, ids=lambda case_type: "case_type={}".format(case_type)) @@ -71,7 +75,7 @@ def test_no_samples(self, path_empty_project): ids=lambda lazy: "lazy={}".format(lazy)) def test_no_sample_subannotation_in_config( self, tmpdir, spec_type, lazy, proj_conf_data, path_sample_anns): - """ Merge table attribute remains null if config lacks subannotation. """ + """ Subannotation attribute remains null if config lacks subannotation. """ metadata = proj_conf_data["metadata"] try: assert "sample_subannotation" in metadata @@ -94,14 +98,6 @@ def test_no_sample_subannotation_in_config( assert p.sample_subannotation is None - @pytest.mark.skip("Not implemented") - def test_sample_subannotation_construction( - self, tmpdir, project_config_data): - """ Merge table is constructed iff samples are constructed. """ - # TODO: implement - pass - - def test_counting_samples_doesnt_create_samples( self, sample_annotation_lines, path_project_conf, path_sample_anns): @@ -154,18 +150,16 @@ class ProjectRequirementsTests: """ Tests for a Project's set of requirements. """ - def test_lacks_sample_annotations( + def test_lacks_sample_annotation( self, project_config_data, env_config_filepath, tmpdir): - """ Lack of sample annotations precludes Project construction. """ - + """ Project can be built without sample annotations. """ # Remove sample annotations KV pair from config data for this test. del project_config_data["metadata"][SAMPLE_ANNOTATIONS_KEY] - - # Write the config and assert the expected exception for Project ctor. + # Write the (sans-annotations) config and assert Project is created. conf_path = _write_project_config( project_config_data, dirpath=tmpdir.strpath) - with pytest.raises(_MissingMetadataException): - Project(conf_path, default_compute=env_config_filepath) + prj = Project(conf_path, default_compute=env_config_filepath) + assert isinstance(prj, Project) def test_minimal_configuration_doesnt_fail( @@ -257,29 +251,22 @@ def test_nonexistent_env_settings_file( misnamed_envconf = os.path.join(envconf_dirpath, envconf_filename) # Create and add log message handler for expected errors. - logfile = tmpdir.join("project-error-messages.log").strpath - expected_error_message_handler = logging.FileHandler(logfile, mode='w') - expected_error_message_handler.setLevel(logging.ERROR) - peppy.project._LOGGER.handlers.append(expected_error_message_handler) - - # Create Project, expecting to generate error messages. - project = Project(minimal_project_conf_path, - default_compute=misnamed_envconf) + log = tmpdir.join("project-error-messages.log").strpath + logview = TempLogFileHandler(log, level=logging.ERROR) - # Remove the temporary message handler. - del peppy.project._LOGGER.handlers[-1] + with logview: + # Create Project, expecting to generate error messages. + project = Project( + minimal_project_conf_path, default_compute=misnamed_envconf) # Ensure nulls for all relevant Project attributes. self._assert_null_compute_environment(project) + # We should have two error messages, describing the exception caught # during default environment parsing and that it couldn't be set. - with open(logfile, 'r') as messages: - exception_messages = messages.readlines() - try: - assert 2 == len(exception_messages) - except AssertionError: - print("Exception messages: {}".format(exception_messages)) - raise + exception_messages = logview.messages + assert 2 == len(exception_messages), \ + "Exception messages: {}".format(exception_messages) def test_project_environment_uses_default_environment_settings( @@ -337,11 +324,11 @@ def default_compute_settings(project): -class DerivedColumnsTests: - """ Tests for the behavior of Project's derived_columns attribute. """ +class DerivedAttributesTests: + """ Tests for the behavior of Project's derived_attributes attribute. """ - ADDITIONAL_DERIVED_COLUMNS = ["arbitrary1", "filler2", "placeholder3"] - DERIVED_COLUMNS_CASE_TYPES = ["implicit", "disjoint", "intersection"] + ADDITIONAL_DERIVED_ATTRIBUTES = ["arbitrary1", "filler2", "placeholder3"] + DERIVED_ATTRIBUTES_CASE_TYPES = ["implicit", "disjoint", "intersection"] def create_project( @@ -354,68 +341,68 @@ def create_project( :param str default_env_path: path to the default environment config file to pass to Project constructor :param str case_type: type of test case to execute; this determines - how to specify the derived columns in the config file + how to specify the derived attribute in the config file :param str dirpath: path in which to write config file :return (Iterable[str], Project): collection of names of derived - columns to expect, along with Project instance with which to test + attribute to expect, along with Project instance with which to test """ # Ensure valid parameterization. - if case_type not in self.DERIVED_COLUMNS_CASE_TYPES: + if case_type not in self.DERIVED_ATTRIBUTES_CASE_TYPES: raise ValueError( - "Unexpected derived_columns case type: '{}' (known={})". - format(case_type, self.DERIVED_COLUMNS_CASE_TYPES)) + "Unexpected derived_attributes case type: '{}' (known={})". + format(case_type, self.DERIVED_ATTRIBUTES_CASE_TYPES)) # Parameterization specifies expectation and explicit specification. - expected_derived_columns = copy.copy(Project.DERIVED_COLUMNS_DEFAULT) + expected_derived_attributes = copy.copy(Project.DERIVED_ATTRIBUTES_DEFAULT) if case_type == "implicit": - # Negative control; ensure config data lacks derived columns. - assert "derived_columns" not in project_config_data + # Negative control; ensure config data lacks derived attributes. + assert "derived_attributes" not in project_config_data else: - explicit_derived_columns = \ - copy.copy(self.ADDITIONAL_DERIVED_COLUMNS) - expected_derived_columns.extend(self.ADDITIONAL_DERIVED_COLUMNS) - # Determine explicit inclusion of default derived columns. + explicit_derived_attributes = \ + copy.copy(self.ADDITIONAL_DERIVED_ATTRIBUTES) + expected_derived_attributes.extend(self.ADDITIONAL_DERIVED_ATTRIBUTES) + # Determine explicit inclusion of default derived attributes. if case_type == "intersection": - explicit_derived_columns.extend( - Project.DERIVED_COLUMNS_DEFAULT) - project_config_data["derived_columns"] = explicit_derived_columns + explicit_derived_attributes.extend( + Project.DERIVED_ATTRIBUTES_DEFAULT) + project_config_data["derived_attributes"] = explicit_derived_attributes # Write the config and build the Project. conf_file_path = _write_project_config( project_config_data, dirpath=dirpath) - with mock.patch("peppy.project.check_sample_sheet"): + with mock.patch("peppy.project.Project.parse_sample_sheet"): project = Project(conf_file_path, default_compute=default_env_path) - return expected_derived_columns, project + return expected_derived_attributes, project - def test_default_derived_columns_always_present(self, + def test_default_derived_attributes_always_present(self, env_config_filepath, project_config_data, case_type, tmpdir): - """ Explicit or implicit, default derived columns are always there. """ + """ Explicit or implicit, default derived attributes are always there. """ - expected_derived_columns, project = self.create_project( + expected_derived_attributes, project = self.create_project( project_config_data=project_config_data, default_env_path=env_config_filepath, case_type=case_type, dirpath=tmpdir.strpath) # Rough approximation of order-agnostic validation of # presence and number agreement for all elements. - assert len(expected_derived_columns) == len(project.derived_columns) - assert set(expected_derived_columns) == set(project.derived_columns) + assert len(expected_derived_attributes) == len(project.derived_attributes) + assert set(expected_derived_attributes) == set(project.derived_attributes) - def test_default_derived_columns_not_duplicated(self, + def test_default_derived_attributes_not_duplicated(self, env_config_filepath, project_config_data, case_type, tmpdir): - """ Default derived columns are not added if already present. """ + """ Default derived attributes are not added if already present. """ from collections import Counter _, project = self.create_project( project_config_data=project_config_data, default_env_path=env_config_filepath, case_type=case_type, dirpath=tmpdir.strpath) - num_occ_by_derived_column = Counter(project.derived_columns) - for default_derived_colname in Project.DERIVED_COLUMNS_DEFAULT: - assert 1 == num_occ_by_derived_column[default_derived_colname] + num_occ_by_derived_attribute = Counter(project.derived_attributes) + for default_derived_colname in Project.DERIVED_ATTRIBUTES_DEFAULT: + assert 1 == num_occ_by_derived_attribute[default_derived_colname] @@ -597,7 +584,7 @@ def observed_argstring_elements( conf_file_path = _write_project_config(confdata, dirpath=confpath) # Subvert requirement for sample annotations file. - with mock.patch("peppy.project.check_sample_sheet"): + with mock.patch("peppy.project.Project.parse_sample_sheet"): project = Project(conf_file_path, default_compute=envpath) argstring = project.get_arg_string(pipeline) @@ -683,25 +670,20 @@ def test_merge_samples_negative(self, proj, sample_index): @pytest.mark.parametrize(argnames="sample_index", argvalues=MERGED_SAMPLE_INDICES) def test_data_sources_derivation(self, proj, sample_index): - """ Samples in merge file, check data_sources --> derived_columns. """ - # Make sure these columns were merged: - merged_columns = filter( - lambda col_key: (col_key != "col_modifier") and - not col_key.endswith(COL_KEY_SUFFIX), - proj.samples[sample_index].merged_cols.keys()) + """ Samples in merge file, check data_sources --> derived_attributes. """ # Order may be lost due to mapping. # We don't care about that here, or about duplicates. - expected = set(DERIVED_COLNAMES) - observed = set(merged_columns) + required = set(DERIVED_COLNAMES) + observed = {k for k in proj.samples[sample_index].merged_cols.keys() + if k != "col_modifier" and not k.endswith(COL_KEY_SUFFIX)} # Observed may include additional things (like auto-added subsample_name) - for val in expected: - assert val in observed + assert required == (required & observed) @named_param(argnames="sample_index", argvalues=MERGED_SAMPLE_INDICES) - def test_derived_columns_sample_subannotation_sample( + def test_derived_attributes_sample_subannotation_sample( self, proj, sample_index): - """ Make sure derived columns works on merged table. """ + """ Make sure derived attributes works on merged table. """ observed_merged_sample_filepaths = \ [os.path.basename(f) for f in proj.samples[sample_index].file2.split(" ")] @@ -717,8 +699,8 @@ def test_unmerged_samples_lack_merged_cols(self, proj, sample_index): assert not proj.samples[sample_index].merged_cols - def test_duplicate_derived_columns_still_derived(self, proj): - """ Duplicated derived columns can still be derived. """ + def test_duplicate_derived_attributes_still_derived(self, proj): + """ Duplicated derived attributes can still be derived. """ sample_index = 2 observed_nonmerged_col_basename = \ os.path.basename(proj.samples[sample_index].nonmerged_col) @@ -728,6 +710,180 @@ def test_duplicate_derived_columns_still_derived(self, proj): +class SubprojectActivationTest: + """ Test cases for the effect of activating a subproject. """ + + MARK_NAME = "marker" + SUBPROJ_SECTION = { + "neurons": {MARK_NAME: "NeuN"}, "astrocytes": {MARK_NAME: "GFAP"}, + "oligodendrocytes": {MARK_NAME: "NG2"}, "microglia": {MARK_NAME: "Iba1"} + } + + + @pytest.mark.parametrize("sub", SUBPROJ_SECTION.keys()) + def test_subproj_activation_returns_project(self, tmpdir, sub): + """ Subproject activation returns the project instance. """ + prj = self.make_proj(tmpdir.strpath, incl_subs=True) + updated_prj = prj.activate_subproject(sub) + assert updated_prj is prj + + + @pytest.mark.parametrize( + argnames="attr", argvalues=["permissive", "file_checks"]) + @pytest.mark.parametrize("sub", SUBPROJ_SECTION.keys()) + def test_sp_act_resets_all_attributes(self, tmpdir, attr, sub): + """ Subproject activation doesn't affect non-config attributes. """ + prj = self.make_proj(tmpdir.strpath, incl_subs=True) + original = prj[attr] + prj[attr] = not original + assert prj[attr] is not original + prj.activate_subproject(sub) + assert prj[attr] is original + + + @pytest.mark.parametrize("sub", SUBPROJ_SECTION.keys()) + def test_subproj_activation_adds_new_config_entries(self, tmpdir, sub): + """ Previously nonexistent entries are added by subproject. """ + prj = self.make_proj(tmpdir.strpath, incl_subs=True) + assert self.MARK_NAME not in prj + prj.activate_subproject(sub) + assert self.MARK_NAME in prj + assert self.SUBPROJ_SECTION[sub][self.MARK_NAME] == prj[self.MARK_NAME] + + + @pytest.mark.parametrize("sub", SUBPROJ_SECTION.keys()) + def test_sp_act_overwrites_existing_config_entries(self, tmpdir, sub): + """ An activated subproject's values are favored over preexisting. """ + prj = self.make_proj(tmpdir.strpath, incl_subs=True) + prj[self.MARK_NAME] = "temp-mark" + assert "temp-mark" == prj[self.MARK_NAME] + prj.activate_subproject(sub) + expected = self.SUBPROJ_SECTION[sub][self.MARK_NAME] + assert expected == prj[self.MARK_NAME] + + + def test_activate_unknown_subproj(self, tmpdir): + """ With subprojects, attempt to activate undefined one is an error. """ + prj = self.make_proj(tmpdir.strpath, incl_subs=True) + with pytest.raises(Exception): + prj.activate_subproject("DNE-subproject") + + + @pytest.mark.parametrize("sub", SUBPROJ_SECTION.keys()) + def test_subproj_activation_when_none_exist(self, tmpdir, sub): + """ Without subprojects, activation attempt produces warning. """ + prj = self.make_proj(tmpdir.strpath, incl_subs=False) + logfile = tmpdir.join("project-error-messages.log").strpath + logview = TempLogFileHandler(logfile, level=logging.WARN) + with logview: + # Call that should produce a warning message + prj.activate_subproject(sub) + # Check for warning message. + exception_messages = logview.messages + for msg in exception_messages: + if "no subprojects are defined" in msg: + break + else: + raise AssertionError("Did not find expected message among lines: " + "{}".format(exception_messages)) + + + @classmethod + def make_proj(cls, folder, incl_subs): + """ Write temp config and create Project with subproject option. """ + conf_file_path = os.path.join(folder, "conf.yaml") + conf_data = {"metadata": {}} + if incl_subs: + conf_data.update(**{"subprojects": cls.SUBPROJ_SECTION}) + with open(conf_file_path, 'w') as f: + yaml.safe_dump(conf_data, f) + return Project(conf_file_path) + + + +@pytest.mark.usefixtures("write_project_files") +class ProjectWarningTests: + """ Tests for warning messages related to projects """ + + @pytest.mark.parametrize( + "ideally_implied_mappings", + [{}, {GENOMES_KEY: _GENOMES}, {TRANSCRIPTOMES_KEY: _TRASCRIPTOMES}, + {GENOMES_KEY: _GENOMES, TRANSCRIPTOMES_KEY: _TRASCRIPTOMES}]) + def test_suggests_implied_attributes( + self, recwarn, tmpdir, path_sample_anns, + project_config_data, ideally_implied_mappings): + """ Assemblies directly in proj conf (not implied) is deprecated. """ + + # Add the mappings parameterization to the config data. + conf_data = copy.deepcopy(project_config_data) + conf_data.update(ideally_implied_mappings) + + # Write the config file. + conf_file = tmpdir.join("proj_conf.yaml").strpath + assert not os.path.isfile(conf_file), \ + "Test project temp config file already exists: {}".format(conf_file) + with open(conf_file, 'w') as cf: + yaml.safe_dump(conf_data, cf) + + # (Hopefully) generate the warnings. + assert 0 == len(recwarn) # Ensure a fresh start. + warnings.simplefilter('always') # Allow DeprecationWarning capture. + Project(conf_file) # Generate the warning(s). + msgs = [str(w.message) for w in recwarn # Grab deprecation messages. + if isinstance(w.message, DeprecationWarning)] + assert len(ideally_implied_mappings) == len(msgs) # 1:1 warnings + for k in ideally_implied_mappings: + # Each section that should be implied should generate exactly 1 + # warning; check message for content then remove it from the pool. + matched = [m for m in msgs if k in m and + IMPLICATIONS_DECLARATION in m] + assert 1 == len(matched) + msgs.remove(matched[0]) + + @pytest.mark.parametrize("assembly_implications", + [{"genome": {"organism": _GENOMES}}, + {"transcriptome": {"organism": _TRASCRIPTOMES}}, + {"genome": {"organism": _GENOMES}, + "transcriptome": {"organism": _TRASCRIPTOMES}}]) + def test_no_warning_if_assemblies_are_implied( + self, recwarn, tmpdir, path_sample_anns, + project_config_data, assembly_implications): + """ Assemblies declaration within implied columns is not deprecated. """ + + # Add the mappings parameterization to the config data. + conf_data = copy.deepcopy(project_config_data) + conf_data[IMPLICATIONS_DECLARATION] = assembly_implications + + # Write the config file. + conf_file = tmpdir.join("proj_conf.yaml").strpath + assert not os.path.isfile(conf_file), \ + "Test project temp config file already exists: {}".format(conf_file) + with open(conf_file, 'w') as cf: + yaml.safe_dump(conf_data, cf) + + # Check that there are no warnings before or after test. + assert 0 == len(recwarn) + warnings.simplefilter('always') + Project(conf_file) + assert 0 == len(recwarn) + + + +@pytest.mark.usefixtures("write_project_files") +class SampleSubannotationTests: + + @pytest.mark.parametrize("defer", [False, True]) + def test_sample_subannotation_construction(self, defer, + subannotation_filepath, path_project_conf, path_sample_anns): + """ Merge table is constructed iff samples are constructed. """ + p = Project(path_project_conf, defer_sample_construction=defer) + if defer: + assert p.sample_subannotation is None + else: + assert p.sample_subannotation is not None + + + def _write_project_config(config_data, dirpath, filename="proj-conf.yaml"): """ Write the configuration file for a Project. diff --git a/tests/models/independent/test_ProjectContext.py b/tests/models/independent/test_ProjectContext.py index 6e4c3f9a..d57f5b9c 100644 --- a/tests/models/independent/test_ProjectContext.py +++ b/tests/models/independent/test_ProjectContext.py @@ -14,13 +14,12 @@ RNA_NAME = "rna_PE" WGBS_NAME = "wgbs-hs" RRBS_NAME = "rrbs_mm" +RRBS_NAME = "rrbs_mm" ADD_PROJECT_DATA = { - "genome": {"organism": { - "mouse": "mm10", "human": "hg38", "rat": "rn6"}}, "data_sources": {"src": "{sample}-{flowcell}.bam"}, - "derived_columns": ["data_source"], + "derived_attributes": ["data_source"], "pipeline_args": {"--epilog": None}, - "implied_columns": {"organism": "assembly"}, + "implied_attributes": {"organism": "assembly"}, "user": "test-user", "email": "tester@domain.org", } @@ -112,8 +111,7 @@ def test_no_filtration(self, samples, project): argnames=["inclusion", "expected_names"], argvalues=[("ATAC", {"atac-PE"}), (("WGBS", "RRBS"), {WGBS_NAME, RRBS_NAME}), - ({"RNA", "CHIP"}, {RNA_NAME, CHIP_NAME})], - ids=lambda incl_exp_pair: "{}-{}".format(*incl_exp_pair)) + ({"RNA", "CHIP"}, {RNA_NAME, CHIP_NAME})]) def test_inclusion(self, samples, project, inclusion, expected_names): """ Sample objects can be selected for by protocol. """ _assert_samples(samples, project.samples) diff --git a/tests/models/independent/test_Sample.py b/tests/models/independent/test_Sample.py index 0921dd1c..db0ed957 100644 --- a/tests/models/independent/test_Sample.py +++ b/tests/models/independent/test_Sample.py @@ -36,7 +36,7 @@ class ParseSampleImplicationsTests: def test_project_no_implications(self, sample, implications): """ With no implications mapping, sample is unmodified. """ before_inference = sample.__dict__ - sample.infer_columns(implications) + sample.infer_attributes(implications) after_inference = sample.__dict__ assert before_inference == after_inference @@ -44,16 +44,13 @@ def test_project_no_implications(self, sample, implications): def test_null_intersection_between_sample_and_implications(self, sample): """ Sample with none of implications' fields --> no change. """ before_inference = sample.__dict__ - sample.infer_columns(self.IMPLICATIONS_MAP) + sample.infer_attributes(self.IMPLICATIONS_MAP) assert before_inference == sample.__dict__ @pytest.mark.parametrize( argnames=["implier_value", "implications"], - argvalues=IMPLICATIONS.items(), - ids=lambda implier_and_implications: - "implier='{}', implications={}".format( - implier_and_implications[0], str(implier_and_implications[1]))) + argvalues=IMPLICATIONS.items()) def test_intersection_between_sample_and_implications( self, sample, implier_value, implications): """ Intersection between implications and sample fields --> append. """ @@ -64,7 +61,7 @@ def test_intersection_between_sample_and_implications( # Set the parameterized value for the implications source field. setattr(sample, self.IMPLIER_NAME, implier_value) - sample.infer_columns(self.IMPLICATIONS_MAP) + sample.infer_attributes(self.IMPLICATIONS_MAP) # Validate updates to sample based on column implications & inference. for implied_name, implied_value in implications.items(): @@ -85,7 +82,7 @@ def no_implied_values(): no_implied_values() setattr(sample, self.IMPLIER_NAME, unmapped_implier_value) - sample.infer_columns(self.IMPLICATIONS_MAP) + sample.infer_attributes(self.IMPLICATIONS_MAP) no_implied_values() @@ -246,7 +243,7 @@ def prj_data(self, request): "results_subdir": "results_pipeline", "submission_subdir": "submission"}, DATA_SOURCES_SECTION: self.DATA_SOURCES, - "derived_columns": [data_src]} + "derived_attributes": [data_src]} @named_param( @@ -261,7 +258,7 @@ def test_equivalence_between_implicit_and_explicit_prj( # Explicitly-passed object needs to at least be an AttributeDict. sample_data = AttributeDict( {SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data, - data_src_attr: src_key, "derived_columns": [data_src_attr]}) + data_src_attr: src_key, "derived_attributes": [data_src_attr]}) # Create the samples and make the calls under test. s = Sample(sample_data) @@ -291,8 +288,8 @@ def test_prefers_explicit_project_context(self, prj_data): assert new_src_val == getattr(s, DATA_SOURCE_COLNAME) - @named_param(argnames="exclude_derived_columns", argvalues=[False, True]) - def test_no_derived_columns(self, prj_data, exclude_derived_columns): + @named_param(argnames="exclude_derived_attributes", argvalues=[False, True]) + def test_no_derived_attributes(self, prj_data, exclude_derived_attributes): """ Passing Sample's project is equivalent to its inference. """ # Here we're disinterested in parameterization w.r.t. data source key, @@ -300,8 +297,8 @@ def test_no_derived_columns(self, prj_data, exclude_derived_columns): src_key = self.SOURCE_KEYS[0] # Explicitly-passed object needs to at least be an AttributeDict. - if exclude_derived_columns: - prj_data.pop("derived_columns") + if exclude_derived_attributes: + prj_data.pop("derived_attributes") sample_data = { SAMPLE_NAME_COLNAME: "arbitrary_sample", "prj": prj_data, DATA_SOURCE_COLNAME: src_key} @@ -317,7 +314,7 @@ def test_no_derived_columns(self, prj_data, exclude_derived_columns): # Check results. putative_new_attr = self.DATA_SOURCES[src_key] - if exclude_derived_columns: + if exclude_derived_attributes: # The value to which the source key maps won't have been added. assert not hasattr(s, putative_new_attr) assert putative_new_attr not in s diff --git a/tests/models/integration/test_Project_Sample_interaction.py b/tests/models/integration/test_Project_Sample_interaction.py index 9727d2a3..1a834937 100644 --- a/tests/models/integration/test_Project_Sample_interaction.py +++ b/tests/models/integration/test_Project_Sample_interaction.py @@ -31,21 +31,22 @@ NAME_ANNOTATIONS_FILE = "annotations.csv" SAMPLE_NAMES = ["WGBS_mm10", "ATAC_mm10", "WGBS_rn6", "ATAC_rn6"] -COLUMNS = [SAMPLE_NAME_COLNAME, "val1", "val2", "library"] +PROTOCOL_COLNAME = "protocol" +COLUMNS = [SAMPLE_NAME_COLNAME, "val1", "val2", PROTOCOL_COLNAME] VALUES1 = [random.randint(-5, 5) for _ in range(len(SAMPLE_NAMES))] VALUES2 = [random.randint(-5, 5) for _ in range(len(SAMPLE_NAMES))] -LIBRARIES = ["WGBS", "ATAC", "WGBS", "ATAC"] -DATA = list(zip(SAMPLE_NAMES, VALUES1, VALUES2, LIBRARIES)) +PROTOCOLS = ["WGBS", "ATAC", "WGBS", "ATAC"] +DATA = list(zip(SAMPLE_NAMES, VALUES1, VALUES2, PROTOCOLS)) DATA_FOR_SAMPLES = [ {SAMPLE_NAME_COLNAME: SAMPLE_NAMES}, - {"val1": VALUES1}, {"val2": VALUES2}, {"library": LIBRARIES}] + {"val1": VALUES1}, {"val2": VALUES2}, {PROTOCOL_COLNAME: PROTOCOLS}] PROJECT_CONFIG_DATA = {"metadata": {"sample_annotation": NAME_ANNOTATIONS_FILE}} -PROTOCOLS = ["WGBS", "ATAC"] def pytest_generate_tests(metafunc): """ Customization of test cases within this module. """ + protos = ["WGBS", "ATAC"] if metafunc.cls == BuildSheetTests: if "protocols" in metafunc.fixturenames: # Apply the test case to each of the possible combinations of @@ -53,10 +54,9 @@ def pytest_generate_tests(metafunc): metafunc.parametrize( argnames="protocols", argvalues=list(itertools.chain.from_iterable( - itertools.combinations(PROTOCOLS, x) - for x in range(1 + len(PROTOCOLS)))), - ids=lambda protos: - " protocols = {} ".format(",".join(protos))) + itertools.combinations(protos, x) + for x in range(1 + len(protos)))), + ids=lambda ps: " protocols = {} ".format(",".join(ps))) if "delimiter" in metafunc.fixturenames: metafunc.parametrize(argnames="delimiter", argvalues=[",", "\t"]) @@ -102,7 +102,7 @@ def samples_rawdata(): @pytest.fixture(scope="function") def sample_sheet(samples_rawdata): df = pd.DataFrame(samples_rawdata) - df.columns = [SAMPLE_NAME_COLNAME, "val1", "val2", "library"] + df.columns = [SAMPLE_NAME_COLNAME, "val1", "val2", PROTOCOL_COLNAME] return df @@ -195,7 +195,7 @@ def test_multiple_samples( # But the sheet permits filtering to specific protocol(s). exp_num_samples = len(SAMPLE_NAMES) if not protocols else \ - sum(sum(1 for l in LIBRARIES if l == p) for p in protocols) + sum(sum(1 for p2 in PROTOCOLS if p2 == p1) for p1 in protocols) sheet = p.build_sheet(*protocols) assert exp_num_samples == len(sheet) if protocols: diff --git a/tests/test_utils.py b/tests/test_utils.py index 0456cf70..e9d539d9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,13 @@ """ Tests for utility functions """ import copy +import random +import string +import sys +if sys.version_info < (3, 3): + from collections import Mapping +else: + from collections.abc import Mapping import mock import pytest @@ -8,7 +15,8 @@ from peppy import AttributeDict, Project, Sample from peppy.const import SAMPLE_INDEPENDENT_PROJECT_SECTIONS, SAMPLE_NAME_COLNAME from peppy.utils import \ - add_project_sample_constants, copy as pepcopy, grab_project_data + add_project_sample_constants, coll_like, copy as pepcopy, \ + grab_project_data, has_null_value, non_null_value from tests.helpers import named_param, nonempty_powerset @@ -38,8 +46,8 @@ def basic_project_data(): "output_dir": "outdir", "results_subdir": "results_pipeline", "submission_subdir": "submission"}, - "derived_columns": ["data_source"], - "implied_columns": {"organism": {"genomes": { + "derived_attributes": ["data_source"], + "implied_attributes": {"organism": {"genomes": { "mouse": "mm10", "rat": "rn6", "human": "hg38"}}}, "trackhubs": [] } @@ -171,6 +179,92 @@ def test_name_collision(self, basic_sample, collision, old_val, new_val): +def _randcoll(pool, dt): + """ + Generate random collection of 1-10 elements. + + :param Iterable pool: elements from which to choose + :param type dt: type of collection to create + :return Iterable[object]: collection of randomly generated elements + """ + valid_types = [tuple, list, set, dict] + if dt not in valid_types: + raise TypeError("{} is an invalid type; choose from {}". + format(str(dt), ", ".join(str(t) for t in valid_types))) + rs = [random.choice(pool) for _ in range(random.randint(1, 10))] + return dict(enumerate(rs)) if dt == dict else rs + + + +@pytest.mark.parametrize( + ["arg", "exp"], + [(random.randint(-sys.maxsize - 1, sys.maxsize), False), + (random.random(), False), + (random.choice(string.ascii_letters), False), + ([], True), (set(), True), (dict(), True), (tuple(), True), + (_randcoll(string.ascii_letters, list), True), + (_randcoll(string.ascii_letters, dict), True), + (_randcoll([int(d) for d in string.digits], tuple), True), + (_randcoll([int(d) for d in string.digits], set), True)] +) +def test_coll_like(arg, exp): + """ Test arbiter of whether an object is collection-like. """ + assert exp == coll_like(arg) + + +def _get_empty_attrdict(data): + ad = AttributeDict() + ad.add_entries(data) + return ad + + +class NullValueHelperTests: + """ Tests of accuracy of null value arbiter. """ + + _DATA = {"a": 1, "b": [2]} + + @pytest.mark.skip("Not implemented") + @pytest.fixture( + params=[lambda d: dict(d), + lambda d: AttributeDict().add_entries(d), + lambda d: _DummyProject(d)], + ids=["dict", AttributeDict.__name__, _DummyProject.__name__]) + def kvs(self, request): + """ For test cases provide KV pair map of parameterized type.""" + return request.param(self._DATA) + + def test_missing_key_neither_null_nor_non_null(self, kvs): + """ A key not in a mapping has neither null nor non-null value. """ + k = "new_key" + assert k not in kvs + assert not has_null_value(k, kvs) + assert not non_null_value(k, kvs) + + @pytest.mark.parametrize("coll", [list(), set(), tuple(), dict()]) + def test_empty_collection_is_null(self, coll, kvs): + """ A key with an empty collection instance as its value is null. """ + ck = "empty" + assert ck not in kvs + kvs[ck] = coll + assert has_null_value(ck, kvs) + assert not non_null_value(ck, kvs) + + def test_None_is_null(self, kvs): + """ A key with None as value is null. """ + bad_key = "nv" + assert bad_key not in kvs + kvs[bad_key] = None + assert has_null_value(bad_key, kvs) + assert not non_null_value(bad_key, kvs) + + @pytest.mark.parametrize("k", _DATA.keys()) + def test_non_nulls(self, k, kvs): + """ Keys with non-None atomic or nonempty collection are non-null. """ + assert k in kvs + assert non_null_value(k, kvs) + + + def test_copy(): """ Test reference and equivalence comparison operators. """ class ExampleObject: