make flake8 happy

maxibor · Nov 10, 2020 · f2ff0be · f2ff0be
1 parent 4d4756c
commit f2ff0be
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 35 deletions.
diff --git a/.github/workflows/pydamage_ci.yml b/.github/workflows/pydamage_ci.yml
@@ -10,7 +10,7 @@ jobs:
     if: "!contains(github.event.head_commit.message, '[skip_ci]')"
     steps:
       - uses: actions/checkout@v2
-      - uses: conda-incubator/setup-miniconda@v1
+      - uses: conda-incubator/setup-miniconda@v2
         with:
           python-version: 3.7
           mamba-version: "*"

diff --git a/docs/source/output.md b/docs/source/output.md
@@ -4,34 +4,33 @@ Pydamage generates both a tabular and a visual output.
 
 The tabular output is a comma-separated file (`.csv`) with the following columns, for each analysed reference:
 
-  * `reference`: name of the reference genome/contig
-  * `pred_accuracy`: Predicted accuracy of Pydamage prediction, from the GLM modelling
-  * `null_model_p0`: parameter `p0` of the null model
-  * `null_model_p0_stdev`: standard error of the null model paramater `p0`
-  * `damage_model_p`: parameter `p` of the damage model
-  * `damage_model_p_stdev`: standard error of the parameter `p` of the damage model
-  * `damage_model_pmin`: paramater `p_min` of the damage model. *This is the modelled damage baseline*
-  * `damage_model_pmin_stdev`: standard error of the paramater `p_min` of the damage model
-  * `damage_model_pmax`: paramater `p_max` of the damage model. *This is the modelled amount of damage on the 5' end.*
-  * `damage_model_pmax_stdev`: standard error of the paramater `p_max` of the damage model
-  * `pvalue`: p-value calculated from the likelihood-ratio test-statistic using a chi-squared distribution
-  * `qvalue`: p-value corrected for multiple testing using Benjamini-Hochberg procedure. *Only computed when multiple references are used*
-  * `RMSE`: residual mean standard error of the model fit of the damage model
-  * `nb_reads_aligned`: number of aligned reads
-  * `coverage`: average coverage along the reference genome
-  * `CtoT-N`: Proportion of CtoT substitutions observed at position `N` from 5' end
-  * `GtoA-N`: Proportion of GtoA substitutions observed at position `N` from 5'
+* `reference`: name of the reference genome/contig
+* `pred_accuracy`: Predicted accuracy of Pydamage prediction, from the GLM modelling
+* `null_model_p0`: parameter `p0` of the null model
+* `null_model_p0_stdev`: standard error of the null model paramater `p0`
+* `damage_model_p`: parameter `p` of the damage model
+* `damage_model_p_stdev`: standard error of the parameter `p` of the damage model
+* `damage_model_pmin`: paramater `p_min` of the damage model. *This is the modelled damage baseline*
+* `damage_model_pmin_stdev`: standard error of the paramater `p_min` of the damage model
+* `damage_model_pmax`: paramater `p_max` of the damage model. *This is the modelled amount of damage on the 5' end.*
+* `damage_model_pmax_stdev`: standard error of the paramater `p_max` of the damage model
+* `pvalue`: p-value calculated from the likelihood-ratio test-statistic using a chi-squared distribution
+* `qvalue`: p-value corrected for multiple testing using Benjamini-Hochberg procedure. *Only computed when multiple references are used*
+* `RMSE`: residual mean standard error of the model fit of the damage model
+* `nb_reads_aligned`: number of aligned reads
+* `coverage`: average coverage along the reference genome
+* `CtoT-N`: Proportion of CtoT substitutions observed at position `N` from 5' end
+* `GtoA-N`: Proportion of GtoA substitutions observed at position `N` from 5'
 
 > To select contigs/references with damage, you will most likely want to look at two columns: `pred_accuracy > 0.9` and `qvalue <= 0.05`  
 
 The visual output are PNG files, one per reference contig. They show the frequency of observed C to T, and G to A transition at the 5' end of the sequencing data and overlay it with the fitted models for both the null and the damage model, including 95% confidence intervals. Furthermore, it provides a "residuals versus fitted" plot to help evaluate the fit of the pydamage damage model. Finally, the plot contains informtion on the average coverage along the reference and the p-value calculated from the likelihood-ratio test-statistic using a chi-squared distribution.
 
-> The visual output is only produced when using the `--plot` flag 
-
+> The visual output is only produced when using the `--plot` flag
 
 ## Example
 
-- [**Tabular ouput**](https://raw.githubusercontent.com/maxibor/pydamage/master/docs/assets/pydamage_results.csv)
-- **Visual output**
+* [**Tabular ouput**](https://raw.githubusercontent.com/maxibor/pydamage/master/docs/assets/pydamage_results.csv)
+* **Visual output**
 
-![](../img/NZ_JHCB02000011.1.png)
+![pydamage_plot](../img/NZ_JHCB02000011.1.png)
diff --git a/pydamage/damage.py b/pydamage/damage.py
@@ -44,7 +44,7 @@ def get_damage(self, wlen, show_al):
         all_g = []
         all_bases = []
         for al in self.alignments:
-            if al.is_reverse == False and al.is_unmapped == False:
+            if al.is_reverse is False and al.is_unmapped is False:
                 cigar = al.cigartuples
                 ref = al.get_reference_sequence()
                 quer = al.query_sequence
@@ -103,7 +103,8 @@ def check_model_fit(model_dict, wlen, verbose):
     if np.any(np.array(model_dict["base_cov"][:wlen]) == 0):
         if verbose:
             print(
-                f"Could not reliably fit a model to {model_dict['reference']} because of too few reads aligned"
+                f"Could not reliably fit a model to {model_dict['reference']}"
+                "because of too few reads aligned"
             )
         return False
     return model_dict
@@ -159,7 +160,8 @@ def test_damage(ref, bam, mode, wlen, show_al, process, verbose):
             print(f"Model fitting for {ref} failed")
             print(f"Model fitting error: {e}")
             print(
-                f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov} - reflen: {reflen}\n"
+                f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}"
+                " - reflen: {reflen}\n"
             )
         return False
 
@@ -203,13 +205,21 @@ def get_damage_group(ref, bam, mode, wlen, show_al, process):
 
 
 def test_damage_group(
-    ct_data, ga_data, cc_data, all_bases, nb_reads_aligned, cov, reflen, wlen, verbose
+    ct_data,
+    ga_data,
+    cc_data,
+    all_bases,
+    nb_reads_aligned,
+    cov,
+    reflen,
+    wlen,
+    verbose
 ):
     """Performs damage test
 
     Args:
-        ct_data (list of int): List of positions where CtoT transitions were observed
-        ga_data (list of int): List of positions where GtoA transitions were observed
+        ct_data (list of int): List of positions with CtoT transitions
+        ga_data (list of int): List of positions with GtoA transitions
         cc_data (list of int): List of positions where C in ref and query
         all_bases (list of int): List of positions where a base is aligned
         nb_reads_aligned(int): number of reads aligned
@@ -220,6 +230,7 @@ def test_damage_group(
     Returns:
         dict: Dictionary containing LR test results
     """
+    ref = "reference"
     try:
         if ct_data:
             model_A = models.damage_model()
@@ -235,7 +246,7 @@ def test_damage_group(
                 wlen=wlen,
                 verbose=verbose,
             )
-            test_res["reference"] = "reference"
+            test_res["reference"] = ref
             test_res["nb_reads_aligned"] = nb_reads_aligned
             test_res["coverage"] = cov
             test_res["reflen"] = reflen
@@ -247,6 +258,7 @@ def test_damage_group(
             print(f"Model fitting for {ref} failed")
             print(f"Model fitting error: {e}")
             print(
-                f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov} - reflen: {reflen}\n"
+                f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov} "
+                "- reflen: {reflen}\n"
             )
         return False
diff --git a/pydamage/main.py b/pydamage/main.py
@@ -6,7 +6,6 @@
 from pydamage import damage
 from pydamage.plot import damageplot
 from pydamage.accuracy_model import prepare_data, load_model, fit_model
-import pandas as pd
 import sys
 from tqdm import tqdm
 import warnings
@@ -26,10 +25,24 @@ def analyze(
 ):
 
     if group:
-        analyze_group(bam, wlen, show_al, process, outdir, plot, verbose, force)
+        analyze_group(bam,
+                      wlen,
+                      show_al,
+                      process,
+                      outdir,
+                      plot,
+                      verbose,
+                      force)
 
     else:
-        analyze_multi(bam, wlen, show_al, process, outdir, plot, verbose, force)
+        analyze_multi(bam,
+                      wlen,
+                      show_al,
+                      process,
+                      outdir,
+                      plot,
+                      verbose,
+                      force)
 
 
 def analyze_multi(
@@ -190,7 +203,8 @@ def analyze_group(
     )
     print("Estimating and testing Damage")
     with multiprocessing.Pool(proc) as p:
-        res = list(tqdm(p.imap(get_damage_group_partial, refs), total=len(refs)))
+        res = list(
+            tqdm(p.imap(get_damage_group_partial, refs), total=len(refs)))
     ct_data = []
     ga_data = []
     cc_data = []