From 3f8c6531c3f9005a91775433a6ff6afc5ca5a3ba Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Thu, 3 Jul 2025 20:23:51 -0700 Subject: [PATCH 01/19] Bumping version number to 1.5.0 --- configure.ac | 2 +- docs/content/quickstart.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 9916e10b..76c26ab7 100644 --- a/configure.ac +++ b/configure.ac @@ -14,7 +14,7 @@ dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl General Public License for more details. -AC_INIT([dnmtools], [1.4.4], [andrewds@usc.edu], +AC_INIT([dnmtools], [1.5.0], [andrewds@usc.edu], [dnmtools], [https://github.com/smithlabcode/dnmtools]) dnl the config.h is #included in the sources for version info AC_CONFIG_HEADERS([config.h]) diff --git a/docs/content/quickstart.md b/docs/content/quickstart.md index 306094b1..da638db0 100644 --- a/docs/content/quickstart.md +++ b/docs/content/quickstart.md @@ -70,14 +70,14 @@ would need to be activated when you want to use dnmtools. ### Configuration -* Download [dnmtools-1.4.4.tar.gz](https://github.com/smithlabcode/dnmtools/releases/download/v1.4.4/dnmtools-1.4.4.tar.gz). +* Download [dnmtools-1.5.0.tar.gz](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0.tar.gz). * Unpack the archive: ```console -$ tar -zxvf dnmtools-1.4.4.tar.gz +$ tar -zxvf dnmtools-1.5.0.tar.gz ``` * Move into the dnmtools directory and create a build directory: ```console -$ cd dnmtools-1.4.4 +$ cd dnmtools-1.5.0 $ mkdir build && cd build ``` * Run the configuration script: From d4ea788ed98d8b95941bbed38b85bc5621878236 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Thu, 3 Jul 2025 21:17:28 -0700 Subject: [PATCH 02/19] README.md: updates for installing v1.5.0 --- README.md | 296 ++++++++++++++++-------------------------------------- 1 file changed, 87 insertions(+), 209 deletions(-) diff --git a/README.md b/README.md index c6d9e93d..364b6115 100644 --- a/README.md +++ b/README.md @@ -6,228 +6,106 @@ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) DNMTools is a set of tools for analyzing DNA methylation data from -high-throughput sequencing experiments, especially whole genome -bisulfite sequencing (WGBS), but also reduced representation bisulfite -sequencing (RRBS). These tools focus on overcoming the computing -challenges imposed by the scale of genome-wide DNA methylation data, -which is usually the early parts of data analysis. +high-throughput sequencing experiments, especially whole genome bisulfite +sequencing (WGBS), but also reduced representation bisulfite sequencing +(RRBS). These tools focus on overcoming the computing challenges imposed by +the scale of genome-wide DNA methylation data, which is usually the early +parts of data analysis. -## Installing release 1.4.4 - -The documentation for DNMTools can be found -[here](https://dnmtools.readthedocs.io). But if you want to install -from source and you are reading this on GitHub or in a source tree you -unpacked, then keep reading. And if you are in a terminal, sorry for -all the formatting. - -### Required libraries - -* A recent compiler. Most users will be building and installing this - software with GCC. We require a compiler that fully supports C++17, - so we recommend using at least GCC 9 (released in 2019). There are - still many systems that install a very old version of GCC by - default, so if you have problems with building this software, that - might be the first thing to check. The clang LLVM compiler can - also be used with a recent enough version. -* The GNU Scientific Library. It can be installed using apt on Linux - (Ubuntu, Debian), using brew on macOS, or from source available - [here](http://www.gnu.org/software/gsl). -* The HTSlib library. This can be installed through brew on macOS, - through apt on Linux (Ubuntu, Debian), or from source downloadable - [here](https://github.com/samtools/htslib). - -All the above can also be installed using conda. If you use conda for -these dependencies, even if you are building dnmtools from the source -repo, it is easiest if all dependencies are available through conda. - -### Configuration - -* Download [dnmtools-1.4.4.tar.gz](https://github.com/smithlabcode/dnmtools/releases/download/v1.4.4/dnmtools-1.4.4.tar.gz). -* Unpack the archive: -```console -tar -zxvf dnmtools-1.4.4.tar.gz -``` -* Move into the dnmtools directory and create a build directory: -```console -cd dnmtools-1.4.4 && mkdir build && cd build -``` -* Run the configuration script: -```console -../configure -``` -If you do not want to install DNMTools system-wide, or if you do -not have admin privileges, specify a prefix directory: -```console -../configure --prefix=/some/reasonable/place -``` -If you installed HTSlib yourself in some non-standard directory, -you must specify the location like this: -```console -../configure CPPFLAGS='-I /path/to/htslib/headers' \ - LDFLAGS='-L/path/to/htslib/lib' -``` -Depending on how you obtained HTSlib, the headers may not be -in a directory at the same depth as the library file. - -### Building and installing the tools - -If you are still in the `build` directory, run `make` to compile the -tools, and then `make install` to install them: -```console -make && make install -``` -If your HTSlib (or some other library) is not installed system-wide, -then you might need to udpate your library path: -```console -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/path/to/htslib/lib -``` - -### Testing the program - -To test if everything was successful, simply run `dnmtools` without -any arguments and you should see the list of available commands: -```console -dnmtools -``` -There is a test suite for `dnmtools` and these test can be performed -as follows: -```console -make check -``` -This must be done from the build directory. Note that the tests -performed with `make check` are mostly regression tests that cover -prior issues rather than coverage tests to test all the functionality -of `dnmtools`. - -### Using a clone of the repo - -We strongly recommend using DNMTools through the latest stable release -under the releases section on GitHub or through a package as with -conda/mamba. Developers who wish to work on the latest commits, which -are unstable, can compile the source using `autogen.sh` which just -wraps `autoreconf`. +**Nanopore** As of v1.5.0, DNMTools has funcionality to start analysis with a +BAM file from Nanopore sequencing with 5mC and 5hmC calls at CpG sites. ## Usage -Read the [documentation](https://dnmtools.readthedocs.io) for usage of -individual tools within DNMTools. - -## Installing and running `dnmtools` docker images - -The docker images of `dnmtools` are accessible through GitHub Container -registry. These are light-weight (~30 MB) images that let you run `dnmtools` -without worrying about the dependencies. - -### Installation - -To pull the image for the latest version, run: -```console -docker pull ghcr.io/smithlabcode/dnmtools -``` -To test the image installation, run: -```console -docker run ghcr.io/smithlabcode/dnmtools -``` -You should see the help page of `dnmtools`. - -For simpler reference, you can -re-tag the installed image as follows, but note that you would have to re-tag -the image whenever you pull an image for a new version. -```console -docker tag ghcr.io/smithlabcode/dnmtools:latest dnmtools:latest -``` - -You can also install the image for a particular vertion by running -```console -docker pull ghcr.io/smithlabcode/dnmtools:v[VERSION NUMBER] #(e.g. v1.4.4) -``` -Not all versions have corresponding images; you can find available images -[here](https://github.com/smithlabcode/dnmtools/pkgs/container/dnmtools). - -### Running the docker image - -To run the image, you can run (assuming you tagged the image as above) -```console -docker run -v /path/to/data:/data -w /data \ - dnmtools [DNMTOOLS COMMAND] [OPTIONS] [ARGUMENTS] -``` -In the above command, replace `/path/to/data` with the path to the directory you -want to mount, and it will be mounted as the `/data` directory in the container. -For example, if your genome data `genome.fa` is located in `./genome_data`, you -can execute `abismalidx` by running: -```console -docker run -v ./genome_data:/data -w /data \ - dnmtools abismalidx -v -t 4 genome.fa genome.idx -``` -In the above command, `-w /data` specifies the working directory in the -container, so the output `genome.idx` is saved in the `/data` directory, -which corresponds to the `./genome_data` directory in the host -machine. If you want to specify the output directory, use a command like below. -```console -docker run -v ./genome_data:/data -w /data \ - -v ./genome_index:/output \ - dnmtools abismalidx -v -t 4 genome.fa /output/genome.idx -``` -When you need to access multiple directories, it might be useful to use the -option `-v ./:/app -w /app`, which mounts the current directory -to the `/app` directory in the container, which is alo set as the working -directory. You can specify the paths in the same way you would from the -working directory in the host machine. For example: -```console -docker run -v ./:/app -w /app \ - dnmtools abismal -i genome_index/genome.idx -v -t 4 \ - -o mapped_reads/output.sam \ - reads/reads_1.fq reads/reads_1.fq -``` - -### Testing the install and use of docker image - -Run the following commands to test the installation and usage of the docker -image of `dnmtools`. -```console -docker pull ghcr.io/smithlabcode/dnmtools:latest -docker tag ghcr.io/smithlabcode/dnmtools:latest dnmtools:latest - -# Clone the repo to access test data -git clone git@github.com:smithlabcode/dnmtools.git -cd dnmtools - -# Run containers and save outputs in artifacts directory - -mkdir artifacts - -docker run -v ./:/app -w /app \ - dnmtools abismalidx -v -t 1 data/tRex1.fa artifacts/tRex1.idx - -docker run -v ./:/app -w /app \ - dnmtools simreads -seed 1 -o artifacts/simreads -n 10000 \ - -m 0.01 -b 0.98 data/tRex1.fa - -docker run -v ./:/app -w /app \ - dnmtools abismal -v -t 1 -i artifacts/tRex1.idx artifacts/simreads_{1,2}.fq -``` - -## Contacts and bug reports +The documentation for DNMTools can be found +[here](https://dnmtools.readthedocs.io). + +## Installation + +- **Linux** + The binary should work on almost any Linux system as recent as 2017: Ubuntu + 16.04+, Debian 9+, Fedora 24+, openSUSE Leap 42.3+, and RHEL/CentOS 8+. + [Download](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0-Linux.tar.gz). + +- **Mac** + The Mac binary should work on any Mac hardware and macOS-13 (Ventura) or + newer. + [Download](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0-macOS.tar.gz). + +- **Conda** + ```console + conda install -c bioconda dnmtools + ``` + +- **Build from source** + Get the source here: + [dnmtools-1.5.0.tar.gz](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0.tar.gz). Dependencies: + [GSL](http://www.gnu.org/software/gsl), + [HTSlib](https://github.com/samtools/htslib), + [libdeflate](https://github.com/ebiggers/libdeflate) and + [ZLib](https://github.com/madler/zlib). Installing GSL and HTSlib is usually enough. + See below for system-specific details. + + Build DNMTools like this: + ```console + tar -xf dnmtools-1.5.0.tar.gz + cd dnmtools-1.5.0 + ./configure --prefix=$HOME + make + make install + ``` + + Get dependencies and a compiler on (these might change): + + Ubuntu/Debian + ```console + apt-get install build-essential htslib-dev libgsl-dev + ``` + + RedHat/Fedora + ```console + dnf install @c-development @development-tools htslib-devel gsl-devel awk + ``` + + Homebrew (see below) + ```console + brew install gcc htslib gsl + ``` + + Conda (see below) + ```console + conda create -n build-env -c conda-forge -c bioconda gcc gxx make autoconf automake htslib gsl && \ + conda activate build-env + ``` + + If you used Homebrew or Conda, you might need to set additional environment + variables or run configure differently. For example with Homebrew: + ```console + ./configure CPPFLAGS="-I$(brew --prefix)/include" LDFLAGS="-L$(brew --prefix)/lib" + ``` + +## Contact Andrew D. Smith andrewds@usc.edu ## Copyright and License Information -Copyright (C) 2022-2024 +Copyright (C) 2022-2025 + Andrew D. Smith and Guilherme de Sena Brandine Authors of DNMTools: Andrew D. Smith and Guilherme de Sena Brandine -Essential contributors: Ben Decato, Meng Zhou, Liz Ji, Terence Li, -Jenny Qu, Qiang Song, Fang Fang and Masaru Nakajima +Essential contributors: Ben Decato, Meng Zhou, Liz Ji, Terence Li, Jenny Qu, +Qiang Song, Fang Fang and Masaru Nakajima -This is free software: you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your -option) any later version. +This is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. -This software is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. +This software is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. From 972785b352992af2b63fccd20f0bd5f7ee16f195 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:11:02 -0700 Subject: [PATCH 03/19] src/smithlab_cpp: submodule update --- src/smithlab_cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smithlab_cpp b/src/smithlab_cpp index 57099cc6..a6227577 160000 --- a/src/smithlab_cpp +++ b/src/smithlab_cpp @@ -1 +1 @@ -Subproject commit 57099cc64a94707b0eb3327dcb850905d1ac54f3 +Subproject commit a622757713f518c1c33da86cb3adbad8e9e4ab50 From 8389ee5a2f7c140ea217b0d5bafd926578c6ef31 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:11:33 -0700 Subject: [PATCH 04/19] src/abismal: submodule update --- src/abismal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/abismal b/src/abismal index b63cd99e..a9c2c959 160000 --- a/src/abismal +++ b/src/abismal @@ -1 +1 @@ -Subproject commit b63cd99e99ba988657200d0117f7b3f8b4f1803e +Subproject commit a9c2c95994f83a577244b65317680d4fea0de2fc From c2c091e7fe540ff0d566028f63680b3e6355a023 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:12:09 -0700 Subject: [PATCH 05/19] README.md: updating obtain/install/build instructions --- README.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 364b6115..c42d8fdb 100644 --- a/README.md +++ b/README.md @@ -23,28 +23,25 @@ The documentation for DNMTools can be found ## Installation - **Linux** - The binary should work on almost any Linux system as recent as 2017: Ubuntu - 16.04+, Debian 9+, Fedora 24+, openSUSE Leap 42.3+, and RHEL/CentOS 8+. - [Download](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0-Linux.tar.gz). + [binary](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0-Linux.tar.gz). + Should work on any Linux distribution since roughly 2017. - **Mac** - The Mac binary should work on any Mac hardware and macOS-13 (Ventura) or - newer. - [Download](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0-macOS.tar.gz). + [binary](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0-macOS.tar.gz). + Should work on any Mac hardware and macOS-13 (Ventura) or newer. - **Conda** ```console conda install -c bioconda dnmtools ``` -- **Build from source** - Get the source here: +- **Source** [dnmtools-1.5.0.tar.gz](https://github.com/smithlabcode/dnmtools/releases/download/v1.5.0/dnmtools-1.5.0.tar.gz). Dependencies: [GSL](http://www.gnu.org/software/gsl), [HTSlib](https://github.com/samtools/htslib), [libdeflate](https://github.com/ebiggers/libdeflate) and - [ZLib](https://github.com/madler/zlib). Installing GSL and HTSlib is usually enough. - See below for system-specific details. + [ZLib](https://github.com/madler/zlib). Installing HTSlib as a package + should also give you ZLib and libdeflate. System-specific details below. Build DNMTools like this: ```console @@ -55,7 +52,7 @@ The documentation for DNMTools can be found make install ``` - Get dependencies and a compiler on (these might change): + To get dependencies and a compiler on (these might with OS/package updates): Ubuntu/Debian ```console @@ -67,19 +64,22 @@ The documentation for DNMTools can be found dnf install @c-development @development-tools htslib-devel gsl-devel awk ``` - Homebrew (see below) + Homebrew (see notes below) ```console brew install gcc htslib gsl ``` - Conda (see below) + Conda (see notes below) ```console - conda create -n build-env -c conda-forge -c bioconda gcc gxx make autoconf automake htslib gsl && \ + conda create -n build-env -c conda-forge -c bioconda \ + gcc gxx make autoconf automake htslib gsl zlib binutils && \ conda activate build-env ``` - If you used Homebrew or Conda, you might need to set additional environment - variables or run configure differently. For example with Homebrew: + Notes: If you use only Homebrew or only Conda to setup your environment, you + could need additional dependencies, and some of what I listed you might + already have. You might need to set additional environment variables or run + configure differently. For example with Homebrew: ```console ./configure CPPFLAGS="-I$(brew --prefix)/include" LDFLAGS="-L$(brew --prefix)/lib" ``` From 5c9ee8d7db598185df55fc610bc7a51e18b5c43e Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:29:23 -0700 Subject: [PATCH 06/19] data/md5sum.txt: updating test output file hashes for v1.5.0 --- data/md5sum.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data/md5sum.txt b/data/md5sum.txt index 9a4e2c82..7fa70efd 100644 --- a/data/md5sum.txt +++ b/data/md5sum.txt @@ -15,10 +15,10 @@ ec6a686617cad31e9f7a37a3d378e6ed tests/two_epialleles.states 93e38b20d162062a5d147c4290095a13 tests/mlml.out d947fe3d61ef7b1564558a69608f0e64 tests/methylome.pmd d41d8cd98f00b204e9800998ecf8427e tests/two_epialleles.amr -534ed1d8320e4b250c6d675bba23fac9 tests/reads.xcounts -838e61c5a3155db8075eeeec53d25efa tests/reads.unxcounts 001b9d966f62fa439b24cf2198cc3de5 tests/reads.counts.sym 2b8a0406015458be51b8b1c9e58b3602 tests/tRex1_promoters.roi.bed 9bdb361091d1c0626df30be8ba2c408c tests/reads.sam -660171a254c588f6251ef757a3b9bc7d tests/reads.fmt.sam -9629a89eeaf9b877253e0f4aa0654c83 tests/reads.fmt.srt.uniq.sam +00ca55777ac7d9cd87823bdf293a3d7f tests/reads.fmt.sam +98a7f3ae4bb296c32b6751e326977c51 tests/reads.fmt.srt.uniq.sam +fa21ade51680ef2752768f02e32eb2e8 tests/reads.xcounts +14e8f72fde8d0f669b17da6cf4a908b6 tests/reads.unxcounts From baa8cd3c50b2cc60f350fc8527b1c0d6861a8b97 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:30:35 -0700 Subject: [PATCH 07/19] most source files: updating the signature for argv to be non-const --- src/amrfinder/allelicmeth.cpp | 2 +- src/amrfinder/amrfinder.cpp | 2 +- src/amrfinder/amrtester.cpp | 2 +- src/analysis/bsrate.cpp | 2 +- src/analysis/cpgbins.cpp | 2 +- src/analysis/hmr-rep.cpp | 2 +- src/analysis/hmr.cpp | 2 +- src/analysis/hypermr.cpp | 2 +- src/analysis/levels.cpp | 2 +- src/analysis/metagene.cpp | 2 +- src/analysis/methcounts.cpp | 2 +- src/analysis/methentropy.cpp | 2 +- src/analysis/methstates.cpp | 2 +- src/analysis/multimethstat.cpp | 2 +- src/analysis/nanopore.cpp | 2 +- src/analysis/pmd.cpp | 2 +- src/analysis/roimethstat.cpp | 2 +- src/dnmtools.cpp | 106 +++++++++-------- src/mlml/mlml.cpp | 2 +- src/radmeth/dmr.cpp | 2 +- src/radmeth/methdiff.cpp | 2 +- src/radmeth/radmeth-adjust.cpp | 2 +- src/radmeth/radmeth-merge.cpp | 2 +- src/radmeth/radmeth.cpp | 2 +- src/utils/clean-hairpins.cpp | 2 +- src/utils/covered.cpp | 2 +- src/utils/fast-liftover.cpp | 2 +- src/utils/format-reads.cpp | 2 +- src/utils/guessprotocol.cpp | 2 +- src/utils/kmersites.cpp | 2 +- src/utils/lc-approx.cpp | 2 +- src/utils/lift-filter.cpp | 2 +- src/utils/merge-bsrate.cpp | 2 +- src/utils/merge-methcounts.cpp | 2 +- src/utils/recovered.cpp | 2 +- src/utils/selectsites.cpp | 208 +++++++++++++++++---------------- src/utils/symmetric-cpgs.cpp | 2 +- src/utils/uniq.cpp | 2 +- src/utils/unxcounts.cpp | 2 +- src/utils/xcounts.cpp | 2 +- 40 files changed, 203 insertions(+), 187 deletions(-) diff --git a/src/amrfinder/allelicmeth.cpp b/src/amrfinder/allelicmeth.cpp index dde3f906..4a65aca0 100644 --- a/src/amrfinder/allelicmeth.cpp +++ b/src/amrfinder/allelicmeth.cpp @@ -213,7 +213,7 @@ verify_chroms_available(const string &chrom_name, int -main_allelicmeth(int argc, const char **argv) { +main_allelicmeth(int argc, char *argv[]) { try { diff --git a/src/amrfinder/amrfinder.cpp b/src/amrfinder/amrfinder.cpp index 436966bf..0590d811 100644 --- a/src/amrfinder/amrfinder.cpp +++ b/src/amrfinder/amrfinder.cpp @@ -403,7 +403,7 @@ struct rename_amr { }; int -main_amrfinder(int argc, const char **argv) { +main_amrfinder(int argc, char *argv[]) { try { const std::string description = "identify regions of allele-specific methylation"; diff --git a/src/amrfinder/amrtester.cpp b/src/amrfinder/amrtester.cpp index 4d7aa1e3..c3cb3a76 100644 --- a/src/amrfinder/amrtester.cpp +++ b/src/amrfinder/amrtester.cpp @@ -187,7 +187,7 @@ ensure_regions_are_named(vector ®ions) { int -main_amrtester(int argc, const char **argv) { +main_amrtester(int argc, char *argv[]) { try { diff --git a/src/analysis/bsrate.cpp b/src/analysis/bsrate.cpp index fd058a12..e0324bff 100644 --- a/src/analysis/bsrate.cpp +++ b/src/analysis/bsrate.cpp @@ -468,7 +468,7 @@ write_per_read_histogram(const vector> &tab, const size_t n_hist_bins, } int -main_bsrate(int argc, const char **argv) { +main_bsrate(int argc, char *argv[]) { try { // assumed maximum length of a fragment static constexpr const size_t output_size = 10000; diff --git a/src/analysis/cpgbins.cpp b/src/analysis/cpgbins.cpp index 84498905..7c2ecdb4 100644 --- a/src/analysis/cpgbins.cpp +++ b/src/analysis/cpgbins.cpp @@ -195,7 +195,7 @@ process_chrom(const bool report_more_info, const string &chrom_name, } int -main_cpgbins(int argc, const char **argv) { +main_cpgbins(int argc, char *argv[]) { try { static const string description = R"""( Compute average site methylation levels in each non-overlapping diff --git a/src/analysis/hmr-rep.cpp b/src/analysis/hmr-rep.cpp index d052ced8..fb34bc4c 100644 --- a/src/analysis/hmr-rep.cpp +++ b/src/analysis/hmr-rep.cpp @@ -338,7 +338,7 @@ split_comma(const string &orig) { } int -main_hmr_rep(int argc, const char **argv) { +main_hmr_rep(int argc, char *argv[]) { try { diff --git a/src/analysis/hmr.cpp b/src/analysis/hmr.cpp index 9182b1a9..dda0c230 100644 --- a/src/analysis/hmr.cpp +++ b/src/analysis/hmr.cpp @@ -386,7 +386,7 @@ check_sorted_within_chroms(T first, const T last) { } int -main_hmr(int argc, const char **argv) { +main_hmr(int argc, char *argv[]) { try { diff --git a/src/analysis/hypermr.cpp b/src/analysis/hypermr.cpp index c9de0d83..e5855bcc 100644 --- a/src/analysis/hypermr.cpp +++ b/src/analysis/hypermr.cpp @@ -212,7 +212,7 @@ initialize_transitions(vector> &trans) { } int -main_hypermr(int argc, const char **argv) { +main_hypermr(int argc, char *argv[]) { try { static const string description = "Identify regions of elevated methylation. Designed for " diff --git a/src/analysis/levels.cpp b/src/analysis/levels.cpp index 507db499..0f516e5c 100644 --- a/src/analysis/levels.cpp +++ b/src/analysis/levels.cpp @@ -83,7 +83,7 @@ guess_counts_file_format(const string &filename) { } int -main_levels(int argc, const char **argv) { +main_levels(int argc, char *argv[]) { try { bool verbose = false; bool relaxed_mode = false; diff --git a/src/analysis/metagene.cpp b/src/analysis/metagene.cpp index 205bd547..3529ffa5 100644 --- a/src/analysis/metagene.cpp +++ b/src/analysis/metagene.cpp @@ -86,7 +86,7 @@ collapse_bins(const uint32_t bin_size, vector &v) { } int -metagene(int argc, const char **argv) { +metagene(int argc, char *argv[]) { constexpr auto description = "Compute the information needed for metagene plots of DNA methylation \ levels. The columns in the output correspond to the fields calculated \ diff --git a/src/analysis/methcounts.cpp b/src/analysis/methcounts.cpp index 33b7d1a6..469d44c1 100644 --- a/src/analysis/methcounts.cpp +++ b/src/analysis/methcounts.cpp @@ -534,7 +534,7 @@ process_reads(const bool VERBOSE, const bool show_progress, } int -main_counts(int argc, const char **argv) { +main_counts(int argc, char *argv[]) { try { diff --git a/src/analysis/methentropy.cpp b/src/analysis/methentropy.cpp index a3589fa5..03fdfa68 100644 --- a/src/analysis/methentropy.cpp +++ b/src/analysis/methentropy.cpp @@ -286,7 +286,7 @@ process_chrom(const bool VERBOSE, const std::size_t cpg_window, } int -main_methentropy(int argc, const char **argv) { +main_methentropy(int argc, char *argv[]) { try { diff --git a/src/analysis/methstates.cpp b/src/analysis/methstates.cpp index 18e28572..4d18e007 100644 --- a/src/analysis/methstates.cpp +++ b/src/analysis/methstates.cpp @@ -180,7 +180,7 @@ get_chrom(const string &chrom_name, const vector &all_chroms, } int -main_methstates(int argc, const char **argv) { +main_methstates(int argc, char *argv[]) { try { const string description = "Convert mapped reads in SAM format into a format that indicates binary \ diff --git a/src/analysis/multimethstat.cpp b/src/analysis/multimethstat.cpp index a3d4c934..e03fe3ad 100644 --- a/src/analysis/multimethstat.cpp +++ b/src/analysis/multimethstat.cpp @@ -528,7 +528,7 @@ check_bed_format(const string ®ions_file) { int -main_multimethstat(int argc, const char **argv) { +main_multimethstat(int argc, char *argv[]) { try { diff --git a/src/analysis/nanopore.cpp b/src/analysis/nanopore.cpp index 7944afcc..d8886c5b 100644 --- a/src/analysis/nanopore.cpp +++ b/src/analysis/nanopore.cpp @@ -1127,7 +1127,7 @@ struct read_processor { }; int -main_nanocount(int argc, const char **argv) { +main_nanocount(int argc, char *argv[]) { try { diff --git a/src/analysis/pmd.cpp b/src/analysis/pmd.cpp index d18f3b1d..ba6dedeb 100644 --- a/src/analysis/pmd.cpp +++ b/src/analysis/pmd.cpp @@ -1140,7 +1140,7 @@ write_empty_summary(const string &summary_file) { int -main_pmd(int argc, const char **argv) { +main_pmd(int argc, char *argv[]) { try { static const size_t min_observations_for_inference = 100; diff --git a/src/analysis/roimethstat.cpp b/src/analysis/roimethstat.cpp index 55a6c7c0..1f7fdb6e 100644 --- a/src/analysis/roimethstat.cpp +++ b/src/analysis/roimethstat.cpp @@ -419,7 +419,7 @@ get_bed_columns(const string ®ions_file) { int -main_roimethstat(int argc, const char **argv) { +main_roimethstat(int argc, char *argv[]) { try { static const string description = R"""( Compute average site methylation levels in each interval from a given diff --git a/src/dnmtools.cpp b/src/dnmtools.cpp index ff5881d7..5e6185ff 100644 --- a/src/dnmtools.cpp +++ b/src/dnmtools.cpp @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -using std::begin; using std::cout; using std::end; using std::endl; @@ -34,13 +34,17 @@ using std::vector; static const string PROGRAM_NAME = "dnmtools"; +static const std::string license_text = R"( + +)"; + struct dnmtools_command { string tag; string description; - std::function fun; + std::function fun; auto - operator()(const int argc, const char **argv) const -> int { + operator()(const int argc, char *argv[]) const -> int { return fun(argc - 1, argv + 1); } }; @@ -57,87 +61,87 @@ operator<<(std::ostream &out, const dnmtools_command &cmd) -> std::ostream & { // ADS: not sure of best way to acquire these below beyond simply // declaring them here int -abismal(int argc, const char **argv); +abismal(int argc, char *argv[]); int -abismalidx(int argc, const char **argv); +abismalidx(int argc, char *argv[]); int -simreads(int argc, const char **argv); +simreads(int argc, char *argv[]); int -main_counts(int argc, const char **argv); +main_counts(int argc, char *argv[]); int -main_nanocount(int argc, const char **argv); +main_nanocount(int argc, char *argv[]); int -main_allelicmeth(int argc, const char **argv); +main_allelicmeth(int argc, char *argv[]); int -main_amrfinder(int argc, const char **argv); +main_amrfinder(int argc, char *argv[]); int -main_amrtester(int argc, const char **argv); +main_amrtester(int argc, char *argv[]); int -main_bsrate(int argc, const char **argv); +main_bsrate(int argc, char *argv[]); int -main_hmr(int argc, const char **argv); +main_hmr(int argc, char *argv[]); int -main_hmr_rep(int argc, const char **argv); +main_hmr_rep(int argc, char *argv[]); int -main_hypermr(int argc, const char **argv); +main_hypermr(int argc, char *argv[]); int -main_levels(int argc, const char **argv); +main_levels(int argc, char *argv[]); int -main_methentropy(int argc, const char **argv); +main_methentropy(int argc, char *argv[]); int -main_methstates(int argc, const char **argv); +main_methstates(int argc, char *argv[]); int -main_multimethstat(int argc, const char **argv); +main_multimethstat(int argc, char *argv[]); int -main_pmd(int argc, const char **argv); +main_pmd(int argc, char *argv[]); int -main_roimethstat(int argc, const char **argv); +main_roimethstat(int argc, char *argv[]); int -main_cpgbins(int argc, const char **argv); +main_cpgbins(int argc, char *argv[]); int -main_mlml(int argc, const char **argv); +main_mlml(int argc, char *argv[]); int -main_dmr(int argc, const char **argv); +main_dmr(int argc, char *argv[]); int -main_methdiff(int argc, const char **argv); +main_methdiff(int argc, char *argv[]); int -main_radmeth_adjust(int argc, const char **argv); +main_radmeth_adjust(int argc, char *argv[]); int -main_radmeth(int argc, const char **argv); +main_radmeth(int argc, char *argv[]); int -main_radmeth_merge(int argc, const char **argv); +main_radmeth_merge(int argc, char *argv[]); int -main_clean_hairpins(int argc, const char **argv); +main_clean_hairpins(int argc, char *argv[]); int -main_uniq(int argc, const char **argv); +main_uniq(int argc, char *argv[]); int -main_fast_liftover(int argc, const char **argv); +main_fast_liftover(int argc, char *argv[]); int -main_format(int argc, const char **argv); +main_format(int argc, char *argv[]); int -main_guessprotocol(int argc, const char **argv); +main_guessprotocol(int argc, char *argv[]); int -main_lift_filter(int argc, const char **argv); +main_lift_filter(int argc, char *argv[]); int -main_merge_bsrate(int argc, const char **argv); +main_merge_bsrate(int argc, char *argv[]); int -main_merge_methcounts(int argc, const char **argv); +main_merge_methcounts(int argc, char *argv[]); int -main_selectsites(int argc, const char **argv); +main_selectsites(int argc, char *argv[]); int -main_symmetric_cpgs(int argc, const char **argv); +main_symmetric_cpgs(int argc, char *argv[]); int -metagene(int argc, const char **argv); +metagene(int argc, char *argv[]); int -main_covered(int argc, const char **argv); +main_covered(int argc, char *argv[]); int -main_xcounts(int argc, const char **argv); +main_xcounts(int argc, char *argv[]); int -main_unxcounts(int argc, const char **argv); +main_unxcounts(int argc, char *argv[]); int -main_recovered(int argc, const char **argv); +main_recovered(int argc, char *argv[]); int -kmersites(int argc, const char **argv); +kmersites(int argc, char *argv[]); void print_help( @@ -155,7 +159,7 @@ print_help( } int -main(int argc, const char **argv) { +main(int argc, char *argv[]) { try { vector>> command_groups = { // clang-format off @@ -220,13 +224,21 @@ main(int argc, const char **argv) { return EXIT_SUCCESS; } + for (auto i = 0; i < argc; ++i) { + if (std::string(argv[i]) == "--license") { + std::cout << license_text; + return EXIT_SUCCESS; + } + } + const auto has_tag = [&](const dnmtools_command &a) { return a.tag == argv[1]; }; for (auto &g : command_groups) { - const auto the_cmd = find_if(begin(g.second), end(g.second), has_tag); - if (the_cmd != end(g.second)) + const auto the_cmd = + std::find_if(std::cbegin(g.second), std::cend(g.second), has_tag); + if (the_cmd != std::cend(g.second)) return (*the_cmd)(argc, argv); } diff --git a/src/mlml/mlml.cpp b/src/mlml/mlml.cpp index bd6d05a5..d67e2ace 100644 --- a/src/mlml/mlml.cpp +++ b/src/mlml/mlml.cpp @@ -737,7 +737,7 @@ process_two_types(const double alpha, } int -main_mlml(int argc, const char **argv) { +main_mlml(int argc, char *argv[]) { try { diff --git a/src/radmeth/dmr.cpp b/src/radmeth/dmr.cpp index 7bed35fc..00403e14 100644 --- a/src/radmeth/dmr.cpp +++ b/src/radmeth/dmr.cpp @@ -275,7 +275,7 @@ get_cpg_stats(const bool LOW_CUTOFF, const double sig_cutoff, int -main_dmr(int argc, const char **argv) { +main_dmr(int argc, char *argv[]) { try { diff --git a/src/radmeth/methdiff.cpp b/src/radmeth/methdiff.cpp index 9b2a5cc9..2fe12816 100644 --- a/src/radmeth/methdiff.cpp +++ b/src/radmeth/methdiff.cpp @@ -237,7 +237,7 @@ process_sites(const bool show_progress, bgzf_file &in_a, bgzf_file &in_b, } int -main_methdiff(int argc, const char **argv) { +main_methdiff(int argc, char *argv[]) { try { std::string outfile; double pseudocount = 1.0; diff --git a/src/radmeth/radmeth-adjust.cpp b/src/radmeth/radmeth-adjust.cpp index 2eb10050..03903f83 100644 --- a/src/radmeth/radmeth-adjust.cpp +++ b/src/radmeth/radmeth-adjust.cpp @@ -436,7 +436,7 @@ fdr(vector &loci) { int -main_radmeth_adjust(int argc, const char **argv) { +main_radmeth_adjust(int argc, char *argv[]) { try { diff --git a/src/radmeth/radmeth-merge.cpp b/src/radmeth/radmeth-merge.cpp index 6d6b3355..538e7a5d 100644 --- a/src/radmeth/radmeth-merge.cpp +++ b/src/radmeth/radmeth-merge.cpp @@ -154,7 +154,7 @@ merge(istream &cpg_stream, ostream &dmr_stream, double cutoff) { } int -main_radmeth_merge(int argc, const char **argv) { +main_radmeth_merge(int argc, char *argv[]) { try { diff --git a/src/radmeth/radmeth.cpp b/src/radmeth/radmeth.cpp index 11b173af..815e599a 100644 --- a/src/radmeth/radmeth.cpp +++ b/src/radmeth/radmeth.cpp @@ -275,7 +275,7 @@ verify_multiple_levels(const Regression &full_regression, * proportions and design matrix */ int -main_radmeth(int argc, const char **argv) { +main_radmeth(int argc, char *argv[]) { try { static const string description = "calculate differential methylation scores"; diff --git a/src/utils/clean-hairpins.cpp b/src/utils/clean-hairpins.cpp index 3902e173..24147870 100644 --- a/src/utils/clean-hairpins.cpp +++ b/src/utils/clean-hairpins.cpp @@ -330,7 +330,7 @@ clean_hairpin::analyze_reads(bgzf_file &in1, bgzf_file &in2, } int -main_clean_hairpins(int argc, const char **argv) { +main_clean_hairpins(int argc, char *argv[]) { static const string description = "fix and stat invdup/hairpin reads"; diff --git a/src/utils/covered.cpp b/src/utils/covered.cpp index 117a352c..7836ed15 100644 --- a/src/utils/covered.cpp +++ b/src/utils/covered.cpp @@ -53,7 +53,7 @@ get_n_reads(const kstring_t &line) { } int -main_covered(int argc, const char **argv) { +main_covered(int argc, char *argv[]) { try { size_t n_threads = 1; diff --git a/src/utils/fast-liftover.cpp b/src/utils/fast-liftover.cpp index 90d93649..cede0633 100644 --- a/src/utils/fast-liftover.cpp +++ b/src/utils/fast-liftover.cpp @@ -108,7 +108,7 @@ lift_site(const unordered_map &index, } int -main_fast_liftover(int argc, const char **argv) { +main_fast_liftover(int argc, char *argv[]) { try { string indexfile; string tofile; diff --git a/src/utils/format-reads.cpp b/src/utils/format-reads.cpp index c8ba480f..58e1535e 100644 --- a/src/utils/format-reads.cpp +++ b/src/utils/format-reads.cpp @@ -379,7 +379,7 @@ format(const string &cmd, const size_t n_threads, const string &inputfile, } int -main_format(int argc, const char **argv) { +main_format(int argc, char *argv[]) { try { size_t n_reads_to_check = 1000000; diff --git a/src/utils/guessprotocol.cpp b/src/utils/guessprotocol.cpp index 99b56d6c..a2ef5923 100644 --- a/src/utils/guessprotocol.cpp +++ b/src/utils/guessprotocol.cpp @@ -229,7 +229,7 @@ operator>>(bgzf_file &s, FASTQRecord &r) { } int -main_guessprotocol(int argc, const char **argv) { +main_guessprotocol(int argc, char *argv[]) { try { diff --git a/src/utils/kmersites.cpp b/src/utils/kmersites.cpp index afd68beb..19ab4fc4 100644 --- a/src/utils/kmersites.cpp +++ b/src/utils/kmersites.cpp @@ -99,7 +99,7 @@ process_chrom_with_named_lines(const string &kmer, const int offset, } auto -kmersites(const int argc, const char **argv) -> int { +kmersites(const int argc, char *argv[]) -> int { try { bool verbose = false; diff --git a/src/utils/lc-approx.cpp b/src/utils/lc-approx.cpp index a682fdb7..479ae690 100644 --- a/src/utils/lc-approx.cpp +++ b/src/utils/lc-approx.cpp @@ -83,7 +83,7 @@ get_approx_line_count(const bool VERBOSE, const string &filename, int -main_lc_approx(int argc, const char **argv) { +main_lc_approx(int argc, char *argv[]) { try { size_t n_samples = 100; diff --git a/src/utils/lift-filter.cpp b/src/utils/lift-filter.cpp index fec8aaea..7a3686fb 100644 --- a/src/utils/lift-filter.cpp +++ b/src/utils/lift-filter.cpp @@ -40,7 +40,7 @@ same_chrom_pos_strand(const MSite &a, const MSite &b) { } int -main_lift_filter(int argc, const char **argv) { +main_lift_filter(int argc, char *argv[]) { try{ string pfile; bool VERBOSE = false; diff --git a/src/utils/merge-bsrate.cpp b/src/utils/merge-bsrate.cpp index ecb9c8c3..74f8f9e1 100644 --- a/src/utils/merge-bsrate.cpp +++ b/src/utils/merge-bsrate.cpp @@ -60,7 +60,7 @@ bool readline(std::vector& infiles, } int -main_merge_bsrate(int argc, const char **argv) { +main_merge_bsrate(int argc, char *argv[]) { try { bool VERBOSE = false; diff --git a/src/utils/merge-methcounts.cpp b/src/utils/merge-methcounts.cpp index 767f0ef9..08470b83 100644 --- a/src/utils/merge-methcounts.cpp +++ b/src/utils/merge-methcounts.cpp @@ -343,7 +343,7 @@ get_chroms_order(const vector &filenames, output is in counts or fractions. */ int -main_merge_methcounts(int argc, const char **argv) { +main_merge_methcounts(int argc, char *argv[]) { try { diff --git a/src/utils/recovered.cpp b/src/utils/recovered.cpp index c268c238..9775f9c4 100644 --- a/src/utils/recovered.cpp +++ b/src/utils/recovered.cpp @@ -323,7 +323,7 @@ process_sites(const bool verbose, const bool add_missing_chroms, int -main_recovered(int argc, const char **argv) { +main_recovered(int argc, char *argv[]) { try { bool verbose = false; diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp index 1539e654..9186e4f2 100644 --- a/src/utils/selectsites.cpp +++ b/src/utils/selectsites.cpp @@ -16,35 +16,33 @@ * GNU General Public License for more details. */ -#include -#include -#include +#include +#include +#include +#include #include #include -#include -#include #include +#include +#include #include -#include -#include +#include -#include "OptionParser.hpp" -#include "smithlab_utils.hpp" -#include "smithlab_os.hpp" #include "GenomicRegion.hpp" #include "MSite.hpp" +#include "OptionParser.hpp" +#include "smithlab_os.hpp" +#include "smithlab_utils.hpp" -using std::string; -using std::vector; -using std::cout; using std::cerr; +using std::cout; using std::endl; using std::ios_base; using std::runtime_error; -using std::ifstream; -using std::unordered_map; -using std::tuple; +using std::string; using std::tie; +using std::tuple; +using std::unordered_map; using bamxx::bgzf_file; @@ -55,13 +53,15 @@ struct quick_buf : public std::ostringstream, // ADS: By user ecatmur on SO; very fast. Seems to work... quick_buf() { // ...but this seems to depend on data layout - static_cast&>(*this).rdbuf(this); + static_cast &>(*this).rdbuf(this); } - void clear() { + void + clear() { // reset buffer pointers (member functions) setp(pbase(), pbase()); } - char const* c_str() { + char const * + c_str() { /* between c_str and insertion make sure to clear() */ *pptr() = '\0'; return pbase(); @@ -72,44 +72,46 @@ struct selectsites_summary { // command_line is the command used to produce this summary file and // the corresponding results - string command_line{}; + std::string command_line{}; // n_target_regions is the number of target regions provided as // input to the command - uint64_t n_target_regions{}; + std::uint64_t n_target_regions{}; // target_region_size is the sum of the sizes of each target region - uint64_t target_region_size{}; + std::uint64_t target_region_size{}; // n_target_regions_collapsed is the number of target regions after // having collapsed the input target regions merging those that // overlap - uint64_t n_target_regions_collapsed{}; + std::uint64_t n_target_regions_collapsed{}; // target_region_collapsed_size is the sum of the sizes of target // regions after collapsing - uint64_t target_region_collapsed_size{}; + std::uint64_t target_region_collapsed_size{}; // n_sites_total is the total number of sites available in the input // counts file. This value is displayed as zero if the command // specified to process the sites on disk. - uint64_t n_sites_total{}; + std::uint64_t n_sites_total{}; // n_sites_selected is the total number of sites in the output file - uint64_t n_sites_selected{}; + std::uint64_t n_sites_selected{}; - template static auto measure_target_regions(const T &t) - -> tuple { + template + static auto + measure_target_regions(const T &t) -> tuple { return { std::size(t), accumulate(cbegin(t), cend(t), 0ul, - [](const uint64_t a, const typename T::value_type &v) { + [](const std::uint64_t a, const typename T::value_type &v) { return a + v.get_width(); }), }; } - auto tostring() const -> string { + auto + tostring() const -> std::string { std::ostringstream oss; oss << "command_line: " << command_line << '\n' << "n_target_regions: " << n_target_regions << '\n' @@ -125,24 +127,26 @@ struct selectsites_summary { static auto write_stats_output(const selectsites_summary &summary, - const string &summary_file) -> void { + const std::string &summary_file) -> void { if (!summary_file.empty()) { std::ofstream out_summary(summary_file); - if (!out_summary) throw runtime_error("bad summary output file"); + if (!out_summary) + throw runtime_error("bad summary output file"); out_summary << summary.tostring() << endl; } } - static void -collapsebed(vector ®ions) { - size_t j = 0; - for (size_t i = 1; i < regions.size(); ++i) { +collapsebed(std::vector ®ions) { + std::size_t j = 0; + for (std::size_t i = 1; i < std::(regions); ++i) { if (regions[j].same_chrom(regions[i]) && regions[i].get_start() <= regions[j].get_end()) { regions[j].set_end(std::max(regions[j].get_end(), regions[i].get_end())); } - else { regions[++j] = regions[i]; } + else { + regions[++j] = regions[i]; + } } regions.erase(begin(regions) + j + 1, end(regions)); } @@ -160,22 +164,25 @@ contains(const GenomicRegion &r, const MSite &s) { } static auto -process_all_sites(const bool VERBOSE, const string &sites_file, - const unordered_map> ®ions, - bgzf_file &out) -> tuple { +process_all_sites( + const bool VERBOSE, const std::string &sites_file, + const unordered_map> ®ions, + bgzf_file &out) -> tuple { bgzf_file in(sites_file, "r"); - if (!in) throw runtime_error("cannot open file: " + sites_file); + if (!in) + throw runtime_error("cannot open file: " + sites_file); - uint64_t n_sites_total = 0u; - uint64_t n_sites_selected = 0u; + std::uint64_t n_sites_total = 0u; + std::uint64_t n_sites_selected = 0u; MSite the_site, prev_site; - vector::const_iterator i, i_lim; + std::vector::const_iterator i, i_lim; bool chrom_is_relevant = false; while (read_site(in, the_site)) { ++n_sites_total; if (the_site.chrom != prev_site.chrom) { - if (VERBOSE) cerr << "processing " << the_site.chrom << endl; + if (VERBOSE) + cerr << "processing " << the_site.chrom << endl; const auto r = regions.find(the_site.chrom); chrom_is_relevant = (r != cend(regions)); if (chrom_is_relevant) { @@ -197,17 +204,17 @@ process_all_sites(const bool VERBOSE, const string &sites_file, } static auto -get_sites_in_region(ifstream &site_in, const GenomicRegion ®ion, - bgzf_file &out) -> uint64_t { +get_sites_in_region(std::ifstream &site_in, const GenomicRegion ®ion, + bgzf_file &out) -> std::uint64_t { quick_buf buf; - const string chrom{region.get_chrom()}; - const size_t start_pos = region.get_start(); - const size_t end_pos = region.get_end(); + const std::string chrom{region.get_chrom()}; + const std::size_t start_pos = region.get_start(); + const std::size_t end_pos = region.get_end(); find_offset_for_msite(chrom, start_pos, site_in); MSite the_site; - uint64_t n_sites_selected = 0u; + std::uint64_t n_sites_selected = 0u; // ADS: should only happen once that "the_site.chrom < chrom" and // this is only needed because of end state of binary search on disk while (site_in >> the_site && @@ -218,72 +225,69 @@ get_sites_in_region(ifstream &site_in, const GenomicRegion ®ion, // struct is bad... buf.clear(); buf << the_site << '\n'; - if (!out.write(buf.c_str(), buf.tellp())) + if (!out.write(buf.data(), buf.tellp())) throw runtime_error("error writing output"); ++n_sites_selected; } return n_sites_selected; } - static auto -process_with_sites_on_disk(const string &sites_file, - vector ®ions, - bgzf_file &out) -> uint64_t { - ifstream in(sites_file); +process_with_sites_on_disk(const std::string &sites_file, + std::vector ®ions, + bgzf_file &out) -> std::uint64_t { + std::ifstream in(sites_file); if (!in) throw runtime_error("cannot open file: " + sites_file); - uint64_t n_sites_selected = 0ul; - for (auto i = 0u; i < size(regions) && in; ++i) + std::uint64_t n_sites_selected = 0ul; + for (auto i = 0u; i < std::size(regions) && in; ++i) n_sites_selected += get_sites_in_region(in, regions[i], out); return n_sites_selected; } - static void -regions_by_chrom(vector ®ions, - unordered_map > &lookup) { - for (auto &&r: regions) { - const string chrom_name(r.get_chrom()); +regions_by_chrom( + std::vector ®ions, + unordered_map> &lookup) { + for (auto &&r : regions) { + const std::string chrom_name(r.get_chrom()); if (lookup.find(chrom_name) == end(lookup)) - lookup[chrom_name] = vector(); + lookup[chrom_name] = std::vector(); lookup[chrom_name].push_back(r); } regions.clear(); regions.shrink_to_fit(); } - inline bool -file_exists(const string &filename) { - return (access(filename.c_str(), F_OK) == 0); +file_exists(const std::string &filename) { + return (access(filename.data(), F_OK) == 0); } - static bool -is_compressed_file(const string &filename) { - const bgzf_file f(filename.c_str(), "r"); +is_compressed_file(const std::string &filename) { + const bgzf_file f(filename.data(), "r"); htsFormat fmt; const int ret = hts_detect_format(f.f->fp, &fmt); - if (ret != 0) throw runtime_error("failed to detect format: " + filename); + if (ret != 0) + throw runtime_error("failed to detect format: " + filename); return fmt.compression != no_compression; } - static auto -get_command_line(const int argc, const char **const argv) -> string { - if (argc == 0) return string(); +get_command_line(const int argc, const char *const argv[]) -> std::string { + if (argc == 0) + return std::string(); std::ostringstream cmd; cmd << '"'; copy(argv, argv + (argc - 1), std::ostream_iterator(cmd, " ")); - cmd << argv[argc-1] << '"'; + cmd << argv[argc - 1] << '"'; return cmd.str(); } - int -main_selectsites(int argc, const char **argv) { +main_selectsites(int argc, char *argv[]) { try { @@ -291,10 +295,10 @@ main_selectsites(int argc, const char **argv) { bool keep_file_on_disk = false; bool compress_output = false; - string outfile("-"); - string summary_file; + std::string outfile("-"); + std::string summary_file; - const string description = + const std::string description = "Select sites inside a set of genomic intervals. " "Sites must be specified in methcounts format. " "Intervals must be specified in bed format."; @@ -302,16 +306,18 @@ main_selectsites(int argc, const char **argv) { /****************** COMMAND LINE OPTIONS ********************/ OptionParser opt_parse(strip_path(argv[0]), description, " ", 2); - opt_parse.add_opt("output", 'o', "output file (default: stdout)", - false, outfile); - opt_parse.add_opt("disk", 'd', "process sites on disk " + opt_parse.add_opt("output", 'o', "output file (default: stdout)", false, + outfile); + opt_parse.add_opt("disk", 'd', + "process sites on disk " "(fast if target intervals are few)", false, keep_file_on_disk); - opt_parse.add_opt("summary", 'S', "write summary to this file", false, summary_file); + opt_parse.add_opt("summary", 'S', "write summary to this file", false, + summary_file); opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output); opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE); opt_parse.set_show_defaults(); - vector leftover_args; + std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { cerr << opt_parse.help_message() << endl @@ -326,12 +332,12 @@ main_selectsites(int argc, const char **argv) { cerr << opt_parse.option_missing_message() << endl; return EXIT_SUCCESS; } - if (leftover_args.size() != 2) { + if (std::size(leftover_args) != 2) { cerr << opt_parse.help_message() << endl; return EXIT_SUCCESS; } - const string regions_file = leftover_args.front(); - const string sites_file = leftover_args.back(); + const std::string regions_file = leftover_args.front(); + const std::string sites_file = leftover_args.back(); /****************** END COMMAND LINE OPTIONS *****************/ selectsites_summary summary; @@ -346,30 +352,32 @@ main_selectsites(int argc, const char **argv) { cerr << "input file is so must be loaded" << endl; } - vector regions; + std::vector regions; ReadBEDFile(regions_file, regions); if (!check_sorted(regions)) throw runtime_error("regions not sorted in file: " + regions_file); std::tie(summary.n_target_regions, summary.target_region_size) = selectsites_summary::measure_target_regions(regions); - const size_t n_orig_regions = regions.size(); + const std::size_t n_orig_regions = std::size(regions); collapsebed(regions); - if (VERBOSE && n_orig_regions != regions.size()) + if (VERBOSE && n_orig_regions != std::size(regions)) cerr << "[number of regions merged due to overlap: " - << n_orig_regions - regions.size() << "]" << endl; + << n_orig_regions - std::size(regions) << "]" << endl; std::tie(summary.n_target_regions_collapsed, summary.target_region_collapsed_size) = selectsites_summary::measure_target_regions(regions); - unordered_map> regions_lookup; - if (!keep_file_on_disk) regions_by_chrom(regions, regions_lookup); + unordered_map> regions_lookup; + if (!keep_file_on_disk) + regions_by_chrom(regions, regions_lookup); // open the output file - const string output_mode = compress_output ? "w" : "wu"; + const std::string output_mode = compress_output ? "w" : "wu"; bamxx::bgzf_file out(outfile, output_mode); - if (!out) throw runtime_error("error opening output file: " + outfile); + if (!out) + throw runtime_error("error opening output file: " + outfile); if (keep_file_on_disk) summary.n_sites_selected = @@ -380,13 +388,9 @@ main_selectsites(int argc, const char **argv) { write_stats_output(summary, summary_file); } - catch (const runtime_error &e) { + catch (const std::exception &e) { cerr << e.what() << endl; return EXIT_FAILURE; } - catch (std::bad_alloc &ba) { - cerr << "ERROR: could not allocate memory" << endl; - return EXIT_FAILURE; - } return EXIT_SUCCESS; } diff --git a/src/utils/symmetric-cpgs.cpp b/src/utils/symmetric-cpgs.cpp index 5b7b9620..1884a8fa 100644 --- a/src/utils/symmetric-cpgs.cpp +++ b/src/utils/symmetric-cpgs.cpp @@ -127,7 +127,7 @@ process_sites(const bool verbose, T &in, T &out) { } int -main_symmetric_cpgs(int argc, const char **argv) { +main_symmetric_cpgs(int argc, char *argv[]) { try { // file types from HTSlib use "-" for the filename to go to stdout string outfile{"-"}; diff --git a/src/utils/uniq.cpp b/src/utils/uniq.cpp index 31c38be0..107b0da9 100644 --- a/src/utils/uniq.cpp +++ b/src/utils/uniq.cpp @@ -277,7 +277,7 @@ uniq(const bool add_dup_count, const uint32_t max_buffer_size, } int -main_uniq(int argc, const char **argv) { +main_uniq(int argc, char *argv[]) { try { uint32_t max_buffer_size = std::numeric_limits::max(); bool VERBOSE = false; diff --git a/src/utils/unxcounts.cpp b/src/utils/unxcounts.cpp index ee64f8c8..d3360d87 100644 --- a/src/utils/unxcounts.cpp +++ b/src/utils/unxcounts.cpp @@ -564,7 +564,7 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, } int -main_unxcounts(int argc, const char **argv) { +main_unxcounts(int argc, char *argv[]) { try { bool verbose = false; bool add_missing_chroms = false; diff --git a/src/utils/xcounts.cpp b/src/utils/xcounts.cpp index b240a0fc..4578187e 100644 --- a/src/utils/xcounts.cpp +++ b/src/utils/xcounts.cpp @@ -105,7 +105,7 @@ fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) { } int -main_xcounts(int argc, const char **argv) { +main_xcounts(int argc, char *argv[]) { try { bool verbose = false; bool gzip_output = false; From d78e18a77d549b5a8a6acea34ff00a3bc4cc904d Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:35:19 -0700 Subject: [PATCH 08/19] src/utils/selectsites.cpp: fixing a bug in function name --- src/utils/selectsites.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp index 9186e4f2..44c22ae0 100644 --- a/src/utils/selectsites.cpp +++ b/src/utils/selectsites.cpp @@ -61,8 +61,8 @@ struct quick_buf : public std::ostringstream, setp(pbase(), pbase()); } char const * - c_str() { - /* between c_str and insertion make sure to clear() */ + data() { + /* between data() and insertion make sure to clear() */ *pptr() = '\0'; return pbase(); } From 59e83af500cdd96fae31684b30dc225d35f28a52 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:36:34 -0700 Subject: [PATCH 09/19] src/utils/selectsites.cpp: fixing another typo bug --- src/utils/selectsites.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp index 44c22ae0..43b288a7 100644 --- a/src/utils/selectsites.cpp +++ b/src/utils/selectsites.cpp @@ -139,7 +139,7 @@ write_stats_output(const selectsites_summary &summary, static void collapsebed(std::vector ®ions) { std::size_t j = 0; - for (std::size_t i = 1; i < std::(regions); ++i) { + for (std::size_t i = 1; i < std::size(regions); ++i) { if (regions[j].same_chrom(regions[i]) && regions[i].get_start() <= regions[j].get_end()) { regions[j].set_end(std::max(regions[j].get_end(), regions[i].get_end())); From 8969aa67687ad7b361a2f93e2a52243644e79518 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:37:40 -0700 Subject: [PATCH 10/19] .github/workflows/dnmtools_release_linux.yml: attempting to get the archive name mostly right without the zip --- .github/workflows/dnmtools_release_linux.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/dnmtools_release_linux.yml b/.github/workflows/dnmtools_release_linux.yml index 24967e2c..957b2207 100644 --- a/.github/workflows/dnmtools_release_linux.yml +++ b/.github/workflows/dnmtools_release_linux.yml @@ -19,6 +19,12 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive + - name: Get version number + id: get-vn + run: | + awk '/AC_INIT/ {print "vn="$2}' configure.ac | sed "s/\[//; s/\]//; s/,//" >> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ github.token }} - name: Configure and build env: SCRIPT: | @@ -54,17 +60,12 @@ jobs: autoreconf -i && \ mkdir build && cd build && \ ../configure --with-libdeflate && \ - make -j4 LDFLAGS="-static-libgcc -static-libstdc++ -s" + make -j4 LDFLAGS="-static-libgcc -static-libstdc++ -s" && \ + tar -cf dnmtools-${{ steps.get-vn.outputs.vn }}-Linux.tar.gz dnmtools run: | docker exec build-container bash -c "$SCRIPT" - - name: Get version number - id: get-vn - run: | - awk '/AC_INIT/ {print "vn="$2}' configure.ac | sed "s/\[//; s/\]//; s/,//" >> "$GITHUB_OUTPUT" - env: - GH_TOKEN: ${{ github.token }} - name: Upload the binary uses: actions/upload-artifact@v4 with: - name: dnmtools-${{ steps.get-vn.outputs.vn }}-Linux - path: build/dnmtools + name: dnmtools-${{ steps.get-vn.outputs.vn }}-Linux.tar.gz + path: build/dnmtools-${{ steps.get-vn.outputs.vn }}-Linux.tar.gz From 71a897b25c58d008ace117217c40da263a8ea86b Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 19:43:11 -0700 Subject: [PATCH 11/19] .github/workflows/dnmtools_release_macos.yml: attempting to put the binary directly into the desired tar.gz format --- .github/workflows/dnmtools_release_macos.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/dnmtools_release_macos.yml b/.github/workflows/dnmtools_release_macos.yml index ea7e54e5..40a4bcf2 100644 --- a/.github/workflows/dnmtools_release_macos.yml +++ b/.github/workflows/dnmtools_release_macos.yml @@ -80,6 +80,11 @@ jobs: runs-on: macos-15 steps: - uses: actions/checkout@v4 + - name: Get version number + id: vn + run: awk '/AC_INIT/ {print "vn="$2}' configure.ac | sed "s/\[//; s/\]//; s/,//" >> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ github.token }} - name: Download artifacts uses: actions/download-artifact@v4 with: @@ -92,13 +97,9 @@ jobs: binaries/dnmtools-*/dnmtools_* \ -output dnmtools chmod +x dnmtools - - name: Get version number - id: vn - run: awk '/AC_INIT/ {print "vn="$2}' configure.ac | sed "s/\[//; s/\]//; s/,//" >> "$GITHUB_OUTPUT" - env: - GH_TOKEN: ${{ github.token }} + tar -cf dnmtools-${{ steps.get-vn.outputs.vn }}-macOS.tar.gz dnmtools - name: Upload the lipo binary uses: actions/upload-artifact@v4 with: - name: dnmtools-${{ steps.vn.outputs.vn }}-macOS + name: dnmtools-${{ steps.vn.outputs.vn }}-macOS.tar.gz path: dnmtools From 8e81bc8f85ecdc732b6b961f05bdabcd624ac30f Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 20:37:53 -0700 Subject: [PATCH 12/19] data/LICENSE: adding file with full license info --- data/LICENSE | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 data/LICENSE diff --git a/data/LICENSE b/data/LICENSE new file mode 100644 index 00000000..a88abdb4 --- /dev/null +++ b/data/LICENSE @@ -0,0 +1,127 @@ +LICENSES + +DNMTools + +Copyright (C) 2025 Andrew D Smith + +This is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This software is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +======================================================================== + +Additional licenses/copyrights that apply to DNMTools pre-built binaries, +which link statically to GSL (GNU Scientific Library), ZLib, HTSlib and +libdeflate. + +======================================================================== +GNU Scientific Library + +This copyright notice is taken files at: +https://savannah.gnu.org/projects/gsl + +Copyright (C) 1996-2025 Free Software Foundation, Inc. and the GSL developers + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 51 Franklin +Street, Fifth Floor, Boston, MA 02110-1301, USA. + +======================================================================= +ZLib + +This copyright notice is taken from: +https://github.com/madler/zlib/blob/develop/LICENSE + +Copyright notice: + +(C) 1995-2024 Jean-loup Gailly and Mark Adler + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the +use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a + product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +Jean-loup Gailly Mark Adler +jloup@gzip.org madler@alumni.caltech.edu + +======================================================================= +libdeflate + +This copyright notice is taken from: +https://github.com/ebiggers/libdeflate/blob/master/COPYING + +Copyright 2016 Eric Biggers +Copyright 2024 Google LLC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +======================================================================= +HTSlib + +This copyright notice is taken from: +https://github.com/samtools/htslib/blob/develop/LICENSE + +The MIT/Expat License + +Copyright (C) 2012-2025 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 2a753cecd89a6edc57c1ce0d5adb72034326364c Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 20:40:42 -0700 Subject: [PATCH 13/19] data/make_full_license_info_header.sh : adding a script that will format the full license as a header for inclusion in released dnmtools binaries --- data/make_full_license_info_header.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 data/make_full_license_info_header.sh diff --git a/data/make_full_license_info_header.sh b/data/make_full_license_info_header.sh new file mode 100755 index 00000000..bb5ae09d --- /dev/null +++ b/data/make_full_license_info_header.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +input=$1 + +echo 'static const char *license_text = R"(' +cat "$input" +echo ')";' From cf83a2aa033161ab4ad9bf18ea683a4256876540 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 20:41:34 -0700 Subject: [PATCH 14/19] src/dnmtools.cpp: adding preprocessor control to include full license information so it will be available in released binaries --- src/dnmtools.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/dnmtools.cpp b/src/dnmtools.cpp index 5e6185ff..cb2b03fa 100644 --- a/src/dnmtools.cpp +++ b/src/dnmtools.cpp @@ -24,6 +24,10 @@ #include #include +#ifdef INCLUDE_FULL_LICENSE_INFO +#include +#endif + using std::cout; using std::end; using std::endl; @@ -34,10 +38,6 @@ using std::vector; static const string PROGRAM_NAME = "dnmtools"; -static const std::string license_text = R"( - -)"; - struct dnmtools_command { string tag; string description; @@ -148,6 +148,9 @@ print_help( const vector>> &command_groups) { cout << "Program: " << PROGRAM_NAME << "\n" << "Version: " << VERSION << "\n" +#ifdef INCLUDE_FULL_LICENSE_INFO + << "License: use --license for full license info\n" +#endif << "Usage: " << PROGRAM_NAME << " [options]\n" << "Commands:" << endl; for (auto &&g : command_groups) { @@ -224,12 +227,14 @@ main(int argc, char *argv[]) { return EXIT_SUCCESS; } +#ifdef INCLUDE_FULL_LICENSE_INFO for (auto i = 0; i < argc; ++i) { if (std::string(argv[i]) == "--license") { std::cout << license_text; return EXIT_SUCCESS; } } +#endif const auto has_tag = [&](const dnmtools_command &a) { return a.tag == argv[1]; From fba1e60749c234e47cc58f0930ca2aae6f88393c Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 20:48:01 -0700 Subject: [PATCH 15/19] .github/workflows/dnmtools_release_linux.yml: updates to build in the license info when making a binary for release --- .github/workflows/dnmtools_release_linux.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/dnmtools_release_linux.yml b/.github/workflows/dnmtools_release_linux.yml index 957b2207..d4f12f78 100644 --- a/.github/workflows/dnmtools_release_linux.yml +++ b/.github/workflows/dnmtools_release_linux.yml @@ -60,6 +60,8 @@ jobs: autoreconf -i && \ mkdir build && cd build && \ ../configure --with-libdeflate && \ + ../data/make_full_license_info_header.sh ../data/LICENSE > license.h && \ + echo "#define INCLUDE_FULL_LICENSE_INFO 1" >> config.h && \ make -j4 LDFLAGS="-static-libgcc -static-libstdc++ -s" && \ tar -cf dnmtools-${{ steps.get-vn.outputs.vn }}-Linux.tar.gz dnmtools run: | From 2fb37ce62ad8dee604e6d25419ddf59ca02f5df2 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 5 Jul 2025 20:54:06 -0700 Subject: [PATCH 16/19] .github/workflows/dnmtools_release_macos.yml: updates to build in the license info when making a binary for release --- .github/workflows/dnmtools_release_macos.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/dnmtools_release_macos.yml b/.github/workflows/dnmtools_release_macos.yml index 40a4bcf2..90c23cf8 100644 --- a/.github/workflows/dnmtools_release_macos.yml +++ b/.github/workflows/dnmtools_release_macos.yml @@ -58,6 +58,8 @@ jobs: CXX=g++-14 \ LDFLAGS="-L/opt/dnmtools/lib -static-libgcc -static-libstdc++ -Wl,-dead_strip" \ CPPFLAGS="-I/opt/dnmtools/include" + ../data/make_full_license_info_header.sh ../data/LICENSE > license.h + echo "#define INCLUDE_FULL_LICENSE_INFO 1" >> config.h make -j4 - name: Rename the binary run: mv build/dnmtools dnmtools_$(uname -m) From 48553dd886cf6246a15f99f83ba40957f863d9d8 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sun, 6 Jul 2025 11:04:34 -0700 Subject: [PATCH 17/19] .github/workflows/dnmtools_release_macos.yml: fixing a path --- .github/workflows/dnmtools_release_macos.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dnmtools_release_macos.yml b/.github/workflows/dnmtools_release_macos.yml index 90c23cf8..54f48513 100644 --- a/.github/workflows/dnmtools_release_macos.yml +++ b/.github/workflows/dnmtools_release_macos.yml @@ -99,9 +99,9 @@ jobs: binaries/dnmtools-*/dnmtools_* \ -output dnmtools chmod +x dnmtools - tar -cf dnmtools-${{ steps.get-vn.outputs.vn }}-macOS.tar.gz dnmtools + tar -cf dnmtools-${{ steps.vn.outputs.vn }}-macOS.tar.gz dnmtools - name: Upload the lipo binary uses: actions/upload-artifact@v4 with: name: dnmtools-${{ steps.vn.outputs.vn }}-macOS.tar.gz - path: dnmtools + path: dnmtools-${{ steps.vn.outputs.vn }}-macOS.tar.gz From 37745a1c5b13ccf99e1862733e8ee96ee2074aad Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sun, 6 Jul 2025 11:50:01 -0700 Subject: [PATCH 18/19] .github/workflows/dnmtools_distcheck_ubuntu.yml: making artiface for source archive --- .github/workflows/dnmtools_distcheck_ubuntu.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/dnmtools_distcheck_ubuntu.yml b/.github/workflows/dnmtools_distcheck_ubuntu.yml index 5cbd8a47..3000ac6a 100644 --- a/.github/workflows/dnmtools_distcheck_ubuntu.yml +++ b/.github/workflows/dnmtools_distcheck_ubuntu.yml @@ -14,6 +14,12 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive + - name: Get version number + id: vn + run: | + awk '/AC_INIT/ {print "vn="$2}' configure.ac | sed "s/\[//; s/\]//; s/,//" >> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ github.token }} - name: Update packages run: sudo apt-get update - name: Install dependencies @@ -22,5 +28,12 @@ jobs: run: ./autogen.sh - name: configure run: ./configure + - name: Generate the source archive + run: make dist + - name: Upload the archive + uses: actions/upload-artifact@v4 + with: + name: dnmtools-${{ steps.vn.outputs.vn }}.tar.gz + path: dnmtools-${{ steps.vn.outputs.vn }}.tar.gz - name: make distcheck run: make -j4 distcheck From cf0bc346cf9021289b7e77481d2addb1c1f3b2cc Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sun, 6 Jul 2025 11:59:53 -0700 Subject: [PATCH 19/19] docs/content/counts-nano.md: minor update --- docs/content/counts-nano.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/content/counts-nano.md b/docs/content/counts-nano.md index b071ba77..3de69a78 100644 --- a/docs/content/counts-nano.md +++ b/docs/content/counts-nano.md @@ -25,10 +25,10 @@ More documentation will come as this tool evolves, but for now: the number of reads because probabilities on modifications are used, so methylation levels for each site are expected values (the best estimates we can make), and do not use arbitrary cutoffs. -- Other commands in DNMTools have been modified to use this form of expected - methylation level, and behave as previously for bisulfite sequencing data, - but have updated behavior when the data is from nanopore. The user does not - need to specify the technology used. +- Several other commands in DNMTools have been modified to use this form of + expected methylation level, and behave as previously for bisulfite + sequencing data, but have updated behavior when the data is from + nanopore. The user does not need to specify the technology used. - Some commands need to use a `-relaxed` flag to work with the additional columns in the output from `counts-nano` compared with `counts`. For commands without this option, simply do `cut -f1-6` on the output of