Updating MAGNET to v1.2.0.

I updated standalone MAGNET.sh and related files of the repo to v1.1.1 in current head of PIrANHA. Then made a variety of changes to files augmenting to v1.2.0 (new major-minor version).
justincbagley · Dec 22, 2020 · 5c81e13 · 5c81e13
1 parent dde49d1
commit 5c81e13
Show file tree

Hide file tree

Showing 6 changed files with 449 additions and 399 deletions.
diff --git a/bin/MAGNET-1.1.1/MAGNET b/bin/MAGNET-1.1.1/MAGNET
diff --git a/bin/MAGNET-1.1.1/R/rmGapSites.r b/bin/MAGNET-1.1.1/R/rmGapSites.r
@@ -11,7 +11,7 @@
 # Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700.                   #
 # Last update: March 6, 2019                                                             #
 # Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved.                         #
-# Please report bugs to <jbagley@jsu.edu>.                                              #
+# Please report bugs to <jbagley@jsu.edu>.                                               #
 #                                                                                        #
 # Description:                                                                           #
 # RSCRIPT THAT REMOVES GAP SITES FROM AN INPUT DNA SEQUENCE ALIGNMENT IN PHYLIP FORMAT   #
@@ -21,8 +21,8 @@
 
 ######################################## START ###########################################
 
-##--Load needed library, R code, or package stuff. Install package if not present.
-##--source("rmGapSites.R", chdir = TRUE)
+# Load needed library, R code, or package stuff. Install package if not present.
+# source("rmGapSites.R", chdir = TRUE)
 packages <- c("ape", "readr", "seqinr")
 if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
     install.packages(setdiff(packages, rownames(installed.packages())))
@@ -32,20 +32,19 @@ library(ape)
 library(readr)
 library(seqinr)
 
-##--Read in the data, output from first part of NEXUS2gphocs loop:
+# Read in the data, output from first part of NEXUS2gphocs loop:
 	sites <- read.dna("sites.phy", format="sequential")
 	gap_thresh <- read_file("gap_threshold.txt")
 
-##--Fix the gap threshold and then delete columns with the threshold level of gaps
-##--equivalent to at least 1 gap (i.e. any gaps at all):
+# Fix the gap threshold and then delete columns with the threshold level of gaps
+# equivalent to at least 1 gap (i.e. any gaps at all):
 	gap_thresh <- sub(pattern = "\\n", replacement = "", x = gap_thresh)
 	sites_nogaps <- del.colgapsonly(sites, threshold = gap_thresh, freq.only = FALSE)
 
-##--Write new alignment, with sites with gaps removed, to file:
-##--(writing to present working directory)...
+# Write new alignment, with sites with gaps removed, to file in present working directory)...
 	write.dna(sites_nogaps, file="sites_nogaps.phy", format="sequential", nbcol=-1, colw=500000)
 
-	##--write.nexus(sites_nogaps, file="sites_nogaps.nex")
+	# write.nexus(sites_nogaps, file="sites_nogaps.nex")
 
 
 ######################################### END ############################################
diff --git a/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh b/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh
@@ -9,7 +9,7 @@
   VERSION="v1.5.1"                                                                       #
 # Author: Justin C. Bagley                                                               #
 # Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700.                   #
-# Last update: December 11, 2020                                                         #
+# Last update: December 21, 2020                                                         #
 # Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved.                         #
 # Please report bugs to <jbagley@jsu.edu>.                                               #
 #                                                                                        #
@@ -152,27 +152,27 @@ checkMachineType
 
 
 ############ STEP #2: GET NEXUS FILE & DATA CHARACTERISTICS, CONVERT NEXUS TO FASTA FORMAT
-##--Extract charset info from sets block at end of NEXUS file: 
+# Extract charset info from sets block at end of NEXUS file: 
 	MY_NEXUS_CHARSETS="$(egrep "charset|CHARSET" "$MY_NEXUS" | \
 	awk -F"=" '{print $NF}' | sed 's/\;/\,/g' | \
 	awk '{a[NR]=$0} END {for (i=1;i<NR;i++) print a[i];sub(/.$/,"",a[NR]);print a[NR]}' | \
 	sed 's/\,/\,'$CR'/g' | sed 's/^\ //g')";
 
-##--Count number of loci present in the NEXUS file, based on number of charsets defined.
-##--Also get corrected count starting from 0 for numbering loci below...
+# Count number of loci present in the NEXUS file, based on number of charsets defined.
+# Also get corrected count starting from 0 for numbering loci below...
 	MY_NLOCI="$(echo "$MY_NEXUS_CHARSETS" | wc -l | sed 's/\ //g')";
 	MY_CORR_NLOCI="$(calc "$MY_NLOCI" - 1)";
 
-##--This is the base name of the original nexus file, so you have it. This WILL work regardless of whether the NEXUS filename extension is written in lowercase or in all caps, ".NEX".
+# This is the base name of the original nexus file, so you have it. This WILL work regardless of whether the NEXUS filename extension is written in lowercase or in all caps, ".NEX".
 	MY_NEXUS_BASENAME="$(echo "$MY_NEXUS" | sed 's/\.\///g; s/\.[A-Za-z]\{3\}$//g')";
 
-##--Convert data file from NEXUS to fasta format using bioscripts.convert v0.4 Python package:
-##--However, if alignment is too long (>100,000 bp), then need to convert to fasta using my 
-##--script and then wrap to 60 characters with fold function (as suggested at stackexchange
-##--post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size).
-##--If this conversion failes because the alignment is too long, then the code to follow 
-##--will have nothing to work with. So, I am here adding a conditional quit if the fasta
-##--file is not generated.
+# Convert data file from NEXUS to fasta format using bioscripts.convert v0.4 Python package:
+# However, if alignment is too long (>100,000 bp), then need to convert to fasta using my 
+# script and then wrap to 60 characters with fold function (as suggested at stackexchange
+# post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size).
+# If this conversion failes because the alignment is too long, then the code to follow 
+# will have nothing to work with. So, I am here adding a conditional quit if the fasta
+# file is not generated.
 
 #---------ADD IF/THEN CONDITIONAL AND MY OWN NEXUS2fasta SCRIPT HERE!!!!----------#
 
@@ -191,7 +191,7 @@ checkMachineType
 
 ############ STEP #3: PUT COMPONENTS OF ORIGINAL NEXUS FILE AND THE FASTA FILE TOGETHER TO
 ############ MAKE A G-PhoCS-FORMATTED DATA FILE
-##--Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line:
+# Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line:
 echo "$MY_NLOCI" | sed 's/[\ ]*//g' > gphocs_top.txt
 
 echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt
@@ -204,9 +204,9 @@ echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt
 			export setLower="$(echo "$j" | sed 's/\-.*$//g')";
 			export setUpper="$(echo "$j" | sed 's/[0-9]*\-//g' | sed 's/\,//g; s/\ //g')";
 
-			**/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta;
+			**/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta ;
 
-			**/fasta2phylip.pl ./sites.fasta > ./sites.phy;
+			**/fasta2phylip.pl ./sites.fasta > ./sites.phy ;
 
 			##--Need to make sure there is a space between the tip taxon name (10 characters as output
 			##--by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use

diff --git a/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh b/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh
@@ -28,7 +28,6 @@ SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 # the functionality in this boilerplate. This script will fail if they can
 # not be found.
 # -----------------------------------
-
 UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities.
 
 if [[ -f "${UTILS_LOCATION}" ]]; then
@@ -38,10 +37,8 @@ else
   exit 1
 fi
 
-
 # Source shared functions and variables
 # -----------------------------------
-
 FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions.
 VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables.
 
@@ -53,7 +50,6 @@ else
   exit 1
 fi
 
-
 # trapCleanup Function
 # -----------------------------------
 # Any actions that should be taken if the script is prematurely
@@ -144,11 +140,8 @@ echo "INFO      | $(date) |-----------------------------------------------------
 echo "INFO      | $(date) | Starting RAxMLRunChecker pipeline... "
 echo "INFO      | $(date) | Step #1: Set up workspace and check machine type. "
 ############ I. SET WORKING DIRECTORY AND CHECK MACHINE TYPE
-#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")";
 echoCDWorkingDir
-#echo "INFO      | $(date) |          Checking machine type... "
 checkMachineType
-#echo "INFO      | $(date) |               Found machine type ${machine}. "
 
 
 ############ II. RUN RAXML RUN CHECKER

diff --git a/bin/MAGNET-1.1.1/shell/getBootTrees.sh b/bin/MAGNET-1.1.1/shell/getBootTrees.sh
@@ -9,7 +9,7 @@
   VERSION="v1.0.1"                                                                       #
 # Author: Justin C. Bagley                                                               #
 # Date: Created by Justin Bagley on/before August 20, 2017.                              #
-# Last update: December 11, 2020                                                         #
+# Last update: December 21, 2020                                                         #
 # Copyright (c) 2017-2020 Justin C. Bagley. All rights reserved.                         #
 # Please report bugs to <jbagley@jsu.edu>.                                               #
 #                                                                                        #
@@ -28,7 +28,6 @@ SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 # the functionality in this boilerplate. This script will fail if they can
 # not be found.
 # -----------------------------------
-
 UTILS_LOCATION="${SCRIPT_PATH}/../../../lib/utils.sh" # Update this path to find the utilities.
 
 if [[ -f "${UTILS_LOCATION}" ]]; then
@@ -38,10 +37,8 @@ else
   exit 1
 fi
 
-
 # Source shared functions and variables
 # -----------------------------------
-
 FUNCS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedFunctions.sh" # Update this path to find the shared functions.
 VARS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedVariables.sh" # Update this path to find the shared variables.
 
@@ -53,7 +50,6 @@ else
   exit 1
 fi
 
-
 # trapCleanup Function
 # -----------------------------------
 # Any actions that should be taken if the script is prematurely
@@ -144,11 +140,8 @@ echo "INFO      | $(date) |-----------------------------------------------------
 echo "INFO      | $(date) | Starting getBootTrees script... "
 echo "INFO      | $(date) | Step #1: Set up workspace and check machine type. "
 ############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE
-#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")";
 echoCDWorkingDir
-#echo "INFO      | $(date) |          Checking machine type... "
 checkMachineType
-#echo "INFO      | $(date) |               Found machine type ${machine}. "
 
 
 echo "INFO      | $(date) | Step #2: Run main getBootTrees script. "

diff --git a/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh b/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh
@@ -9,7 +9,7 @@
   VERSION="v1.0.1"                                                                       #
 # Author: Justin C. Bagley                                                               #
 # Date: Created by Justin Bagley on November 9, 2016.                                    #
-# Last update: December 11, 2020                                                         #
+# Last update: December 21, 2020                                                         #
 # Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved.                         #
 # Please report bugs to <jbagley@jsu.edu>.                                               #
 #                                                                                        #
@@ -139,16 +139,11 @@ echo "INFO      | $(date) |-----------------------------------------------------
 echo "INFO      | $(date) | phyNcharSumm, v1.0.1 December 2020  (part of PIrANHA v0.4a4)   "
 echo "INFO      | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. "
 echo "INFO      | $(date) |----------------------------------------------------------------"
-
-######################################## START ###########################################
 echo "INFO      | $(date) | Starting phyNcharSumm... "
 echo "INFO      | $(date) | Step #1: Set up workspace and check machine type. "
 ############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE
-#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")";
 echoCDWorkingDir
-#echo "INFO      | $(date) |          Checking machine type... "
 checkMachineType
-#echo "INFO      | $(date) |               Found machine type ${machine}. "
 
 
 echo "INFO      | $(date) | Step #2: Summarize number of characters in each PHYLIP DNA sequence alignment in current directory. "