diff --git a/bin/MAGNET-1.1.1/MAGNET b/bin/MAGNET-1.1.1/MAGNET index 49636bb6..1c38cd60 100755 --- a/bin/MAGNET-1.1.1/MAGNET +++ b/bin/MAGNET-1.1.1/MAGNET @@ -5,11 +5,11 @@ # |__) | | ' (__( | ) | ) (__( # # | # # # -# File: MAGNET.sh ~ MAny GeNE Trees, v1.1.1 # - export VERSION="v1.1.1" # +# File: MAGNET ~ MAny GeNE Trees, v1.2.0 # + export VERSION="v1.2.0" # # Author: Justin C. Bagley # # Date: Created by Justin Bagley on Mon, Aug 29 13:12:45 2016 -0700. # -# Last update: December 15, 2020 # +# Last update: December 21, 2020 # # Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # # Please report bugs to . # # # @@ -28,7 +28,6 @@ SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # the functionality in this boilerplate. This script will fail if they can # not be found. # ----------------------------------- - UTILS_LOCATION="${SCRIPT_PATH}/../../lib/utils.sh" # Update this path to find the utilities. if [[ -f "${UTILS_LOCATION}" ]]; then @@ -38,10 +37,8 @@ else exit 1 fi - # Source shared functions and variables # ----------------------------------- - FUNCS_LOCATION="${SCRIPT_PATH}/../../lib/sharedFunctions.sh" # Update this path to find the shared functions. VARS_LOCATION="${SCRIPT_PATH}/../../lib/sharedVariables.sh" # Update this path to find the shared variables. @@ -53,7 +50,6 @@ else exit 1 fi - # trapCleanup Function # ----------------------------------- # Any actions that should be taken if the script is prematurely @@ -136,7 +132,7 @@ MAGNET () { ########################################################################################## echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | MAGNET, v1.1.1 December 2020 " +echo "INFO | $(date) | MAGNET, v1.2.0 December 2020 " echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " echo "INFO | $(date) |----------------------------------------------------------------" @@ -161,11 +157,8 @@ echo "INFO | $(date) | - option = ${MY_INDIV_MISSING_DAT echo "INFO | $(date) | - Outgroup taxon, = ${MY_OUTGROUP} " echo "INFO | $(date) | - RAxML output name = ${MY_OUTPUT_NAME} " echo "INFO | $(date) | - Resume switch (--resume) = ${MY_RESUME_SWITCH} " - echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " ############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; -#echoCDWorkingDir echoShortPWD MY_WORKING_DIR="$(pwd)" checkMachineType @@ -184,9 +177,9 @@ if [[ "${machine}" = "Linux" ]]; then fi -echo "INFO | $(date) | Step #2: Input single NEXUS or G-PhoCS file, or multiple PHYLIP files. " -echo "INFO | $(date) | For -f 1 or -f 2f '.gphocs' input file present, continue; else convert NEXUS file to " -echo "INFO | $(date) | G-PhoCS format using NEXUS2gphocs code. If -f 3, then run multiple PHYLIP files in " +echo "INFO | $(date) | Step #2: Input single NEXUS (or G-PhoCS-formatted) file, or multiple PHYLIP files. " +echo "INFO | $(date) | For -f 1 or -f 2, if '.gphocs' input file present, continue; else convert NEXUS file " +echo "INFO | $(date) | to G-PhoCS format using NEXUS2gphocs code. If -f 3, then run multiple PHYLIP files in " echo "INFO | $(date) | RAxML." shopt -s nullglob if [[ -n $(find . -name "*.gphocs" -type f) ]]; then @@ -203,35 +196,35 @@ fi NEXUS2gphocs_function () { ############ GET NEXUS FILE & DATA CHARACTERISTICS, CONVERT NEXUS TO FASTA FORMAT - ##--Extract charset info from sets block at end of NEXUS file: + # Extract charset info from sets block at end of NEXUS file: MY_NEXUS_CHARSETS="$(egrep "charset|CHARSET" $MY_NEXUS | \ awk -F"=" '{print $NF}' | sed 's/\;/\,/g' | \ awk '{a[NR]=$0} END {for (i=1;i100,000 bp), then need to convert to FASTA using my - ##--script and then wrap to 60 characters with fold function (as suggested at stackexchange - ##--post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size). - ##--If this conversion failes because the alignment is too long, then the code to follow - ##--will have nothing to work with. So, I am here adding a conditional quit if the FASTA - ##--file is not generated. + # Convert data file from NEXUS to FASTA format using bioscripts.convert v0.4 Python package: + # However, if alignment is too long (>100,000 bp), then need to convert to FASTA using my + # script and then wrap to 60 characters with fold function (as suggested at stackexchange + # post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size). + # If this conversion failes because the alignment is too long, then the code to follow + # will have nothing to work with. So, I am here adding a conditional quit if the FASTA + # file is not generated. #---------TODO: ADD IF/THEN CONDITIONAL AND MY OWN NEXUS2FASTA SCRIPT HERE!!!!----------# convbioseq fasta $MY_NEXUS > "$MY_NEXUS_BASENAME".fasta ; MY_FASTA="$(echo "$MY_NEXUS_BASENAME".fasta | sed 's/\.\///g; s/\.nex//g')"; - ##--The line above creates a file with the name basename.fasta, where basename is the base name of the original .nex file. For example, "hypostomus_str.nex" would be converted to "hypostomus_str.fasta". - ##--Check to make sure the FASTA was created; if so, echo info, if not, echo warning and quit: + # The line above creates a file with the name basename.fasta, where basename is the base name of the original .nex file. For example, "hypostomus_str.nex" would be converted to "hypostomus_str.fasta". + # Check to make sure the FASTA was created; if so, echo info, if not, echo warning and quit: if [[ -s "$MY_NEXUS_BASENAME".fasta ]]; then echo "INFO | $(date) | Input NEXUS was successfully converted to FASTA format. Moving forward... " else @@ -241,7 +234,7 @@ fi ############ PUT COMPONENTS OF ORIGINAL NEXUS FILE AND THE FASTA FILE TOGETHER TO MAKE A ############ A G-PhoCS-FORMATTED DATA FILE - ##--Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line: + # Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line: echo "$MY_NLOCI" | sed 's/[\ ]*//g' > gphocs_top.txt ; echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt ; @@ -258,27 +251,27 @@ fi **/fasta2phylip.pl ./sites.fasta > ./sites.phy ; - ##--Need to make sure there is a space between the tip taxon name (10 characters as output - ##--by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use - ##--a perl search and replace for this: + # Need to make sure there is a space between the tip taxon name (10 characters as output + # by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use + # a perl search and replace for this: perl -p -i -e 's/^([A-Za-z0-9\-\_\ ]{10})/$1\ /g' ./sites.phy ; - ##--If .phy file from NEXUS charset $j has gaps in alignment, then call - ##--rmGapSites.R R script to remove all column positions with gaps from - ##--alignment and output new, gapless PHYLIP file named "./sites_nogaps.phy". - ##--If charset $j does not have gaps, go to next line of loop. We do the - ##--above by first creating a temporary file containing all lines in - ##--sites.phy with the gap character: + # If .phy file from NEXUS charset $j has gaps in alignment, then call + # rmGapSites.R R script to remove all column positions with gaps from + # alignment and output new, gapless PHYLIP file named "./sites_nogaps.phy". + # If charset $j does not have gaps, go to next line of loop. We do the + # above by first creating a temporary file containing all lines in + # sites.phy with the gap character: grep -n "-" ./sites.phy > ./gaptest.tmp ; - ##--Next, we test for nonzero testfile, indicating presence of gaps in $j, - ##--using UNIX test operator "-s" (returns true if file size is not zero). - ##--If fails, cat sites.phy into file with same name as nogaps file that - ##--is output by rmGapSites.R and move forward: + # Next, we test for nonzero testfile, indicating presence of gaps in $j, + # using UNIX test operator "-s" (returns true if file size is not zero). + # If fails, cat sites.phy into file with same name as nogaps file that + # is output by rmGapSites.R and move forward: if [ -s ./gaptest.tmp ]; then echo "Removing column sites in locus"$count" with gaps. " - R CMD BATCH **/rmGapSites.R ; + R CMD BATCH **/rmGapSites.R else echo "" cat ./sites.phy > ./sites_nogaps.phy ; @@ -322,8 +315,6 @@ if [[ -n $(find . -name "*.nex" -type f) ]]; then NEXUS2gphocs_function - - else echo "INFO | $(date) | No NEXUS files in current working directory. Continuing... " fi @@ -382,9 +373,9 @@ if [[ "$MY_RESUME_SWITCH" = "0" ]]; then MY_N_PHYLIP_FILES="$(ls $MY_PHYLIP_ALIGNMENTS | wc -l | perl -pe 's/\t//g')"; - ##--Loop through the input .phy files and do the following for each file: (A) generate one - ##--folder per .phy file with the same name as the file, only minus the extension, then - ##--(B) move input .phy file into corresponding folder. + # Loop through the input .phy files and do the following for each file: (A) generate one + # folder per .phy file with the same name as the file, only minus the extension, then + # (B) move input .phy file into corresponding folder. ( for i in $MY_PHYLIP_ALIGNMENTS; do mkdir "$(ls ${i} | sed 's/\.phy$//g')" ; @@ -407,7 +398,7 @@ if [[ "$MY_RESUME_SWITCH" = "0" ]]; then elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then if [[ "$MY_NUM_RUN_FOLDERS" = "$MY_N_PHYLIP_FILES" ]]; then - echo "IMPORTANT!| $(date) | Resuming a previous/existing run in current working dir. Skipping MultiRAxMLPrepper, using available run folders... " + echo "IMPORTANT${EP}| $(date) | Resuming a previous/existing run in current working dir. Skipping MultiRAxMLPrepper, using available run folders... " echo "INFO | $(date) | Folder check passed: number of run folders matches number of PHYLIP alignments. " else echo "WARNING | $(date) | Folder check FAILED: number of run folders does NOT match the number of PHYLIP alignments. There may be errors. " @@ -422,45 +413,45 @@ if [[ "$MY_RESUME_SWITCH" = "0" ]]; then echo "INFO | $(date) | Step #5: Estimate best maximum-likelihood (ML) gene trees. " echo "INFO | $(date) | Looping through and analyzing contents of each run folder in RAxML... " - ##--Each folder is set with the locus name corresponding to the locus' position in the - ##--original .gphocs alignment (which, if output by pyRAD, is simply in the order in which - ##--the loci were logged to file by pyRAD, no special order). Also, each folder contains - ##--one .phy file carrying the same basename as the folder name, e.g. "locus0.phy". So, - ##--all we need to do here is loop through each folder and call RAxML to run using its - ##--contents as the input file, as follows: + # Each folder is set with the locus name corresponding to the locus' position in the + # original .gphocs alignment (which, if output by pyRAD, is simply in the order in which + # the loci were logged to file by pyRAD, no special order). Also, each folder contains + # one .phy file carrying the same basename as the folder name, e.g. "locus0.phy". So, + # all we need to do here is loop through each folder and call RAxML to run using its + # contents as the input file, as follows: ( for i in ./*/; do - echo "$i" - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then + echo "$i" + cd "$i"; + LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; # NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + cd ..; fi - - cd ..; done ) - ##--NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... - ##--Here: adding loop code to move all .phy files remaining in the current working - ##--directory, after Step #3 of the pipeline, to a new folder called "phylip_files". This - ##--is done here because if the phylip_files folder is present at the end of Step #3, - ##--then RAxML will also try to estimate a gene tree for .phy file(s) in this folder during - ##--Step #5 of the pipeline above. + # Here: adding loop code to move all .phy files remaining in the current working + # directory, after Step #3 of the pipeline, to a new folder called "phylip_files". This + # is done here because if the phylip_files folder is present at the end of Step #3, + # then RAxML will also try to estimate a gene tree for .phy file(s) in this folder during + # Step #5 of the pipeline above. mkdir ./phylip_files/ ; ( for i in $MY_PHYLIP_ALIGNMENTS; do @@ -472,51 +463,52 @@ echo "INFO | $(date) | Looping through and analyzing contents of each run f elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then echo "INFO | $(date) | Step #3: Resuming gene tree estimation. Run on remaining/incomplete run folders, skip those with completed RAxML runs. " - ( for i in ./*/; do - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; - - if [[ "$MY_OUTPUT_NAME" = "raxml_out" ]] && [[ ! -s ./RAxML_info.raxml_out ]]; then - echo "$i" - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - - elif [[ "$MY_OUTPUT_NAME" != "raxml_out" ]] && [[ ! -s ./RAxML_info."$MY_OUTPUT_NAME" ]]; then - echo "$i" - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then + cd "$i"; + LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; + # + if [[ "$MY_OUTPUT_NAME" = "raxml_out" ]] && [[ ! -s ./RAxML_info.raxml_out ]]; then + echo "$i" + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + # + elif [[ "$MY_OUTPUT_NAME" != "raxml_out" ]] && [[ ! -s ./RAxML_info."$MY_OUTPUT_NAME" ]]; then + echo "$i" + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi fi + cd ..; fi - cd ..; done ) @@ -546,14 +538,14 @@ fi ls **/RAxML_bestTree.raxml_out > geneTrees.list ; - ##--Assign gene tree list to variable + # Assign gene tree list to variable MY_GENE_TREE_LIST="$(cat ./geneTrees.list)"; ############ ORGANIZE GENE TREES INTO ONE LOCATION - ##--Place all inferred gene trees into a single "gene_trees" folder in the current - ##--working directory. However, all the gene tree files have the same name. So, in order - ##--to do this, we have to give each gene tree a name that matches the corresponding run - ##--folder, i.e. locus. We can rename each file right after downloading it. + # Place all inferred gene trees into a single "gene_trees" folder in the current + # working directory. However, all the gene tree files have the same name. So, in order + # to do this, we have to give each gene tree a name that matches the corresponding run + # folder, i.e. locus. We can rename each file right after downloading it. mkdir ./gene_trees/ ; echo "INFO | $(date) | Copying *ALL* ML gene trees to 'gene_trees' folder in current directory for post-processing..." @@ -582,14 +574,14 @@ fi ls **/RAxML_bootstrap.raxml_out > bootTrees.list ; - ##--Assign bootstrap tree list to variable + # Assign bootstrap tree list to variable MY_BOOT_TREE_LIST="$(cat ./bootTrees.list)"; ############ ORGANIZE BOOTSTRAP TREES INTO ONE LOCATION - ##--Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in - ##--working directory. However, all the boot tree files have the same name. So, in order - ##--to do this, we have to give each boot tree file a name that matches the corresponding - ##--run folder, i.e. locus. We can rename each file right after downloading it. + # Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in + # working directory. However, all the boot tree files have the same name. So, in order + # to do this, we have to give each boot tree file a name that matches the corresponding + # run folder, i.e. locus. We can rename each file right after downloading it. mkdir ./bootstrap_trees ; echo "INFO | $(date) | Copying *ALL* ML bootstrap trees to 'bootstrap_trees' folder in current directory for post-processing..." @@ -619,7 +611,7 @@ fi echo "INFO | $(date) | Organizing bipartitions trees (with bootstrap proportion labels) and making final output file containing all bipartitions trees... " ls **/RAxML_bipartitions.raxml_out > bipartTrees.list ; - ##--Assign bootstrap tree list to variable + # Assign bootstrap tree list to variable MY_BIPART_TREE_LIST="$(cat ./bipartTrees.list)"; ############ ORGANIZE BIPARTITIONS TREES INTO ONE LOCATION @@ -676,9 +668,7 @@ echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; echoCDWorkingDir MY_WORKING_DIR="$(pwd)" -#echo "INFO | $(date) | Checking machine type... " checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " ## Set raxml executable name based on machine type: if [[ "${machine}" = "Mac" ]]; then @@ -690,8 +680,8 @@ fi echo "INFO | $(date) | Step #2: Input single NEXUS or G-PhoCS file, or multiple PHYLIP files. " -echo "INFO | $(date) | For -f 1 or -f 2f '.gphocs' input file present, continue; else convert NEXUS file to " -echo "INFO | $(date) | G-PhoCS format using NEXUS2gphocs code. If -f 3, then run multiple PHYLIP files in " +echo "INFO | $(date) | For -f 1 or -f 2, if '.gphocs' input file present, continue; else convert NEXUS file " +echo "INFO | $(date) | to G-PhoCS format using NEXUS2gphocs code. If -f 3, then run multiple PHYLIP files in " echo "INFO | $(date) | RAxML." @@ -706,9 +696,9 @@ echo "INFO | $(date) | Step #3: Make run folders. " MY_N_PHYLIP_FILES="$(ls $MY_PHYLIP_ALIGNMENTS | wc -l | perl -pe 's/\t//g')"; - ##--Loop through the input .phy files and do the following for each file: (A) generate one - ##--folder per .phy file with the same name as the file, only minus the extension, then - ##--(B) move input .phy file into corresponding folder. + # Loop through the input .phy files and do the following for each file: (A) generate one + # folder per .phy file with the same name as the file, only minus the extension, then + # (B) move input .phy file into corresponding folder. ( for i in $MY_PHYLIP_ALIGNMENTS; do mkdir "$(ls ${i} | sed 's/\.phy$//g')" ; @@ -731,7 +721,7 @@ echo "INFO | $(date) | Step #3: Make run folders. " elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then if [[ "$MY_NUM_RUN_FOLDERS" = "$MY_N_PHYLIP_FILES" ]]; then - echo "IMPORTANT!| $(date) | Resuming a previous/existing run in current working dir. Skipping MultiRAxMLPrepper, using available run folders... " + echo "IMPORTANT${EP}| $(date) | Resuming a previous/existing run in current working dir. Skipping MultiRAxMLPrepper, using available run folders... " echo "INFO | $(date) | Folder check passed: number of run folders matches number of PHYLIP alignments. " else echo "WARNING | $(date) | Folder check FAILED: number of run folders does NOT match the number of PHYLIP alignments. There may be errors. " @@ -745,45 +735,45 @@ if [[ "$MY_RESUME_SWITCH" = "0" ]]; then echo "INFO | $(date) | Step #4: Estimate best maximum-likelihood (ML) gene trees. " echo "INFO | $(date) | Looping through and analyzing contents of each run folder in RAxML... " - ##--Each folder is set with the locus name corresponding to the locus' position in the - ##--original .gphocs alignment (which, if output by pyRAD, is simply in the order in which - ##--the loci were logged to file by pyRAD, no special order). Also, each folder contains - ##--one .phy file carrying the same basename as the folder name, e.g. "locus0.phy". So, - ##--all we need to do here is loop through each folder and call RAxML to run using its - ##--contents as the input file, as follows: + # Each folder is set with the locus name corresponding to the locus' position in the + # original .gphocs alignment (which, if output by pyRAD, is simply in the order in which + # the loci were logged to file by pyRAD, no special order). Also, each folder contains + # one .phy file carrying the same basename as the folder name, e.g. "locus0.phy". So, + # all we need to do here is loop through each folder and call RAxML to run using its + # contents as the input file, as follows: ( for i in ./*/; do - echo "$i" - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then + echo "$i" + cd "$i"; + LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; # NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + cd ..; fi - - cd ..; done ) - ##--NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... - ##--Here: adding loop code to move all .phy files remaining in the current working - ##--directory, after Step #3 of the pipeline, to a new folder called "phylip_files". This - ##--is done here because if the phylip_files folder is present at the end of Step #3, - ##--then RAxML will also try to estimate a gene tree for .phy file(s) in this folder during - ##--Step #5 of the pipeline above. + # Here: adding loop code to move all .phy files remaining in the current working + # directory, after Step #3 of the pipeline, to a new folder called "phylip_files". This + # is done here because if the phylip_files folder is present at the end of Step #3, + # then RAxML will also try to estimate a gene tree for .phy file(s) in this folder during + # Step #5 of the pipeline above. mkdir ./phylip_files ( for i in $MY_PHYLIP_ALIGNMENTS; do @@ -798,29 +788,30 @@ echo "INFO | $(date) | Step #3: Resuming gene tree estimation. Run on remai ( for i in ./*/; do - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; - - if [[ ! -s ./RAxML_info.raxml_out ]]; then - echo "$i" - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x $(python -c "import random; print random.randint(10000,100000000000)") -p $(python -c "import random; print random.randint(10000,100000000000)") -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then + cd "$i"; + LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; # NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... + # + if [[ ! -s ./RAxML_info.raxml_out ]]; then + echo "$i" + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME + fi + # + if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then + "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME + fi fi + cd ..; fi - cd ..; done ) @@ -849,14 +840,14 @@ fi ls **/RAxML_bestTree.raxml_out > geneTrees.list ; - ##--Assign gene tree list to variable + # Assign gene tree list to variable MY_GENE_TREE_LIST="$(cat ./geneTrees.list)"; ############ ORGANIZE GENE TREES INTO ONE LOCATION - ##--Place all inferred gene trees into a single "gene_trees" folder in the current - ##--working directory. However, all the gene tree files have the same name. So, in order - ##--to do this, we have to give each gene tree a name that matches the corresponding run - ##--folder, i.e. locus. We can rename each file right after downloading it. + # Place all inferred gene trees into a single "gene_trees" folder in the current + # working directory. However, all the gene tree files have the same name. So, in order + # to do this, we have to give each gene tree a name that matches the corresponding run + # folder, i.e. locus. We can rename each file right after downloading it. mkdir ./gene_trees/ ; echo "INFO | $(date) | Copying *ALL* ML gene trees to 'gene_trees' folder in current directory for post-processing..." @@ -885,14 +876,14 @@ fi ls **/RAxML_bootstrap.raxml_out > bootTrees.list ; - ##--Assign bootstrap tree list to variable + # Assign bootstrap tree list to variable MY_BOOT_TREE_LIST="$(cat ./bootTrees.list)"; ############ ORGANIZE BOOTSTRAP TREES INTO ONE LOCATION - ##--Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in - ##--working directory. However, all the boot tree files have the same name. So, in order - ##--to do this, we have to give each boot tree file a name that matches the corresponding - ##--run folder, i.e. locus. We can rename each file right after downloading it. + # Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in + # working directory. However, all the boot tree files have the same name. So, in order + # to do this, we have to give each boot tree file a name that matches the corresponding + # run folder, i.e. locus. We can rename each file right after downloading it. mkdir ./bootstrap_trees/ ; echo "INFO | $(date) | Copying *ALL* ML bootstrap trees to 'bootstrap_trees' folder in current directory for post-processing..." @@ -922,11 +913,11 @@ fi echo "INFO | $(date) | Organizing bipartitions trees (with bootstrap proportion labels) and making final output file containing all bipartitions trees... " ls **/RAxML_bipartitions.raxml_out > bipartTrees.list ; - ##--Assign bootstrap tree list to variable + # Assign bootstrap tree list to variable MY_BIPART_TREE_LIST="$(cat ./bipartTrees.list)"; ############ ORGANIZE BIPARTITIONS TREES INTO ONE LOCATION - mkdir ./bipartitions_trees/ + mkdir ./bipartitions_trees echo "INFO | $(date) | Copying *ALL* RAxML bootstrap bipartitions trees to 'bipartitions_trees' folder in current directory for post-processing..." ( @@ -980,59 +971,64 @@ MY_RAXML_MODEL=GTRGAMMA MY_INDIV_MISSING_DATA=1 ############ CREATE USAGE & HELP TEXTS -USAGE="Usage: $(basename "$0") [OPTION]... +USAGE=" +Usage: $(basename "$0") [OPTION]... ${bold}Options:${reset} - -f fileType (def: 1; 1 = single , 2 = multiple PHYLIP files) starting file - type; if 1, script expects as stdin a single NEXUS or G-PhoCS in the - current directory; if 2, then script expects multiple PHYLIP files in current dir - -i inputNEXUS (def: NULL) input NEXUS file - -e executable (def: $MY_RAXML_EXECUTABLE) name of RAxML executable, accessible from command - line on user's machine - -b numBootstraps (def: $MY_NUM_BOOTREPS) RAxML bootstrap pseudoreplicates - -r raxmlModel (def: $MY_RAXML_MODEL; other: GTRGAMMAI, GTRCAT, GTRCATI) - -s simpleModel (def: $MY_SIMPLE_MODEL; other: JC69, K80, HKY85) specifies simple DNA - substitution model that will override any other model and apply to all DNA partitions - -g gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless >1000 - individuals; takes float proportion value) gap threshold value - -m indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) missing data setting - -o outgroup (def: NULL) outgroup given as single taxon name (tip label) or comma- - separted list - -h help text (also: -help) echo this help text and exit - -H verbose help text (also: -Help) echo verbose help text and exit - -V version (also: --version) echo version of this script and exit - -R resume (also: --resume) short and long options allowing user to resume a previous - MAGNET run in current working directory - -d debug (def: 0, off; 1, on also: --debug) run function in Bash debug mode + -f, --filetype fileType (def: 1; also: 2) starting file type; if 1, script expects as + stdin a single input NEXUS file in the current directory; if 2, then + script expects multiple input PHYLIP files in current directory + -i, --input inputNEXUS (def: NULL) input NEXUS file (mandatory for -f 1) + -e, --exec executable (def: $MY_RAXML_EXECUTABLE) name of RAxML executable available + from user's command line interface + -b, --boot numBootstraps (def: $MY_NUM_BOOTREPS) RAxML bootstrap pseudoreplicates + -r, --raxmlmodel raxmlModel (def: $MY_RAXML_MODEL; other: GTRGAMMAI, GTRCAT, GTRCATI) + -s, --simplemodel simpleModel (def: $MY_SIMPLE_MODEL; other: JC69, K80, HKY85) specifies + simple DNA substitution model that will override any other model (even + across partitions) + -g, --gapthresh gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless + >1000 individuals; takes float proportion value) gap threshold value + -m, --missing indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) missing + data setting + -o, --outgroup outgroup (def: NULL) outgroup given as single taxon name (tip label) or + comma-separted list + -h, --help echo this help text and exit + -H, --Help echo verbose help text and exit + -V, --version echo version and exit + -R, --resume resume (def: 0, off; 1, on) option allowing user to resume a previous + MAGNET run in the current working directory + -d, --debug debug (def: 0, off; 1, on) run function in Bash debug mode ${bold}OVERVIEW${reset} The goal of MAGNET is to infer a maximum-likelihood (ML) gene tree in RAxML for each of - multiple loci, starting from one or multiple input files containing aligned DNA sequences. - If supplied with a single G-PhoCS ('*.gphocs') or NEXUS ('*.nex') data file (using -f1 - or -i -f1 options), then each locus is split into a separate PHYLIP alignment - file, and RAxML (Stamatakis 2014) is run to infer gene trees for each locus. If a NEXUS - datafile is supplied, it is converted into G-PhoCS format (Gronau et al. 2011) while splitting - loci into separate interleaved sequence blocks based on information provided in a sets - block at the end of the NEXUS file (e.g. defined using 'charset' commands), which is mandatory. - However, if -f2, then the program will run in current directory, assuming it contains multiple - PHYLIP-formatted alignment files. Under this scenario, MAGNET will skip directly to running - the PHYLIP files in RAxML using user-specified options. + multiple loci, starting from one or multiple DNA sequence alignment input files. If supplied + with a single G-PhoCS ('*.gphocs') or NEXUS ('*.nex') data file (using -f1 or -i + -f1 options), then each locus is split into a separate PHYLIP alignment file, and RAxML + (Stamatakis 2014) is run to infer gene trees for each locus. If a NEXUS datafile is supplied, + it is converted into G-PhoCS format (Gronau et al. 2011) while splitting loci into separate + interleaved sequence blocks based on information provided in a sets block at the end of the + NEXUS file (e.g. defined using 'charset' commands), which is mandatory. However, if -f2, then + the program will run in current directory, assuming it contains multiple PHYLIP-formatted + alignment files. Under this scenario, MAGNET will skip directly to running the PHYLIP files + in RAxML using user-specified options. Sequence names may not include hyphen characters, or there could be issues. For detailed information on MAGNET and its various dependencies, see 'README.md' file in the distribution - folder; however, it is key that the dependencies are available from the command line interface. - Among the most important options is -r or --resume (off by default), which tells MAGNET to - resume previous run(s) in current directory, including detecting incomplete run folders and - running RAxML there without overwriting results from previously finished runs. + folder; however, it is key that dependencies are available from the command line interface. + Among the most important options is (-r|--resume, off by default), which tells the + program to resume a previous MAGNET run in current directory, including detecting incomplete + RAxML run folders, and running RAxML without overwriting results from the previous run(s). ${bold}Usage examples:${reset} Call the program using PIrANHA, as follows: - piranha -f MAGNET -f 2 -b 100 -g 1 -m 1 Run MAGNET with 100 bootstrap replicates - with gaps allowed and missing data allowed - and the GTRGAMMA model - piranha -f MAGNET -f 2 -b 100 -s HKY85 -g 1 -m 1 Same as above, but using the simpler - HKY85 substitution model for all loci - piranha -f MAGNET -h Show this help text and exit + piranha -f MAGNET -f 2 -b 100 -g 1 -m 1 Run MAGNET with 100 bootstrap pseudo- + replicates, gaps allowed, missing + data allowed, and the GTRGAMMA model + piranha -f MAGNET -f 2 -b 100 -s HKY85 -g 1 -m 1 Same as above, but using the simpler + HKY85 substitution model for all loci + piranha -f MAGNET -f 2 -e raxmlHPC -b 100 -s HKY85 -g 1 -m 1 Same as above, but using raxmlHPC + executable + piranha -f MAGNET -h Show this help text and exit ${bold}CITATION${reset} Bagley, J.C. 2020. PIrANHA v0.4a4. GitHub repository, Available at: @@ -1048,73 +1044,97 @@ USAGE="Usage: $(basename "$0") [OPTION]... Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " -VERBOSE_USAGE="Usage: $(basename "$0") [OPTION]... +VERBOSE_USAGE=" +Usage: $(basename "$0") [OPTION]... ${bold}Options:${reset} - -f fileType (def: 1; 1 = single , 2 = multiple PHYLIP files) starting file - type; if 1, script expects as stdin a single NEXUS or G-PhoCS in the - current directory; if 2, then script expects multiple PHYLIP files in current dir - -i inputNEXUS (def: NULL) input NEXUS file - -e executable (def: $MY_RAXML_EXECUTABLE) name of RAxML executable, accessible from command - line on user's machine - -b numBootstraps (def: $MY_NUM_BOOTREPS) RAxML bootstrap pseudoreplicates - -r raxmlModel (def: $MY_RAXML_MODEL; other: GTRGAMMAI, GTRCAT, GTRCATI) - -s simpleModel (def: $MY_SIMPLE_MODEL; other: JC69, K80, HKY85) specifies simple DNA - substitution model that will override any other model and apply to all DNA partitions - -g gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless >1000 - individuals; takes float proportion value) gap threshold value - -m indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) missing data setting - -o outgroup (def: NULL) outgroup given as single taxon name (tip label) or comma- - separated list - -h help text (also: -help) echo this help text and exit - -H verbose help text (also: -Help) echo verbose help text and exit - -V version (also: --version) echo version of this script and exit - -R resume (also: --resume) short and long options allowing user to resume a previous - MAGNET run in current working directory - -d debug (def: 0, off; 1, on also: --debug) run function in Bash debug mode + -f, --filetype fileType (def: 1; also: 2) starting file type; if 1, script expects as + stdin a single input NEXUS file in the current directory; if 2, then + script expects multiple input PHYLIP files in current directory + -i, --input inputNEXUS (def: NULL) input NEXUS file (mandatory for -f 1) + -e, --exec executable (def: $MY_RAXML_EXECUTABLE) name of RAxML executable available + from user's command line interface + -b, --boot numBootstraps (def: $MY_NUM_BOOTREPS) RAxML bootstrap pseudoreplicates + -r, --raxmlmodel raxmlModel (def: $MY_RAXML_MODEL; other: GTRGAMMAI, GTRCAT, GTRCATI) + -s, --simplemodel simpleModel (def: $MY_SIMPLE_MODEL; other: JC69, K80, HKY85) specifies + simple DNA substitution model that will override any other model (even + across partitions) + -g, --gapthresh gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless + >1000 individuals; takes float proportion value) gap threshold value + -m, --missing indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) missing + data setting + -o, --outgroup outgroup (def: NULL) outgroup given as single taxon name (tip label) or + comma-separted list + -h, --help echo this help text and exit + -H, --Help echo verbose help text and exit + -V, --version echo version and exit + -R, --resume resume (def: 0, off; 1, on) option allowing user to resume a previous + MAGNET run in the current working directory + -d, --debug debug (def: 0, off; 1, on) run function in Bash debug mode ${bold}OVERVIEW${reset} The goal of MAGNET is to infer a maximum-likelihood (ML) gene tree in RAxML for each of - multiple loci, starting from one or multiple input files containing aligned DNA sequences. - If supplied with a single G-PhoCS ('*.gphocs') or NEXUS ('*.nex') data file (using -f1 - or -i -f1 options), then each locus is split into a separate PHYLIP alignment - file, and RAxML (Stamatakis 2014) is run to infer gene trees for each locus. If a NEXUS - datafile is supplied, it is converted into G-PhoCS format (Gronau et al. 2011) while splitting - loci into separate interleaved sequence blocks based on information provided in a sets - block at the end of the NEXUS file (e.g. defined using 'charset' commands), which is mandatory. - However, if -f2, then the program will run in current directory, assuming it contains multiple - PHYLIP-formatted alignment files. Under this scenario, MAGNET will skip directly to running - the PHYLIP files in RAxML using user-specified options. + multiple loci, starting from one or multiple DNA sequence alignment input files. If supplied + with a single G-PhoCS ('*.gphocs') or NEXUS ('*.nex') data file (using -f1 or -i + -f1 options), then each locus is split into a separate PHYLIP alignment file, and RAxML + (Stamatakis 2014) is run to infer gene trees for each locus. If a NEXUS datafile is supplied, + it is converted into G-PhoCS format (Gronau et al. 2011) while splitting loci into separate + interleaved sequence blocks based on information provided in a sets block at the end of the + NEXUS file (e.g. defined using 'charset' commands), which is mandatory. However, if -f2, then + the program will run in current directory, assuming it contains multiple PHYLIP-formatted + alignment files. Under this scenario, MAGNET will skip directly to running the PHYLIP files + in RAxML using user-specified options. Sequence names may not include hyphen characters, or there could be issues. For detailed information on MAGNET and its various dependencies, see 'README.md' file in the distribution - folder; however, it is key that the dependencies are available from the command line interface. - Among the most important options is -r or --resume (off by default), which tells MAGNET to - resume previous run(s) in current directory, including detecting incomplete run folders and - running RAxML there without overwriting results from previously finished runs. + folder; however, it is key that dependencies are available from the command line interface. + Among the most important options is (-r|--resume, off by default), which tells the + program to resume a previous MAGNET run in current directory, including detecting incomplete + RAxML run folders, and running RAxML without overwriting results from the previous run(s). ${bold}DETAILS${reset} - The -f flag specifies the starting fileType. If -f 1, then the mandatory input is the name - or path to the corresponding starting file, which is passed using the -i flag. - If -f 2, then mandatory input is the name or path to the working directory (type '.' for current - directory, or supply a relative or absolute path). + The -f flag (also --filetype) specifies the starting fileType. If -f 1, then the mandatory + input is the name or path to the corresponding starting file, which is + passed using the -i|--input flag. If -f 2, then mandatory input is the name or path to + the working directory (type '.' for current directory, or supply a relative or absolute + path). - The -i flag passess the name of the input NEXUS file, parameter, to the program. + The -i flag (also --input) passess the name of the input NEXUS file, parameter, + to the program. + + The -e flag (also --exec) sets the name of the RAxML executable that will be called. The + default executable name is 'raxml', but the user may wish to change this to something + specific to their install or parallelization needs (e.g. 'raxmlHPC-PTHREADS-SSE3'). The + default setting should work on local machine or supercomputing cluster installs. However, + this should be tested beforehand by entering 'raxml' at the command prompt. On some + version fo Linux this yields the following error message: - The -e flag sets the name of the RAxML executable that will be called. The user may wish to - change this to something specific to their install, or to something generic like 'raxml'. - The default setting should work on local machine or supercomputing cluster installs. + 'raxml: error while loading shared libraries: libmpi.so.12: cannot open shared object + file: No such file or directory'. + + If this occurs, then Open MPI related libraries are installed in a non-standard location + and you will need to add this location to your LD_LIBRARY_PATH, e.g.: + + 'export LD_LIBRARY_PATH=/usr/local/openmpi-1.8.1/lib:$LD_LIBRARY_PATH' - The -b flag sets the number of boostrap pseudoreplicates for RAxML to perform while estimating - the gene tree for each locus. The default is 100; remove bootstrapping by setting to 0. + See the following URL: for more insight into this problem: https://stackoverflow.com/ + questions/14769599/mpi-error-loading-shared-libraries. However, simply using a different + raxml executable that does not rely on these libararies will also immediately solve the + problem. In my experience, just setting MAGNET to call the 'raxmlHPC' executable immed- + iately solves this issue on Mac and Linux (so also try simply running MAGNET with '-e + raxmlHPC' or '--exec raxmlHPC'). + + The -b flag sets the number of boostrap pseudoreplicates for RAxML to perform while + estimating the gene tree for each locus. The default is 100; remove bootstrapping by + setting to 0. The -r flag sets the RAxML model for each locus. This uses the full default GTRGAMMA model, - and at present it is not possible to vary the model across loci. If you want to use HKY - or K80, you will need to use the -s flag (below). + and at present it is not possible to vary the model across loci. If you want to use HKY + or K80, you will need to use the -s flag (below). The -s flag sets a simple RAxML model for each locus/partition, which will override any - model set using the -r flag above and apply to all partitions. In the current version of - RAxML, it is possible to specify the JC69, K80, and HKY85 models as overrides. By default, - this option is turned off and the model set under the -r flag is used instead. + model set using the -r flag above and apply to all partitions. In the current version of + RAxML, it is possible to specify the JC69, K80, and HKY85 models as overrides. By default, + this option is turned off and the model set under the -r flag is used instead. The following two options are available **ONLY** if you are starting from a NEXUS input file: @@ -1134,26 +1154,29 @@ VERBOSE_USAGE="Usage: $(basename "$0") [OPTION]... have varying numbers of individuals for different loci. The -o flag sets the outgroup exactly the same way as that described in the RAxML v8 user's - manual, as a single name or as a comma-separated list with no spaces between taxon names. - The first name in the list is prioritized, e.g. when members of the list are not monophyletic. + manual, as a single name or as a comma-separated list with no spaces between taxon names. + The first name in the list is prioritized, e.g. when members of the list are not mono- + phyletic. -R | --resume is among the most important options available in MAGNET because it tells the - program to resume a previous run in current directory, including to detect incomplete run - subfolders and run RAxML there without overwriting results from run folders with finished - runs. The default setting is to run without this option. + program to resume a previous run in current directory, including to detect incomplete run + subfolders and run RAxML there without overwriting results from run folders with finished + runs. The default setting is to run without this option. The -d flag runs this function in Bash debug mode (set -xv), which is intended for debugging - for development purposes. If you find a bug, please contact the author at jbagley@jsu.edu. + for development purposes. If you find a bug, please contact the author at jbagley@jsu.edu. ${bold}Usage examples:${reset} Call the program using PIrANHA, as follows: - piranha -f MAGNET -f 2 -b 100 -g 1 -m 1 Run MAGNET with 100 bootstrap replicates - with gaps allowed and missing data allowed - and the GTRGAMMA model - piranha -f MAGNET -f 2 -b 100 -s HKY85 -g 1 -m 1 Same as above, but using the simpler - HKY85 substitution model for all loci - piranha -f MAGNET -h Show this help text and exit + piranha -f MAGNET -f 2 -b 100 -g 1 -m 1 Run MAGNET with 100 bootstrap pseudo- + replicates, gaps allowed, missing + data allowed, and the GTRGAMMA model + piranha -f MAGNET -f 2 -b 100 -s HKY85 -g 1 -m 1 Same as above, but using the simpler + HKY85 substitution model for all loci + piranha -f MAGNET -f 2 -e raxmlHPC -b 100 -s HKY85 -g 1 -m 1 Same as above, but using raxmlHPC + executable + piranha -f MAGNET -H Show this help text and exit ${bold}CITATION${reset} Bagley, J.C. 2020. PIrANHA v0.4a4. GitHub repository, Available at: @@ -1198,114 +1221,161 @@ fi # echo "$MY_ARGS" -############ CAPTURE ARGUMENTS, SEND TO FILE FOR PARSING +############ CLEAN WORKING DIR, CAPTURE ARGUMENTS, SEND TO FILE FOR PARSING if [[ -s ./args.tmp ]]; then rm ./args.tmp ; fi ; if [[ -s ./args.txt ]]; then rm ./args.txt ; fi ; - ALL_MY_ARGUMENTS="$(echo "$@")" echo "$ALL_MY_ARGUMENTS" > ./args.txt perl -p -i -e $'s/\-/\n\-/g' ./args.txt - perl -p -i -e 's/^\-$//g' ./args.txt + perl -p -i -e $'s/\-filetype/\-\-filetype/g' ./args.txt + perl -p -i -e $'s/\-input/\-\-input/g' ./args.txt + perl -p -i -e $'s/\-exec/\-\-exec/g' ./args.txt +# perl -p -i -e $'s/\-part/\-\-part/g' ./args.txt + perl -p -i -e $'s/\-boot/\-\-boot/g' ./args.txt + perl -p -i -e $'s/\-raxmlmodel/\-\-raxmlmodel/g' ./args.txt + perl -p -i -e $'s/\-simplemodel/\-\-simplemodel/g' ./args.txt + perl -p -i -e $'s/\-outgroup/\-\-outgroup/g' ./args.txt + perl -p -i -e $'s/\-name/\-\-name/g' ./args.txt perl -p -i -e $'s/\-resume/\-\-resume/g' ./args.txt perl -p -i -e $'s/\-debug/\-\-debug/g' ./args.txt - #perl -p -i -e $'s/\ /\n/g' ./args.txt - #wc -l ./args.txt | perl -pe 's/\.\/args\.txt.*//g' | perl -pe 's/\ //g' +############ MANUALLY PARSE THE OPTIONS FROM ARGS + ### SET OPTIONS TO DEFAULT VALUES, EXCEPT WHERE VALUES WERE READ IN FROM USER ARGS - if [[ "$(grep -h '\-f' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-f' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-filetype' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then STARTING_FILE_TYPE=1 ; - else + elif [[ "$(grep -h '\-f' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-filetype' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-f' ./args.txt | perl -pe 's/\-f//g' | perl -pe 's/\ //g')"; STARTING_FILE_TYPE="$MY_ARG" ; + elif [[ "$(grep -h '\-f' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-filetype' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-filetype' ./args.txt | perl -pe 's/\-\-filetype//g' | perl -pe 's/\ //g')"; + STARTING_FILE_TYPE="$MY_ARG" ; fi # - if [[ "$(grep -h '\-i' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-i' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-input' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_NEXUS=NULL ; - else + elif [[ "$(grep -h '\-i' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-input' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-i' ./args.txt | perl -pe 's/\-i//g' | perl -pe 's/\ //g')"; MY_NEXUS="$MY_ARG" ; + elif [[ "$(grep -h '\-i' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-input' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-input' ./args.txt | perl -pe 's/\-\-input//g' | perl -pe 's/\ //g')"; + MY_NEXUS="$MY_ARG" ; fi # - if [[ "$(grep -h '\-e' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-e' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-exec' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_RAXML_EXECUTABLE=raxml ; - else + elif [[ "$(grep -h '\-e' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-exec' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-e' ./args.txt | perl -pe 's/\-e//g' | perl -pe 's/\ //g')"; MY_RAXML_EXECUTABLE="$MY_ARG" ; + elif [[ "$(grep -h '\-e' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-exec' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-exec' ./args.txt | perl -pe 's/\-\-exec//g' | perl -pe 's/\ //g')"; + MY_RAXML_EXECUTABLE="$MY_ARG" ; fi # - if [[ "$(grep -h '\-b' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then +# if [[ "$(grep -h '\-p' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-part' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then +# MY_PARTITIONS_FILE=partitions.txt ; +# elif [[ "$(grep -h '\-p' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-part' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then +# MY_ARG="$(grep -h '\-p' ./args.txt | perl -pe 's/\-p//g' | perl -pe 's/\ //g')"; +# MY_PARTITIONS_FILE="$MY_ARG" ; +# elif [[ "$(grep -h '\-p' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-part' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then +# MY_ARG="$(grep -h '\-\-part' ./args.txt | perl -pe 's/\-\-part//g' | perl -pe 's/\ //g')"; +# MY_PARTITIONS_FILE="$MY_ARG" ; +# fi +# # + if [[ "$(grep -h '\-b' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-boot' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_NUM_BOOTREPS=100 ; - else + elif [[ "$(grep -h '\-b' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-boot' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-b' ./args.txt | perl -pe 's/\-b//g' | perl -pe 's/\ //g')"; - MY_NUM_BOOTREPS="$MY_ARG" ; + MY_NAMES_FILE="$MY_ARG" ; + elif [[ "$(grep -h '\-b' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-boot' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-boot' ./args.txt | perl -pe 's/\-\-boot//g' | perl -pe 's/\ //g')"; + MY_NAMES_FILE="$MY_ARG" ; fi # - if [[ "$(grep -h '^\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-raxmlmodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_RAXML_MODEL=GTRGAMMA ; - else - MY_ARG="$(grep -h '^\-r' ./args.txt | perl -pe 's/\-r//g' | perl -pe 's/\ //g')"; + elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-raxmlmodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + MY_ARG="$(grep -h '\-r' ./args.txt | perl -pe 's/\-r//g' | perl -pe 's/\ //g')"; + MY_RAXML_MODEL="$MY_ARG" ; + elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-raxmlmodel' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-raxmlmodel' ./args.txt | perl -pe 's/\-\-raxmlmodel//g' | perl -pe 's/\ //g')"; MY_RAXML_MODEL="$MY_ARG" ; fi # - if [[ "$(grep -h '\-s' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-s' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-simplemodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_SIMPLE_MODEL=NULL ; - else + elif [[ "$(grep -h '\-s' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-simplemodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-s' ./args.txt | perl -pe 's/\-s//g' | perl -pe 's/\ //g')"; MY_SIMPLE_MODEL="$MY_ARG" ; + elif [[ "$(grep -h '\-s' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-simplemodel' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-simplemodel' ./args.txt | perl -pe 's/\-\-simplemodel//g' | perl -pe 's/\ //g')"; + MY_SIMPLE_MODEL="$MY_ARG" ; fi # - if [[ "$(grep -h '\-g' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-g' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-gapthresh' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_GAP_THRESHOLD=0.001 ; - else + elif [[ "$(grep -h '\-g' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-gapthresh' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-g' ./args.txt | perl -pe 's/\-g//g' | perl -pe 's/\ //g')"; MY_GAP_THRESHOLD="$MY_ARG" ; + elif [[ "$(grep -h '\-g' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-gapthresh' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-gapthresh' ./args.txt | perl -pe 's/\-\-gapthresh//g' | perl -pe 's/\ //g')"; + MY_GAP_THRESHOLD="$MY_ARG" ; fi # - if [[ "$(grep -h '\-m' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-m' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-missing' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_INDIV_MISSING_DATA=1 ; - else + elif [[ "$(grep -h '\-m' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-missing' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-m' ./args.txt | perl -pe 's/\-m//g' | perl -pe 's/\ //g')"; MY_INDIV_MISSING_DATA="$MY_ARG" ; + elif [[ "$(grep -h '\-m' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-missing' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-missing' ./args.txt | perl -pe 's/\-\-missing//g' | perl -pe 's/\ //g')"; + MY_INDIV_MISSING_DATA="$MY_ARG" ; fi # - if [[ "$(grep -h '\-o' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-o' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-outgroup' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_OUTGROUP=NULL ; - else + elif [[ "$(grep -h '\-o' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-outgroup' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-o' ./args.txt | perl -pe 's/\-o//g' | perl -pe 's/\ //g')"; MY_OUTGROUP="$MY_ARG" ; + elif [[ "$(grep -h '\-o' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-outgroup' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-outgroup' ./args.txt | perl -pe 's/\-\-outgroup//g' | perl -pe 's/\ //g')"; + MY_OUTGROUP="$MY_ARG" ; fi # - if [[ "$(grep -h '\-n' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-n' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-name' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_OUTPUT_NAME=raxml_out ; - else + elif [[ "$(grep -h '\-n' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-name' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-n' ./args.txt | perl -pe 's/\-n//g' | perl -pe 's/\ //g')"; MY_OUTPUT_NAME="$MY_ARG" ; + elif [[ "$(grep -h '\-n' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-name' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-name' ./args.txt | perl -pe 's/\-\-name//g' | perl -pe 's/\ //g')"; + MY_OUTPUT_NAME="$MY_ARG" ; fi # - if [[ "$(grep -h '\-R' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-resume' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_RESUME_SWITCH=0 ; - else - MY_RESUME_SWITCH=1 ; - fi - if [[ "$(grep -h '\-\-resume' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_RESUME_SWITCH=0 ; - else - MY_RESUME_SWITCH=1 ; + elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-resume' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + MY_ARG="$(grep -h '\-r' ./args.txt | perl -pe 's/\-r//g' | perl -pe 's/\ //g')"; + MY_RESUME_SWITCH="$MY_ARG" ; + elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-resume' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-resume' ./args.txt | perl -pe 's/\-\-resume//g' | perl -pe 's/\ //g')"; + MY_RESUME_SWITCH="$MY_ARG" ; + if [[ -z "$MY_VERBOSE_OUT_SWITCH" ]] && [[ "$MY_VERBOSE_OUT_SWITCH" != "0" ]] && [[ "$MY_VERBOSE_OUT_SWITCH" != "1" ]]; then MY_VERBOSE_OUT_SWITCH=1 ; fi fi # - if [[ "$(grep -h '\-d' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then + if [[ "$(grep -h '\-d' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-debug' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_DEBUG_MODE_SWITCH=0 ; - else + elif [[ "$(grep -h '\-d' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-debug' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then MY_ARG="$(grep -h '\-d' ./args.txt | perl -pe 's/\-d//g' | perl -pe 's/\ //g')"; - if [[ "$MY_ARG" = "1" ]]; then MY_DEBUG_MODE_SWITCH=1 ; fi - if [[ "$MY_ARG" != "1" ]] && [[ "$MY_ARG" != "0" ]]; then MY_DEBUG_MODE_SWITCH=1 ; fi - fi - if [[ "$(grep -h '\-\-debug' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_DEBUG_MODE_SWITCH=0 ; - else - MY_DEBUG_MODE_SWITCH=1 + MY_DEBUG_MODE_SWITCH="$MY_ARG" ; + elif [[ "$(grep -h '\-d' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-debug' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then + MY_ARG="$(grep -h '\-\-debug' ./args.txt | perl -pe 's/\-\-debug//g' | perl -pe 's/\ //g')"; + MY_DEBUG_MODE_SWITCH="$MY_ARG" ; + if [[ -z "$MY_DEBUG_MODE_SWITCH" ]] && [[ "$MY_DEBUG_MODE_SWITCH" != "0" ]] && [[ "$MY_DEBUG_MODE_SWITCH" != "1" ]]; then MY_DEBUG_MODE_SWITCH=1 ; fi fi +# + # ############# ############# ############# # ## TIME TO RUN THE SCRIPT ## diff --git a/bin/MAGNET-1.1.1/R/rmGapSites.r b/bin/MAGNET-1.1.1/R/rmGapSites.r index b3818d1d..af09307e 100644 --- a/bin/MAGNET-1.1.1/R/rmGapSites.r +++ b/bin/MAGNET-1.1.1/R/rmGapSites.r @@ -11,7 +11,7 @@ # Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. # # Last update: March 6, 2019 # # Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # +# Please report bugs to . # # # # Description: # # RSCRIPT THAT REMOVES GAP SITES FROM AN INPUT DNA SEQUENCE ALIGNMENT IN PHYLIP FORMAT # @@ -21,8 +21,8 @@ ######################################## START ########################################### -##--Load needed library, R code, or package stuff. Install package if not present. -##--source("rmGapSites.R", chdir = TRUE) +# Load needed library, R code, or package stuff. Install package if not present. +# source("rmGapSites.R", chdir = TRUE) packages <- c("ape", "readr", "seqinr") if (length(setdiff(packages, rownames(installed.packages()))) > 0) { install.packages(setdiff(packages, rownames(installed.packages()))) @@ -32,20 +32,19 @@ library(ape) library(readr) library(seqinr) -##--Read in the data, output from first part of NEXUS2gphocs loop: +# Read in the data, output from first part of NEXUS2gphocs loop: sites <- read.dna("sites.phy", format="sequential") gap_thresh <- read_file("gap_threshold.txt") -##--Fix the gap threshold and then delete columns with the threshold level of gaps -##--equivalent to at least 1 gap (i.e. any gaps at all): +# Fix the gap threshold and then delete columns with the threshold level of gaps +# equivalent to at least 1 gap (i.e. any gaps at all): gap_thresh <- sub(pattern = "\\n", replacement = "", x = gap_thresh) sites_nogaps <- del.colgapsonly(sites, threshold = gap_thresh, freq.only = FALSE) -##--Write new alignment, with sites with gaps removed, to file: -##--(writing to present working directory)... +# Write new alignment, with sites with gaps removed, to file in present working directory)... write.dna(sites_nogaps, file="sites_nogaps.phy", format="sequential", nbcol=-1, colw=500000) - ##--write.nexus(sites_nogaps, file="sites_nogaps.nex") + # write.nexus(sites_nogaps, file="sites_nogaps.nex") ######################################### END ############################################ diff --git a/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh b/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh index deb876b9..4ad8dfd3 100644 --- a/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh +++ b/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh @@ -9,7 +9,7 @@ VERSION="v1.5.1" # # Author: Justin C. Bagley # # Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. # -# Last update: December 11, 2020 # +# Last update: December 21, 2020 # # Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # # Please report bugs to . # # # @@ -152,27 +152,27 @@ checkMachineType ############ STEP #2: GET NEXUS FILE & DATA CHARACTERISTICS, CONVERT NEXUS TO FASTA FORMAT -##--Extract charset info from sets block at end of NEXUS file: +# Extract charset info from sets block at end of NEXUS file: MY_NEXUS_CHARSETS="$(egrep "charset|CHARSET" "$MY_NEXUS" | \ awk -F"=" '{print $NF}' | sed 's/\;/\,/g' | \ awk '{a[NR]=$0} END {for (i=1;i100,000 bp), then need to convert to fasta using my -##--script and then wrap to 60 characters with fold function (as suggested at stackexchange -##--post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size). -##--If this conversion failes because the alignment is too long, then the code to follow -##--will have nothing to work with. So, I am here adding a conditional quit if the fasta -##--file is not generated. +# Convert data file from NEXUS to fasta format using bioscripts.convert v0.4 Python package: +# However, if alignment is too long (>100,000 bp), then need to convert to fasta using my +# script and then wrap to 60 characters with fold function (as suggested at stackexchange +# post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size). +# If this conversion failes because the alignment is too long, then the code to follow +# will have nothing to work with. So, I am here adding a conditional quit if the fasta +# file is not generated. #---------ADD IF/THEN CONDITIONAL AND MY OWN NEXUS2fasta SCRIPT HERE!!!!----------# @@ -191,7 +191,7 @@ checkMachineType ############ STEP #3: PUT COMPONENTS OF ORIGINAL NEXUS FILE AND THE FASTA FILE TOGETHER TO ############ MAKE A G-PhoCS-FORMATTED DATA FILE -##--Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line: +# Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line: echo "$MY_NLOCI" | sed 's/[\ ]*//g' > gphocs_top.txt echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt @@ -204,9 +204,9 @@ echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt export setLower="$(echo "$j" | sed 's/\-.*$//g')"; export setUpper="$(echo "$j" | sed 's/[0-9]*\-//g' | sed 's/\,//g; s/\ //g')"; - **/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta; + **/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta ; - **/fasta2phylip.pl ./sites.fasta > ./sites.phy; + **/fasta2phylip.pl ./sites.fasta > ./sites.phy ; ##--Need to make sure there is a space between the tip taxon name (10 characters as output ##--by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use diff --git a/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh b/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh index a8f1c56a..f5fdcac5 100644 --- a/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh +++ b/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh @@ -28,7 +28,6 @@ SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # the functionality in this boilerplate. This script will fail if they can # not be found. # ----------------------------------- - UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities. if [[ -f "${UTILS_LOCATION}" ]]; then @@ -38,10 +37,8 @@ else exit 1 fi - # Source shared functions and variables # ----------------------------------- - FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions. VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables. @@ -53,7 +50,6 @@ else exit 1 fi - # trapCleanup Function # ----------------------------------- # Any actions that should be taken if the script is prematurely @@ -144,11 +140,8 @@ echo "INFO | $(date) |----------------------------------------------------- echo "INFO | $(date) | Starting RAxMLRunChecker pipeline... " echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " ############ I. SET WORKING DIRECTORY AND CHECK MACHINE TYPE -#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " ############ II. RUN RAXML RUN CHECKER diff --git a/bin/MAGNET-1.1.1/shell/getBootTrees.sh b/bin/MAGNET-1.1.1/shell/getBootTrees.sh index 5f3d6b09..5341ab3a 100644 --- a/bin/MAGNET-1.1.1/shell/getBootTrees.sh +++ b/bin/MAGNET-1.1.1/shell/getBootTrees.sh @@ -9,7 +9,7 @@ VERSION="v1.0.1" # # Author: Justin C. Bagley # # Date: Created by Justin Bagley on/before August 20, 2017. # -# Last update: December 11, 2020 # +# Last update: December 21, 2020 # # Copyright (c) 2017-2020 Justin C. Bagley. All rights reserved. # # Please report bugs to . # # # @@ -28,7 +28,6 @@ SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # the functionality in this boilerplate. This script will fail if they can # not be found. # ----------------------------------- - UTILS_LOCATION="${SCRIPT_PATH}/../../../lib/utils.sh" # Update this path to find the utilities. if [[ -f "${UTILS_LOCATION}" ]]; then @@ -38,10 +37,8 @@ else exit 1 fi - # Source shared functions and variables # ----------------------------------- - FUNCS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedFunctions.sh" # Update this path to find the shared functions. VARS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedVariables.sh" # Update this path to find the shared variables. @@ -53,7 +50,6 @@ else exit 1 fi - # trapCleanup Function # ----------------------------------- # Any actions that should be taken if the script is prematurely @@ -144,11 +140,8 @@ echo "INFO | $(date) |----------------------------------------------------- echo "INFO | $(date) | Starting getBootTrees script... " echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " ############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " echo "INFO | $(date) | Step #2: Run main getBootTrees script. " diff --git a/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh b/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh index 029163d6..bcbcbb4a 100644 --- a/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh +++ b/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh @@ -9,7 +9,7 @@ VERSION="v1.0.1" # # Author: Justin C. Bagley # # Date: Created by Justin Bagley on November 9, 2016. # -# Last update: December 11, 2020 # +# Last update: December 21, 2020 # # Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # # Please report bugs to . # # # @@ -139,16 +139,11 @@ echo "INFO | $(date) |----------------------------------------------------- echo "INFO | $(date) | phyNcharSumm, v1.0.1 December 2020 (part of PIrANHA v0.4a4) " echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### echo "INFO | $(date) | Starting phyNcharSumm... " echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " ############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " echo "INFO | $(date) | Step #2: Summarize number of characters in each PHYLIP DNA sequence alignment in current directory. "