diff --git a/bin/MAGNET-1.1.1/MAGNET b/bin/MAGNET-1.1.1/MAGNET deleted file mode 100755 index 1c38cd60..00000000 --- a/bin/MAGNET-1.1.1/MAGNET +++ /dev/null @@ -1,1414 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: MAGNET ~ MAny GeNE Trees, v1.2.0 # - export VERSION="v1.2.0" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on Mon, Aug 29 13:12:45 2016 -0700. # -# Last update: December 21, 2020 # -# Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL PIPELINE FOR AUTOMATING ESTIMATION OF A MAXIMUM-LIKELIHOOD (ML) GENE TREE IN # -# RAxML FOR EACH OF MANY LOCI IN A RAD-seq, UCE, OR OTHER MULTILOCUS SEQUENCE DATASET # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- -UTILS_LOCATION="${SCRIPT_PATH}/../../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - -# Source shared functions and variables -# ----------------------------------- -FUNCS_LOCATION="${SCRIPT_PATH}/../../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -trapCleanup () { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -safeExit () { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -MAGNET () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | MAGNET, v1.2.0 December 2020 " -echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - - -############################## IF -f 1: SINGLE FILE RUN ################################## -########################################################################################## - -####### -if [[ "$STARTING_FILE_TYPE" = "1" ]] && [[ "$MY_NEXUS" != "NULL" ]]; then - -######################################## START ########################################### -echo "INFO | $(date) | Starting MAGNET pipeline... " -echo "INFO | $(date) | Running with the following options: " -echo "INFO | $(date) | - NEXUS file, = ${MY_NEXUS} " -echo "INFO | $(date) | - Starting = ${STARTING_FILE_TYPE} " -echo "INFO | $(date) | - RAxML = ${MY_RAXML_EXECUTABLE} " -echo "INFO | $(date) | - Bootstrap reps, = ${MY_NUM_BOOTREPS} " -echo "INFO | $(date) | - RAxML model, = ${MY_RAXML_MODEL} " -echo "INFO | $(date) | - option = ${MY_SIMPLE_MODEL} " -echo "INFO | $(date) | - option = ${MY_GAP_THRESHOLD} " -echo "INFO | $(date) | - option = ${MY_INDIV_MISSING_DATA} " -echo "INFO | $(date) | - Outgroup taxon, = ${MY_OUTGROUP} " -echo "INFO | $(date) | - RAxML output name = ${MY_OUTPUT_NAME} " -echo "INFO | $(date) | - Resume switch (--resume) = ${MY_RESUME_SWITCH} " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -echoShortPWD -MY_WORKING_DIR="$(pwd)" -checkMachineType - -###### -## START DEBUG MODE -if [[ "$MY_DEBUG_MODE_SWITCH" != "0" ]]; then set -xv; fi - - -## Set raxml executable name based on machine type: -if [[ "${machine}" = "Mac" ]]; then - MY_RAXML_EXECUTABLE=raxml -fi -if [[ "${machine}" = "Linux" ]]; then - MY_RAXML_EXECUTABLE=raxmlHPC-SSE3 -fi - - -echo "INFO | $(date) | Step #2: Input single NEXUS (or G-PhoCS-formatted) file, or multiple PHYLIP files. " -echo "INFO | $(date) | For -f 1 or -f 2, if '.gphocs' input file present, continue; else convert NEXUS file " -echo "INFO | $(date) | to G-PhoCS format using NEXUS2gphocs code. If -f 3, then run multiple PHYLIP files in " -echo "INFO | $(date) | RAxML." -shopt -s nullglob -if [[ -n $(find . -name "*.gphocs" -type f) ]]; then - echo "INFO | $(date) | Found '.gphocs' input file... " - MY_GPHOCS_DATA_FILE=./*.gphocs ; ## Assign G-PhoCS-formatted genomic/SNP data file (originally produced/output by pyRAD) in run directory to variable. -else - echo "WARNING | $(date) | No '.gphocs' input file in current working directory... " - echo "INFO | $(date) | Attempting to convert NEXUS file, if present, to GPho-CS format... " -fi - - - #################################### NEXUS2gphocs.sh ##################################### - - NEXUS2gphocs_function () { - - ############ GET NEXUS FILE & DATA CHARACTERISTICS, CONVERT NEXUS TO FASTA FORMAT - # Extract charset info from sets block at end of NEXUS file: - MY_NEXUS_CHARSETS="$(egrep "charset|CHARSET" $MY_NEXUS | \ - awk -F"=" '{print $NF}' | sed 's/\;/\,/g' | \ - awk '{a[NR]=$0} END {for (i=1;i100,000 bp), then need to convert to FASTA using my - # script and then wrap to 60 characters with fold function (as suggested at stackexchange - # post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size). - # If this conversion failes because the alignment is too long, then the code to follow - # will have nothing to work with. So, I am here adding a conditional quit if the FASTA - # file is not generated. - - #---------TODO: ADD IF/THEN CONDITIONAL AND MY OWN NEXUS2FASTA SCRIPT HERE!!!!----------# - - convbioseq fasta $MY_NEXUS > "$MY_NEXUS_BASENAME".fasta ; - MY_FASTA="$(echo "$MY_NEXUS_BASENAME".fasta | sed 's/\.\///g; s/\.nex//g')"; - - # The line above creates a file with the name basename.fasta, where basename is the base name of the original .nex file. For example, "hypostomus_str.nex" would be converted to "hypostomus_str.fasta". - # Check to make sure the FASTA was created; if so, echo info, if not, echo warning and quit: - if [[ -s "$MY_NEXUS_BASENAME".fasta ]]; then - echo "INFO | $(date) | Input NEXUS was successfully converted to FASTA format. Moving forward... " - else - echo "WARNING | $(date) | NEXUS to FASTA file conversion FAILED. Quitting... " - exit 1 - fi - - ############ PUT COMPONENTS OF ORIGINAL NEXUS FILE AND THE FASTA FILE TOGETHER TO MAKE A - ############ A G-PhoCS-FORMATTED DATA FILE - # Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line: - echo "$MY_NLOCI" | sed 's/[\ ]*//g' > gphocs_top.txt ; - - echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt ; - count=0 - ( - for j in ${MY_NEXUS_CHARSETS}; do - echo "$j" - charRange="$(echo ${j} | sed 's/\,//g')"; - echo "$charRange" - setLower="$(echo ${j} | sed 's/\-.*$//g')"; - setUpper="$(echo ${j} | sed 's/[0-9]*\-//g' | sed 's/\,//g; s/\ //g')"; - - **/selectSites.pl -s $charRange $MY_FASTA > ./sites.fasta ; - - **/fasta2phylip.pl ./sites.fasta > ./sites.phy ; - - # Need to make sure there is a space between the tip taxon name (10 characters as output - # by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use - # a perl search and replace for this: - - perl -p -i -e 's/^([A-Za-z0-9\-\_\ ]{10})/$1\ /g' ./sites.phy ; - - # If .phy file from NEXUS charset $j has gaps in alignment, then call - # rmGapSites.R R script to remove all column positions with gaps from - # alignment and output new, gapless PHYLIP file named "./sites_nogaps.phy". - # If charset $j does not have gaps, go to next line of loop. We do the - # above by first creating a temporary file containing all lines in - # sites.phy with the gap character: - grep -n "-" ./sites.phy > ./gaptest.tmp ; - - # Next, we test for nonzero testfile, indicating presence of gaps in $j, - # using UNIX test operator "-s" (returns true if file size is not zero). - # If fails, cat sites.phy into file with same name as nogaps file that - # is output by rmGapSites.R and move forward: - if [ -s ./gaptest.tmp ]; then - echo "Removing column sites in locus"$count" with gaps. " - R CMD BATCH **/rmGapSites.R - else - echo "" - cat ./sites.phy > ./sites_nogaps.phy ; - fi - - phylip_header="$(head -n1 ./sites_nogaps.phy)"; - locus_ntax="$(head -n1 ./sites_nogaps.phy | sed 's/[\ ]*[.0-9]*$//g')"; - locus_nchar="$(head -n1 ./sites_nogaps.phy | sed 's/[0-9]*\ //g')"; - - - if [ $MY_INDIV_MISSING_DATA = "0" ]; then - sed '1d' ./sites_nogaps.phy | egrep -v 'NNNNNNNNNN|nnnnnnnnnn' > ./cleanLocus.tmp ; - cleanLocus_ntax="$(cat ./cleanLocus.tmp | wc -l)"; - echo locus"$((count++))" $cleanLocus_ntax $locus_nchar > ./locus_top.tmp ; - cat ./locus_top.tmp ./cleanLocus.tmp >> ./gphocs_body.txt ; - else - echo locus"$((count++))" $locus_ntax $locus_nchar > ./locus_top.tmp ; - cat ./locus_top.tmp ./sites_nogaps.phy >> ./gphocs_body.txt ; - fi - - if [[ -s ./sites.fasta ]] && [[ -s ./sites.phy ]] && [[ ! -z ./*.tmp ]] && [[ -s ./sites_nogaps.phy ]]; then - rm ./sites.fasta ./sites.phy ./*.tmp ; - rm ./sites_nogaps.phy ; - fi - done - ) - - grep -v "^[0-9]*\ [0-9]*.*$" ./gphocs_body.txt > ./gphocs_body_fix.txt ; - sed 's/locus/'$CR'locus/g' ./gphocs_body_fix.txt > ./gphocs_body_fix2.txt ; - cat ./gphocs_top.txt ./gphocs_body_fix2.txt > $MY_NEXUS_BASENAME.gphocs ; - - ############ CLEANUP: REMOVE UNNECESSARY FILES - if [[ -s ./gphocs_top.txt ]]; then rm ./gphocs_top.txt ; fi - if [[ -s ./gap_threshold.txt ]]; then rm ./gap_threshold.txt ; fi - if [[ ! -z ./gphocs_body* ]]; then rm ./gphocs_body* ; fi - -} - -shopt -s nullglob -if [[ -n $(find . -name "*.nex" -type f) ]]; then - -NEXUS2gphocs_function - -else - echo "INFO | $(date) | No NEXUS files in current working directory. Continuing... " -fi - -shopt -s nullglob -if [[ -n $(find . -name "*.gphocs" -type f) ]]; then - echo "INFO | $(date) | MAGNET successfully created a '.gphocs' input file from the existing NEXUS file... " - MY_GPHOCS_DATA_FILE=./*.gphocs ; ## Assign G-PhoCS-formatted genomic/SNP data file (originally produced/output by pyRAD) in run directory to variable. -else - echo "WARNING | $(date) | Failed to convert NEXUS file into G-PhoCS format... " - echo "INFO | $(date) | Quitting." - exit -fi - - - ################################# gphocs2multiPhylip.sh ################################## - - MY_NLOCI="$(head -n1 $MY_GPHOCS_DATA_FILE)"; - -echo "INFO | $(date) | Step #3: Make alignments for each locus. " -echo "INFO | $(date) | In a single loop, using info from '.gphocs' file to split each locus block \ -into a separate PHYLIP-formatted alignment file using gphocs2multiPhylip code... " - ( - for (( i=0; i<=$(calc $MY_NLOCI-1); i++ )); do - echo "$i" - MY_NTAX="$(grep -n "locus$i\ " $MY_GPHOCS_DATA_FILE | \ - awk -F"locus$i " '{print $NF}' | sed 's/\ [0-9]*//g')"; - - MY_NCHAR="$(grep -n "locus$i\ " $MY_GPHOCS_DATA_FILE | \ - awk -F"locus$i [0-9]*\ " '{print $NF}')"; - - awk "/locus"$i"\ / {for(j=1; j<="$MY_NTAX"; j++) {getline; print}}" $MY_GPHOCS_DATA_FILE > ./locus"$i".tmp ; - - echo "$MY_NTAX $MY_NCHAR" > ./locus"$i"_header.tmp ; - - cat ./locus"$i"_header.tmp ./locus"$i".tmp > ./locus"$i".phy ; - done - ) - - ############ CLEANUP: REMOVE UNNECESSARY OR TEMPORARY FILES - if [[ ! -z ./*.tmp ]]; then rm ./*.tmp ; fi - - if [[ -n $(find . -name "*.phy" -type f) ]]; then - MY_PHYLIP_ALIGNMENTS=./*.phy ; ## Assign PHYLIP-formatted genomic/SNP data files (e.g. output by gphocs2multiPhylip.sh shell script) in run directory to variable. - else - echo "..." - fi - - - - ################################# MultiRAxMLPrepper.sh ################################## - -echo "INFO | $(date) | Step #4: Make and check run folders. " - -if [[ "$MY_RESUME_SWITCH" = "0" ]]; then - - MY_N_PHYLIP_FILES="$(ls $MY_PHYLIP_ALIGNMENTS | wc -l | perl -pe 's/\t//g')"; - - # Loop through the input .phy files and do the following for each file: (A) generate one - # folder per .phy file with the same name as the file, only minus the extension, then - # (B) move input .phy file into corresponding folder. - ( - for i in $MY_PHYLIP_ALIGNMENTS; do - mkdir "$(ls ${i} | sed 's/\.phy$//g')" ; - cp "$i" ./"$(ls ${i} | sed 's/\.phy$//g')" ; - done - ) - - ##### Setup and run check on the number of run folders created by the program: - MY_FILECOUNT="$(find . -type f | wc -l)"; - MY_DIRCOUNT="$(find . -type d | wc -l)"; - MY_NUM_RUN_FOLDERS="$(ls ./*/*.phy | wc -l | perl -pe 's/\t//g; s/\ //g')"; - - echo "INFO | $(date) | Number of run folders created: $MY_NUM_RUN_FOLDERS " - - if [[ "$MY_NUM_RUN_FOLDERS" = "$MY_N_PHYLIP_FILES" ]]; then - echo "INFO | $(date) | Folder check passed: number of run folders matches number of PHYLIP alignments. " - else - echo "WARNING | $(date) | Folder check FAILED: number of run folders does NOT match the number of PHYLIP alignments. This may cause errors. " - fi - -elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then - if [[ "$MY_NUM_RUN_FOLDERS" = "$MY_N_PHYLIP_FILES" ]]; then - echo "IMPORTANT${EP}| $(date) | Resuming a previous/existing run in current working dir. Skipping MultiRAxMLPrepper, using available run folders... " - echo "INFO | $(date) | Folder check passed: number of run folders matches number of PHYLIP alignments. " - else - echo "WARNING | $(date) | Folder check FAILED: number of run folders does NOT match the number of PHYLIP alignments. There may be errors. " - fi - -fi - - - ################################### RAxMLRunner.sh ####################################### - -if [[ "$MY_RESUME_SWITCH" = "0" ]]; then - -echo "INFO | $(date) | Step #5: Estimate best maximum-likelihood (ML) gene trees. " -echo "INFO | $(date) | Looping through and analyzing contents of each run folder in RAxML... " - # Each folder is set with the locus name corresponding to the locus' position in the - # original .gphocs alignment (which, if output by pyRAD, is simply in the order in which - # the loci were logged to file by pyRAD, no special order). Also, each folder contains - # one .phy file carrying the same basename as the folder name, e.g. "locus0.phy". So, - # all we need to do here is loop through each folder and call RAxML to run using its - # contents as the input file, as follows: - ( - for i in ./*/; do - if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then - echo "$i" - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; # NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - cd ..; - fi - done - ) - - - # Here: adding loop code to move all .phy files remaining in the current working - # directory, after Step #3 of the pipeline, to a new folder called "phylip_files". This - # is done here because if the phylip_files folder is present at the end of Step #3, - # then RAxML will also try to estimate a gene tree for .phy file(s) in this folder during - # Step #5 of the pipeline above. - mkdir ./phylip_files/ ; - ( - for i in $MY_PHYLIP_ALIGNMENTS; do - echo "$i" - mv "$i" ./phylip_files/ ; - done - ) - -elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then - -echo "INFO | $(date) | Step #3: Resuming gene tree estimation. Run on remaining/incomplete run folders, skip those with completed RAxML runs. " - ( - for i in ./*/; do - if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; - # - if [[ "$MY_OUTPUT_NAME" = "raxml_out" ]] && [[ ! -s ./RAxML_info.raxml_out ]]; then - echo "$i" - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - # - elif [[ "$MY_OUTPUT_NAME" != "raxml_out" ]] && [[ ! -s ./RAxML_info."$MY_OUTPUT_NAME" ]]; then - echo "$i" - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - fi - cd ..; - fi - done - ) - - if [[ ! -s ./phylip_files/ ]]; then - mkdir ./phylip_files/ ; - fi - ( - for i in $MY_PHYLIP_ALIGNMENTS; do - echo "$i" - mv "$i" ./phylip_files/ ; - done - ) -fi - - - - -if [[ "$MY_RESUME_SWITCH" = "0" ]]; then - echo "INFO | $(date) | Step #6: RAxML post-processing analyses. " -elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then - echo "INFO | $(date) | Step #4: RAxML post-processing analyses. " -fi - - ################################## getGeneTrees.sh ####################################### - echo "INFO | $(date) | Organizing gene trees and making final output file containing all trees... " - echo "INFO | $(date) | Making list of ML gene trees generated by RAxML... " - - ls **/RAxML_bestTree.raxml_out > geneTrees.list ; - - # Assign gene tree list to variable - MY_GENE_TREE_LIST="$(cat ./geneTrees.list)"; - - ############ ORGANIZE GENE TREES INTO ONE LOCATION - # Place all inferred gene trees into a single "gene_trees" folder in the current - # working directory. However, all the gene tree files have the same name. So, in order - # to do this, we have to give each gene tree a name that matches the corresponding run - # folder, i.e. locus. We can rename each file right after downloading it. - mkdir ./gene_trees/ ; - - echo "INFO | $(date) | Copying *ALL* ML gene trees to 'gene_trees' folder in current directory for post-processing..." - ( - for j in ${MY_GENE_TREE_LIST}; do - echo "$j" - cp "$j" ./gene_trees/ ; - MY_LOCUS_NAME="$(echo $j | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./gene_trees/RAxML_bestTree.raxml_out ./gene_trees/"$MY_LOCUS_NAME"_RAxML_best.tre ; - if [[ -s ./gene_trees/RAxML_bestTree.raxml_out ]]; then rm ./gene_trees/RAxML_bestTree.raxml_out ; fi - done - ) - - echo "INFO | $(date) | Making final output file 'besttrees.tre' containing best ML trees from all runs/loci..." - ( - for k in ./gene_trees/*; do - echo "$k" - cat "$k" >> ./besttrees.tre ; - done - ) - - - ################################## getBootTrees.sh ####################################### - echo "INFO | $(date) | Organizing bootstrap trees and making final output file containing all trees... " - echo "INFO | $(date) | Making list of ML bootstrap trees generated by RAxML... " - - ls **/RAxML_bootstrap.raxml_out > bootTrees.list ; - - # Assign bootstrap tree list to variable - MY_BOOT_TREE_LIST="$(cat ./bootTrees.list)"; - - ############ ORGANIZE BOOTSTRAP TREES INTO ONE LOCATION - # Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in - # working directory. However, all the boot tree files have the same name. So, in order - # to do this, we have to give each boot tree file a name that matches the corresponding - # run folder, i.e. locus. We can rename each file right after downloading it. - mkdir ./bootstrap_trees ; - - echo "INFO | $(date) | Copying *ALL* ML bootstrap trees to 'bootstrap_trees' folder in current directory for post-processing..." - ( - for l in ${MY_BOOT_TREE_LIST}; do - echo "$l" - cp "$l" ./bootstrap_trees/ ; - MY_LOCUS_NAME="$(echo $l | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./bootstrap_trees/RAxML_bootstrap.raxml_out ./bootstrap_trees/"$MY_LOCUS_NAME"_RAxML_boot.tre ; - if [[ -s ./bootstrap_trees/RAxML_bootstrap.raxml_out ]]; then rm ./bootstrap_trees/RAxML_bootstrap.raxml_out ; fi - done - ) - - echo "INFO | $(date) | Making final output file 'boottrees.tre' containing best ML trees from all runs/loci..." - ( - for m in ./bootstrap_trees/*; do - echo "$m" - cat "$m" >> ./boottrees.tre ; - done - ) - - echo "INFO | $(date) | Making final list of ML bootstrap trees ('final_bootTrees.list') in bootstrap_trees directory..." - ls ./bootstrap_trees/*.tre > final_bootTrees.list ; - - - ################################## getBipartTrees.sh ####################################### - echo "INFO | $(date) | Organizing bipartitions trees (with bootstrap proportion labels) and making final output file containing all bipartitions trees... " - ls **/RAxML_bipartitions.raxml_out > bipartTrees.list ; - - # Assign bootstrap tree list to variable - MY_BIPART_TREE_LIST="$(cat ./bipartTrees.list)"; - - ############ ORGANIZE BIPARTITIONS TREES INTO ONE LOCATION - mkdir ./bipartitions_trees - - echo "INFO | $(date) | Copying *ALL* RAxML bootstrap bipartitions trees to 'bipartitions_trees' folder in current directory for post-processing..." - ( - for l in ${MY_BIPART_TREE_LIST}; do - echo "$l" - cp "$l" ./bipartitions_trees/ ; - MY_LOCUS_NAME="$(echo $l | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./bipartitions_trees/RAxML_bipartitions.raxml_out ./bipartitions_trees/"$MY_LOCUS_NAME"_RAxML_bipartitions.tre ; - if [[ -s ./bipartitions_trees/RAxML_bipartitions.raxml_out ]]; then rm ./bipartitions_trees/RAxML_bipartitions.raxml_out ; fi - done - ) - - echo "INFO | $(date) | Making final output file 'biparttrees.tre' containing RAxML bipartitions trees from all runs/loci..." - ( - for m in ./bipartitions_trees/*; do - echo "$m" - cat "$m" >> ./biparttrees.tre ; - done - ) - - echo "INFO | $(date) | Making final list of RAxML bipartitions trees ('final_bipartTrees.list') in bipartitions_trees directory..." - ls ./bipartitions_trees/*.tre > final_bipartTrees.list ; - - -fi -####### - - -############################### IF -f 2: MULTI PHYLIP RUN ################################ -########################################################################################## - -if [[ "$STARTING_FILE_TYPE" = "2" ]]; then - -######################################## START ########################################### -echo "INFO | $(date) | Starting MAGNET pipeline... " -echo "INFO | $(date) | Running with the following options: " -echo "INFO | $(date) | - NEXUS file, = ${MY_NEXUS} " -echo "INFO | $(date) | - Starting = ${STARTING_FILE_TYPE} " -echo "INFO | $(date) | - RAxML = ${MY_RAXML_EXECUTABLE} " -echo "INFO | $(date) | - Bootstrap reps, = ${MY_NUM_BOOTREPS} " -echo "INFO | $(date) | - RAxML model, = ${MY_RAXML_MODEL} " -echo "INFO | $(date) | - option = ${MY_SIMPLE_MODEL} " -echo "INFO | $(date) | - option = ${MY_GAP_THRESHOLD} " -echo "INFO | $(date) | - option = ${MY_INDIV_MISSING_DATA} " -echo "INFO | $(date) | - Outgroup taxon, = ${MY_OUTGROUP} " -echo "INFO | $(date) | - RAxML output name = ${MY_OUTPUT_NAME} " -echo "INFO | $(date) | - Resume switch (--resume) = ${MY_RESUME_SWITCH} " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; -echoCDWorkingDir -MY_WORKING_DIR="$(pwd)" -checkMachineType - -## Set raxml executable name based on machine type: -if [[ "${machine}" = "Mac" ]]; then - MY_RAXML_EXECUTABLE=raxml -fi -if [[ "${machine}" = "Linux" ]]; then - MY_RAXML_EXECUTABLE=raxmlHPC-SSE3 -fi - - -echo "INFO | $(date) | Step #2: Input single NEXUS or G-PhoCS file, or multiple PHYLIP files. " -echo "INFO | $(date) | For -f 1 or -f 2, if '.gphocs' input file present, continue; else convert NEXUS file " -echo "INFO | $(date) | to G-PhoCS format using NEXUS2gphocs code. If -f 3, then run multiple PHYLIP files in " -echo "INFO | $(date) | RAxML." - - - MY_PHYLIP_ALIGNMENTS=./*.phy ; ## Assign PHYLIP-formatted multilocus gene / genomic/SNP / RAD locus sequence alignment files (e.g. output by gphocs2multiPhylip.sh shell script) in run directory to variable. - - - ################################# MultiRAxMLPrepper.sh ################################## - -if [[ "$MY_RESUME_SWITCH" = "0" ]]; then - -echo "INFO | $(date) | Step #3: Make run folders. " - - MY_N_PHYLIP_FILES="$(ls $MY_PHYLIP_ALIGNMENTS | wc -l | perl -pe 's/\t//g')"; - - # Loop through the input .phy files and do the following for each file: (A) generate one - # folder per .phy file with the same name as the file, only minus the extension, then - # (B) move input .phy file into corresponding folder. - ( - for i in $MY_PHYLIP_ALIGNMENTS; do - mkdir "$(ls ${i} | sed 's/\.phy$//g')" ; - cp "$i" ./"$(ls ${i} | sed 's/\.phy$//g')" ; - done - ) - - ##### Setup and run check on the number of run folders created by the program: - MY_FILECOUNT="$(find . -type f | wc -l)"; - MY_DIRCOUNT="$(find . -type d | wc -l)"; - MY_NUM_RUN_FOLDERS="$(ls ./*/*.phy | wc -l | perl -pe 's/\t//g; s/\ //g')"; - - echo "INFO | $(date) | Number of run folders created: $MY_NUM_RUN_FOLDERS " - - if [[ "$MY_NUM_RUN_FOLDERS" = "$MY_N_PHYLIP_FILES" ]]; then - echo "INFO | $(date) | Folder check passed: number of run folders matches number of PHYLIP alignments. " - else - echo "WARNING | $(date) | Folder check FAILED: number of run folders does NOT match the number of PHYLIP alignments. This may cause errors. " - fi - -elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then - if [[ "$MY_NUM_RUN_FOLDERS" = "$MY_N_PHYLIP_FILES" ]]; then - echo "IMPORTANT${EP}| $(date) | Resuming a previous/existing run in current working dir. Skipping MultiRAxMLPrepper, using available run folders... " - echo "INFO | $(date) | Folder check passed: number of run folders matches number of PHYLIP alignments. " - else - echo "WARNING | $(date) | Folder check FAILED: number of run folders does NOT match the number of PHYLIP alignments. There may be errors. " - fi - -fi - - ################################### RAxMLRunner.sh ####################################### - -if [[ "$MY_RESUME_SWITCH" = "0" ]]; then - -echo "INFO | $(date) | Step #4: Estimate best maximum-likelihood (ML) gene trees. " -echo "INFO | $(date) | Looping through and analyzing contents of each run folder in RAxML... " - # Each folder is set with the locus name corresponding to the locus' position in the - # original .gphocs alignment (which, if output by pyRAD, is simply in the order in which - # the loci were logged to file by pyRAD, no special order). Also, each folder contains - # one .phy file carrying the same basename as the folder name, e.g. "locus0.phy". So, - # all we need to do here is loop through each folder and call RAxML to run using its - # contents as the input file, as follows: - ( - for i in ./*/; do - if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then - echo "$i" - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; # NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - cd ..; - fi - done - ) - - - # Here: adding loop code to move all .phy files remaining in the current working - # directory, after Step #3 of the pipeline, to a new folder called "phylip_files". This - # is done here because if the phylip_files folder is present at the end of Step #3, - # then RAxML will also try to estimate a gene tree for .phy file(s) in this folder during - # Step #5 of the pipeline above. - mkdir ./phylip_files - ( - for i in $MY_PHYLIP_ALIGNMENTS; do - echo "$i" - mv "$i" ./phylip_files/ ; - done - ) - -elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then - -echo "INFO | $(date) | Step #3: Resuming gene tree estimation. Run on remaining/incomplete run folders, skip those with completed RAxML runs. " - - ( - for i in ./*/; do - if [[ "$i" != "./bad_genes/" ]] && [[ "$i" != "./R/" ]] && [[ "$i" != "./shell/" ]] && [[ "$i" != "./perl/" ]] && [[ "$i" != "./orig_phylip/" ]] && [[ "$i" != "./phylip/" ]] && [[ "$i" != "./orig_fasta/" ]] && [[ "$i" != "./fasta/" ]] && [[ "$i" != "./phylip_files/" ]]; then - cd "$i"; - LOCUS_NAME="$(echo $i | sed 's/\.\///g; s/\/$//g')"; # NOTE: not currently using $LOCUS_NAME here, but leave for now, bc may need to use it later... - # - if [[ ! -s ./RAxML_info.raxml_out ]]; then - echo "$i" - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" = "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" = "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -n $MY_OUTPUT_NAME - fi - # - if [[ "$MY_OUTGROUP" != "NULL" ]] && [[ "$MY_SIMPLE_MODEL" != "NULL" ]]; then - "$MY_RAXML_EXECUTABLE" -f a -x "$RANDOM""$RANDOM" -p "$RANDOM""$RANDOM" -# $MY_NUM_BOOTREPS -m $MY_RAXML_MODEL -s ./*.phy --$MY_SIMPLE_MODEL -o $MY_OUTGROUP -n $MY_OUTPUT_NAME - fi - fi - cd ..; - fi - done - ) - - if [[ ! -s ./phylip_files/ ]]; then - mkdir ./phylip_files/ ; - fi - ( - for i in $MY_PHYLIP_ALIGNMENTS; do - echo "$i" - mv "$i" ./phylip_files/ ; - done - ) -fi - - - -if [[ "$MY_RESUME_SWITCH" = "0" ]]; then - echo "INFO | $(date) | Step #5: RAxML post-processing analyses. " -elif [[ "$MY_RESUME_SWITCH" = "1" ]]; then - echo "INFO | $(date) | Step #4: RAxML post-processing analyses. " -fi - - ################################## getGeneTrees.sh ####################################### - echo "INFO | $(date) | Organizing gene trees and making final output file containing all trees... " - echo "INFO | $(date) | Making list of ML gene trees generated by RAxML... " - - ls **/RAxML_bestTree.raxml_out > geneTrees.list ; - - # Assign gene tree list to variable - MY_GENE_TREE_LIST="$(cat ./geneTrees.list)"; - - ############ ORGANIZE GENE TREES INTO ONE LOCATION - # Place all inferred gene trees into a single "gene_trees" folder in the current - # working directory. However, all the gene tree files have the same name. So, in order - # to do this, we have to give each gene tree a name that matches the corresponding run - # folder, i.e. locus. We can rename each file right after downloading it. - mkdir ./gene_trees/ ; - - echo "INFO | $(date) | Copying *ALL* ML gene trees to 'gene_trees' folder in current directory for post-processing..." - ( - for j in ${MY_GENE_TREE_LIST}; do - echo "$j" - cp "$j" ./gene_trees/ ; - MY_LOCUS_NAME="$(echo $j | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./gene_trees/RAxML_bestTree.raxml_out ./gene_trees/"$MY_LOCUS_NAME"_RAxML_best.tre ; - if [[ -s ./gene_trees/RAxML_bestTree.raxml_out ]]; then rm ./gene_trees/RAxML_bestTree.raxml_out ; fi - done - ) - - echo "INFO | $(date) | Making final output file 'besttrees.tre' containing best ML trees from all runs/loci..." - ( - for k in ./gene_trees/*; do - echo "$k" - cat "$k" >> ./besttrees.tre ; - done - ) - - - ################################## getBootTrees.sh ####################################### - echo "INFO | $(date) | Organizing bootstrap trees and making final output file containing all trees... " - echo "INFO | $(date) | Making list of ML bootstrap trees generated by RAxML... " - - ls **/RAxML_bootstrap.raxml_out > bootTrees.list ; - - # Assign bootstrap tree list to variable - MY_BOOT_TREE_LIST="$(cat ./bootTrees.list)"; - - ############ ORGANIZE BOOTSTRAP TREES INTO ONE LOCATION - # Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in - # working directory. However, all the boot tree files have the same name. So, in order - # to do this, we have to give each boot tree file a name that matches the corresponding - # run folder, i.e. locus. We can rename each file right after downloading it. - mkdir ./bootstrap_trees/ ; - - echo "INFO | $(date) | Copying *ALL* ML bootstrap trees to 'bootstrap_trees' folder in current directory for post-processing..." - ( - for l in ${MY_BOOT_TREE_LIST}; do - echo "$l" - cp "$l" ./bootstrap_trees/ ; - MY_LOCUS_NAME="$(echo $l | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./bootstrap_trees/RAxML_bootstrap.raxml_out ./bootstrap_trees/"$MY_LOCUS_NAME"_RAxML_boot.tre ; - if [[ -s ./bootstrap_trees/RAxML_bootstrap.raxml_out ]]; then rm ./bootstrap_trees/RAxML_bootstrap.raxml_out ; fi - done - ) - - echo "INFO | $(date) | Making final output file 'boottrees.tre' containing best ML trees from all runs/loci..." - ( - for m in ./bootstrap_trees/*; do - echo "$m" - cat "$m" >> ./boottrees.tre ; - done - ) - - echo "INFO | $(date) | Making final list of ML bootstrap trees ('final_bootTrees.list') in bootstrap_trees directory..." - ls ./bootstrap_trees/*.tre > final_bootTrees.list ; - - - ################################## getBipartTrees.sh ####################################### - echo "INFO | $(date) | Organizing bipartitions trees (with bootstrap proportion labels) and making final output file containing all bipartitions trees... " - ls **/RAxML_bipartitions.raxml_out > bipartTrees.list ; - - # Assign bootstrap tree list to variable - MY_BIPART_TREE_LIST="$(cat ./bipartTrees.list)"; - - ############ ORGANIZE BIPARTITIONS TREES INTO ONE LOCATION - mkdir ./bipartitions_trees - - echo "INFO | $(date) | Copying *ALL* RAxML bootstrap bipartitions trees to 'bipartitions_trees' folder in current directory for post-processing..." - ( - for l in ${MY_BIPART_TREE_LIST}; do - echo "$l" - cp "$l" ./bipartitions_trees/ ; - MY_LOCUS_NAME="$(echo $l | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./bipartitions_trees/RAxML_bipartitions.raxml_out ./bipartitions_trees/"$MY_LOCUS_NAME"_RAxML_bipartitions.tre ; - if [[ -s ./bipartitions_trees/RAxML_bipartitions.raxml_out ]]; then rm ./bipartitions_trees/RAxML_bipartitions.raxml_out ; fi - done - ) - - echo "INFO | $(date) | Making final output file 'biparttrees.tre' containing RAxML bipartitions trees from all runs/loci..." - ( - for m in ./bipartitions_trees/*; do - echo "$m" - cat "$m" >> ./biparttrees.tre ; - done - ) - - echo "INFO | $(date) | Making final list of RAxML bipartitions trees ('final_bipartTrees.list') in bipartitions_trees directory..." - ls ./bipartitions_trees/*.tre > final_bipartTrees.list ; - -fi -####### - - ## Remove arguments file generated when parsing the options: - if [[ -s ./args.txt ]]; then rm ./args.txt ; fi - - -echo "INFO | $(date) | Done." -echo "----------------------------------------------------------------------------------------------------------" -echo "" - -## END DEBUG MODE -if [[ "$MY_DEBUG_MODE_SWITCH" != "0" ]]; then set +xv; fi -###### - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTION DEFAULTS FOR USAGE TEXT: -MY_RAXML_EXECUTABLE=raxml -MY_NUM_BOOTREPS=100 -MY_GAP_THRESHOLD=0.001 -MY_RAXML_MODEL=GTRGAMMA -MY_INDIV_MISSING_DATA=1 - -############ CREATE USAGE & HELP TEXTS -USAGE=" -Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -f, --filetype fileType (def: 1; also: 2) starting file type; if 1, script expects as - stdin a single input NEXUS file in the current directory; if 2, then - script expects multiple input PHYLIP files in current directory - -i, --input inputNEXUS (def: NULL) input NEXUS file (mandatory for -f 1) - -e, --exec executable (def: $MY_RAXML_EXECUTABLE) name of RAxML executable available - from user's command line interface - -b, --boot numBootstraps (def: $MY_NUM_BOOTREPS) RAxML bootstrap pseudoreplicates - -r, --raxmlmodel raxmlModel (def: $MY_RAXML_MODEL; other: GTRGAMMAI, GTRCAT, GTRCATI) - -s, --simplemodel simpleModel (def: $MY_SIMPLE_MODEL; other: JC69, K80, HKY85) specifies - simple DNA substitution model that will override any other model (even - across partitions) - -g, --gapthresh gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless - >1000 individuals; takes float proportion value) gap threshold value - -m, --missing indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) missing - data setting - -o, --outgroup outgroup (def: NULL) outgroup given as single taxon name (tip label) or - comma-separted list - -h, --help echo this help text and exit - -H, --Help echo verbose help text and exit - -V, --version echo version and exit - -R, --resume resume (def: 0, off; 1, on) option allowing user to resume a previous - MAGNET run in the current working directory - -d, --debug debug (def: 0, off; 1, on) run function in Bash debug mode - - ${bold}OVERVIEW${reset} - The goal of MAGNET is to infer a maximum-likelihood (ML) gene tree in RAxML for each of - multiple loci, starting from one or multiple DNA sequence alignment input files. If supplied - with a single G-PhoCS ('*.gphocs') or NEXUS ('*.nex') data file (using -f1 or -i - -f1 options), then each locus is split into a separate PHYLIP alignment file, and RAxML - (Stamatakis 2014) is run to infer gene trees for each locus. If a NEXUS datafile is supplied, - it is converted into G-PhoCS format (Gronau et al. 2011) while splitting loci into separate - interleaved sequence blocks based on information provided in a sets block at the end of the - NEXUS file (e.g. defined using 'charset' commands), which is mandatory. However, if -f2, then - the program will run in current directory, assuming it contains multiple PHYLIP-formatted - alignment files. Under this scenario, MAGNET will skip directly to running the PHYLIP files - in RAxML using user-specified options. - Sequence names may not include hyphen characters, or there could be issues. For detailed - information on MAGNET and its various dependencies, see 'README.md' file in the distribution - folder; however, it is key that dependencies are available from the command line interface. - Among the most important options is (-r|--resume, off by default), which tells the - program to resume a previous MAGNET run in current directory, including detecting incomplete - RAxML run folders, and running RAxML without overwriting results from the previous run(s). - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f MAGNET -f 2 -b 100 -g 1 -m 1 Run MAGNET with 100 bootstrap pseudo- - replicates, gaps allowed, missing - data allowed, and the GTRGAMMA model - piranha -f MAGNET -f 2 -b 100 -s HKY85 -g 1 -m 1 Same as above, but using the simpler - HKY85 substitution model for all loci - piranha -f MAGNET -f 2 -e raxmlHPC -b 100 -s HKY85 -g 1 -m 1 Same as above, but using raxmlHPC - executable - piranha -f MAGNET -h Show this help text and exit - - ${bold}CITATION${reset} - Bagley, J.C. 2020. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Gronau, I., Hubisz, M.J., Gulko, B., Danko, C.G., Siepel, A. 2011. Bayesian inference of - ancient human demography from individual genome sequences. Nature Genetics, 43, 1031-1034. - Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of - large phylogenies. Bioinformatics, 30, 1312-1313. - - Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. - Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. -" - -VERBOSE_USAGE=" -Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -f, --filetype fileType (def: 1; also: 2) starting file type; if 1, script expects as - stdin a single input NEXUS file in the current directory; if 2, then - script expects multiple input PHYLIP files in current directory - -i, --input inputNEXUS (def: NULL) input NEXUS file (mandatory for -f 1) - -e, --exec executable (def: $MY_RAXML_EXECUTABLE) name of RAxML executable available - from user's command line interface - -b, --boot numBootstraps (def: $MY_NUM_BOOTREPS) RAxML bootstrap pseudoreplicates - -r, --raxmlmodel raxmlModel (def: $MY_RAXML_MODEL; other: GTRGAMMAI, GTRCAT, GTRCATI) - -s, --simplemodel simpleModel (def: $MY_SIMPLE_MODEL; other: JC69, K80, HKY85) specifies - simple DNA substitution model that will override any other model (even - across partitions) - -g, --gapthresh gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless - >1000 individuals; takes float proportion value) gap threshold value - -m, --missing indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) missing - data setting - -o, --outgroup outgroup (def: NULL) outgroup given as single taxon name (tip label) or - comma-separted list - -h, --help echo this help text and exit - -H, --Help echo verbose help text and exit - -V, --version echo version and exit - -R, --resume resume (def: 0, off; 1, on) option allowing user to resume a previous - MAGNET run in the current working directory - -d, --debug debug (def: 0, off; 1, on) run function in Bash debug mode - - ${bold}OVERVIEW${reset} - The goal of MAGNET is to infer a maximum-likelihood (ML) gene tree in RAxML for each of - multiple loci, starting from one or multiple DNA sequence alignment input files. If supplied - with a single G-PhoCS ('*.gphocs') or NEXUS ('*.nex') data file (using -f1 or -i - -f1 options), then each locus is split into a separate PHYLIP alignment file, and RAxML - (Stamatakis 2014) is run to infer gene trees for each locus. If a NEXUS datafile is supplied, - it is converted into G-PhoCS format (Gronau et al. 2011) while splitting loci into separate - interleaved sequence blocks based on information provided in a sets block at the end of the - NEXUS file (e.g. defined using 'charset' commands), which is mandatory. However, if -f2, then - the program will run in current directory, assuming it contains multiple PHYLIP-formatted - alignment files. Under this scenario, MAGNET will skip directly to running the PHYLIP files - in RAxML using user-specified options. - Sequence names may not include hyphen characters, or there could be issues. For detailed - information on MAGNET and its various dependencies, see 'README.md' file in the distribution - folder; however, it is key that dependencies are available from the command line interface. - Among the most important options is (-r|--resume, off by default), which tells the - program to resume a previous MAGNET run in current directory, including detecting incomplete - RAxML run folders, and running RAxML without overwriting results from the previous run(s). - - ${bold}DETAILS${reset} - The -f flag (also --filetype) specifies the starting fileType. If -f 1, then the mandatory - input is the name or path to the corresponding starting file, which is - passed using the -i|--input flag. If -f 2, then mandatory input is the name or path to - the working directory (type '.' for current directory, or supply a relative or absolute - path). - - The -i flag (also --input) passess the name of the input NEXUS file, parameter, - to the program. - - The -e flag (also --exec) sets the name of the RAxML executable that will be called. The - default executable name is 'raxml', but the user may wish to change this to something - specific to their install or parallelization needs (e.g. 'raxmlHPC-PTHREADS-SSE3'). The - default setting should work on local machine or supercomputing cluster installs. However, - this should be tested beforehand by entering 'raxml' at the command prompt. On some - version fo Linux this yields the following error message: - - 'raxml: error while loading shared libraries: libmpi.so.12: cannot open shared object - file: No such file or directory'. - - If this occurs, then Open MPI related libraries are installed in a non-standard location - and you will need to add this location to your LD_LIBRARY_PATH, e.g.: - - 'export LD_LIBRARY_PATH=/usr/local/openmpi-1.8.1/lib:$LD_LIBRARY_PATH' - - See the following URL: for more insight into this problem: https://stackoverflow.com/ - questions/14769599/mpi-error-loading-shared-libraries. However, simply using a different - raxml executable that does not rely on these libararies will also immediately solve the - problem. In my experience, just setting MAGNET to call the 'raxmlHPC' executable immed- - iately solves this issue on Mac and Linux (so also try simply running MAGNET with '-e - raxmlHPC' or '--exec raxmlHPC'). - - The -b flag sets the number of boostrap pseudoreplicates for RAxML to perform while - estimating the gene tree for each locus. The default is 100; remove bootstrapping by - setting to 0. - - The -r flag sets the RAxML model for each locus. This uses the full default GTRGAMMA model, - and at present it is not possible to vary the model across loci. If you want to use HKY - or K80, you will need to use the -s flag (below). - - The -s flag sets a simple RAxML model for each locus/partition, which will override any - model set using the -r flag above and apply to all partitions. In the current version of - RAxML, it is possible to specify the JC69, K80, and HKY85 models as overrides. By default, - this option is turned off and the model set under the -r flag is used instead. - - The following two options are available **ONLY** if you are starting from a NEXUS input file: - - The -g flag supplies a 'gap threshold' to an R script, which deletes all column sites in - the DNA alignment with a proportion of gap characters '-' at or above the threshold value. - If no gap threshold is specified, all sites with gaps are removed by default. If end goal - is to produce a file for G-PhoCS, you will want to leave at the default. - However, if the next step in your pipeline involves converting from .gphocs to other data - formats, you will likely want to set = 1 (e.g. before converting to PHYLIP - format for RAxML). - - The -m flag allows users to choose their level of tolerance for individuals with missing - data. The default is = 1, allowing individuals with runs of 10 or more - missing nucleotide characters ('N') to be kept in the alignment. Alternatively, setting - = 0 removes all such individuals from each locus; thus, while the input - file would have had the same number of individuals across loci, the resulting file could - have varying numbers of individuals for different loci. - - The -o flag sets the outgroup exactly the same way as that described in the RAxML v8 user's - manual, as a single name or as a comma-separated list with no spaces between taxon names. - The first name in the list is prioritized, e.g. when members of the list are not mono- - phyletic. - - -R | --resume is among the most important options available in MAGNET because it tells the - program to resume a previous run in current directory, including to detect incomplete run - subfolders and run RAxML there without overwriting results from run folders with finished - runs. The default setting is to run without this option. - - The -d flag runs this function in Bash debug mode (set -xv), which is intended for debugging - for development purposes. If you find a bug, please contact the author at jbagley@jsu.edu. - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f MAGNET -f 2 -b 100 -g 1 -m 1 Run MAGNET with 100 bootstrap pseudo- - replicates, gaps allowed, missing - data allowed, and the GTRGAMMA model - piranha -f MAGNET -f 2 -b 100 -s HKY85 -g 1 -m 1 Same as above, but using the simpler - HKY85 substitution model for all loci - piranha -f MAGNET -f 2 -e raxmlHPC -b 100 -s HKY85 -g 1 -m 1 Same as above, but using raxmlHPC - executable - piranha -f MAGNET -H Show this help text and exit - - ${bold}CITATION${reset} - Bagley, J.C. 2020. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Gronau, I., Hubisz, M.J., Gulko, B., Danko, C.G., Siepel, A. 2011. Bayesian inference of - ancient human demography from individual genome sequences. Nature Genetics, 43, 1031-1034. - Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of - large phylogenies. Bioinformatics, 30, 1312-1313. - - Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. - Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. -" - -if [[ -z "$*" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-h" ]] || [[ "$1" == "-help" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-H" ]] || [[ "$1" == "-Help" ]]; then - echo "$VERBOSE_USAGE" - exit -fi - -if [[ "$1" == "-v" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - -############ CHECK ARGUMENTS - # echo "$@"; echo "$#"; echo "$1" - # for i in "$@"; do - # echo "$i"; - # done - # MY_ARGS="$(echo "$@" | perl -pe $'s/\ /\n/')" - # echo "$MY_ARGS" - - -############ CLEAN WORKING DIR, CAPTURE ARGUMENTS, SEND TO FILE FOR PARSING - if [[ -s ./args.tmp ]]; then rm ./args.tmp ; fi ; - if [[ -s ./args.txt ]]; then rm ./args.txt ; fi ; - ALL_MY_ARGUMENTS="$(echo "$@")" - echo "$ALL_MY_ARGUMENTS" > ./args.txt - perl -p -i -e $'s/\-/\n\-/g' ./args.txt - perl -p -i -e $'s/\-filetype/\-\-filetype/g' ./args.txt - perl -p -i -e $'s/\-input/\-\-input/g' ./args.txt - perl -p -i -e $'s/\-exec/\-\-exec/g' ./args.txt -# perl -p -i -e $'s/\-part/\-\-part/g' ./args.txt - perl -p -i -e $'s/\-boot/\-\-boot/g' ./args.txt - perl -p -i -e $'s/\-raxmlmodel/\-\-raxmlmodel/g' ./args.txt - perl -p -i -e $'s/\-simplemodel/\-\-simplemodel/g' ./args.txt - perl -p -i -e $'s/\-outgroup/\-\-outgroup/g' ./args.txt - perl -p -i -e $'s/\-name/\-\-name/g' ./args.txt - perl -p -i -e $'s/\-resume/\-\-resume/g' ./args.txt - perl -p -i -e $'s/\-debug/\-\-debug/g' ./args.txt - - -############ MANUALLY PARSE THE OPTIONS FROM ARGS - -### SET OPTIONS TO DEFAULT VALUES, EXCEPT WHERE VALUES WERE READ IN FROM USER ARGS - if [[ "$(grep -h '\-f' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-filetype' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - STARTING_FILE_TYPE=1 ; - elif [[ "$(grep -h '\-f' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-filetype' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-f' ./args.txt | perl -pe 's/\-f//g' | perl -pe 's/\ //g')"; - STARTING_FILE_TYPE="$MY_ARG" ; - elif [[ "$(grep -h '\-f' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-filetype' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-filetype' ./args.txt | perl -pe 's/\-\-filetype//g' | perl -pe 's/\ //g')"; - STARTING_FILE_TYPE="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-i' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-input' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_NEXUS=NULL ; - elif [[ "$(grep -h '\-i' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-input' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-i' ./args.txt | perl -pe 's/\-i//g' | perl -pe 's/\ //g')"; - MY_NEXUS="$MY_ARG" ; - elif [[ "$(grep -h '\-i' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-input' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-input' ./args.txt | perl -pe 's/\-\-input//g' | perl -pe 's/\ //g')"; - MY_NEXUS="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-e' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-exec' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_RAXML_EXECUTABLE=raxml ; - elif [[ "$(grep -h '\-e' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-exec' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-e' ./args.txt | perl -pe 's/\-e//g' | perl -pe 's/\ //g')"; - MY_RAXML_EXECUTABLE="$MY_ARG" ; - elif [[ "$(grep -h '\-e' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-exec' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-exec' ./args.txt | perl -pe 's/\-\-exec//g' | perl -pe 's/\ //g')"; - MY_RAXML_EXECUTABLE="$MY_ARG" ; - fi -# -# if [[ "$(grep -h '\-p' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-part' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then -# MY_PARTITIONS_FILE=partitions.txt ; -# elif [[ "$(grep -h '\-p' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-part' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then -# MY_ARG="$(grep -h '\-p' ./args.txt | perl -pe 's/\-p//g' | perl -pe 's/\ //g')"; -# MY_PARTITIONS_FILE="$MY_ARG" ; -# elif [[ "$(grep -h '\-p' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-part' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then -# MY_ARG="$(grep -h '\-\-part' ./args.txt | perl -pe 's/\-\-part//g' | perl -pe 's/\ //g')"; -# MY_PARTITIONS_FILE="$MY_ARG" ; -# fi -# # - if [[ "$(grep -h '\-b' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-boot' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_NUM_BOOTREPS=100 ; - elif [[ "$(grep -h '\-b' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-boot' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-b' ./args.txt | perl -pe 's/\-b//g' | perl -pe 's/\ //g')"; - MY_NAMES_FILE="$MY_ARG" ; - elif [[ "$(grep -h '\-b' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-boot' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-boot' ./args.txt | perl -pe 's/\-\-boot//g' | perl -pe 's/\ //g')"; - MY_NAMES_FILE="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-raxmlmodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_RAXML_MODEL=GTRGAMMA ; - elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-raxmlmodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-r' ./args.txt | perl -pe 's/\-r//g' | perl -pe 's/\ //g')"; - MY_RAXML_MODEL="$MY_ARG" ; - elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-raxmlmodel' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-raxmlmodel' ./args.txt | perl -pe 's/\-\-raxmlmodel//g' | perl -pe 's/\ //g')"; - MY_RAXML_MODEL="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-s' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-simplemodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_SIMPLE_MODEL=NULL ; - elif [[ "$(grep -h '\-s' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-simplemodel' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-s' ./args.txt | perl -pe 's/\-s//g' | perl -pe 's/\ //g')"; - MY_SIMPLE_MODEL="$MY_ARG" ; - elif [[ "$(grep -h '\-s' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-simplemodel' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-simplemodel' ./args.txt | perl -pe 's/\-\-simplemodel//g' | perl -pe 's/\ //g')"; - MY_SIMPLE_MODEL="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-g' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-gapthresh' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_GAP_THRESHOLD=0.001 ; - elif [[ "$(grep -h '\-g' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-gapthresh' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-g' ./args.txt | perl -pe 's/\-g//g' | perl -pe 's/\ //g')"; - MY_GAP_THRESHOLD="$MY_ARG" ; - elif [[ "$(grep -h '\-g' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-gapthresh' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-gapthresh' ./args.txt | perl -pe 's/\-\-gapthresh//g' | perl -pe 's/\ //g')"; - MY_GAP_THRESHOLD="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-m' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-missing' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_INDIV_MISSING_DATA=1 ; - elif [[ "$(grep -h '\-m' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-missing' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-m' ./args.txt | perl -pe 's/\-m//g' | perl -pe 's/\ //g')"; - MY_INDIV_MISSING_DATA="$MY_ARG" ; - elif [[ "$(grep -h '\-m' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-missing' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-missing' ./args.txt | perl -pe 's/\-\-missing//g' | perl -pe 's/\ //g')"; - MY_INDIV_MISSING_DATA="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-o' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-outgroup' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_OUTGROUP=NULL ; - elif [[ "$(grep -h '\-o' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-outgroup' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-o' ./args.txt | perl -pe 's/\-o//g' | perl -pe 's/\ //g')"; - MY_OUTGROUP="$MY_ARG" ; - elif [[ "$(grep -h '\-o' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-outgroup' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-outgroup' ./args.txt | perl -pe 's/\-\-outgroup//g' | perl -pe 's/\ //g')"; - MY_OUTGROUP="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-n' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-name' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_OUTPUT_NAME=raxml_out ; - elif [[ "$(grep -h '\-n' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-name' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-n' ./args.txt | perl -pe 's/\-n//g' | perl -pe 's/\ //g')"; - MY_OUTPUT_NAME="$MY_ARG" ; - elif [[ "$(grep -h '\-n' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-name' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-name' ./args.txt | perl -pe 's/\-\-name//g' | perl -pe 's/\ //g')"; - MY_OUTPUT_NAME="$MY_ARG" ; - fi -# - if [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-resume' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_RESUME_SWITCH=0 ; - elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-resume' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-r' ./args.txt | perl -pe 's/\-r//g' | perl -pe 's/\ //g')"; - MY_RESUME_SWITCH="$MY_ARG" ; - elif [[ "$(grep -h '\-r' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-resume' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-resume' ./args.txt | perl -pe 's/\-\-resume//g' | perl -pe 's/\ //g')"; - MY_RESUME_SWITCH="$MY_ARG" ; - if [[ -z "$MY_VERBOSE_OUT_SWITCH" ]] && [[ "$MY_VERBOSE_OUT_SWITCH" != "0" ]] && [[ "$MY_VERBOSE_OUT_SWITCH" != "1" ]]; then MY_VERBOSE_OUT_SWITCH=1 ; fi - fi -# - if [[ "$(grep -h '\-d' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]] && [[ "$(grep -h '\-\-debug' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_DEBUG_MODE_SWITCH=0 ; - elif [[ "$(grep -h '\-d' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-debug' ./args.txt | wc -l | perl -pe 's/\ //g')" = "0" ]]; then - MY_ARG="$(grep -h '\-d' ./args.txt | perl -pe 's/\-d//g' | perl -pe 's/\ //g')"; - MY_DEBUG_MODE_SWITCH="$MY_ARG" ; - elif [[ "$(grep -h '\-d' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]] && [[ "$(grep -h '\-\-debug' ./args.txt | wc -l | perl -pe 's/\ //g')" != "0" ]]; then - MY_ARG="$(grep -h '\-\-debug' ./args.txt | perl -pe 's/\-\-debug//g' | perl -pe 's/\ //g')"; - MY_DEBUG_MODE_SWITCH="$MY_ARG" ; - if [[ -z "$MY_DEBUG_MODE_SWITCH" ]] && [[ "$MY_DEBUG_MODE_SWITCH" != "0" ]] && [[ "$MY_DEBUG_MODE_SWITCH" != "1" ]]; then MY_DEBUG_MODE_SWITCH=1 ; fi - fi -# - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -MAGNET - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/R/rmGapSites.r b/bin/MAGNET-1.1.1/R/rmGapSites.r deleted file mode 100644 index af09307e..00000000 --- a/bin/MAGNET-1.1.1/R/rmGapSites.r +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env Rscript - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: rmGapSites Rscript # -# VERSION="v1.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. # -# Last update: March 6, 2019 # -# Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# RSCRIPT THAT REMOVES GAP SITES FROM AN INPUT DNA SEQUENCE ALIGNMENT IN PHYLIP FORMAT # -# NAMED 'sites.phy' (SPECIFIC TO THE MAGNET PIPELINE) # -# # -########################################################################################## - -######################################## START ########################################### - -# Load needed library, R code, or package stuff. Install package if not present. -# source("rmGapSites.R", chdir = TRUE) -packages <- c("ape", "readr", "seqinr") -if (length(setdiff(packages, rownames(installed.packages()))) > 0) { - install.packages(setdiff(packages, rownames(installed.packages()))) -} - -library(ape) -library(readr) -library(seqinr) - -# Read in the data, output from first part of NEXUS2gphocs loop: - sites <- read.dna("sites.phy", format="sequential") - gap_thresh <- read_file("gap_threshold.txt") - -# Fix the gap threshold and then delete columns with the threshold level of gaps -# equivalent to at least 1 gap (i.e. any gaps at all): - gap_thresh <- sub(pattern = "\\n", replacement = "", x = gap_thresh) - sites_nogaps <- del.colgapsonly(sites, threshold = gap_thresh, freq.only = FALSE) - -# Write new alignment, with sites with gaps removed, to file in present working directory)... - write.dna(sites_nogaps, file="sites_nogaps.phy", format="sequential", nbcol=-1, colw=500000) - - # write.nexus(sites_nogaps, file="sites_nogaps.nex") - - -######################################### END ############################################ diff --git a/bin/MAGNET-1.1.1/README.md b/bin/MAGNET-1.1.1/README.md deleted file mode 100644 index d02433c6..00000000 --- a/bin/MAGNET-1.1.1/README.md +++ /dev/null @@ -1,289 +0,0 @@ - - -[![Codacy Badge](https://api.codacy.com/project/badge/Grade/2ae9ac0297a54d469fd8b847f798b53f)](https://www.codacy.com/app/justincbagley/MAGNET?utm_source=github.com&utm_medium=referral&utm_content=justincbagley/MAGNET&utm_campaign=Badge_Grade) [![License](http://img.shields.io/badge/license-GPL%20%28%3E=%202%29-green.svg?style=flat)](LICENSE) - -Shell script pipeline for inferring ML gene trees for many loci (e.g. RAD loci, UCEs) - -## LICENSE - -All code within the PIrANHA repository, including MAGNET v1.1.1 pipeline code, is available "AS IS" under a 3-clause BSD license. See the [LICENSE](LICENSE) file for more information. - -## CITATION - -If you use scripts from this repository as part of your published research, then I require you to cite the PIrANHA repository and/or MAGNET package as follows (also see DOI information below): - - Bagley, J.C. 2020. PIrANHA v0.4-alpha3. GitHub repository, Available at: http://github.com/justincbagley/piranha/. - - Bagley, J.C. 2020. MAGNET v1.1.1. GitHub package, Available at: http://github.com/justincbagley/MAGNET. - -Alternatively, please provide the following link to this software program in your manuscript: - - http://github.com/justincbagley/MAGNET - -**Example citations using the above URL:** - "We estimated a gene tree for each RAD locus in RAxML v8 (Stamatakis 2014) using - the MAGNET v1.1.1 pipeline (http://github.com/justincbagley/MAGNET). Each RAxML run - specified the GTRGAMMA model and coestimated the maximum-likelihood phylogeny and - bootstrap proportions from 500 bootstrap pseudoreplicates." - -## DOI - -The DOI for MAGNET, via Zenodo, is as follows: [![DOI](https://zenodo.org/badge/66839898.svg)](https://zenodo.org/badge/latestdoi/66839898). Here is an example of citing MAGNET using the DOI: - - Bagley, J.C. 2020. MAGNET v1.1.1. GitHub package, Available at: https://doi.org/10.5281/zenodo.596774. - -## INTRODUCTION - -The estimation of species-level phylogenies, or "species trees" is a fundamental goal in evolutionary biology. However, while "gene trees" estimated from different loci provide insight into the varying evolutionary histories of different parts of the genome, gene trees are random realizations of a stochastic evolutionary process. Thus, gene trees often exhibit conflicting topologies, being incongruent with each other and incongruent with the underlying species tree due to a variety of genetic and biological processes (e.g. gene flow, incomplete lineage sorting, introgression, selection). - -With the advent of recent advances in DNA sequencing technologies, biologists now commonly sequence data from multiple loci, and even hundreds to thousands of loci can quickly be sequenced using massively parallel sequencing on NGS sequencing platforms. Full-likelihood or Bayesian algorithms for inferring species trees and population-level parameters from multiple loci, such as \*BEAST and SNAPP, are computationally burdensome and may be difficult to apply to large amounts of data or distantly related taxa (or other cases that complicate obtaining MCMC convergence). By contrast, a number of recently developed and widely used "summary-statistics" approaches rely on sets of gene trees to infer a species tree for a set of taxa (reviewed by Chifman and Kubatko, 2014; Mirarab and Warnow, 2015). These methods are specifically designed to estimate gene trees or use gene trees input by the user, which are treated as observed data points analyzed in a distance-based or coalescent algorithm. Moreover, summary-statistics approaches to species tree inference tend to be accurate and typically much faster than full-data approaches (e.g. Mirarab et al., 2014; Chifman and Kubatko, 2014). Examples of species tree software in this category include programs such as BUCKy (Larget et al., 2010), STEM (Liu et al., 2010), spedeSTEM, NJst (Liu and Yu, 2011), ASTRAL and ASTRAL-II (Mirarab and Warnow, 2015), and ASTRID (Vachaspati and Warnow, 2015). Phylogenetic network models implemented in recent software like SplitsTree and SNaQ also improve network and inference by analyzing sets of gene trees. - -Despite the importance of gene trees in species tree and network inference, few resources have been specifically designed to aid rapid estimation of gene trees for different loci. MAGNET (MAny GeNE Trees) is a shell script pipeline within the PIrANHA (PhylogenetIcs ANd PHylogeogrAphy) repository (https://github.com/justincbagley/piranha) that helps fill this gap by automating inferring a maximum-likelihood (ML) gene tree for each locus in a multilocus dataset. Here, the term "locus" is used loosely to refer to a DNA alignment of homologous nucleotide characters including both variable and invariant DNA sites. The MAGNET package was originally coded up to aid analyses of RAD loci generated by massively parallel sequencing of ddRAD-seq genomic libraries (Peterson et al. 2012). However, MAGNET can be used to estimate gene trees from any multilocus dataset in the appropriate format, and three starting file types are supported: single NEXUS, single G-PhoCS, or multiple PHYLIP files. - -## HARDWARE AND SETUP - -:computer: MAGNET focuses on allowing users to automate the workflow necessary for quickly estimating many gene trees for many loci on their local machine. - -:thumbsup: No special hardware or setup is necessary, unless the user is interested in estimating gene trees on a remote supercomputing cluster (see below). - -## SOFTWARE DEPENDENCIES - -MAGNET v1.1.1 is a software package composed of `shell`, `R`, and Perl scripts and also calls several software programs that it relies on as dependencies. These dependencies are described in some detail in README files for different scripts in the package. However, here I provide a list of them, with asterisks preceding those already included with the MAGNET distribution: - -- Perl (available at: https://www.perl.org/get.html). -- Nayoki Takebayashi's file conversion Perl scripts (available at: http://raven.iab.alaska.edu/~ntakebay/teaching/programming/perl-scripts/perl-scripts.html). -- Python (available at: https://www.python.org/downloads/). -- bioscripts.convert v0.4 Python package (available at: https://pypi.python.org/pypi/bioscripts.convert/0.4; also see README for `NEXUS2gphocs.sh`). -- RAxML, installed and running on local machine (available at: http://sco.h-its.org/exelixis/web/software/raxml/index.html). - -Users must install all software not included in MAGNET, and ensure that it is available via the command line on their local machine. On the user's local machine, Perl should be available by simply typing "`Perl`" at the command line; Python should be available by typing "`python`" at the command line; and bioscripts.convert package should be available by typing "convbioseq" at the command line. Also, RAxML should be compiled using SSE3 install commands, so that RAxML can be called by simply typing "`raxmlHPC-SSE3`" on the command line. For detailed instructions for setting up RAxML this way, refer to the newest RAxML user manual (available at: http://sco.h-its.org/exelixis/resource/download/NewManual.pdf). - -## INPUT FILE FORMAT - -MAGNET assumes that you are starting from multilocus DNA sequence data in one of three formats. The *First format* that is supported is that of a single datafile in G-Phocs (Gronau et al. 2011) format, with the extension ".gphocs". The *Second format* that is supported is NEXUS format, with data in a single file having the extension '.nex'. For genomic data in aligned sequence format, such as aligned RAD tags (e.g. ddRAD-seq contigs) or other SNP data derived from genotyping-by-sequencing (GBS) methods, the user should assemble the data, call SNPs, and output SNP sequence data files in .gphocs or .nex format prior to running MAGNET. This can easily be done by running pyRAD or ipyrad (Eaton 2014) while calling for output in all formats (\*; you'll get .gphocs and .nex files). - -However, this may not always be possible, and .gphocs format is not yet among the most popular file formats in phylogenomics/population genomics. Thus, I have added a ```NEXUS2gphocs.sh``` shell script utility within MAGNET (in the "shell" folder) that will convert a sequential NEXUS file into .gphocs format for you. An example NEXUS file "example.nex" is included in the distribution. Feel free to use the `NEXUS2gphocs.sh` utility script independently of MAGNET to convert from NEXUS to .gphocs format. However, when doing this, *make sure to follow the usage guidelines below*. - -The *Third format* that is supported in MAGNET is that of DNA sequence alignments for multiple loci contained in separate PHYLIP files for each locus. - -Users must specify the input fileType with the `-f` flag. Options are 1 for a single G-PhoCS- or NEXUS-formattted inputFile, or 2 for the multiple PHYLIP option. If `-f 1`, then the program will expect as standard input (stdin) the name of the inputFile. However, if `-f 2`, then you should supply MAGNET with the path to the desired working directory; often, this will simply be the current working directory, in which case the user can simply type \".\" for workingDir, but the relative or absolute path to the workingDir will also work fine. - -## PIPELINE - -Apart from input file conversion steps, the MAGNET pipeline works by calling five different scripts, in series, each designed to conduct a task whose output is processed in the next step of the pipeline. First, the ```gphocs2multiPhylip.sh``` shell script is used to extract loci from the input file and place each locus in a PHYLIP-formatted file with extension ".phy". Second, a shell script named ```MultiRAxMLPrepper.sh``` is used to place the .phy files into separate "run folders" and prepare them to be run in RAxML. Third, a script named ```RAxMLRunner.sh``` is called to run RAxML on the contents of each run folder. In a "clean-up" step, MAGNET moves all .phy files files remaining in the working directory to a new folder, "phylip\_files", which is created within the working directory. - -After running the MAGNET pipeline, the shell script ```getGeneTrees.sh``` automates post-processing of the gene trees output by RAxML, including organizing all inferred gene trees into a single "gene\_trees" folder in the working directory, and combining the individual 'best' gene trees resulting from each run into a single file named "besttrees.tre". Also, if bootstrap pseudoreplicates were performed and the bootstrap tree files are detected, then the ```getBootTrees.sh``` script conducts similar processing on the bootstrap trees for each loucus, which are collated, renamed, and given a list file containing the name of each file. Given the directory of bootstrap trees resulting from a MAGNET run ("bootstrap\_trees") can take up substantial disk space (>200 MB), users may wish to compress this directory to a zip file, for example using `$ zip -r bootstrap_trees.zip bootstrap_trees/` at the conclusion of a run. - -A new feature of MAGNET (as of December 2018) is the `--resume` flag, a long option allowing the user to resume a previous MAGNET run in a working directory where MAGNET was previously run (specified to stdin as workingDir). - -## USAGE - -Additional input file and usage information is available in the usage or help texts. To get regular usage info for MAGNET, type `$ ./MAGNET.sh`, `$ ./MAGNET.sh -h .`, or `./MAGNET.sh -help` while in the MAGNET directory. However, it is more useful (particularly when running for the first time) to get *verbose usage info* for MAGNET, including detailed descriptions of each option; do this by typing `$ ./MAGNET.sh -H .` or `./MAGNET.sh -Help` (capital `-H` flag) at the command line while in the MAGNET directory. The verbose usage text is as follows: - -```sh -$ piranha -f MAGNET --args='-H' -. -. -. -Usage: MAGNET [OPTION]... - - Options: - -f fileType (def: 1; 1 = single , 2 = multiple PHYLIP files) starting file - type; if 1, script expects as stdin a single NEXUS or G-PhoCS in the - current directory; if 2, then script expects multiple PHYLIP files in current dir - -i inputNEXUS (def: NULL) input NEXUS file - -e executable (def: $MY_RAXML_EXECUTABLE) name of RAxML executable, accessible from command - line on user's machine - -b numBootstraps (def: $MY_NUM_BOOTREPS) RAxML bootstrap pseudoreplicates - -r raxmlModel (def: $MY_RAXML_MODEL; other: GTRGAMMAI, GTRCAT, GTRCATI) - -s simpleModel (def: $MY_SIMPLE_MODEL; other: JC69, K80, HKY85) specifies simple DNA - substitution model that will override any other model and apply to all DNA partitions - -g gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless >1000 - individuals; takes float proportion value) gap threshold value - -m indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) missing data setting - -o outgroup (def: NULL) outgroup given as single taxon name (tip label) or comma- - separted list - -h help text (also: -help) echo this help text and exit - -H verbose help text (also: -Help) echo verbose help text and exit - -V version (also: --version) echo version of this script and exit - -R resume (also: --resume) short and long options allowing user to resume a previous - MAGNET run in current working directory - -d debug (def: 0, off; 1, on also: --debug) run function in Bash debug mode - - OVERVIEW - The goal of MAGNET is to infer a maximum-likelihood (ML) gene tree in RAxML for each of - multiple loci, starting from one or multiple input files containing aligned DNA sequences. - If supplied with a single G-PhoCS ('*.gphocs') or NEXUS ('*.nex') data file (using -f1 - or -i -f1 options), then each locus is split into a separate PHYLIP alignment - file, and RAxML (Stamatakis 2014) is run to infer gene trees for each locus. If a NEXUS - datafile is supplied, it is converted into G-PhoCS format (Gronau et al. 2011) while splitting - loci into separate interleaved sequence blocks based on information provided in a sets - block at the end of the NEXUS file (e.g. defined using 'charset' commands), which is mandatory. - However, if -f2, then the program will run in current directory, assuming it contains multiple - PHYLIP-formatted alignment files. Under this scenario, MAGNET will skip directly to running - the PHYLIP files in RAxML using user-specified options. - Sequence names may not include hyphen characters, or there could be issues. For detailed - information on MAGNET and its various dependencies, see 'README.md' file in the distribution - folder; however, it is key that the dependencies are available from the command line interface. - Among the most important options is -r or --resume (off by default), which tells MAGNET to - resume previous run(s) in current directory, including detecting incomplete run folders and - running RAxML there without overwriting results from previously finished runs. - - DETAILS - The -f flag specifies the starting fileType. If -f 1, then the mandatory input is the name - or path to the corresponding starting file, which is passed using the -i flag. - If -f 2, then mandatory input is the name or path to the working directory (type '.' for current - directory, or supply a relative or absolute path). - - The -i flag passess the name of the input NEXUS file, parameter, to the program. - - The -e flag sets the name of the RAxML executable that will be called. The user may wish to - change this to something specific to their install, or to something generic like 'raxml'. - The default setting should work on local machine or supercomputing cluster installs. - - The -b flag sets the number of boostrap pseudoreplicates for RAxML to perform while estimating - the gene tree for each locus. The default is 100; remove bootstrapping by setting to 0. - - The -r flag sets the RAxML model for each locus. This uses the full default GTRGAMMA model, - and at present it is not possible to vary the model across loci. If you want to use HKY - or K80, you will need to use the -s flag (below). - - The -s flag sets a simple RAxML model for each locus/partition, which will override any - model set using the -r flag above and apply to all partitions. In the current version of - RAxML, it is possible to specify the JC69, K80, and HKY85 models as overrides. By default, - this option is turned off and the model set under the -r flag is used instead. - - The following two options are available **ONLY** if you are starting from a NEXUS input file: - - The -g flag supplies a 'gap threshold' to an R script, which deletes all column sites in - the DNA alignment with a proportion of gap characters '-' at or above the threshold value. - If no gap threshold is specified, all sites with gaps are removed by default. If end goal - is to produce a file for G-PhoCS, you will want to leave at the default. - However, if the next step in your pipeline involves converting from .gphocs to other data - formats, you will likely want to set = 1 (e.g. before converting to PHYLIP - format for RAxML). - - The -m flag allows users to choose their level of tolerance for individuals with missing - data. The default is = 1, allowing individuals with runs of 10 or more - missing nucleotide characters ('N') to be kept in the alignment. Alternatively, setting - = 0 removes all such individuals from each locus; thus, while the input - file would have had the same number of individuals across loci, the resulting file could - have varying numbers of individuals for different loci. - - The -o flag sets the outgroup exactly the same way as that described in the RAxML v8 user's - manual, as a single name or as a comma-separated list with no spaces between taxon names. - The first name in the list is prioritized, e.g. when members of the list are not monophyletic. - - -R | --resume is among the most important options available in MAGNET because it tells the - program to resume a previous run in current directory, including to detect incomplete run - subfolders and run RAxML there without overwriting results from run folders with finished - runs. The default setting is to run without this option. - - The -d flag runs this function in Bash debug mode (set -xv), which is intended for debugging - for development purposes. If you find a bug, please contact the author at jbagley@jsu.edu. - - Usage examples: - Call the program using PIrANHA, as follows: - - piranha -f MAGNET -f 2 -b 100 -g 1 -m 1 Run MAGNET with 100 bootstrap replicates - with gaps allowed and missing data allowed - and the GTRGAMMA model - piranha -f MAGNET -f 2 -b 100 -s HKY85 -g 1 -m 1 Same as above, but using the simpler - HKY85 substitution model for all loci - piranha -f MAGNET -h Show this help text and exit - - CITATION - Bagley, J.C. 2020. PIrANHA v0.4a4. GitHub repository, Available at: - . - - REFERENCES - Gronau, I., Hubisz, M.J., Gulko, B., Danko, C.G., Siepel, A. 2011. Bayesian inference of - ancient human demography from individual genome sequences. Nature Genetics, 43, 1031-1034. - Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of - large phylogenies. Bioinformatics, 30, 1312-1313. - - Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. - Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. -``` - -### NOTES ON NEXUS2gphocs USAGE - -- You may use ```NEXUS2gphocs.sh``` as a standalone script for converting prior to running G-PhoCS on your data. -- However, in its current form, you must move ```NEXUS2gphocs.sh``` (out of the shell folder) *and* rmGapSites.r (out of the R folder) into the MAGNET directory in order to run NEXUS2gphocs as a standalone script (this assumes the target is also located in the MAGNET dir). You could also move both scripts into another working directory containing your target . -- You can get the usage info for ```NEXUS2gphocs.sh```, in similar fashion to that above, by typing ```./NEXUS2gphocs.sh```, ```./NEXUS2gphocs.sh -h .```, or ```./NEXUS2gphocs.sh -help``` into the command line, and then pressing enter. The ```NEXUS2gphocs``` usage text is sufficiently similar to the latter part of the MAGNET usage printed above that it doesn't bear repeating here. - -### USAGE EXAMPLES - -**Below I give some examples of how to use the software under the two most common scenarios:** - -**SCENARIO 1.** If your data contain very little missing data and, in particular, they contain no individuals with all missing data for a locus, then it should be fine to run MAGNET using the default options on either a single input file (-f 1) or multiple PHYLIP input files (-f 2), as follows: - -```sh -##--Scenario 1, generic usage: -piranha -f MAGNET --args='-i -f1' -piranha -f MAGNET --args='-f2' ## multiple PHYLIP input files case. - -##--Examples: -cd ~/Downloads/MAGNET-master/ -piranha -f MAGNET --args='-i example.nex -f1' -piranha -f MAGNET --args='-f2' ## multiple PHYLIP input files case. -``` - -**SCENARIO 2.** If your data are relatively lower quality data (e.g. from NGS runs) and you have lots of missing data, including individuals with all missing data for a locus (as is common for RAD tag/SNP data), then RAxML will not run properly under the default MAGNET options. You will likely get up to ~10 messages like "ERROR: Sequence XXXXX consists entirely of undetermined values which will be treated as missing data", follwed by a summary like this: "ERROR: Found 10 sequences that consist entirely of undetermined values, exiting...", and RAxML will quit. The rest of the pipeline will be affected, for example the final summary gene tree file will make no sense because it will simply include a concatenation of all files in the working directory. - -To avoid the above issues caused by large amounts of missing data, you should run MAGNET while **setting the -m flag to 0** (indivMissingData=0) to specify that individuals with missing data are NOT allowed: - -```sh -##--Scenario 2, all params except indivMissingData set to default options: -piranha -f MAGNET --args='-i -f1 -m0' -piranha -f MAGNET --args='-f1 -m0' ## multiple PHYLIP input files case. - -##--Example: -cd ~/Downloads/MAGNET-master/ -piranha -f MAGNET --args='-i example.nex -f1 -m0' -piranha -f MAGNET --args='-f2 -m0' ## multiple PHYLIP input files case. -``` - -In addition to the above, here are illustrations of varying the **RAxML options**: - -```sh -##--Scenario 1, GTRCAT model, instead of the default GTRGAMMA model: -piranha -f MAGNET --args='-i -f1 -rGTRCAT' -piranha -f MAGNET --args='-f2 -rGTRCAT' ## multiple PHYLIP input files case. - -##--Scenario 1, adding name of an outgroup taxon: -piranha -f MAGNET --args='-i -f1 -rGTRCAT -o outgroup' -piranha -f MAGNET --args='-f2 -rGTRCAT -o outgroup' ## multiple PHYLIP input files case. - -##--Scenario 1, overriding -r model with HKY85 and adding an outgroup: -piranha -f MAGNET --args='-i -f1 -rGTRCAT -sHKY85 -o outgroup' -piranha -f MAGNET --args='-f2 -rGTRCAT -sHKY85 -o outgroup' ## multiple PHYLIP input files case. - -##--Scenario 2, 500 bootstrap reps per locus, instead of the default 100: -piranha -f MAGNET --args='-i -f1 -b500 -m0' -piranha -f MAGNET --args='-f2 -b500 -m0' ## multiple PHYLIP input files case. - -##--Scenario 2, *zero* bootstrap reps per locus: -piranha -f MAGNET --args='-i -f1 -b0 -m0' -piranha -f MAGNET --args='-f2 -b0 -m0' ## multiple PHYLIP input files case. -``` - -## ACKNOWLEDGEMENTS - -I gratefully acknowledge Nayoki Takebayashi, who wrote and freely provided some Perl scripts I have used in PIrANHA and MAGNET. I also thank the Brigham Young University Fulton Supercomputing Lab (FSL) for providing computational resources used during the development of this software. J.C.B. received stipend support from a Ciência Sem Fronteiras (Science Without Borders) postdoctoral fellowship from the Brazilian Conselho Nacional de Desenvolvimento Científico e Tecnológico (CNPq; Processo 314724/2014-1). Lab and computer space was also supplied by The University of Alabama, during an internship in the Lozier Lab in the UA Department of Biological Sciences. - -## REFERENCES - -- Chifman J, Kubatko L (2014) Quartet inference from SNP data under the coalescent model. Bioinformatics, 30, pages 3317–3324. -- Eaton DAR (2014) PyRAD: assembly of de novo RADseq loci for phyloge-netic analyses. Bioinformatics, 30, 1844–1849. -- Gronau I, Hubisz MJ, Gulko B, Danko CG, Siepel A (2011) Bayesian inference of ancient human demography from individual genome sequences. Nature Genetics, 43, 1031-1034. -- Larget BR, Kotha SK, Dewey CN, Ané C (2010) BUCKy: gene tree/species tree reconciliation with Bayesian concordance analysis. Bioinformatics, 26(22):2910-2911. -- Liu L, Yu L, Edwards SV (2010) A maximum pseudo-likelihood approach for estimating species trees under the coalescent model. BMC Evol Biol, 10(1):302. -- Liu L, Yu L (2011) Estimating species trees from unrooted gene trees. Syst Biol, 60(5):661-667. -- Mirarab S, Warnow T (2015) ASTRAL-II: coalescent-based species tree estimation with many hundreds of taxa and thousands of genes. Bioinformatics, 30:44-52. -- Peterson BK, Weber JN, Kay EH, Fisher HS, Hoekstra HE (2012) Double digest RADseq: an inexpensive method for de novo SNP discovery and genotyping in model and non-model species. PLoS One, 7, e37135. -- Stamatakis A (2014) RAxML version 8: a tool for phylogenetic analysis and post-analysis of large phylogenies. Bioinformatics, 30.9, 1312-1313. -- Vachaspati P, Warnow T (2015) ASTRID: Accurate Species TRees from Internode Distances. BMC Genomics, 16(Suppl 10):S3. - -July 31, 2020 -Justin C. Bagley, St. Louis, MO, USA diff --git a/bin/MAGNET-1.1.1/perl/fasta2phylip.pl b/bin/MAGNET-1.1.1/perl/fasta2phylip.pl deleted file mode 100644 index 0275cc96..00000000 --- a/bin/MAGNET-1.1.1/perl/fasta2phylip.pl +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/perl - -# Converts an aligned fasta (aa or dna) seq file to phylip format - -my $usage = "Usage: $0 [-h] [-v] [-c numChar] [infile]\n" . - " -h: help\n" . - " -c: long seq names are shortened to 10 char, default: 7 chars from the\n". - " beggining is combined with the last 3 chars. You can change the\n". - " behavior by this option. E.g., -c 10 will shorten the long name\n" . - " by taking the first 10 characters of the name.\n". - " -v: verbose (print name conversion in STDERR)\n" . - " infile should be an aligned fasta, " . - "STDIN is used if no infile is given\n"; - -use IO::File; -use Getopt::Std; -getopts('hvc:') || die "$usage\n"; - -die "$usage\n" if (defined ($opt_h)); - -my $totNumChar = 10; # number of characters allowed for name in phylip -my $numFrontChar = 7; # When the name is too long, this amount of characters - # are used from the beginning of the name, and the rest - # are from the end of the name. -if (defined ($opt_c)) { - if ($opt_c <= $totNumChar && $opt_c >= 0) { - $numFrontChar = $opt_c; - } else { - die "Error: give an integer between 0 and $totNumChar (ends inclusive)". - " for -c.\n"; - } -} - -my $tmpFH = IO::File->new_tmpfile || die "Unable to make new temp file: $!"; - -my $firstLine = 1; -my $maxLen = 0; -my $numTaxa = 0; -my $name; - -while(<>){ - - chomp; - s/^\s+//; s/\s$//; - next if (/^$/); - - if (s/^>\s*//) { - - if ($firstLine == 0) { - if ($seqLen != $maxLen && $maxLen != 0) { - warn "WARN: The $numTaxa-th species ($name) have ", - "different seq length\n"; - warn "Previous Seq Len: $maxLen, This Seq Len: $seqLen\n"; - } - print $tmpFH "\n"; # end of the previous sequence - } else { - $firstLine = 0; - } - - $maxLen = $seqLen if ($seqLen > $maxLen); $seqLen = 0; - $numTaxa ++; - - $name = $_; - if (CharLen($_) <=10) { - printf $tmpFH "%-10s", $_; - } else { - $shortName = TrimName($_); - print STDERR "$_ => $shortName\n" if (defined ($opt_v)); - printf $tmpFH "%10s", $shortName; - } - } else { - $seqLen += CharLen ($_); - print $tmpFH $_; - } -} - -print $tmpFH "\n"; - -### print out to the STDOUT -print "$numTaxa $maxLen\n"; - -seek ($tmpFH, 0, 0) || die "seek: $!"; -my $line; -while (defined ($line = $tmpFH->getline())) { - chomp ($line); - print "$line"; - $missingBases = $maxLen - (CharLen($line) - $totNumChar); - while ($missingBases > 0) { - print "-"; - $missingBases--; - } - print "\n"; -} - -sub CharLen { # returns number of characters in a string - my $string = shift; - return scalar(split (//, $string)); -} - -sub TrimName { # trim a long name - my $name = shift; - my @charArray = split (//, $name); - - if ($totNumChar == $numFrontChar) { - return join ('', splice (@charArray, 0, $numFrontChar)); - } else { - return join ('', splice (@charArray, 0, $numFrontChar), - splice (@charArray, - ($totNumChar - $numFrontChar))); - } -} - diff --git a/bin/MAGNET-1.1.1/perl/phylip2fasta.pl b/bin/MAGNET-1.1.1/perl/phylip2fasta.pl deleted file mode 100644 index 54d1988b..00000000 --- a/bin/MAGNET-1.1.1/perl/phylip2fasta.pl +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/perl - -# Converts an aligned fasta (aa or dna) seq file to phylip format - -my $usage = "Usage: $0 [-h] [-v] [infile]\n" . - " -h help\n" . - " -v verbose (print name conversion in STDERR)\n" . - " infile should be a phylip or paml format (one liner), " . - "STDIN is used if no infile is given\n"; - -use IO::File; -use Getopt::Std; -getopts('hv') || die "$usage\n"; - -die "$usage\n" if (defined ($opt_h)); - -my $totNumChar = 10; # number of characters allowed for name in phylip -my $numFrontChar = 7; # When the name is too long, this amount of characters - # are used from the beginning of the name, and the rest - # are from the end of the name. - -while(<>){ - - next if ($. == 1); # skip the first line - - chomp; - s/^\s+//; s/\s$//; - next if (/^$/); - - my @line = split (/\s+/); - - my @nameChar = split (//, $line[0]); - - if (@nameChar > $totNumChar) { - if ( /^(.{$totNumChar})/ ) { - $name = $1; - } - } else { - $name = $line[0]; - } - - s/$name//; - s/^\s+//; - - print ">$name\n$_\n"; -} diff --git a/bin/MAGNET-1.1.1/perl/selectSites.pl b/bin/MAGNET-1.1.1/perl/selectSites.pl deleted file mode 100644 index b3ac2180..00000000 --- a/bin/MAGNET-1.1.1/perl/selectSites.pl +++ /dev/null @@ -1,771 +0,0 @@ -#!/usr/bin/perl - -my $usage="Usage: $0 [-g] [-n replacementChar] [-x n] [-s siteList] [-f siteListFile] [-r [1,2,3]] [-cd] [-i splicingData] fastaFile\n" . - " -g : replace unwanted sites with - instead of removing them\n" . - " -n : replace unwanted sites with replacementChar\n" . - " -x n: Remove all gap sites after sites are selected. If n=1, all\n". - " gap-only-sites are removed. If n=3, the gap-only-sites are\n". - " removed by multiple of n(=3). With 7 neighboring gap-only sites,\n". - " 6 of them will be removed, and 1 site will be left. n can be any integer.\n". - " -s : specify the site lists, do not put any spaces between elements\n" . - " example siteList : 1-4,8,90-\n" . - " -r '1,2' : first and 2nd position of the codon triplets are selected\n". - " -f : read site list from a file.\n". - " -c : site list is codon number\n" . - " -i : individual splicing data for each sequences are given\n" . - " -e : with -i, sequence not listed in the splicingData is excluded\n" . - " -d : delete (instead of select) the sites specified\n" . - "\n". - " For -f option, you can use spaces, tab, comma, or new-line delimited\n". - " site numbers. You can use the range specifiers (see -s), but be\n". - " CAREFUL not to use spaces around '-'; e.g. 1-4 is ok, but 1 - 4 is not.\n". - " In this file, you can also add comments. For each line, any characters\n". - " after the 1st # is considered as comments and ignored.\n". - " Example of site list file:\n\n" . - "# This file can be given to -f\n". - " 1-4, 8 # COMMENT: first 5 bases\n". - "90-\n". - "# end of file\n"; - -my $sep = "\t"; # if you use tab in the sequence name, change this to - # other characters such as "," -my $lineWidth = 70; # used to break the long sequences into lines for FASTA out - -use Getopt::Std; -getopts('hx:i:ecgn:df:s:r:') || die "$usage\n"; - -if (defined($opt_h)) { - die "$usage\n"; -} - -die "$usage\n" if (@ARGV > 1); - -@ARGV = ('-') unless @ARGV; # take STDIN when no arg. -my $dnaFile = shift @ARGV; - -# initialize the @seqArray, @seqNameArray, and $maxSeqLen -my @dat = ReadInFASTA($dnaFile); -my $numSeq = @dat; -my $maxLen = MaxSeqLen(@dat); - -if (defined($opt_c)) { - $maxLen = int (($maxLen + 2) / 3); -} - -my $replaceChar = "-"; # for -g, this character is used to replace -if (defined($opt_g) && defined($opt_n)) { - die "ERROR: -g and -n can not be specified simultaneously. ". - "Choose either -g or -n"; -} elsif (defined($opt_g)) { - $replaceChar = '-'; -} elsif (defined($opt_n)) { - $replaceChar = $opt_n; -} - -if (defined($opt_i)) { - my @result = Splice(@dat); - PrintFASTA(@result); - exit (0); -} - -if (defined($opt_r)) { - if (defined($opt_s) || defined($opt_f) || defined($opt_c)) { - die "When -r is specified, -c, -f, or -s can't be used\n"; - } - @index = RepeatIndex($maxLen, $opt_r); -} elsif (defined($opt_s)) { - if (defined($opt_f)) { - die "-s and -f can't be used at the same time\n"; - } - @index = MkSelIndex($maxLen, $opt_s); -} elsif (defined($opt_f)) { - @index = MkIndexFromFile($maxLen, $opt_f); -} else { - @index = 0..($maxLen - 1); -} - - -if (defined($opt_d)) { - @allSites = 0..($maxLen - 1) ; - @index = InANotInB (\@allSites, \@index); -} - -if (defined($opt_c)) { - @index = CodonToBaseIndex(@index); -} -# @dat = AdjustSeqLength(@dat); - -@index = sort {$a <=> $b} (@index); -@index = ExtractUnique (@index); - -@dat = Sites (\@dat, \@index); - -if (defined ($opt_x)) { - @dat = RemoveGapOnlySites(\@dat, $opt_x); -} - -PrintFASTA (@dat); - -exit(0); - -#### functions - -sub RepeatIndex { - my ($max, $optionR) = @_; - $optionR =~ s/\s+//g; - if ($optionR !~ /^[123,]+$/) { - print STDERR "$usage"; - die "\nERROR: give comma delimited integer (1, 2, or 3) as the " . - " argument of -r.\ne.g. -r '2,3'\n"; - } - my @repList = split (",", $optionR); - my @result = (); - for my $i (1..$max) { - if (MemberQ(($i - 1) % 3 + 1, \@repList) == 1) { - push @result, $i-1; - } - } - return(@result); -} - -sub MkSelIndex { - my ($max, $siteList) = @_; - $siteList =~ s/^\s+//; - $siteList =~ s/\s+$//; - - my @sites = split(/\s*,\s*/, $siteList); - - my @result = (); - foreach my $item (@sites) { - if ($item =~ /^(\d+)-(\d+)$/) { - die "ERROR: 1st number is larger than 2nd in $item\n" if ($1 > $2); - $beginPos = $1 - 1; - $endPos = $2 - 1; - } elsif ($item =~ /^-(\d+)$/) { - $beginPos = 0; - $endPos = $1 - 1; - } elsif ($item =~ /^(\d+)-$/) { - $beginPos = $1 - 1; - $endPos = $max-1; - } elsif ($item =~ /^(\d+)$/) { - $beginPos = $1 - 1; - $endPos = $1 - 1; - } else { - die "$siteList given as the list of sites. " . - "Make sure it is comma delimitted, and each element is " . - " one of the forms: 23-26, 29, -10, 40-\n"; - } - push (@result, $beginPos..$endPos); - } - return(@result); -} - -sub MkIndexFromFile { - my ($max, $file) = @_; - open (IN,"<$file") || die "Can't open the file $file\n"; - my @result=(); - while() { - chomp(); - s/#.*$//; # remove comments - - s/^\s+//; s/\s+$//; - next if (/^$/); - s/,/\t/g; # convert commans to tab - s/\s+/\t/g; - unless(/^[\d\t-]+$/) { - warn "This line contains non-numeric, skipped:\n$_\n"; - next; - } - my @line = split; - push @result, @line; - } - my $siteString = join ",", @result; - @result = MkSelIndex($max, $siteString); - - # change unit offset index to 0-offset. -# for $i (0..$#result) { -# if ($max < $i) { -# warn "site $i is too large. should be less than $max\n"; -# } -# $result[$i] -= 1; -# } - return @result; -} - -# convert codon index (0 offset) to nucleotide index (0 offset) -sub CodonToBaseIndex { - my @list = @_; - my @result=(); - for my $i (@list) { - push (@result, $i*3, $i*3+1, $i*3+2); - } - return (@result); -} - -# takes an arg; name of a file from which data are read Then read in -# the data and make an array. Each element of this array corresponds -# to a sequence, name tab data. -sub ReadInFASTA { - my $infile = shift; - my @line; - my $i = -1; - my @result = (); - my @seqName = (); - my @seqDat = (); - - open (INFILE, "<$infile") || die "Can't open $infile\n"; - - while () { - chomp; - if (/^>/) { # name line in fasta format - $i++; - s/^>\s*//; s/^\s+//; s/\s+$//; - $seqName[$i] = $_; - $seqDat[$i] = ""; - } else { - s/^\s+//; s/\s+$//; - s/\s+//g; # get rid of any spaces - next if (/^$/); # skip empty line - s/[uU]/T/g; # change U to T - $seqDat[$i] = $seqDat[$i] . uc($_); - } - - # checking no occurence of internal separator $sep. - die ("ERROR: \"$sep\" is an internal separator. Line $. of " . - "the input FASTA file contains this charcter. Make sure this " . - "separator character is not used in your data file or modify " . - "variable \$sep in this script to some other character.\n") - if (/$sep/); - - } - close(INFILE); - - foreach my $i (0..$#seqName) { - $result[$i] = $seqName[$i] . $sep . $seqDat[$i]; - } - return (@result); -} - -sub GetSeqDat { - my @data = @_; - my @line; - my @result = (); - - foreach my $i (@data) { - @line = split (/$sep/, $i); - push @result, $line[1]; - } - - return (@result) -} - -sub GetSeqName { - my @data = @_; - my @line; - my @result = (); - - foreach my $i (@data) { - @line = split (/$sep/, $i); - push @result, $line[0]; - } - return (@result) -} - -sub SelectSites { - my ($arrayRef, $indexRef) = @_; - unless (@_ == 2 && ref($arrayRef) eq 'ARRAY' && ref($indexRef) eq 'ARRAY'){ - die "args to SelectSites() should be ARRAY REF, ARRAY REF\n"; - } - - my $maxIndex = @$arrayRef -1; - my @result = (); - foreach my $posi (@$indexRef) { - if ($maxIndex < $posi) { - push @result, "?"; - } else { - push @result, $$arrayRef[$posi]; - } - } - return @result; -} - -# 1st argument is a ref to an array with each element is a DNA sequence -# a ref to a vector of indices. -sub ReplaceOtherSitesWChar { - my ($arrayRef, $indexRef, $repChar) = @_; - unless (@_ == 3 && ref($arrayRef) eq 'ARRAY' && ref($indexRef) eq 'ARRAY'){ - die "args to ReplaceOtherSitesWChar() should be ARRAY REF, ARRAY REF\n"; - } - - my $maxIndex = @$arrayRef -1; - - my @allSites = 0..($maxIndex) ; - my @index = InANotInB (\@allSites, $indexRef); # making the complement set - - warn "WARN: some selected sites don't exists\n" - if (Max(@$indexRef) > $maxIndex); - - my @result = @$arrayRef; - foreach my $posi (@index) { - $result[$posi] = $repChar; - } - if ($debug) { - print join "", "debug: ", @$arrayRef, "\n"; - print join "", "debug: ", @result, "\n\n"; - } - return @result; -} - -sub Sites { - my ($datRef, $indexRef) = @_; - my @seqDat = GetSeqDat(@$datRef); - my @seqName = GetSeqName(@$datRef); - my @result = (); - - # make 2 dimensional matrix - foreach $seqNumber (0..$#seqDat) { - my @tmpArray = split(//, $seqDat[$seqNumber]); - my @thisSeq = (defined($opt_g) || defined($opt_n)) ? - ReplaceOtherSitesWChar(\@tmpArray, $indexRef, $replaceChar) : - SelectSites(\@tmpArray, $indexRef); - my $thisLine = $seqName[$seqNumber] . "\t" . (join("", @thisSeq)); - push @result, $thisLine; - } - return (@result); -} - - -sub PrintFASTA { - my @seqName = GetSeqName(@_); - my @seqDat = GetSeqDat(@_); - for my $i (0..$#seqDat) { - # print ">$seqName[$i]\n$seqDat[$i]\n"; - print ">$seqName[$i]\n"; - my $seq = $seqDat[$i]; - for (my $pos=0 ; $pos < length ($seq) ; $pos += $lineWidth) { - print substr($seq, $pos, $lineWidth), "\n"; - } - } -} - -sub MaxSeqLen { - my @data = GetSeqDat(@_); - my $maxLen = 0; - foreach $i (@data) { - my $len = CharLen($i); - $maxLen = $len if ($len > $maxLen); - } - return ($maxLen); -} - -# take std seq data (name\tseq), and attach "?" for the shorter sequences -sub AdjustSeqLength { - my @data = @_; - my @seqDat = GetSeqDat(@_); - my @seqName = GetSeqName(@_); - my $maxLen = MaxSeqLen(@_); - - foreach $i (0 .. $#seqDat) { - my $thisLen = CharLen ($seqDat[$i]); - if ($thisLen == $maxLen) { - ; # do nothing - } elsif ($thisLen < $maxLen) { - my $diff = $maxLen - $thisLen; - warn "WARN: $seqName[$i] shorter. " . - "$diff '?' (missing character) were added at the end\n"; - for ($j=0; $j < $diff; $j++) { - $data[$i] = $data[$i] . "?"; - } - } else { - die "ERROR: the length of sequence $seqName[$i] is $thisLen, " . - "longer than \$maxLen = $maxLen. Weird!!"; - } - } - return (@data); -} - -sub RemoveGapOnlySites { - my ($seqDatARef, $multipleOf) = @_; - my @seqDat = GetSeqDat(@$seqDatARef); - my @seqName = GetSeqName(@$seqDatARef); - my $maxLen = MaxSeqLen(@$seqDatARef); - my @gapSites = (); - my @notGapSites = (); - my ($posi, $seqNumber); - my @seqMat = (); - - # make 2 dimensional matrix - foreach $seqNumber (0..$#seqDat) { - my @tmpArray = split(//, $seqDat[$seqNumber]); - # Check the length - if (@tmpArray != $maxLen) { - die "ERROR: the sequence $seqName[$i] is not same length " . - "as \$maxLen = $maxLen. Weird!!"; - } - push @seqMat, [ @tmpArray ]; - } - - # now identify the all gap sites - for $posi (0 .. ($maxLen-1)) { - my $gap = 1; - for $seqNumber (0 .. $#seqMat){ - if ($seqMat[$seqNumber][$posi] !~ /^[-\?]$/) { - $gap = 0; - last; - } - } - if ($gap == 1) { # all sequences have a gap at these sites - push (@gapSites, $posi+1); # now unit-offset - } else { # there are some non-gap character at these sites - push (@notGapSites, $posi+1); - } - } - - my @rmSites = (); # removing multiples of $multipleOf - for(my $i = 0; $i < @gapSites - $multipleOf + 1; $i++) { - my $rmFlag = 1; - for(my $j = 1; $j < $multipleOf; $j++) { - if ($gapSites[$i] + $j != $gapSites[$i+$j]) { - $rmFlag = 0; # we don't want to remove this $i - $j=$multipleOf; # get out of inner loop - } - } - if ($rmFlag == 1) { - push @rmSites, @gapSites[$i..($i+$multipleOf-1)]; - $i += $multipleOf - 1; - } - } - - my @allSites = 1..($maxLen) ; - my @selIndex = InANotInB (\@allSites, \@rmSites); - @selIndex = To0Offset(@selIndex); # convert to 0-ffset - - # select sites and make results - my @result = (); - for $seqNumber (0 .. $#seqMat) { - my @thisSeq = SelectSites($seqMat[$seqNumber], \@selIndex); - my $line = $seqName[$seqNumber] . $sep . (join("", @thisSeq)); - push (@result, $line); - } - -# if (@rmSites > 0) { -# warn ("Following sites consist of all gaps, removed from analysis\n"); -# print STDERR join(" ", @rmSites); -# print STDERR "\n"; -# } - return (@result); -} - -# convert 1-offset index array to 0-offset array -sub To0Offset { - my @result = map {$_ - 1} @_; - return @result; -} - -# count the number of characters in a string -sub CharLen { - my $string = shift; - my @charString = split (//, $string); - return scalar(@charString); -} - -# this function take two scalars and return the larger value -sub larger { - my ($a, $b) = @_; - - return (($a > $b) ? $a : $b); -} - -sub InANotInB { - my ($aRef, $bRef) =@_; - my %seen = (); - my @aonly =(); - - foreach my $item (@$bRef) { $seen{$item} = 1}; - foreach my $item (@$aRef) { - push (@aonly, $item) unless $seen{$item}; - } - return (@aonly); -} - -sub ExtractUnique { - my %seen=(); - my @unique = (); - - foreach my $item (@_) { - push (@unique, $item) unless $seen{$item}++; - } - return @unique; -} - -sub Max { - my $max = shift; - foreach $item (@_) { - if ($item > $max) { - $max = $item; - } - } - return $max; -} - -sub MemberQ { - my ($x, $arrRef) = @_; - foreach my $item (@$arrRef) { - if ($x eq $item) { - return 1; - } - } - return 0; -} - -sub sortByColumn { -# numerical sort by a column, return an array -# sortbyColumn ($col_num, $order, @record) -# @record is an array with each element representing a space delimited record -# example -# ("473 p1 S 0:06 -bash", "541 p2 SW 0:00 ps-a", ....) -# $col_num -- the column by which the record is sorted by (left-most col is 0) -# $order can be "a" (ascending) or "d" (descending), -# sort column can be hyphnated numbers (e.g. 10-4-150) - - local $col_num = shift(@_); - local $order = shift(@_); - local @record = @_ ; - local ($sortCol); - - ## test if the sort column is hyphnated or plain number - local $sortMethod = "number"; - foreach $sortCol (@record) { - if ( (split(/\s+/,$sortCol))[$col_num] =~ /\d+-\d+/ ) { - $sortMethod = "hyphnated"; - last ; - } - } - - return sort $sortMethod @record; - -## two sub-sub-routines - sub number { - # $col_num, $order are the given arguments - # the left-most column is 0 - local $first = (split(/\s+/, $a))[$col_num]; - local $second = (split(/\s+/, $b))[$col_num]; -# argument errors not well trapped here - ($first,$second) = ($second, $first) if ($order eq "d"); - - return ($first <=> $second); - } - -#probably I don't need the "sub number" - sub hyphnated { - # $col_num, $order are the given arguments - local ($each_number, $cmp_result, @temp_swap); - - ## separte the hyphnated numbers and put them in the following arrays - local @first = split(/-/, ((split(/\s+/, $a))[$col_num])); - local @second = split(/-/, ((split(/\s+/, $b))[$col_num])); - - ## ascending (default) or descending order - if ($order eq "d") { - @temp_swap = @first; - @first = @second; - @second = @temp_swap; - } - - ## comparison of array elements - for ($each_number = 0; $each_number <= - (($#first < $#second) ? $#first : $#second) ; $each_number++) { - $cmp_result = ($first[$each_number] <=> $second[$each_number]); - last if ($cmp_result); - } - - ## if the size of two arrays differ - if ( ($cmp_result == 0) && ($#first != $#second) ) { - return (($#first < $#second) ? -1 : 1); - } else { - return $cmp_result; - } - } -} - - -#### not used -sub Bootstrap { - my @data = @_; - my @seqDat = GetSeqDat(@_); - my @seqName = GetSeqName(@_); - - my $maxLen = MaxSeqLen(@data); - my ($tmpOutFile, $tmpSeqFileName); - - # getting tmpfilenames - do { $tmpSeqFileName = tmpnam() } - until my $fh = IO::File->new($tmpSeqFileName, O_RDWR|O_CREAT|O_EXCL); - close $fh; - do { $tmpOutFile = tmpnam() } - until $fh = IO::File->new($tmpOutFile, O_RDWR|O_CREAT|O_EXCL); - close $fh; - - print "$tmpSeqFileName\n$tmpOutFile\n"; - -# END { unlink($tmpSeqFileName) -# or die "Couldn't unlink $tmpSeqFileName : $!" } -# END { unlink($tmpOutFile) or die "Couldn't unlink $tmpOutFile : $!" } - - # prepare PAUP cmd - if (defined($opt_p)) { - $setting = $opt_p; - } else { - $setting = "set criterion=distance; dset distance=k2p"; - } - my $paupCmd = "execute $tmpSeqFileName; $setting; " . - "log start file=$tmpOutFile replace=yes ; showdist; " . - "log stop; quit WarnTSave=no;"; - warn "PAUP commands:\n$paupCmd\n"; - - my @sampledDat = SampleSites($maxLen, @seqDat); - WriteNEXUS ($tmpSeqFileName, \@seqName, \@sampledDat); - - open (PAUP, "|paup -n"); - print PAUP $paupCmd; - close(PAUP); - - if (defined ($opt_s)) { - open (GETDIST, "$EXTRACT_PAIR_DIST_EXE -s $opt_s $tmpOutFile|"); - } else { - open (GETDIST, "$EXTRACT_PAIR_DIST_EXE $tmpOutFile|"); - } - my @dist =(); - while() { - my @line = split; - if ($. == 1) { - if ($line[$#line] ne "dist") { - warn "## WARN ## using the last column ($line[$#line]) of " . - "output from $EXTRACT_PAIR_DIST_EXE as the distance\n"; - } - } - push @dist, $line[$#line]; - } - -# if (@dist != @seqName * (@seqName - 1) / 2) { -# warn "## DANGER ## PAUP didn't out put correct number of " . -# "pairwise dists\n" -# } - -} - -# set criterion=distance;dset distance=TamNei Rates=gamma Shape=2.3333 Pinvar=0.1300 -#echo "execute ../$(IN_BASENAME).nx;set criterion=distance;dset distance=TamNei Rates=gamma Shape=2.3333 Pinvar=0.1300; log start file=$@.tmp; showdist; log stop;quit WarnTSave=no;" |paup -# $(EXTRACT_PAIR_DIST) -s seqNames $@.tmp > $@ - -# note this function take only @seqDat -sub SampleSites { - my $maxLen = shift; - my @seqDat = @_; - - my @randSites = RandIntArray($maxLen, $maxLen-1); - - for my $seqNumber (0 .. $#seqDat) { - my @line = split (//, $seqDat[$seqNumber]); - @line = SelectSites (\@line, \@randSites); - my $randomized = join ("", @line); - push @result, $randomized; - } - return (@result); -} - -# rand integers between 0 and $max (both ends inclusive) -sub RandIntArray { - my ($size, $max) = @_; - my @result = (); - - for my $i (0 .. $size - 1) { - push @result, int(rand ($max + 1)); # rand returns [0, $max + 1) - } - return (@result); -} - -sub WriteNEXUS { - my ($fileName, $nameArrayRef, $datArrayRef) = @_; - - my @nameArray = @$nameArrayRef; - my @datArray = @$datArrayRef; - die "Error in WriteNEXUS\n" if (@nameArray != @datArray); - my $numSeq = @nameArray; - my $seqLen = CharLen($datArray[0]); - - my $type = "nucleotide"; - if (defined ($opt_a)) { - $type = "aminoacid"; - } - - open (FP, ">$fileName") || die "Can't open a tmpFile $fileName"; - - print FP "#NEXUS\nBegin data;\n" . - " Dimensions ntax=$numSeq nchar=$seqLen;\n" . - " Format datatype=$type gap=- missing=? matchchar=.;\n" . - " Matrix\n"; - - for my $i (0 .. $numSeq - 1) { - print FP "\'$nameArray[$i]\' $datArray[$i]\n"; - } - print FP " ;\nEnd;\n"; - - close(FP); - return (0); -} - -### for splicing -sub Splice { - my @dat = @_; - my @seqDat = GetSeqDat(@dat); - my @seqName = GetSeqName(@dat); - - open SPLICE, "<$opt_i" || die "Can't open $opt_i\n"; - my %splice = ReadSplicingData(\*SPLICE); - close (SPLICE); - -# for $k (keys (%splice)) { print "debug: $k => $splice{$k}\n"; }; - - my @result = (); - for my $i (0..$#seqName) { - my $name = $seqName[$i]; - if (exists ($splice{$name})) { - my $len = CharLen($seqDat[$i]); - if (defined($opt_c)) { - $len = int (($len + 2) / 3); - } - my @index = MkSelIndex($len, $splice{$name}); - if (defined($opt_d)) { # delete the selected sites - my @allSites = 0..($maxLen - 1) ; - @index = InANotInB (\@allSites, \@index); - } - my @tmpDat = ($dat[$i]); - @tmpDat = Sites(\@tmpDat, \@index); - push @result, @tmpDat[0]; - } else { - if (! defined ($opt_e)) { - push @result, $dat[$i]; - } - } - } - return(@result); -} - -sub ReadSplicingData { - my $fh = shift; - my %splice = (); - while(<$fh>) { - chomp; - s/^\s+//; s/\s+$//; - s/#.*$//; - next if (/^$/); - my @line = split (/\t/); - $splice{$line[0]} = $line[1]; - } - - foreach my $key (keys(%splice)) { - print STDERR "INFO: $key => $splice{$key}\n"; - } - return(%splice); -} diff --git a/bin/MAGNET-1.1.1/shell/NEXUS2gphocs b/bin/MAGNET-1.1.1/shell/NEXUS2gphocs deleted file mode 100755 index 00bc2f47..00000000 --- a/bin/MAGNET-1.1.1/shell/NEXUS2gphocs +++ /dev/null @@ -1,405 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: NEXUS2gphocs.sh # - VERSION="v1.5.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. # -# Last update: December 11, 2020 # -# Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT AUTOMATES SUBSAMPLING EACH OF ONE TO MULTIPLE PHYLIP ALIGNMENT # -# FILES DOWN TO ONE (RANDOM) SEQUENCE PER SPECIES (FOR SPECIES TREE ANALYSIS) # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- - -UTILS_LOCATION="${SCRIPT_PATH}/../../../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - - -# Source shared functions and variables -# ----------------------------------- - -FUNCS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function NEXUS2gphocs () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | NEXUS2gphocs, v1.5.1 December 2020 (part of PIrANHA v0.4a4) " -echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### -echo "INFO | $(date) | Starting NEXUS2gphocs analysis... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; -echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " -checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " - - -############ STEP #2: GET NEXUS FILE & DATA CHARACTERISTICS, CONVERT NEXUS TO FASTA FORMAT -##--Extract charset info from sets block at end of NEXUS file: - MY_NEXUS_CHARSETS="$(egrep "charset|CHARSET" "$MY_NEXUS" | \ - awk -F"=" '{print $NF}' | sed 's/\;/\,/g' | \ - awk '{a[NR]=$0} END {for (i=1;i100,000 bp), then need to convert to fasta using my -##--script and then wrap to 60 characters with fold function (as suggested at stackexchange -##--post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size). -##--If this conversion failes because the alignment is too long, then the code to follow -##--will have nothing to work with. So, I am here adding a conditional quit if the fasta -##--file is not generated. - -#---------ADD IF/THEN CONDITIONAL AND MY OWN NEXUS2fasta SCRIPT HERE!!!!----------# - - convbioseq fasta $MY_NEXUS > "$MY_NEXUS_BASENAME".fasta ; - MY_FASTA="$(echo "$MY_NEXUS_BASENAME".fasta | sed 's/\.\///g; s/\.nex//g')"; - - ##--The line above creates a file with the name basename.fasta, where basename is the base name of the original .nex file. For example, "hypostomus_str.nex" would be converted to "hypostomus_str.fasta". - ##--Check to make sure the fasta was created; if so, echo info, if not, echo warning and quit: - if [[ -s "$MY_NEXUS_BASENAME".fasta ]]; then - echo "INFO | $(date) | Input NEXUS was successfully converted to fasta format. Moving forward... " - else - echo "WARNING | $(date) | NEXUS to fasta file conversion FAILED. Quitting... " - exit 1 - fi - - -############ STEP #3: PUT COMPONENTS OF ORIGINAL NEXUS FILE AND THE FASTA FILE TOGETHER TO -############ MAKE A G-PhoCS-FORMATTED DATA FILE -##--Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line: -echo "$MY_NLOCI" | sed 's/[\ ]*//g' > gphocs_top.txt - -echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt - count=0 - ( - for j in ${MY_NEXUS_CHARSETS}; do - echo "$j"; - charRange="$(echo "$j" | sed 's/\,//g')"; - echo "$charRange"; - export setLower="$(echo "$j" | sed 's/\-.*$//g')"; - export setUpper="$(echo "$j" | sed 's/[0-9]*\-//g' | sed 's/\,//g; s/\ //g')"; - - **/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta; - - **/fasta2phylip.pl ./sites.fasta > ./sites.phy; - - ##--Need to make sure there is a space between the tip taxon name (10 characters as output - ##--by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use - ##--a perl search and replace for this: - - perl -p -i -e 's/^([A-Za-z0-9\-\_\ ]{10})/$1\ /g' ./sites.phy ; - - ##--If .phy file from NEXUS charset $j has gaps in alignment, then call - ##--rmGapSites.R R script to remove all column positions with gaps from - ##--alignment and output new, gapless phylip file named "./sites_nogaps.phy". - ##--If charset $j does not have gaps, go to next line of loop. We do the - ##--above by first creating a temporary file containing all lines in - ##--sites.phy with the gap character: - grep -n "-" ./sites.phy > ./gaptest.tmp ; - - ##--Next, we test for nonzero testfile, indicating presence of gaps in $j, - ##--using UNIX test operator "-s" (returns true if file size is not zero). - ##--If fails, cat sites.phy into file with same name as nogaps file that - ##--is output by rmGapSites.R and move forward: - if [ -s ./gaptest.tmp ]; then - echo "Removing column sites in locus"$count" with gaps. " - R CMD BATCH **/rmGapSites.R ; - else - echo "" - cat ./sites.phy > ./sites_nogaps.phy ; - fi - - phylip_header="$(head -n1 ./sites_nogaps.phy)" ; - locus_ntax="$(head -n1 ./sites_nogaps.phy | sed 's/[\ ]*[.0-9]*$//g')" ; - locus_nchar="$(head -n1 ./sites_nogaps.phy | sed 's/[0-9]*\ //g')" ; - - if [ $MY_INDIV_MISSING_DATA == 0 ]; then - sed '1d' ./sites_nogaps.phy | egrep -v 'NNNNNNNNNN|nnnnnnnnnn' > ./cleanLocus.tmp ; - cleanLocus_ntax="$(cat ./cleanLocus.tmp | wc -l)" ; - echo locus"$((count++))" "$cleanLocus_ntax" "$locus_nchar" > ./locus_top.tmp ; - cat ./locus_top.tmp ./cleanLocus.tmp >> ./gphocs_body.txt ; - else - echo locus"$((count++))" "$locus_ntax" "$locus_nchar" > ./locus_top.tmp ; - cat ./locus_top.tmp ./sites_nogaps.phy >> ./gphocs_body.txt ; - fi - - rm ./sites.fasta ./sites.phy ./*.tmp ; - rm ./sites_nogaps.phy ; - done - ) - - grep -v "^[0-9]*\ [0-9]*.*$" ./gphocs_body.txt > ./gphocs_body_fix.txt ; - cat ./gphocs_top.txt ./gphocs_body_fix.txt > "$MY_NEXUS_BASENAME".gphocs ; - - ############ STEP #4: CLEANUP: REMOVE UNNECESSARY FILES - rm ./gphocs_top.txt ; - rm ./gap_threshold.txt ; - rm ./gphocs_body.txt ; - -#echo "INFO | $(date) | Successfully created a '.gphocs' input file from the existing NEXUS file... " -#echo "INFO | $(date) | Bye. -#" - -echo "----------------------------------------------------------------------------------------------------------" -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -MY_NEXUS=NULL -MY_GAP_THRESHOLD=0.001 -MY_INDIV_MISSING_DATA=1 - -############ CREATE USAGE & HELP TEXTS -USAGE="Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -i inputNEXUS (def: NULL) input NEXUS file - -g gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless >1000 - individuals; takes float proportion value) - -m indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) - -h help text (also: --help) echo this help text and exit - -V version (also: --version) echo version and exit - - ${bold}OVERVIEW${reset} - Reads in a single NEXUS datafile and converts it to '.gphocs' format for G-PhoCS software - (Gronau et al. 2011). Sequence names may not include hyphen characters, or there will be - issues. For best results, update to R v3.3.1 or higher. - - ${bold}DETAILS${reset} - The -i flag passess the name of the input NEXUS file, parameter, to the program. - - The -g flag supplies a 'gap threshold' to an R script, which deletes all column sites in - the DNA alignment with a proportion of gap characters '-' at or above the threshold value. - If no gap threshold is specified, all sites with gaps are removed by default. If end goal - is to produce a file for G-PhoCS, you will want to leave at the default. - However, if the next step in your pipeline involves converting from .gphocs to other data - formats, you will likely want to set = 1 (e.g. before converting to phylip - format for RAxML). - - The -m flag allows users to choose their level of tolerance for individuals with missing - data. The default is = 1, allowing individuals with runs of 10 or more - missing nucleotide characters ('N') to be kept in the alignment. Alternatively, setting - = 0 removes all such individuals from each locus; thus, while the input - file would have had the same number of individuals across loci, the resulting file could - have varying numbers of individuals for different loci. - - Dependencies: This script has the same dependencies as MAGNET v1.0.0, which it is distributed - with. See the MAGNET help text or README for more information. However a list of dependencies - includes Perl, R, Python, the bioscripts.convert v0.4 Python package, and Naoki Takebayashi - Perl scripts 'fasta2phylip.pl' and 'selectSites.pl' given correct permissions in MAGNET folder, - or available from command line (in your path). Tested with Perl v5+ and R v3.3.3–v3.5.1. - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f MAGNET-1.0.0/NEXUS2gphocs --args='-i -g1 -m1' - piranha -f MAGNET-1.0.0/NEXUS2gphocs --args='-h' - - ${bold}CITATION${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Gronau, I., Hubisz, M.J., Gulko, B., Danko, C.G., Siepel, A. 2011. Bayesian inference of - ancient human demography from individual genome sequences. Nature Genetics, 43, 1031-1034. - - Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. - Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved. -" - -if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - -############ PARSE THE OPTIONS -while getopts 'i:g:m:' opt ; do - case $opt in -## NEXUS2gphocs options: - i) MY_NEXUS=$OPTARG ;; - g) MY_GAP_THRESHOLD=$OPTARG ;; - m) MY_INDIV_MISSING_DATA=$OPTARG ;; -## Missing and illegal options: - :) printf "Missing argument for -%s\n" "$OPTARG" >&2 - echo "$USAGE" >&2 - exit 1 ;; - \?) printf "Illegal option: -%s\n" "$OPTARG" >&2 - echo "$USAGE" >&2 - exit 1 ;; - esac -done - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -NEXUS2gphocs - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh b/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh deleted file mode 100644 index 4ad8dfd3..00000000 --- a/bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh +++ /dev/null @@ -1,408 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: NEXUS2gphocs.sh # - VERSION="v1.5.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. # -# Last update: December 21, 2020 # -# Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT AUTOMATES SUBSAMPLING EACH OF ONE TO MULTIPLE PHYLIP ALIGNMENT # -# FILES DOWN TO ONE (RANDOM) SEQUENCE PER SPECIES (FOR SPECIES TREE ANALYSIS) # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- - -UTILS_LOCATION="${SCRIPT_PATH}/../../../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - - -# Source shared functions and variables -# ----------------------------------- - -FUNCS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function NEXUS2gphocs () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | NEXUS2gphocs, v1.5.1 December 2020 (part of PIrANHA v0.4a4) " -echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### -echo "INFO | $(date) | Starting NEXUS2gphocs analysis... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; -echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " -checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " - - -############ STEP #2: GET NEXUS FILE & DATA CHARACTERISTICS, CONVERT NEXUS TO FASTA FORMAT -# Extract charset info from sets block at end of NEXUS file: - MY_NEXUS_CHARSETS="$(egrep "charset|CHARSET" "$MY_NEXUS" | \ - awk -F"=" '{print $NF}' | sed 's/\;/\,/g' | \ - awk '{a[NR]=$0} END {for (i=1;i100,000 bp), then need to convert to fasta using my -# script and then wrap to 60 characters with fold function (as suggested at stackexchange -# post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size). -# If this conversion failes because the alignment is too long, then the code to follow -# will have nothing to work with. So, I am here adding a conditional quit if the fasta -# file is not generated. - -#---------ADD IF/THEN CONDITIONAL AND MY OWN NEXUS2fasta SCRIPT HERE!!!!----------# - - convbioseq fasta $MY_NEXUS > "$MY_NEXUS_BASENAME".fasta ; - MY_FASTA="$(echo "$MY_NEXUS_BASENAME".fasta | sed 's/\.\///g; s/\.nex//g')"; - - ##--The line above creates a file with the name basename.fasta, where basename is the base name of the original .nex file. For example, "hypostomus_str.nex" would be converted to "hypostomus_str.fasta". - ##--Check to make sure the fasta was created; if so, echo info, if not, echo warning and quit: - if [[ -s "$MY_NEXUS_BASENAME".fasta ]]; then - echo "INFO | $(date) | Input NEXUS was successfully converted to fasta format. Moving forward... " - else - echo "WARNING | $(date) | NEXUS to fasta file conversion FAILED. Quitting... " - exit 1 - fi - - -############ STEP #3: PUT COMPONENTS OF ORIGINAL NEXUS FILE AND THE FASTA FILE TOGETHER TO -############ MAKE A G-PhoCS-FORMATTED DATA FILE -# Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line: -echo "$MY_NLOCI" | sed 's/[\ ]*//g' > gphocs_top.txt - -echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt - count=0 - ( - for j in ${MY_NEXUS_CHARSETS}; do - echo "$j"; - charRange="$(echo "$j" | sed 's/\,//g')"; - echo "$charRange"; - export setLower="$(echo "$j" | sed 's/\-.*$//g')"; - export setUpper="$(echo "$j" | sed 's/[0-9]*\-//g' | sed 's/\,//g; s/\ //g')"; - - **/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta ; - - **/fasta2phylip.pl ./sites.fasta > ./sites.phy ; - - ##--Need to make sure there is a space between the tip taxon name (10 characters as output - ##--by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use - ##--a perl search and replace for this: - - perl -p -i -e 's/^([A-Za-z0-9\-\_\ ]{10})/$1\ /g' ./sites.phy ; - - ##--If .phy file from NEXUS charset $j has gaps in alignment, then call - ##--rmGapSites.R R script to remove all column positions with gaps from - ##--alignment and output new, gapless phylip file named "./sites_nogaps.phy". - ##--If charset $j does not have gaps, go to next line of loop. We do the - ##--above by first creating a temporary file containing all lines in - ##--sites.phy with the gap character: - grep -n "-" ./sites.phy > ./gaptest.tmp ; - - ##--Next, we test for nonzero testfile, indicating presence of gaps in $j, - ##--using UNIX test operator "-s" (returns true if file size is not zero). - ##--If fails, cat sites.phy into file with same name as nogaps file that - ##--is output by rmGapSites.R and move forward: - if [ -s ./gaptest.tmp ]; then - echo "Removing column sites in locus${count} with gaps. " - R CMD BATCH **/rmGapSites.R ; - else - echo "" - cat ./sites.phy > ./sites_nogaps.phy ; - fi - - phylip_header="$(head -n1 ./sites_nogaps.phy)" ; - locus_ntax="$(head -n1 ./sites_nogaps.phy | sed 's/[\ ]*[.0-9]*$//g')" ; - locus_nchar="$(head -n1 ./sites_nogaps.phy | sed 's/[0-9]*\ //g')" ; - - if [[ "$MY_INDIV_MISSING_DATA" == 0 ]]; then - sed '1d' ./sites_nogaps.phy | egrep -v 'NNNNNNNNNN|nnnnnnnnnn' > ./cleanLocus.tmp ; - cleanLocus_ntax="$(cat ./cleanLocus.tmp | wc -l)" ; - echo locus"$((count++))" "$cleanLocus_ntax" "$locus_nchar" > ./locus_top.tmp ; - cat ./locus_top.tmp ./cleanLocus.tmp >> ./gphocs_body.txt ; - else - echo locus"$((count++))" "$locus_ntax" "$locus_nchar" > ./locus_top.tmp ; - cat ./locus_top.tmp ./sites_nogaps.phy >> ./gphocs_body.txt ; - fi - - rm ./sites.fasta ./sites.phy ; - if [[ "$(ls -1 ./*.tmp 2>/dev/null | wc -l | sed 's/\ //g')" != "0" ]]; then - rm ./*.tmp ; - fi - rm ./sites_nogaps.phy ; - done - ) - - grep -v "^[0-9]*\ [0-9]*.*$" ./gphocs_body.txt > ./gphocs_body_fix.txt ; - cat ./gphocs_top.txt ./gphocs_body_fix.txt > "$MY_NEXUS_BASENAME".gphocs ; - - ############ STEP #4: CLEANUP: REMOVE UNNECESSARY FILES - if [[ -s ./gphocs_top.txt ]]; then rm ./gphocs_top.txt ; fi - if [[ -s ./gap_threshold.txt ]]; then rm ./gap_threshold.txt ; fi - if [[ -s ./gphocs_body.txt ]]; then rm ./gphocs_body.txt ; fi - -#echo "INFO | $(date) | Successfully created a '.gphocs' input file from the existing NEXUS file... " -#echo "INFO | $(date) | Bye. -#" - -echo "----------------------------------------------------------------------------------------------------------" -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -MY_NEXUS=NULL -MY_GAP_THRESHOLD=0.001 -MY_INDIV_MISSING_DATA=1 - -############ CREATE USAGE & HELP TEXTS -USAGE="Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -i inputNEXUS (def: NULL) input NEXUS file - -g gapThreshold (def: $MY_GAP_THRESHOLD=essentially zero gaps allowed unless >1000 - individuals; takes float proportion value) - -m indivMissingData (def: $MY_INDIV_MISSING_DATA=allowed; 0=removed) - -h help text (also: --help) echo this help text and exit - -V version (also: --version) echo version and exit - - ${bold}OVERVIEW${reset} - Reads in a single NEXUS datafile and converts it to '.gphocs' format for G-PhoCS software - (Gronau et al. 2011). Sequence names may not include hyphen characters, or there will be - issues. For best results, update to R v3.3.1 or higher. - - ${bold}DETAILS${reset} - The -i flag passess the name of the input NEXUS file, parameter, to the program. - - The -g flag supplies a 'gap threshold' to an R script, which deletes all column sites in - the DNA alignment with a proportion of gap characters '-' at or above the threshold value. - If no gap threshold is specified, all sites with gaps are removed by default. If end goal - is to produce a file for G-PhoCS, you will want to leave at the default. - However, if the next step in your pipeline involves converting from .gphocs to other data - formats, you will likely want to set = 1 (e.g. before converting to phylip - format for RAxML). - - The -m flag allows users to choose their level of tolerance for individuals with missing - data. The default is = 1, allowing individuals with runs of 10 or more - missing nucleotide characters ('N') to be kept in the alignment. Alternatively, setting - = 0 removes all such individuals from each locus; thus, while the input - file would have had the same number of individuals across loci, the resulting file could - have varying numbers of individuals for different loci. - - Dependencies: This script has the same dependencies as MAGNET v1.0.0, which it is distributed - with. See the MAGNET help text or README for more information. However a list of dependencies - includes Perl, R, Python, the bioscripts.convert v0.4 Python package, and Naoki Takebayashi - Perl scripts 'fasta2phylip.pl' and 'selectSites.pl' given correct permissions in MAGNET folder, - or available from command line (in your path). Tested with Perl v5+ and R v3.3.3–v3.5.1. - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f MAGNET-1.0.0/NEXUS2gphocs --args='-i -g1 -m1' - piranha -f MAGNET-1.0.0/NEXUS2gphocs --args='-h' - - ${bold}CITATION${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Gronau, I., Hubisz, M.J., Gulko, B., Danko, C.G., Siepel, A. 2011. Bayesian inference of - ancient human demography from individual genome sequences. Nature Genetics, 43, 1031-1034. - - Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. - Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. -" - -if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - -############ PARSE THE OPTIONS -while getopts 'i:g:m:' opt ; do - case $opt in -## NEXUS2gphocs options: - i) MY_NEXUS=$OPTARG ;; - g) MY_GAP_THRESHOLD=$OPTARG ;; - m) MY_INDIV_MISSING_DATA=$OPTARG ;; -## Missing and illegal options: - :) printf "Missing argument for -%s\n" "$OPTARG" >&2 - echo "$USAGE" >&2 - exit 1 ;; - \?) printf "Illegal option: -%s\n" "$OPTARG" >&2 - echo "$USAGE" >&2 - exit 1 ;; - esac -done - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -NEXUS2gphocs - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/shell/RAxMLRunChecker b/bin/MAGNET-1.1.1/shell/RAxMLRunChecker deleted file mode 100755 index 4f53c808..00000000 --- a/bin/MAGNET-1.1.1/shell/RAxMLRunChecker +++ /dev/null @@ -1,341 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: RAxMLRunChecker.sh # - VERSION="v1.3.0" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on/before November 29, 2018. # -# Last update: March 14, 2019 # -# Copyright (c) 2018-2019 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT COUNTS NUMBER OF LOCI/PARTITIONS WITH COMPLETED RAxML RUNS DURING # -# OR AFTER A RUN OF THE MAGNET PIPELINE, AND COLLATES RUN INFORMATION # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- - -UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - - -# Source shared functions and variables -# ----------------------------------- - -FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function RAxMLRunChecker () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | RAxMLRunChecker, v1.3.0 March 2019 (part of PIrANHA v0.4a4) " -echo "INFO | $(date) | Copyright (c) 2018-2019 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### -echo "INFO | $(date) | Starting RAxMLRunChecker pipeline... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ I. SET WORKING DIRECTORY AND CHECK MACHINE TYPE -#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; -echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " -checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " - - -############ II. RUN RAXML RUN CHECKER -echo "INFO | $(date) | Step #2: Check RAxML runs in subfolders in current directory (assumed to be a MAGNET run folder). " - - echo "INFO | $(date) | Estimating numbers (no.) of loci and RAxML runs... " - MY_N_LOCI_FOLD="$(ls -d ./locus*/ | wc -l | sed 's/^[\ ]*//g')"; - MY_N_COMPLETED="$(ls ./locus*/RAxML_info.raxml_out | wc -l | sed 's/^[\ ]*//g')"; - MY_N_REMAINING="$(calc $MY_N_LOCI_FOLD - $MY_N_COMPLETED)"; - - echo "INFO | $(date) | Total no. RAxML runs: $TAB$TAB$MY_N_LOCI_FOLD " - echo "INFO | $(date) | No. completed RAxML runs: $TAB$MY_N_COMPLETED " - echo "INFO | $(date) | No. remaining RAxML runs: $TAB$MY_N_REMAINING " - - if [[ -s ./completed_run_info.tmp ]]; then - rm ./completed_run_info.tmp ; - fi - if [[ -s ./completed_run_info.txt ]]; then - rm ./completed_run_info.txt ; - fi - if [[ -s ./remaining_run_info.tmp ]]; then - rm ./remaining_run_info.tmp ; - fi - if [[ -s ./remaining_run_info.txt ]]; then - rm ./remaining_run_info.txt ; - fi - - echo "INFO | $(date) | Saving RAxML run info to file... " - - count=1 - echo "INFO | $(date) | ... $count / $MY_N_LOCI_FOLD ..." -( - for i in ./locus*/; do - MY_LOCUS="$(echo $i | sed 's/\.\///g; s/\///g; s/\ //g')"; - MY_COUNT_HUND_CHECK="$(calc $count / 100 | sed 's/^[0-9]*\.//g; s/^[0]\{1\}//g')" - if [[ "$MY_COUNT_HUND_CHECK" -eq "0" ]]; then - echo "INFO | $(date) | ... $count / $MY_N_LOCI_FOLD ..." - fi - if [[ "$count" -eq "$MY_N_LOCI_FOLD" ]]; then - echo "INFO | $(date) | ... $MY_N_LOCI_FOLD / $MY_N_LOCI_FOLD ..." - fi - cd "$i"; - if [[ -s RAxML_bipartitions.raxml_out ]]; then - - MY_ALIGN_PATT="$(grep -h '^Alignment\ Patterns\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')"; - MY_SUBST_MODEL="$(grep -h '^Substitution\ Matrix\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')"; - MY_OPTIM_LIKE="$(grep -h 'Final\ ML\ Optimization\ Likelihood\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')"; - MY_ML_RUN_TIME="$(grep -h 'Overall\ execution\ time\ ' ./RAxML_info.raxml_out | sed 's/^Overall\ execution\ time\ [A-Za-z\ ]*\:\ //g; s/or\ .*//g')"; - - echo "$count$TAB$MY_LOCUS$TAB$MY_ALIGN_PATT$TAB$MY_SUBST_MODEL$TAB$MY_OPTIM_LIKE$TAB$MY_ML_RUN_TIME$TAB complete" >> ../completed_run_info.tmp ; - fi - if [[ ! -s RAxML_bipartitions.raxml_out ]]; then - - echo "$count$TAB$MY_LOCUS$TAB$MY_ALIGN_PATT$TAB$MY_SUBST_MODEL$TAB incomplete" >> ../remaining_run_info.tmp ; - - fi - cd ..; - echo "$((count++))" > ./count.tmp ; - done -) - - - echo "No$TAB Locus$TAB No. Patterns$TAB Subst. Model$TAB Likelihood$TAB ML Run Time$TAB Status" > ./header.tmp ; - echo "No$TAB Locus$TAB No. Patterns$TAB Subst. Model$TAB Status" > ./rem_header.tmp ; - cat ./header.tmp ./completed_run_info.tmp > ./completed_run_info.txt ; - cat ./rem_header.tmp ./remaining_run_info.tmp > ./remaining_run_info.txt ; - - - echo "INFO | $(date) | Editing final RAxML run information files... " - if [[ "${machine}" = "Mac" ]]; then - sed -i '' 's/\ //g' ./completed_run_info.txt ; - sed -i '' 's/\ //g' ./remaining_run_info.txt ; - fi - if [[ "${machine}" = "Linux" ]]; then - sed -i 's/\ //g' ./completed_run_info.txt ; - sed -i 's/\ //g' ./remaining_run_info.txt ; - fi - - -############ III. CLEAN UP WORKSPACE BY REMOVING TEMPORARY FILES. -echo "INFO | $(date) | Step #3: Clean up workspace. " -echo "INFO | $(date) | Cleaning up workspace by removing temporary files generated during run... " - - if [[ "$(ls -1 ./*.tmp 2>/dev/null | wc -l | sed 's/\ //g')" != "0" ]]; then - rm ./*.tmp ; - fi - -echo "----------------------------------------------------------------------------------------------------------" -echo "output file(s): ./completed_run_info.txt " -echo " ./remaining_run_info.txt " -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -# None at this time. - -############ CREATE USAGE & HELP TEXTS -USAGE="Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -h help text (also: --help) echo this help text and exit - -V version (also: --version) echo version of this script and exit - - ${bold}OVERVIEW${reset} - THIS SCRIPT was designed to run in a current working directory where the MAGNET pipeline - in PIrANHA v0.4a4 (Bagley 2019) is being run, or has completed a run, to estimate maximum- - likelihood (ML) gene trees in RAxML v8+ (Stamatakis 2014) for a set of loci from DNA - sequence data. Given such a workspace, this script counts the number of loci or data - partitions (each assigned a separate RAxML run in MAGNET) with completed RAxML runs and - collates run information. Output files include a summary of completed runs and a summary - of ongoing runs. - This program runs on UNIX-like and Linux systems using commonly distributed utility - software, with usage obtained by running the script with the -h flag. It has been tested - on macOS High Sierra (v10.13+) and Mojave but should work on many earlier versions or - Linux (tested on CentOS 6/7). There are no other dependencies. - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f RAxMLRunChecker Run the software - piranha -f RAxMLRunChecker --args='-h' Print this help text - - ${bold}CITATION${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of - large phylogenies. Bioinformatics, 30, 1312-1313. - - Created by Justin Bagley on Wed, Mar 6 09:57:26 CST 2019. - Copyright (c) 2019 Justin C. Bagley. All rights reserved. -" - -if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -RAxMLRunChecker - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh b/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh deleted file mode 100644 index f5fdcac5..00000000 --- a/bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh +++ /dev/null @@ -1,334 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: RAxMLRunChecker.sh # - VERSION="v1.3.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on/before November 29, 2018. # -# Last update: December 11, 2020 # -# Copyright (c) 2018-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT COUNTS NUMBER OF LOCI/PARTITIONS WITH COMPLETED RAxML RUNS DURING # -# OR AFTER A RUN OF THE MAGNET PIPELINE, AND COLLATES RUN INFORMATION # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- -UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - -# Source shared functions and variables -# ----------------------------------- -FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function RAxMLRunChecker () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | RAxMLRunChecker, v1.3.1 December 2020 (part of PIrANHA v0.4a4)" -echo "INFO | $(date) | Copyright (c) 2018-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### -echo "INFO | $(date) | Starting RAxMLRunChecker pipeline... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ I. SET WORKING DIRECTORY AND CHECK MACHINE TYPE -echoCDWorkingDir -checkMachineType - - -############ II. RUN RAXML RUN CHECKER -echo "INFO | $(date) | Step #2: Check RAxML runs in subfolders in current directory (assumed to be a MAGNET run folder). " - - echo "INFO | $(date) | Estimating numbers (no.) of loci and RAxML runs... " - MY_N_LOCI_FOLD="$(ls -d ./locus*/ | wc -l | sed 's/^[\ ]*//g')"; - MY_N_COMPLETED="$(ls ./locus*/RAxML_info.raxml_out | wc -l | sed 's/^[\ ]*//g')"; - MY_N_REMAINING="$(calc "$MY_N_LOCI_FOLD" - "$MY_N_COMPLETED")"; - - echo "INFO | $(date) | Total no. RAxML runs: $TAB$TAB$MY_N_LOCI_FOLD " - echo "INFO | $(date) | No. completed RAxML runs: $TAB$MY_N_COMPLETED " - echo "INFO | $(date) | No. remaining RAxML runs: $TAB$MY_N_REMAINING " - - if [[ -s ./completed_run_info.tmp ]]; then - rm ./completed_run_info.tmp ; - fi - if [[ -s ./completed_run_info.txt ]]; then - rm ./completed_run_info.txt ; - fi - if [[ -s ./remaining_run_info.tmp ]]; then - rm ./remaining_run_info.tmp ; - fi - if [[ -s ./remaining_run_info.txt ]]; then - rm ./remaining_run_info.txt ; - fi - - echo "INFO | $(date) | Saving RAxML run info to file... " - - count=1 - echo "INFO | $(date) | ... $count / $MY_N_LOCI_FOLD ..." -( - for i in ./locus*/; do - MY_LOCUS="$(echo "$i" | sed 's/\.\///g; s/\///g; s/\ //g')"; - MY_COUNT_HUND_CHECK="$(calc $count / 100 | sed 's/^[0-9]*\.//g; s/^[0]\{1\}//g')"; - if [[ "$MY_COUNT_HUND_CHECK" -eq "0" ]]; then - echo "INFO | $(date) | ... $count / $MY_N_LOCI_FOLD ..." - fi - if [[ "$count" -eq "$MY_N_LOCI_FOLD" ]]; then - echo "INFO | $(date) | ... $MY_N_LOCI_FOLD / $MY_N_LOCI_FOLD ..." - fi - cd "$i"; - if [[ -s RAxML_bipartitions.raxml_out ]]; then - - MY_ALIGN_PATT="$(grep -h '^Alignment\ Patterns\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')"; - MY_SUBST_MODEL="$(grep -h '^Substitution\ Matrix\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')"; - MY_OPTIM_LIKE="$(grep -h 'Final\ ML\ Optimization\ Likelihood\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')"; - MY_ML_RUN_TIME="$(grep -h 'Overall\ execution\ time\ ' ./RAxML_info.raxml_out | sed 's/^Overall\ execution\ time\ [A-Za-z\ ]*\:\ //g; s/or\ .*//g')"; - - echo "$count$TAB$MY_LOCUS$TAB$MY_ALIGN_PATT$TAB$MY_SUBST_MODEL$TAB$MY_OPTIM_LIKE$TAB$MY_ML_RUN_TIME$TAB complete" >> ../completed_run_info.tmp; - fi - if [[ ! -s RAxML_bipartitions.raxml_out ]]; then - - echo "$count$TAB$MY_LOCUS$TAB$MY_ALIGN_PATT$TAB$MY_SUBST_MODEL$TAB incomplete" >> ../remaining_run_info.tmp ; - - fi - cd ..; - echo "$((count++))" > ./count.tmp ; - done -) - - - echo "No$TAB Locus$TAB No. Patterns$TAB Subst. Model$TAB Likelihood$TAB ML Run Time$TAB Status" > ./header.tmp ; - echo "No$TAB Locus$TAB No. Patterns$TAB Subst. Model$TAB Status" > ./rem_header.tmp ; - cat ./header.tmp ./completed_run_info.tmp > ./completed_run_info.txt ; - cat ./rem_header.tmp ./remaining_run_info.tmp > ./remaining_run_info.txt ; - - - echo "INFO | $(date) | Editing final RAxML run information files... " - if [[ "${machine}" = "Mac" ]]; then - sed -i '' 's/\ //g' ./completed_run_info.txt ; - sed -i '' 's/\ //g' ./remaining_run_info.txt ; - fi - if [[ "${machine}" = "Linux" ]]; then - sed -i 's/\ //g' ./completed_run_info.txt ; - sed -i 's/\ //g' ./remaining_run_info.txt ; - fi - - -############ III. CLEAN UP WORKSPACE BY REMOVING TEMPORARY FILES. -echo "INFO | $(date) | Step #3: Clean up workspace. " -echo "INFO | $(date) | Cleaning up workspace by removing temporary files generated during run... " - - if [[ "$(ls -1 ./*.tmp 2>/dev/null | wc -l | sed 's/\ //g')" != "0" ]]; then - rm ./*.tmp ; - fi - -echo "----------------------------------------------------------------------------------------------------------" -echo "output file(s): ./completed_run_info.txt " -echo " ./remaining_run_info.txt " -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -# None at this time. - -############ CREATE USAGE & HELP TEXTS -USAGE="Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -h help text (also: --help) echo this help text and exit - -V version (also: --version) echo version of this script and exit - - ${bold}OVERVIEW${reset} - THIS SCRIPT was designed to run in a current working directory where the MAGNET pipeline - in PIrANHA v0.4a4 (Bagley 2019) is being run, or has completed a run, to estimate maximum- - likelihood (ML) gene trees in RAxML v8+ (Stamatakis 2014) for a set of loci from DNA - sequence data. Given such a workspace, this script counts the number of loci or data - partitions (each assigned a separate RAxML run in MAGNET) with completed RAxML runs and - collates run information. Output files include a summary of completed runs and a summary - of ongoing runs. - This program runs on UNIX-like and Linux systems using commonly distributed utility - software, with usage obtained by running the script with the -h flag. It has been tested - on macOS High Sierra (v10.13+) and Mojave but should work on many earlier versions or - Linux (tested on CentOS 6/7). There are no other dependencies. - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f RAxMLRunChecker Run the software - piranha -f RAxMLRunChecker --args='-h' Print this help text - - ${bold}CITATION${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of - large phylogenies. Bioinformatics, 30, 1312-1313. - - Created by Justin Bagley on Wed, Mar 6 09:57:26 CST 2019. - Copyright (c) 2019 Justin C. Bagley. All rights reserved. -" - -if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -RAxMLRunChecker - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/shell/getBootTrees b/bin/MAGNET-1.1.1/shell/getBootTrees deleted file mode 100755 index bd44649e..00000000 --- a/bin/MAGNET-1.1.1/shell/getBootTrees +++ /dev/null @@ -1,302 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: getBootTrees.sh # - VERSION="v1.0.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on/before August 20, 2017. # -# Last update: December 11, 2020 # -# Copyright (c) 2017-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT AUTOMATES ORGANIZING BOOTSTRAP TREES OUTPUT BY RAxML RUNS CONDUCTED # -# IN CURRENT WORKING DIRECTORY USING MAGNET # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- - -UTILS_LOCATION="${SCRIPT_PATH}/../../../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - - -# Source shared functions and variables -# ----------------------------------- - -FUNCS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function getBootTrees () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | getBootTrees, v1.0.1 December 2020 (part of PIrANHA v0.4a4) " -echo "INFO | $(date) | Copyright (c) 2017-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### -echo "INFO | $(date) | Starting getBootTrees script... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; -echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " -checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " - - -echo "INFO | $(date) | Step #2: Run main getBootTrees script. " -echo "INFO | $(date) | Organizing bootstrap trees and making final output file containing all trees... " -echo "INFO | $(date) | Making list of ML bootstrap trees generated by RAxML... " - - ls **/RAxML_bootstrap.raxml_out > bootTrees.list ; - - ##--Assign bootstrap tree list to variable - MY_BOOT_TREE_LIST="$(cat ./bootTrees.list)"; - - ############ ORGANIZE BOOTSTRAP TREES INTO ONE LOCATION - ##--Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in - ##--working directory. However, all the boot tree files have the same name. So, in order - ##--to do this, we have to give each boot tree file a name that matches the corresponding - ##--run folder, i.e. locus. We can rename each file right after downloading it. - - mkdir ./bootstrap_trees/ ; - - echo "INFO | $(date) | Copying *ALL* ML bootstrap trees to 'bootstrap_trees' folder in current directory for post-processing..." - ( - for j in ${MY_BOOT_TREE_LIST}; do - echo "$j"; - cp "$j" ./bootstrap_trees/ ; - MY_LOCUS_NAME="$(echo $j | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./bootstrap_trees/RAxML_bootstrap.raxml_out ./bootstrap_trees/"$MY_LOCUS_NAME"_RAxML_boot.tre ; - rm ./bootstrap_trees/RAxML_bootstrap.raxml_out ; - done - ) - - echo "INFO | $(date) | Making final output file containing best ML trees from all runs/loci..." - ( - for k in ./bootstrap_trees/*; do - echo "$k"; - cat "$k" >> ./boottrees.tre ; - done - ) - - echo "INFO | $(date) | Making final list of ML bootstrap trees in bootstrap_trees directory..." - ls ./bootstrap_trees/*.tre > final_bootTrees.list ; - -#echo "INFO | $(date) | Done collating ML bootstrap trees from all RAxML runs (independent sub-folders of pwd) run using getBootTrees.sh." -#echo "INFO | $(date) | Bye. -#" -echo "----------------------------------------------------------------------------------------------------------" -echo "output file(s)/folder(s): ./boottrees.tre " -echo " ./final_bootTrees.list " -echo " ./bootstrap_trees/ " -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -# None at this time. - -############ CREATE USAGE & HELP TEXTS -USAGE="Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -h help text (also: --help) echo this help text and exit - -V version (also: --version) echo version of this script and exit - - ${bold}OVERVIEW${reset} - THIS SCRIPT was designed to run in a current working directory where the MAGNET pipeline - in PIrANHA v0.4a4 (Bagley 2019) has been run to estimate maximum-likelihood (ML) gene trees - in RAxML v8+ (Stamatakis 2014) for a set of loci from DNA sequence data. Given such a - workspace, this script organizes the bootstrap trees resulting from all RAxML runs, in - subfolders of the current directory. The getBootTrees function is already run during the - MAGNET pipeline, by default, so users will likely not need to run getBootTrees from this - standalone function. However, this function may be useful in summarizing bootstrap trees - from a set of RAxML subfolders when MAGNET has not been run. For example, if you wrote your - own code to run RAxML in multiple subfolders of a given directory, then you could use - getBootTrees to summarize the bootstrap trees and quit. - This program runs on UNIX-like and Linux systems using commonly distributed utility - software, with usage obtained by running the script with the -h flag. It has been tested - on macOS High Sierra (v10.13+) and Mojave but should work on many earlier versions or - Linux (tested on CentOS 6/7). There are no other dependencies. - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f getBootTrees Run the software - piranha -f getBootTrees --args='-h' Print this help text - - ${bold}CITATION${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of - large phylogenies. Bioinformatics, 30, 1312-1313. - - Created by Justin Bagley on Wed, Mar 6 09:57:26 CST 2019. - Copyright (c) 2019 Justin C. Bagley. All rights reserved. -" - -if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -getBootTrees - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/shell/getBootTrees.sh b/bin/MAGNET-1.1.1/shell/getBootTrees.sh deleted file mode 100644 index 5341ab3a..00000000 --- a/bin/MAGNET-1.1.1/shell/getBootTrees.sh +++ /dev/null @@ -1,295 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: getBootTrees.sh # - VERSION="v1.0.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on/before August 20, 2017. # -# Last update: December 21, 2020 # -# Copyright (c) 2017-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT AUTOMATES ORGANIZING BOOTSTRAP TREES OUTPUT BY RAxML RUNS CONDUCTED # -# IN CURRENT WORKING DIRECTORY USING MAGNET # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- -UTILS_LOCATION="${SCRIPT_PATH}/../../../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - -# Source shared functions and variables -# ----------------------------------- -FUNCS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function getBootTrees () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | getBootTrees, v1.0.1 December 2020 (part of PIrANHA v0.4a4) " -echo "INFO | $(date) | Copyright (c) 2017-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### -echo "INFO | $(date) | Starting getBootTrees script... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -echoCDWorkingDir -checkMachineType - - -echo "INFO | $(date) | Step #2: Run main getBootTrees script. " -echo "INFO | $(date) | Organizing bootstrap trees and making final output file containing all trees... " -echo "INFO | $(date) | Making list of ML bootstrap trees generated by RAxML... " - - ls **/RAxML_bootstrap.raxml_out > bootTrees.list; - - ##--Assign bootstrap tree list to variable - MY_BOOT_TREE_LIST="$(cat ./bootTrees.list)"; - - ############ ORGANIZE BOOTSTRAP TREES INTO ONE LOCATION - ##--Place all inferred bootstrap tree files into a single "bootstrap_trees" folder in - ##--working directory. However, all the boot tree files have the same name. So, in order - ##--to do this, we have to give each boot tree file a name that matches the corresponding - ##--run folder, i.e. locus. We can rename each file right after downloading it. - - mkdir ./bootstrap_trees/ ; - - echo "INFO | $(date) | Copying *ALL* ML bootstrap trees to 'bootstrap_trees' folder in current directory for post-processing..." - ( - for j in ${MY_BOOT_TREE_LIST}; do - echo "$j"; - cp "$j" ./bootstrap_trees/ ; - MY_LOCUS_NAME="$(echo "$j" | sed 's/\/[A-Za-z.\_\-]*//g')"; - cp ./bootstrap_trees/RAxML_bootstrap.raxml_out ./bootstrap_trees/"$MY_LOCUS_NAME"_RAxML_boot.tre ; - rm ./bootstrap_trees/RAxML_bootstrap.raxml_out ; - done - ) - - echo "INFO | $(date) | Making final output file containing best ML trees from all runs/loci..." - ( - for k in ./bootstrap_trees/*; do - echo "$k"; - cat "$k" >> ./boottrees.tre ; - done - ) - - echo "INFO | $(date) | Making final list of ML bootstrap trees in bootstrap_trees directory..." - ls ./bootstrap_trees/*.tre > final_bootTrees.list ; - -#echo "INFO | $(date) | Done collating ML bootstrap trees from all RAxML runs (independent sub-folders of pwd) run using getBootTrees.sh." -#echo "INFO | $(date) | Bye. -#" -echo "----------------------------------------------------------------------------------------------------------" -echo "output file(s)/folder(s): ./boottrees.tre " -echo " ./final_bootTrees.list " -echo " ./bootstrap_trees/ " -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -# None at this time. - -############ CREATE USAGE & HELP TEXTS -USAGE="Usage: $(basename "$0") [OPTION]... - - ${bold}Options:${reset} - -h help text (also: --help) echo this help text and exit - -V version (also: --version) echo version of this script and exit - - ${bold}OVERVIEW${reset} - THIS SCRIPT was designed to run in a current working directory where the MAGNET pipeline - in PIrANHA v0.4a4 (Bagley 2019) has been run to estimate maximum-likelihood (ML) gene trees - in RAxML v8+ (Stamatakis 2014) for a set of loci from DNA sequence data. Given such a - workspace, this script organizes the bootstrap trees resulting from all RAxML runs, in - subfolders of the current directory. The getBootTrees function is already run during the - MAGNET pipeline, by default, so users will likely not need to run getBootTrees from this - standalone function. However, this function may be useful in summarizing bootstrap trees - from a set of RAxML subfolders when MAGNET has not been run. For example, if you wrote your - own code to run RAxML in multiple subfolders of a given directory, then you could use - getBootTrees to summarize the bootstrap trees and quit. - This program runs on UNIX-like and Linux systems using commonly distributed utility - software, with usage obtained by running the script with the -h flag. It has been tested - on macOS High Sierra (v10.13+) and Mojave but should work on many earlier versions or - Linux (tested on CentOS 6/7). There are no other dependencies. - - ${bold}Usage examples:${reset} - Call the program using PIrANHA, as follows: - - piranha -f getBootTrees Run the software - piranha -f getBootTrees --args='-h' Print this help text - - ${bold}CITATION${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - - ${bold}REFERENCES${reset} - Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at: - . - Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of - large phylogenies. Bioinformatics, 30, 1312-1313. - - Created by Justin Bagley on Wed, Mar 6 09:57:26 CST 2019. - Copyright (c) 2019 Justin C. Bagley. All rights reserved. -" - -if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then - echo "$USAGE" - exit -fi - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -getBootTrees - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/shell/phyNcharSumm b/bin/MAGNET-1.1.1/shell/phyNcharSumm deleted file mode 100755 index 029163d6..00000000 --- a/bin/MAGNET-1.1.1/shell/phyNcharSumm +++ /dev/null @@ -1,226 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: phyNcharSumm.sh # - VERSION="v1.0.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on November 9, 2016. # -# Last update: December 11, 2020 # -# Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT SUMMARIZES THE NUMBER OF CHARACTERS IN EACH OF N PHYLIP DNA SEQUENCE # -# ALIGNMENTS IN CURRENT WORKING DIRECTORY AND SAVES THIS INFORMATION TO FILE # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- - -UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - - -# Source shared functions and variables -# ----------------------------------- - -FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function phyNcharSumm () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | phyNcharSumm, v1.0.1 December 2020 (part of PIrANHA v0.4a4) " -echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" - -######################################## START ########################################### -echo "INFO | $(date) | Starting phyNcharSumm... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")"; -echoCDWorkingDir -#echo "INFO | $(date) | Checking machine type... " -checkMachineType -#echo "INFO | $(date) | Found machine type ${machine}. " - - -echo "INFO | $(date) | Step #2: Summarize number of characters in each PHYLIP DNA sequence alignment in current directory. " -###### Starting from a folder containing multiple PHYLIP alignment files (e.g. as generated -## by MAGNET.sh for indiv. SNP loci), this script uses a for loop to echo all alignment -## names (containing locus or taxon information) to file, and then echoes the number of -## characters (Nchar) recursively to a file named "nchar.txt" in the working directory. - -echo "INFO | $(date) | Saving number of characters for each alignment in file named './nchar.txt'. " -( - for i in ./*.phy; do - echo "$i" >> ./phyalign_names.txt; - echo "$(head -n1 "$i" | awk -F"[0-9]*\ " '{print $NF}')" >> ./nchar.txt ; - done; -) - -## I would like to build on this by calculating statistics from the nchar list from within -## the shell. - -echo "----------------------------------------------------------------------------------------------------------" -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -# None at this time. - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -phyNcharSumm - -# Exit cleanly -safeExit diff --git a/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh b/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh deleted file mode 100644 index bcbcbb4a..00000000 --- a/bin/MAGNET-1.1.1/shell/phyNcharSumm.sh +++ /dev/null @@ -1,221 +0,0 @@ -#!/bin/sh - -########################################################################################## -# __ o __ __ __ |__ __ # -# |__) | | ' (__( | ) | ) (__( # -# | # -# # -# File: phyNcharSumm.sh # - VERSION="v1.0.1" # -# Author: Justin C. Bagley # -# Date: Created by Justin Bagley on November 9, 2016. # -# Last update: December 21, 2020 # -# Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. # -# Please report bugs to . # -# # -# Description: # -# SHELL SCRIPT THAT SUMMARIZES THE NUMBER OF CHARACTERS IN EACH OF N PHYLIP DNA SEQUENCE # -# ALIGNMENTS IN CURRENT WORKING DIRECTORY AND SAVES THIS INFORMATION TO FILE # -# # -########################################################################################## - -# Provide a variable with the location of this script. -SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -# Source Scripting Utilities -# ----------------------------------- -# These shared utilities provide many functions which are needed to provide -# the functionality in this boilerplate. This script will fail if they can -# not be found. -# ----------------------------------- - -UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities. - -if [[ -f "${UTILS_LOCATION}" ]]; then - source "${UTILS_LOCATION}" -else - echo "Please find the file util.sh and add a reference to it in this script. Exiting..." - exit 1 -fi - - -# Source shared functions and variables -# ----------------------------------- - -FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions. -VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables. - -if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then - source "${FUNCS_LOCATION}" ; - source "${VARS_LOCATION}" ; -else - echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... " - exit 1 -fi - - -# trapCleanup Function -# ----------------------------------- -# Any actions that should be taken if the script is prematurely -# exited. Always call this function at the top of your script. -# ----------------------------------- -function trapCleanup() { - echo "" - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - die "Exit trapped. In function: '${FUNCNAME[*]}'" -} - -# safeExit -# ----------------------------------- -# Non destructive exit for when script exits naturally. -# Usage: Add this function at the end of every script. -# ----------------------------------- -function safeExit() { - # Delete temp files, if any - if is_dir "${tmpDir}"; then - rm -r "${tmpDir}" - fi - trap - INT TERM EXIT - exit -} - -# Set Flags -# ----------------------------------- -# Flags which can be overridden by user input. -# Default values are below -# ----------------------------------- -quiet=false -printLog=false -verbose=false -force=false -strict=false -debug=false -args=() - -# Set Temp Directory -# ----------------------------------- -# Create temp directory with three random numbers and the process ID -# in the name. This directory is removed automatically at exit. -# ----------------------------------- -tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$" -(umask 077 && mkdir "${tmpDir}") || { - die "Could not create temporary directory! Exiting." -} - -# Logging -# ----------------------------------- -# Log is only used when the '-l' flag is set. -# -# To never save a logfile change variable to '/dev/null' -# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log -# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log -# ----------------------------------- -logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log" - -# Check for Dependencies -# ----------------------------------- -# Arrays containing package dependencies needed to execute this script. -# The script will fail if dependencies are not installed. For Mac users, -# most dependencies can be installed automatically using the package -# manager 'Homebrew'. Mac applications will be installed using -# Homebrew Casks. Ruby and gems via RVM. -# ----------------------------------- -export homebrewDependencies=() -export caskDependencies=() -export gemDependencies=() - - - - -function phyNcharSumm () { - -######################################## START ########################################### -########################################################################################## - -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | phyNcharSumm, v1.0.1 December 2020 (part of PIrANHA v0.4a4) " -echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. " -echo "INFO | $(date) |----------------------------------------------------------------" -echo "INFO | $(date) | Starting phyNcharSumm... " -echo "INFO | $(date) | Step #1: Set up workspace and check machine type. " -############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE -echoCDWorkingDir -checkMachineType - - -echo "INFO | $(date) | Step #2: Summarize number of characters in each PHYLIP DNA sequence alignment in current directory. " -###### Starting from a folder containing multiple PHYLIP alignment files (e.g. as generated -## by MAGNET.sh for indiv. SNP loci), this script uses a for loop to echo all alignment -## names (containing locus or taxon information) to file, and then echoes the number of -## characters (Nchar) recursively to a file named "nchar.txt" in the working directory. - -echo "INFO | $(date) | Saving number of characters for each alignment in file named './nchar.txt'. " -( - for i in ./*.phy; do - echo "$i" >> ./phyalign_names.txt; - echo "$(head -n1 "$i" | awk -F"[0-9]*\ " '{print $NF}')" >> ./nchar.txt ; - done; -) - -## I would like to build on this by calculating statistics from the nchar list from within -## the shell. - -echo "----------------------------------------------------------------------------------------------------------" -echo "" - - -########################################################################################## -######################################### END ############################################ - -} - - - -############ SCRIPT OPTIONS -## OPTION DEFAULTS ## -# None at this time. - -if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then - echo "$(basename "$0") $VERSION"; - exit -fi - - -# ############# ############# ############# -# ## TIME TO RUN THE SCRIPT ## -# ## ## -# ## You shouldn't need to edit anything ## -# ## beneath this line ## -# ## ## -# ############# ############# ############# - -# Trap bad exits with your cleanup function -trap trapCleanup EXIT INT TERM - -# Set IFS to preferred implementation -IFS=$'\n\t' - -# Exit on error. Append '||true' when you run the script if you expect an error. -set -o errexit - -# Run in debug mode, if set -if ${debug}; then set -x ; fi - -# Exit on empty variable -if ${strict}; then set -o nounset ; fi - -# Bash will remember & return the highest exitcode in a chain of pipes. -# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example. -set -o pipefail - -# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed. -# checkDependencies - -# Run the script -phyNcharSumm - -# Exit cleanly -safeExit