Skip to content

Commit

Permalink
Finalizing MAGNET v1.2.0 file upload
Browse files Browse the repository at this point in the history
Finalizing MAGNET v1.2.0 file upload
  • Loading branch information
justincbagley committed Dec 23, 2020
1 parent 0175a94 commit 46e0ba5
Show file tree
Hide file tree
Showing 15 changed files with 4,132 additions and 0 deletions.
405 changes: 405 additions & 0 deletions bin/MAGNET-1.2.0/NEXUS2gphocs

Large diffs are not rendered by default.

408 changes: 408 additions & 0 deletions bin/MAGNET-1.2.0/NEXUS2gphocs.sh

Large diffs are not rendered by default.

341 changes: 341 additions & 0 deletions bin/MAGNET-1.2.0/RAxMLRunChecker
Original file line number Diff line number Diff line change
@@ -0,0 +1,341 @@
#!/bin/sh

##########################################################################################
# __ o __ __ __ |__ __ #
# |__) | | ' (__( | ) | ) (__( #
# | #
# #
# File: RAxMLRunChecker.sh #
VERSION="v1.3.0" #
# Author: Justin C. Bagley #
# Date: Created by Justin Bagley on/before November 29, 2018. #
# Last update: March 14, 2019 #
# Copyright (c) 2018-2019 Justin C. Bagley. All rights reserved. #
# Please report bugs to <jbagley@jsu.edu>. #
# #
# Description: #
# SHELL SCRIPT THAT COUNTS NUMBER OF LOCI/PARTITIONS WITH COMPLETED RAxML RUNS DURING #
# OR AFTER A RUN OF THE MAGNET PIPELINE, AND COLLATES RUN INFORMATION #
# #
##########################################################################################

# Provide a variable with the location of this script.
SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Source Scripting Utilities
# -----------------------------------
# These shared utilities provide many functions which are needed to provide
# the functionality in this boilerplate. This script will fail if they can
# not be found.
# -----------------------------------

UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities.

if [[ -f "${UTILS_LOCATION}" ]]; then
source "${UTILS_LOCATION}"
else
echo "Please find the file util.sh and add a reference to it in this script. Exiting..."
exit 1
fi


# Source shared functions and variables
# -----------------------------------

FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions.
VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables.

if [[ -f "${FUNCS_LOCATION}" ]] && [[ -f "${VARS_LOCATION}" ]]; then
source "${FUNCS_LOCATION}" ;
source "${VARS_LOCATION}" ;
else
echo "Please find the files sharedFunctions.sh and sharedVariables.sh and add references to them in this script. Exiting... "
exit 1
fi


# trapCleanup Function
# -----------------------------------
# Any actions that should be taken if the script is prematurely
# exited. Always call this function at the top of your script.
# -----------------------------------
function trapCleanup() {
echo ""
# Delete temp files, if any
if is_dir "${tmpDir}"; then
rm -r "${tmpDir}"
fi
die "Exit trapped. In function: '${FUNCNAME[*]}'"
}

# safeExit
# -----------------------------------
# Non destructive exit for when script exits naturally.
# Usage: Add this function at the end of every script.
# -----------------------------------
function safeExit() {
# Delete temp files, if any
if is_dir "${tmpDir}"; then
rm -r "${tmpDir}"
fi
trap - INT TERM EXIT
exit
}

# Set Flags
# -----------------------------------
# Flags which can be overridden by user input.
# Default values are below
# -----------------------------------
quiet=false
printLog=false
verbose=false
force=false
strict=false
debug=false
args=()

# Set Temp Directory
# -----------------------------------
# Create temp directory with three random numbers and the process ID
# in the name. This directory is removed automatically at exit.
# -----------------------------------
tmpDir="/tmp/${SCRIPT_NAME}.$RANDOM.$RANDOM.$RANDOM.$$"
(umask 077 && mkdir "${tmpDir}") || {
die "Could not create temporary directory! Exiting."
}

# Logging
# -----------------------------------
# Log is only used when the '-l' flag is set.
#
# To never save a logfile change variable to '/dev/null'
# Save to Desktop use: $HOME/Desktop/${SCRIPT_BASENAME}.log
# Save to standard user log location use: $HOME/Library/Logs/${SCRIPT_BASENAME}.log
# -----------------------------------
logFile="$HOME/Library/Logs/${SCRIPT_BASENAME}.log"

# Check for Dependencies
# -----------------------------------
# Arrays containing package dependencies needed to execute this script.
# The script will fail if dependencies are not installed. For Mac users,
# most dependencies can be installed automatically using the package
# manager 'Homebrew'. Mac applications will be installed using
# Homebrew Casks. Ruby and gems via RVM.
# -----------------------------------
export homebrewDependencies=()
export caskDependencies=()
export gemDependencies=()




function RAxMLRunChecker () {

######################################## START ###########################################
##########################################################################################

echo "INFO | $(date) |----------------------------------------------------------------"
echo "INFO | $(date) | RAxMLRunChecker, v1.3.0 March 2019 (part of PIrANHA v0.4a4) "
echo "INFO | $(date) | Copyright (c) 2018-2019 Justin C. Bagley. All rights reserved. "
echo "INFO | $(date) |----------------------------------------------------------------"

######################################## START ###########################################
echo "INFO | $(date) | Starting RAxMLRunChecker pipeline... "
echo "INFO | $(date) | Step #1: Set up workspace and check machine type. "
############ I. SET WORKING DIRECTORY AND CHECK MACHINE TYPE
#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")";
echoCDWorkingDir
#echo "INFO | $(date) | Checking machine type... "
checkMachineType
#echo "INFO | $(date) | Found machine type ${machine}. "


############ II. RUN RAXML RUN CHECKER
echo "INFO | $(date) | Step #2: Check RAxML runs in subfolders in current directory (assumed to be a MAGNET run folder). "

echo "INFO | $(date) | Estimating numbers (no.) of loci and RAxML runs... "
MY_N_LOCI_FOLD="$(ls -d ./locus*/ | wc -l | sed 's/^[\ ]*//g')";
MY_N_COMPLETED="$(ls ./locus*/RAxML_info.raxml_out | wc -l | sed 's/^[\ ]*//g')";
MY_N_REMAINING="$(calc $MY_N_LOCI_FOLD - $MY_N_COMPLETED)";

echo "INFO | $(date) | Total no. RAxML runs: $TAB$TAB$MY_N_LOCI_FOLD "
echo "INFO | $(date) | No. completed RAxML runs: $TAB$MY_N_COMPLETED "
echo "INFO | $(date) | No. remaining RAxML runs: $TAB$MY_N_REMAINING "

if [[ -s ./completed_run_info.tmp ]]; then
rm ./completed_run_info.tmp ;
fi
if [[ -s ./completed_run_info.txt ]]; then
rm ./completed_run_info.txt ;
fi
if [[ -s ./remaining_run_info.tmp ]]; then
rm ./remaining_run_info.tmp ;
fi
if [[ -s ./remaining_run_info.txt ]]; then
rm ./remaining_run_info.txt ;
fi

echo "INFO | $(date) | Saving RAxML run info to file... "

count=1
echo "INFO | $(date) | ... $count / $MY_N_LOCI_FOLD ..."
(
for i in ./locus*/; do
MY_LOCUS="$(echo $i | sed 's/\.\///g; s/\///g; s/\ //g')";
MY_COUNT_HUND_CHECK="$(calc $count / 100 | sed 's/^[0-9]*\.//g; s/^[0]\{1\}//g')"
if [[ "$MY_COUNT_HUND_CHECK" -eq "0" ]]; then
echo "INFO | $(date) | ... $count / $MY_N_LOCI_FOLD ..."
fi
if [[ "$count" -eq "$MY_N_LOCI_FOLD" ]]; then
echo "INFO | $(date) | ... $MY_N_LOCI_FOLD / $MY_N_LOCI_FOLD ..."
fi
cd "$i";
if [[ -s RAxML_bipartitions.raxml_out ]]; then

MY_ALIGN_PATT="$(grep -h '^Alignment\ Patterns\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')";
MY_SUBST_MODEL="$(grep -h '^Substitution\ Matrix\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')";
MY_OPTIM_LIKE="$(grep -h 'Final\ ML\ Optimization\ Likelihood\:\ ' ./RAxML_info.raxml_out | sed 's/^.*\:\ //g')";
MY_ML_RUN_TIME="$(grep -h 'Overall\ execution\ time\ ' ./RAxML_info.raxml_out | sed 's/^Overall\ execution\ time\ [A-Za-z\ ]*\:\ //g; s/or\ .*//g')";

echo "$count$TAB$MY_LOCUS$TAB$MY_ALIGN_PATT$TAB$MY_SUBST_MODEL$TAB$MY_OPTIM_LIKE$TAB$MY_ML_RUN_TIME$TAB complete" >> ../completed_run_info.tmp ;
fi
if [[ ! -s RAxML_bipartitions.raxml_out ]]; then

echo "$count$TAB$MY_LOCUS$TAB$MY_ALIGN_PATT$TAB$MY_SUBST_MODEL$TAB incomplete" >> ../remaining_run_info.tmp ;

fi
cd ..;
echo "$((count++))" > ./count.tmp ;
done
)


echo "No$TAB Locus$TAB No. Patterns$TAB Subst. Model$TAB Likelihood$TAB ML Run Time$TAB Status" > ./header.tmp ;
echo "No$TAB Locus$TAB No. Patterns$TAB Subst. Model$TAB Status" > ./rem_header.tmp ;
cat ./header.tmp ./completed_run_info.tmp > ./completed_run_info.txt ;
cat ./rem_header.tmp ./remaining_run_info.tmp > ./remaining_run_info.txt ;


echo "INFO | $(date) | Editing final RAxML run information files... "
if [[ "${machine}" = "Mac" ]]; then
sed -i '' 's/\ //g' ./completed_run_info.txt ;
sed -i '' 's/\ //g' ./remaining_run_info.txt ;
fi
if [[ "${machine}" = "Linux" ]]; then
sed -i 's/\ //g' ./completed_run_info.txt ;
sed -i 's/\ //g' ./remaining_run_info.txt ;
fi


############ III. CLEAN UP WORKSPACE BY REMOVING TEMPORARY FILES.
echo "INFO | $(date) | Step #3: Clean up workspace. "
echo "INFO | $(date) | Cleaning up workspace by removing temporary files generated during run... "

if [[ "$(ls -1 ./*.tmp 2>/dev/null | wc -l | sed 's/\ //g')" != "0" ]]; then
rm ./*.tmp ;
fi

echo "----------------------------------------------------------------------------------------------------------"
echo "output file(s): ./completed_run_info.txt "
echo " ./remaining_run_info.txt "
echo ""


##########################################################################################
######################################### END ############################################

}



############ SCRIPT OPTIONS
## OPTION DEFAULTS ##
# None at this time.

############ CREATE USAGE & HELP TEXTS
USAGE="Usage: $(basename "$0") [OPTION]...
${bold}Options:${reset}
-h help text (also: --help) echo this help text and exit
-V version (also: --version) echo version of this script and exit
${bold}OVERVIEW${reset}
THIS SCRIPT was designed to run in a current working directory where the MAGNET pipeline
in PIrANHA v0.4a4 (Bagley 2019) is being run, or has completed a run, to estimate maximum-
likelihood (ML) gene trees in RAxML v8+ (Stamatakis 2014) for a set of loci from DNA
sequence data. Given such a workspace, this script counts the number of loci or data
partitions (each assigned a separate RAxML run in MAGNET) with completed RAxML runs and
collates run information. Output files include a summary of completed runs and a summary
of ongoing runs.
This program runs on UNIX-like and Linux systems using commonly distributed utility
software, with usage obtained by running the script with the -h flag. It has been tested
on macOS High Sierra (v10.13+) and Mojave but should work on many earlier versions or
Linux (tested on CentOS 6/7). There are no other dependencies.
${bold}Usage examples:${reset}
Call the program using PIrANHA, as follows:
piranha -f RAxMLRunChecker Run the software
piranha -f RAxMLRunChecker --args='-h' Print this help text
${bold}CITATION${reset}
Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at:
<https://github.com/justincbagley/PIrANHA>.
${bold}REFERENCES${reset}
Bagley, J.C. 2019. PIrANHA v0.4a4. GitHub repository, Available at:
<https://github.com/justincbagley/PIrANHA>.
Stamatakis, A. 2014. RAxML version 8: a tool for phylogenetic analysis and post-analysis of
large phylogenies. Bioinformatics, 30, 1312-1313.
Created by Justin Bagley on Wed, Mar 6 09:57:26 CST 2019.
Copyright (c) 2019 Justin C. Bagley. All rights reserved.
"

if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then
echo "$USAGE"
exit
fi

if [[ "$1" == "-V" ]] || [[ "$1" == "--version" ]]; then
echo "$(basename "$0") $VERSION";
exit
fi


# ############# ############# #############
# ## TIME TO RUN THE SCRIPT ##
# ## ##
# ## You shouldn't need to edit anything ##
# ## beneath this line ##
# ## ##
# ############# ############# #############

# Trap bad exits with your cleanup function
trap trapCleanup EXIT INT TERM

# Set IFS to preferred implementation
IFS=$'\n\t'

# Exit on error. Append '||true' when you run the script if you expect an error.
set -o errexit

# Run in debug mode, if set
if ${debug}; then set -x ; fi

# Exit on empty variable
if ${strict}; then set -o nounset ; fi

# Bash will remember & return the highest exitcode in a chain of pipes.
# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`, for example.
set -o pipefail

# Invoke the checkDependenices function to test for Bash packages. Uncomment if needed.
# checkDependencies

# Run the script
RAxMLRunChecker

# Exit cleanly
safeExit

0 comments on commit 46e0ba5

Please sign in to comment.