Skip to content

Commit

Permalink
Updating MAGNET to v1.2.0.
Browse files Browse the repository at this point in the history
I updated standalone MAGNET.sh and related files of the repo to v1.1.1 in current head of PIrANHA. Then made a variety of changes to files augmenting to v1.2.0 (new major-minor version).
  • Loading branch information
justincbagley committed Dec 22, 2020
1 parent dde49d1 commit 5c81e13
Show file tree
Hide file tree
Showing 6 changed files with 449 additions and 399 deletions.
778 changes: 424 additions & 354 deletions bin/MAGNET-1.1.1/MAGNET

Large diffs are not rendered by default.

17 changes: 8 additions & 9 deletions bin/MAGNET-1.1.1/R/rmGapSites.r
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. #
# Last update: March 6, 2019 #
# Copyright (c) 2016-2019 Justin C. Bagley. All rights reserved. #
# Please report bugs to <jbagley@jsu.edu>. #
# Please report bugs to <jbagley@jsu.edu>. #
# #
# Description: #
# RSCRIPT THAT REMOVES GAP SITES FROM AN INPUT DNA SEQUENCE ALIGNMENT IN PHYLIP FORMAT #
Expand All @@ -21,8 +21,8 @@

######################################## START ###########################################

##--Load needed library, R code, or package stuff. Install package if not present.
##--source("rmGapSites.R", chdir = TRUE)
# Load needed library, R code, or package stuff. Install package if not present.
# source("rmGapSites.R", chdir = TRUE)
packages <- c("ape", "readr", "seqinr")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())))
Expand All @@ -32,20 +32,19 @@ library(ape)
library(readr)
library(seqinr)

##--Read in the data, output from first part of NEXUS2gphocs loop:
# Read in the data, output from first part of NEXUS2gphocs loop:
sites <- read.dna("sites.phy", format="sequential")
gap_thresh <- read_file("gap_threshold.txt")

##--Fix the gap threshold and then delete columns with the threshold level of gaps
##--equivalent to at least 1 gap (i.e. any gaps at all):
# Fix the gap threshold and then delete columns with the threshold level of gaps
# equivalent to at least 1 gap (i.e. any gaps at all):
gap_thresh <- sub(pattern = "\\n", replacement = "", x = gap_thresh)
sites_nogaps <- del.colgapsonly(sites, threshold = gap_thresh, freq.only = FALSE)

##--Write new alignment, with sites with gaps removed, to file:
##--(writing to present working directory)...
# Write new alignment, with sites with gaps removed, to file in present working directory)...
write.dna(sites_nogaps, file="sites_nogaps.phy", format="sequential", nbcol=-1, colw=500000)

##--write.nexus(sites_nogaps, file="sites_nogaps.nex")
# write.nexus(sites_nogaps, file="sites_nogaps.nex")


######################################### END ############################################
30 changes: 15 additions & 15 deletions bin/MAGNET-1.1.1/shell/NEXUS2gphocs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
VERSION="v1.5.1" #
# Author: Justin C. Bagley #
# Date: Created by Justin Bagley on/before Aug 29 13:12:45 2016 -0700. #
# Last update: December 11, 2020 #
# Last update: December 21, 2020 #
# Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. #
# Please report bugs to <jbagley@jsu.edu>. #
# #
Expand Down Expand Up @@ -152,27 +152,27 @@ checkMachineType


############ STEP #2: GET NEXUS FILE & DATA CHARACTERISTICS, CONVERT NEXUS TO FASTA FORMAT
##--Extract charset info from sets block at end of NEXUS file:
# Extract charset info from sets block at end of NEXUS file:
MY_NEXUS_CHARSETS="$(egrep "charset|CHARSET" "$MY_NEXUS" | \
awk -F"=" '{print $NF}' | sed 's/\;/\,/g' | \
awk '{a[NR]=$0} END {for (i=1;i<NR;i++) print a[i];sub(/.$/,"",a[NR]);print a[NR]}' | \
sed 's/\,/\,'$CR'/g' | sed 's/^\ //g')";

##--Count number of loci present in the NEXUS file, based on number of charsets defined.
##--Also get corrected count starting from 0 for numbering loci below...
# Count number of loci present in the NEXUS file, based on number of charsets defined.
# Also get corrected count starting from 0 for numbering loci below...
MY_NLOCI="$(echo "$MY_NEXUS_CHARSETS" | wc -l | sed 's/\ //g')";
MY_CORR_NLOCI="$(calc "$MY_NLOCI" - 1)";

##--This is the base name of the original nexus file, so you have it. This WILL work regardless of whether the NEXUS filename extension is written in lowercase or in all caps, ".NEX".
# This is the base name of the original nexus file, so you have it. This WILL work regardless of whether the NEXUS filename extension is written in lowercase or in all caps, ".NEX".
MY_NEXUS_BASENAME="$(echo "$MY_NEXUS" | sed 's/\.\///g; s/\.[A-Za-z]\{3\}$//g')";

##--Convert data file from NEXUS to fasta format using bioscripts.convert v0.4 Python package:
##--However, if alignment is too long (>100,000 bp), then need to convert to fasta using my
##--script and then wrap to 60 characters with fold function (as suggested at stackexchange
##--post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size).
##--If this conversion failes because the alignment is too long, then the code to follow
##--will have nothing to work with. So, I am here adding a conditional quit if the fasta
##--file is not generated.
# Convert data file from NEXUS to fasta format using bioscripts.convert v0.4 Python package:
# However, if alignment is too long (>100,000 bp), then need to convert to fasta using my
# script and then wrap to 60 characters with fold function (as suggested at stackexchange
# post URL: https://unix.stackexchange.com/questions/25173/how-can-i-wrap-text-at-a-certain-column-size).
# If this conversion failes because the alignment is too long, then the code to follow
# will have nothing to work with. So, I am here adding a conditional quit if the fasta
# file is not generated.

#---------ADD IF/THEN CONDITIONAL AND MY OWN NEXUS2fasta SCRIPT HERE!!!!----------#

Expand All @@ -191,7 +191,7 @@ checkMachineType

############ STEP #3: PUT COMPONENTS OF ORIGINAL NEXUS FILE AND THE FASTA FILE TOGETHER TO
############ MAKE A G-PhoCS-FORMATTED DATA FILE
##--Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line:
# Make top (first line) of the G-Phocs format file, which should have the number of loci on the first line:
echo "$MY_NLOCI" | sed 's/[\ ]*//g' > gphocs_top.txt

echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt
Expand All @@ -204,9 +204,9 @@ echo "$MY_GAP_THRESHOLD" > ./gap_threshold.txt
export setLower="$(echo "$j" | sed 's/\-.*$//g')";
export setUpper="$(echo "$j" | sed 's/[0-9]*\-//g' | sed 's/\,//g; s/\ //g')";

**/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta;
**/selectSites.pl -s "$charRange" "$MY_FASTA" > ./sites.fasta ;

**/fasta2phylip.pl ./sites.fasta > ./sites.phy;
**/fasta2phylip.pl ./sites.fasta > ./sites.phy ;

##--Need to make sure there is a space between the tip taxon name (10 characters as output
##--by the fasta2phylip.pl Perl script) and the corresponding sequence, for all tips. Use
Expand Down
7 changes: 0 additions & 7 deletions bin/MAGNET-1.1.1/shell/RAxMLRunChecker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# the functionality in this boilerplate. This script will fail if they can
# not be found.
# -----------------------------------

UTILS_LOCATION="${SCRIPT_PATH}/../lib/utils.sh" # Update this path to find the utilities.

if [[ -f "${UTILS_LOCATION}" ]]; then
Expand All @@ -38,10 +37,8 @@ else
exit 1
fi


# Source shared functions and variables
# -----------------------------------

FUNCS_LOCATION="${SCRIPT_PATH}/../lib/sharedFunctions.sh" # Update this path to find the shared functions.
VARS_LOCATION="${SCRIPT_PATH}/../lib/sharedVariables.sh" # Update this path to find the shared variables.

Expand All @@ -53,7 +50,6 @@ else
exit 1
fi


# trapCleanup Function
# -----------------------------------
# Any actions that should be taken if the script is prematurely
Expand Down Expand Up @@ -144,11 +140,8 @@ echo "INFO | $(date) |-----------------------------------------------------
echo "INFO | $(date) | Starting RAxMLRunChecker pipeline... "
echo "INFO | $(date) | Step #1: Set up workspace and check machine type. "
############ I. SET WORKING DIRECTORY AND CHECK MACHINE TYPE
#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")";
echoCDWorkingDir
#echo "INFO | $(date) | Checking machine type... "
checkMachineType
#echo "INFO | $(date) | Found machine type ${machine}. "


############ II. RUN RAXML RUN CHECKER
Expand Down
9 changes: 1 addition & 8 deletions bin/MAGNET-1.1.1/shell/getBootTrees.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
VERSION="v1.0.1" #
# Author: Justin C. Bagley #
# Date: Created by Justin Bagley on/before August 20, 2017. #
# Last update: December 11, 2020 #
# Last update: December 21, 2020 #
# Copyright (c) 2017-2020 Justin C. Bagley. All rights reserved. #
# Please report bugs to <jbagley@jsu.edu>. #
# #
Expand All @@ -28,7 +28,6 @@ SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# the functionality in this boilerplate. This script will fail if they can
# not be found.
# -----------------------------------

UTILS_LOCATION="${SCRIPT_PATH}/../../../lib/utils.sh" # Update this path to find the utilities.

if [[ -f "${UTILS_LOCATION}" ]]; then
Expand All @@ -38,10 +37,8 @@ else
exit 1
fi


# Source shared functions and variables
# -----------------------------------

FUNCS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedFunctions.sh" # Update this path to find the shared functions.
VARS_LOCATION="${SCRIPT_PATH}/../../../lib/sharedVariables.sh" # Update this path to find the shared variables.

Expand All @@ -53,7 +50,6 @@ else
exit 1
fi


# trapCleanup Function
# -----------------------------------
# Any actions that should be taken if the script is prematurely
Expand Down Expand Up @@ -144,11 +140,8 @@ echo "INFO | $(date) |-----------------------------------------------------
echo "INFO | $(date) | Starting getBootTrees script... "
echo "INFO | $(date) | Step #1: Set up workspace and check machine type. "
############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE
#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")";
echoCDWorkingDir
#echo "INFO | $(date) | Checking machine type... "
checkMachineType
#echo "INFO | $(date) | Found machine type ${machine}. "


echo "INFO | $(date) | Step #2: Run main getBootTrees script. "
Expand Down
7 changes: 1 addition & 6 deletions bin/MAGNET-1.1.1/shell/phyNcharSumm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
VERSION="v1.0.1" #
# Author: Justin C. Bagley #
# Date: Created by Justin Bagley on November 9, 2016. #
# Last update: December 11, 2020 #
# Last update: December 21, 2020 #
# Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. #
# Please report bugs to <jbagley@jsu.edu>. #
# #
Expand Down Expand Up @@ -139,16 +139,11 @@ echo "INFO | $(date) |-----------------------------------------------------
echo "INFO | $(date) | phyNcharSumm, v1.0.1 December 2020 (part of PIrANHA v0.4a4) "
echo "INFO | $(date) | Copyright (c) 2016-2020 Justin C. Bagley. All rights reserved. "
echo "INFO | $(date) |----------------------------------------------------------------"

######################################## START ###########################################
echo "INFO | $(date) | Starting phyNcharSumm... "
echo "INFO | $(date) | Step #1: Set up workspace and check machine type. "
############ SET WORKING DIRECTORY AND CHECK MACHINE TYPE
#USER_SPEC_PATH="$(printf '%q\n' "$(pwd)")";
echoCDWorkingDir
#echo "INFO | $(date) | Checking machine type... "
checkMachineType
#echo "INFO | $(date) | Found machine type ${machine}. "


echo "INFO | $(date) | Step #2: Summarize number of characters in each PHYLIP DNA sequence alignment in current directory. "
Expand Down

0 comments on commit 5c81e13

Please sign in to comment.