Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

-fully implemented optimized support for LG4 model

-added raxml-specific memory allocation routines
-added lockless threaded malloc files, does not appear to work properly yet in terms of performance,
 therefore I added the files but using the library has been disabled
  • Loading branch information...
commit 5684b6b63fc34b9de36f6d2edaef09db721d2f46 1 parent 9c943c8
@stamatak authored
Showing with 8,962 additions and 1,205 deletions.
  1. +3 −2 Makefile.AVX.HYBRID.gcc
  2. +3 −2 Makefile.AVX.MPI.gcc
  3. +4 −2 Makefile.AVX.PTHREADS.gcc
  4. +3 −2 Makefile.AVX.PTHREADS.mac
  5. +5 −3 Makefile.AVX.gcc
  6. +3 −2 Makefile.AVX.mac
  7. +3 −2 Makefile.HYBRID.gcc
  8. +3 −2 Makefile.MPI.gcc
  9. +3 −2 Makefile.PTHREADS.gcc
  10. +3 −2 Makefile.QuartetMPI.gcc
  11. +3 −2 Makefile.SSE3.HYBRID.gcc
  12. +3 −2 Makefile.SSE3.MPI.gcc
  13. +3 −2 Makefile.SSE3.PTHREADS.gcc
  14. +3 −2 Makefile.SSE3.QuartetMPI.gcc
  15. +3 −2 Makefile.SSE3.gcc
  16. +3 −2 Makefile.gcc
  17. +22 −22 ancestralStates.c
  18. +499 −8 avxLikelihood.c
  19. +314 −228 axml.c
  20. +5 −1 axml.h
  21. +120 −118 bipartitionList.c
  22. +24 −24 classify.c
  23. +27 −0 compiler.h
  24. +114 −8 evaluateGenericSpecial.c
  25. +17 −17 evaluatePartialGenericSpecial.c
  26. +37 −37 fastDNAparsimony.c
  27. +15 −15 fastSearch.c
  28. +48 −0 gcc.h
  29. +2 −0  globalVariables.h
  30. +6,064 −0 ll_alloc.c
  31. +320 −0 ll_asm.h
  32. +144 −0 ll_list.h
  33. +159 −92 makenewzGenericSpecial.c
  34. +108 −0 mem_alloc.c
  35. +17 −0 mem_alloc.h
  36. +30 −29 models.c
  37. +37 −37 multiple.c
  38. +463 −214 newviewGenericSpecial.c
  39. +219 −217 optimizeModel.c
  40. +40 −40 parsePartitions.c
  41. +15 −15 rapidBootstrap.c
  42. +4 −4 rogueEPA.c
  43. +20 −20 searchAlgo.c
  44. +16 −16 topologies.c
  45. +11 −10 treeIO.c
View
5 Makefile.AVX.HYBRID.gcc
@@ -8,11 +8,11 @@ LIBRARIES = -lm -pthread
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o mem_alloc.o
all : raxmlHPC-HYBRID-AVX
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC-HYBRID-AVX : $(objs)
$(CC) -o raxmlHPC-HYBRID-AVX $(objs) $(LIBRARIES)
@@ -43,6 +43,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-HYBRID-AVX
View
5 Makefile.AVX.MPI.gcc
@@ -8,11 +8,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o mem_alloc.o
all : raxmlHPC-MPI-AVX
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC-MPI-AVX : $(objs)
$(CC) -o raxmlHPC-MPI-AVX $(objs) $(LIBRARIES)
@@ -43,6 +43,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-MPI-AVX
View
6 Makefile.AVX.PTHREADS.gcc
@@ -10,11 +10,11 @@ LIBRARIES = -lm -pthread
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o mem_alloc.o #ll_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h #compiler.h gcc.h ll_asm.h ll_list.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-PTHREADS-AVX $(objs) $(LIBRARIES)
@@ -45,6 +45,8 @@ makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+#ll_alloc.o : ll_alloc.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-PTHREADS-AVX
View
5 Makefile.AVX.PTHREADS.mac
@@ -10,11 +10,11 @@ LIBRARIES = -lm -pthread
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o mem_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-PTHREADS-AVX $(objs) $(LIBRARIES)
@@ -45,6 +45,7 @@ makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-PTHREADS-AVX
View
8 Makefile.AVX.gcc
@@ -3,17 +3,17 @@
CC = gcc
-CFLAGS = -D__SIM_SSE3 -msse3 -D_GNU_SOURCE -O2 -fomit-frame-pointer -funroll-loops -D__AVX #-Wall -pedantic -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
+CFLAGS = -D__SIM_SSE3 -msse3 -D_GNU_SOURCE -O2 -fomit-frame-pointer -funroll-loops -D__AVX #-Wall -Wunused-parameter -Wredundant-decls -Wreturn-type -Wswitch-default -Wunused-value -Wimplicit -Wimplicit-function-declaration -Wimplicit-int -Wimport -Wunused -Wunused-function -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value -Wunused-variable -Wformat -Wformat-nonliteral -Wparentheses -Wsequence-point -Wuninitialized -Wundef -Wbad-function-cast
LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o mem_alloc.o #ll_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h # compiler.h gcc.h ll_asm.h ll_list.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-AVX $(objs) $(LIBRARIES)
@@ -44,6 +44,8 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+#ll_alloc.o : ll_alloc.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-AVX
View
5 Makefile.AVX.mac
@@ -9,11 +9,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o avxLikelihood.o mem_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-AVX $(objs) $(LIBRARIES)
@@ -44,6 +44,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-AVX
View
5 Makefile.HYBRID.gcc
@@ -10,11 +10,11 @@ LIBRARIES = -lm -pthread
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC-HYBRID
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC-HYBRID : $(objs)
$(CC) -o raxmlHPC-HYBRID $(objs) $(LIBRARIES)
@@ -40,6 +40,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-HYBRID
View
5 Makefile.MPI.gcc
@@ -10,11 +10,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC-MPI
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC-MPI : $(objs)
$(CC) -o raxmlHPC-MPI $(objs) $(LIBRARIES)
@@ -40,6 +40,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-MPI
View
5 Makefile.PTHREADS.gcc
@@ -11,11 +11,11 @@ LIBRARIES = -lm -pthread
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC-PTHREADS
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC-PTHREADS : $(objs)
$(CC) -o raxmlHPC-PTHREADS $(objs) $(LIBRARIES)
@@ -41,6 +41,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-PTHREADS
View
5 Makefile.QuartetMPI.gcc
@@ -10,11 +10,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-QUARTET-MPI $(objs) $(LIBRARIES)
@@ -40,6 +40,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-QUARTET-MPI
View
5 Makefile.SSE3.HYBRID.gcc
@@ -8,11 +8,11 @@ LIBRARIES = -lm -pthread
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC-HYBRID-SSE3
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC-HYBRID-SSE3 : $(objs)
$(CC) -o raxmlHPC-HYBRID-SSE3 $(objs) $(LIBRARIES)
@@ -38,6 +38,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-HYBRID-SSE3
View
5 Makefile.SSE3.MPI.gcc
@@ -8,11 +8,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC-MPI-SSE3
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC-MPI-SSE3 : $(objs)
$(CC) -o raxmlHPC-MPI-SSE3 $(objs) $(LIBRARIES)
@@ -38,6 +38,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-MPI-SSE3
View
5 Makefile.SSE3.PTHREADS.gcc
@@ -10,11 +10,11 @@ LIBRARIES = -lm -pthread
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-PTHREADS-SSE3 $(objs) $(LIBRARIES)
@@ -40,6 +40,7 @@ fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-PTHREADS-SSE3
View
5 Makefile.SSE3.QuartetMPI.gcc
@@ -9,11 +9,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-SSE3-QUARTET-MPI $(objs) $(LIBRARIES)
@@ -39,6 +39,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-SSE3-QUARTET-MPI
View
5 Makefile.SSE3.gcc
@@ -9,11 +9,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC-SSE3 $(objs) $(LIBRARIES)
@@ -39,6 +39,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC-SSE3
View
5 Makefile.gcc
@@ -10,11 +10,11 @@ LIBRARIES = -lm
RM = rm -f
-objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o
+objs = axml.o optimizeModel.o multiple.o searchAlgo.o topologies.o parsePartitions.o treeIO.o models.o bipartitionList.o rapidBootstrap.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o classify.o fastDNAparsimony.o fastSearch.o leaveDropping.o rogueEPA.o ancestralStates.o mem_alloc.o
all : raxmlHPC
-GLOBAL_DEPS = axml.h globalVariables.h
+GLOBAL_DEPS = axml.h globalVariables.h mem_alloc.h
raxmlHPC : $(objs)
$(CC) -o raxmlHPC $(objs) $(LIBRARIES)
@@ -40,6 +40,7 @@ fastSearch.o : fastSearch.c $(GLOBAL_DEPS)
leaveDropping.o : leaveDropping.c $(GLOBAL_DEPS)
rogueEPA.o : rogueEPA.c $(GLOBAL_DEPS)
ancestralStates.o : ancestralStates.c $(GLOBAL_DEPS)
+mem_alloc.o : mem_alloc.c $(GLOBAL_DEPS)
clean :
$(RM) *.o raxmlHPC
View
44 ancestralStates.c
@@ -44,7 +44,7 @@
#include <string.h>
#include "axml.h"
-
+#include "mem_alloc.h"
extern char workdir[1024];
extern char run_id[128];
@@ -128,7 +128,7 @@ static void ancestralCat(double *v, double *sumBuffer, double *diagptable, int i
double
*ancestral = &sumBuffer[numStates * i],
sum = 0.0,
- *term = (double*)malloc(sizeof(double) * numStates);
+ *term = (double*)rax_malloc(sizeof(double) * numStates);
for(l = 0; l < numStates; l++)
{
@@ -146,7 +146,7 @@ static void ancestralCat(double *v, double *sumBuffer, double *diagptable, int i
for(l = 0; l < numStates; l++)
ancestral[l] = term[l] / sum;
- free(term);
+ rax_free(term);
}
static void newviewFlexCat_Ancestral(int tipCase, double *extEV,
@@ -156,7 +156,7 @@ static void newviewFlexCat_Ancestral(int tipCase, double *extEV,
int n, double *left, double *right, const int numStates, double *diagptable, double *sumBuffer)
{
double
- *x3 = (double *)malloc(sizeof(double) * numStates),
+ *x3 = (double *)rax_malloc(sizeof(double) * numStates),
*le, *ri, *v, *vl, *vr,
ump_x1, ump_x2, x1px2;
int
@@ -295,7 +295,7 @@ static void newviewFlexCat_Ancestral(int tipCase, double *extEV,
assert(0);
}
- free(x3);
+ rax_free(x3);
@@ -315,7 +315,7 @@ static void ancestralGamma(double *_v, double *sumBuffer, double *diagptable, in
*v,
*ancestral = &sumBuffer[gammaStates * i],
sum = 0.0,
- *term = (double*)malloc(sizeof(double) * numStates);
+ *term = (double*)rax_malloc(sizeof(double) * numStates);
for(l = 0; l < numStates; l++)
term[l] = 0.0;
@@ -340,7 +340,7 @@ static void ancestralGamma(double *_v, double *sumBuffer, double *diagptable, in
for(l = 0; l < numStates; l++)
ancestral[l] = term[l] / sum;
- free(term);
+ rax_free(term);
}
@@ -352,7 +352,7 @@ static void newviewFlexGamma_Ancestral(int tipCase,
{
double
*v,
- *x3 = (double*)malloc(sizeof(double) * 4 * numStates);
+ *x3 = (double*)rax_malloc(sizeof(double) * 4 * numStates);
double x1px2;
int i, j, l, k, scale;
double *vl, *vr, al, ar;
@@ -490,7 +490,7 @@ static void newviewFlexGamma_Ancestral(int tipCase,
- free(x3);
+ rax_free(x3);
}
void newviewIterativeAncestral(tree *tr)
@@ -570,7 +570,7 @@ void newviewIterativeAncestral(tree *tr)
case CAT:
{
double
- *diagptable = (double*)malloc_aligned(tr->partitionData[model].numberOfCategories * states * states * sizeof(double));
+ *diagptable = (double*)rax_malloc_aligned(tr->partitionData[model].numberOfCategories * states * states * sizeof(double));
makeP_Flex(qz, rz, tr->partitionData[model].perSiteRates,
tr->partitionData[model].EI,
@@ -589,14 +589,14 @@ void newviewIterativeAncestral(tree *tr)
tipX1, tipX2, width, left, right, states, diagptable,
tr->partitionData[model].sumBuffer);
- free(diagptable);
+ rax_free(diagptable);
}
break;
case GAMMA:
case GAMMA_I:
{
double
- *diagptable = (double*)malloc_aligned(4 * states * states * sizeof(double));
+ *diagptable = (double*)rax_malloc_aligned(4 * states * states * sizeof(double));
makeP_Flex(qz, rz, tr->partitionData[model].gammaRates,
tr->partitionData[model].EI,
@@ -616,7 +616,7 @@ void newviewIterativeAncestral(tree *tr)
tipX1, tipX2,
width, left, right, states, diagptable, tr->partitionData[model].sumBuffer);
- free(diagptable);
+ rax_free(diagptable);
}
break;
default:
@@ -744,8 +744,8 @@ static void computeAncestralRec(tree *tr, nodeptr p, int *counter, FILE *probsFi
globalIndex = 0;
ancestralState
- *a = (ancestralState *)malloc(sizeof(ancestralState) * tr->cdta->endsite),
- *unsortedA = (ancestralState *)malloc(sizeof(ancestralState) * tr->rdta->sites);
+ *a = (ancestralState *)rax_malloc(sizeof(ancestralState) * tr->cdta->endsite),
+ *unsortedA = (ancestralState *)rax_malloc(sizeof(ancestralState) * tr->rdta->sites);
if(!atRoot)
{
@@ -812,7 +812,7 @@ static void computeAncestralRec(tree *tr, nodeptr p, int *counter, FILE *probsFi
c;
a[globalIndex].states = states;
- a[globalIndex].probs = (double *)malloc(sizeof(double) * states);
+ a[globalIndex].probs = (double *)rax_malloc(sizeof(double) * states);
for(l = 0; l < states; l++)
{
@@ -860,7 +860,7 @@ static void computeAncestralRec(tree *tr, nodeptr p, int *counter, FILE *probsFi
unsortedA[unsorted].states = a[sorted].states;
unsortedA[unsorted].c = a[sorted].c;
- unsortedA[unsorted].probs = (double*)malloc(sizeof(double) * unsortedA[unsorted].states);
+ unsortedA[unsorted].probs = (double*)rax_malloc(sizeof(double) * unsortedA[unsorted].states);
memcpy(unsortedA[unsorted].probs, a[sorted].probs, sizeof(double) * a[sorted].states);
}
}
@@ -883,13 +883,13 @@ static void computeAncestralRec(tree *tr, nodeptr p, int *counter, FILE *probsFi
int j;
for(j = 0; j < tr->rdta->sites; j++)
- free(unsortedA[j].probs);
+ rax_free(unsortedA[j].probs);
for(j = 0; j < tr->cdta->endsite; j++)
- free(a[j].probs);
+ rax_free(a[j].probs);
}
- free(a);
- free(unsortedA);
+ rax_free(a);
+ rax_free(unsortedA);
}
static char *ancestralTreeRec(char *treestr, tree *tr, nodeptr p)
@@ -952,7 +952,7 @@ void computeAncestralStates(tree *tr, double referenceLikelihood)
*statesFile;
#ifdef _USE_PTHREADS
- tr->ancestralStates = (double*)malloc(getContiguousVectorLength(tr) * sizeof(double));
+ tr->ancestralStates = (double*)rax_malloc(getContiguousVectorLength(tr) * sizeof(double));
#endif
strcpy(ancestralProbsFileName, workdir);
View
507 avxLikelihood.c
@@ -839,13 +839,10 @@ void newviewGTRCAT_AVX(int tipCase, double *EV, int *cptr,
*le,
*ri,
*x1,
- *x2,
- *x3;
+ *x2;
int
i,
- j,
- scale,
addScale = 0;
__m256d
@@ -1022,7 +1019,6 @@ void newviewGTRCAT_AVX_GAPPED_SAVE(int tipCase, double *EV, int *cptr,
int
i,
- j,
scaleGap = 0,
addScale = 0;
@@ -1314,7 +1310,7 @@ void newviewGTRCATPROT_AVX(int tipCase, double *extEV,
double
*le, *ri, *v, *vl, *vr;
- int i, l, j, scale, addScale = 0;
+ int i, l, scale, addScale = 0;
#ifdef _FMA
int k;
@@ -1654,7 +1650,6 @@ void newviewGTRCATPROT_AVX_GAPPED_SAVE(int tipCase, double *extEV,
int
i,
l,
- j,
scale,
addScale = 0,
scaleGap = 0;
@@ -2651,6 +2646,503 @@ void newviewGTRGAMMAPROT_AVX(int tipCase,
*scalerIncrement = addScale;
}
+void newviewGTRGAMMAPROT_AVX_LG4(int tipCase,
+ double *x1, double *x2, double *x3, double *extEV[4], double *tipVector[4],
+ int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n,
+ double *left, double *right, int *wgt, int *scalerIncrement, const boolean useFastScaling)
+{
+ double
+ *uX1,
+ *uX2,
+ *v,
+ x1px2,
+ *vl,
+ *vr;
+
+ int
+ i,
+ j,
+ l,
+ k,
+ scale,
+ addScale = 0;
+
+
+#ifndef GCC_VERSION
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#endif
+
+
+#if GCC_VERSION < 40500
+ __m256d
+ bitmask = _mm256_set_pd(0,0,0,-1);
+#else
+ __m256i
+ bitmask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
+#endif
+
+ switch(tipCase)
+ {
+ case TIP_TIP:
+ {
+
+ double
+ umpX1[1840] __attribute__ ((aligned (BYTE_ALIGNMENT))),
+ umpX2[1840] __attribute__ ((aligned (BYTE_ALIGNMENT)));
+
+
+ for(i = 0; i < 23; i++)
+ {
+ for(k = 0; k < 80; k++)
+ {
+ double
+ *ll = &left[k * 20],
+ *rr = &right[k * 20];
+
+ __m256d
+ umpX1v = _mm256_setzero_pd(),
+ umpX2v = _mm256_setzero_pd();
+
+ v = &(tipVector[k / 20][20 * i]);
+
+ for(l = 0; l < 20; l+=4)
+ {
+ __m256d vv = _mm256_load_pd(&v[l]);
+#ifdef _FMA
+ __m256d llv = _mm256_load_pd(&ll[l]);
+ umpX1v = FMAMACC(umpX1v,vv,llv);
+ __m256d rrv = _mm256_load_pd(&rr[l]);
+ umpX2v = FMAMACC(umpX2v,vv,rrv);
+#else
+ umpX1v = _mm256_add_pd(umpX1v,_mm256_mul_pd(vv,_mm256_load_pd(&ll[l])));
+ umpX2v = _mm256_add_pd(umpX2v,_mm256_mul_pd(vv,_mm256_load_pd(&rr[l])));
+#endif
+ }
+
+ umpX1v = hadd3(umpX1v);
+ umpX2v = hadd3(umpX2v);
+ _mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
+ _mm256_maskstore_pd(&umpX2[80 * i + k], bitmask, umpX2v);
+ }
+ }
+
+ for(i = 0; i < n; i++)
+ {
+ uX1 = &umpX1[80 * tipX1[i]];
+ uX2 = &umpX2[80 * tipX2[i]];
+
+ for(j = 0; j < 4; j++)
+ {
+ __m256d vv[5];
+
+ v = &x3[i * 80 + j * 20];
+
+ vv[0] = _mm256_setzero_pd();
+ vv[1] = _mm256_setzero_pd();
+ vv[2] = _mm256_setzero_pd();
+ vv[3] = _mm256_setzero_pd();
+ vv[4] = _mm256_setzero_pd();
+
+ for(k = 0; k < 20; k++)
+ {
+ x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
+
+ __m256d x1px2v = _mm256_set1_pd(x1px2);
+
+ __m256d extEvv = _mm256_load_pd(&extEV[j][20 * k]);
+#ifdef _FMA
+ vv[0] = FMAMACC(vv[0],x1px2v,extEvv);
+#else
+ vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v,extEvv));
+#endif
+ _mm256_store_pd(&v[0],vv[0]);
+
+ extEvv = _mm256_load_pd(&extEV[j][20 * k + 4]);
+#ifdef _FMA
+ vv[1] = FMAMACC(vv[1],x1px2v,extEvv);
+#else
+ vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v,extEvv));
+#endif
+ _mm256_store_pd(&v[4],vv[1]);
+
+ extEvv = _mm256_load_pd(&extEV[j][20 * k + 8]);
+#ifdef _FMA
+ vv[2] = FMAMACC(vv[2],x1px2v,extEvv);
+#else
+ vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v,extEvv));
+#endif
+ _mm256_store_pd(&v[8],vv[2]);
+
+ extEvv = _mm256_load_pd(&extEV[j][20 * k + 12]);
+#ifdef _FMA
+ vv[3] = FMAMACC(vv[3],x1px2v,extEvv);
+#else
+ vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v,extEvv));
+#endif
+ _mm256_store_pd(&v[12],vv[3]);
+
+ extEvv = _mm256_load_pd(&extEV[j][20 * k + 16]);
+#ifdef _FMA
+ vv[4] = FMAMACC(vv[4],x1px2v,extEvv);
+#else
+ vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v,extEvv));
+#endif
+ _mm256_store_pd(&v[16],vv[4]);
+ }
+ }
+ }
+ }
+ break;
+ case TIP_INNER:
+ {
+
+ double
+ umpX1[1840] __attribute__ ((aligned (BYTE_ALIGNMENT))),
+ ump_x2[20] __attribute__ ((aligned (BYTE_ALIGNMENT)));
+
+ for(i = 0; i < 23; i++)
+ {
+ for(k = 0; k < 80; k++)
+ {
+ __m256d umpX1v = _mm256_setzero_pd();
+
+ v = &(tipVector[k / 20][20 * i]);
+
+ for(l = 0; l < 20; l+=4)
+ {
+ __m256d vv = _mm256_load_pd(&v[l]);
+ __m256d leftv = _mm256_load_pd(&left[k * 20 + l]);
+#ifdef _FMA
+
+ umpX1v = FMAMACC(umpX1v, vv, leftv);
+#else
+ umpX1v = _mm256_add_pd(umpX1v, _mm256_mul_pd(vv, leftv));
+#endif
+ }
+ umpX1v = hadd3(umpX1v);
+ _mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
+ }
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ uX1 = &umpX1[80 * tipX1[i]];
+
+ for(k = 0; k < 4; k++)
+ {
+ v = &(x2[80 * i + k * 20]);
+
+ for(l = 0; l < 20; l++)
+ {
+ __m256d ump_x2v = _mm256_setzero_pd();
+
+ __m256d vv = _mm256_load_pd(&v[0]);
+ __m256d rightv = _mm256_load_pd(&right[k*400+l*20+0]);
+#ifdef _FMA
+ ump_x2v = FMAMACC(ump_x2v,vv,rightv);
+#else
+ ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
+#endif
+
+ vv = _mm256_load_pd(&v[4]);
+ rightv = _mm256_load_pd(&right[k*400+l*20+4]);
+#ifdef _FMA
+ ump_x2v = FMAMACC(ump_x2v,vv,rightv);
+#else
+ ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
+#endif
+
+ vv = _mm256_load_pd(&v[8]);
+ rightv = _mm256_load_pd(&right[k*400+l*20+8]);
+#ifdef _FMA
+ ump_x2v = FMAMACC(ump_x2v,vv,rightv);
+#else
+ ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
+#endif
+
+ vv = _mm256_load_pd(&v[12]);
+ rightv = _mm256_load_pd(&right[k*400+l*20+12]);
+#ifdef _FMA
+ ump_x2v = FMAMACC(ump_x2v,vv,rightv);
+#else
+ ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
+#endif
+
+ vv = _mm256_load_pd(&v[16]);
+ rightv = _mm256_load_pd(&right[k*400+l*20+16]);
+#ifdef _FMA
+ ump_x2v = FMAMACC(ump_x2v,vv,rightv);
+#else
+ ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
+#endif
+
+ ump_x2v = hadd3(ump_x2v);
+ _mm256_maskstore_pd(&ump_x2[l], bitmask, ump_x2v);
+ }
+
+ v = &(x3[80 * i + 20 * k]);
+
+
+ __m256d vv[5];
+
+ vv[0] = _mm256_setzero_pd();
+ vv[1] = _mm256_setzero_pd();
+ vv[2] = _mm256_setzero_pd();
+ vv[3] = _mm256_setzero_pd();
+ vv[4] = _mm256_setzero_pd();
+
+ for(l = 0; l < 20; l++)
+ {
+ x1px2 = uX1[k * 20 + l] * ump_x2[l];
+ __m256d x1px2v = _mm256_set1_pd(x1px2);
+
+#ifdef _FMA
+ __m256d ev = _mm256_load_pd(&extEV[l * 20 + 0]);
+ vv[0] = FMAMACC(vv[0],x1px2v, ev);
+#else
+ vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 0])));
+#endif
+ _mm256_store_pd(&v[0],vv[0]);
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[l * 20 + 4]);
+ vv[1] = FMAMACC(vv[1],x1px2v, ev);
+#else
+ vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 4])));
+#endif
+ _mm256_store_pd(&v[4],vv[1]);
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[l * 20 + 8]);
+ vv[2] = FMAMACC(vv[2],x1px2v, ev);
+#else
+ vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 8])));
+#endif
+ _mm256_store_pd(&v[8],vv[2]);
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[l * 20 + 12]);
+ vv[3] = FMAMACC(vv[3],x1px2v, ev);
+#else
+ vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 12])));
+#endif
+ _mm256_store_pd(&v[12],vv[3]);
+
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[l * 20 + 16]);
+ vv[4] = FMAMACC(vv[4],x1px2v, ev);
+#else
+ vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 16])));
+#endif
+ _mm256_store_pd(&v[16],vv[4]);
+
+ }
+ }
+
+ v = &x3[80 * i];
+ __m256d minlikelihood_avx = _mm256_set1_pd(minlikelihood);
+ scale = 1;
+ for(l = 0; scale && (l < 80); l += 4)
+ {
+ __m256d vv = _mm256_load_pd(&v[l]);
+ __m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
+ vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
+ if(_mm256_movemask_pd(vv_abs) != 15)
+ scale = 0;
+ }
+
+ if(scale)
+ {
+ __m256d twotothe256v = _mm256_set_pd(twotothe256,twotothe256,twotothe256,twotothe256);
+ for(l = 0; l < 80; l += 4)
+ {
+ __m256d vv = _mm256_load_pd(&v[l]);
+ _mm256_store_pd(&v[l],_mm256_mul_pd(vv,twotothe256v));
+ }
+ if(useFastScaling)
+ addScale += wgt[i];
+ else
+ ex3[i] += 1;
+ }
+ }
+ }
+ break;
+ case INNER_INNER:
+ for(i = 0; i < n; i++)
+ {
+ scale = 1;
+
+ for(k = 0; k < 4; k++)
+ {
+ vl = &(x1[80 * i + 20 * k]);
+ vr = &(x2[80 * i + 20 * k]);
+ v = &(x3[80 * i + 20 * k]);
+
+ __m256d vv[5];
+
+ vv[0] = _mm256_setzero_pd();
+ vv[1] = _mm256_setzero_pd();
+ vv[2] = _mm256_setzero_pd();
+ vv[3] = _mm256_setzero_pd();
+ vv[4] = _mm256_setzero_pd();
+
+ for(l = 0; l < 20; l++)
+ {
+ __m256d al = _mm256_setzero_pd();
+ __m256d ar = _mm256_setzero_pd();
+
+ __m256d leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 0]);
+ __m256d rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 0]);
+ __m256d vlv = _mm256_load_pd(&vl[0]);
+ __m256d vrv = _mm256_load_pd(&vr[0]);
+
+#ifdef _FMA
+
+ al = FMAMACC(al, vlv, leftv);
+ ar = FMAMACC(ar, vrv, rightv);
+#else
+ al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
+ ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
+#endif
+
+ leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 4]);
+ rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 4]);
+ vlv = _mm256_load_pd(&vl[4]);
+ vrv = _mm256_load_pd(&vr[4]);
+#ifdef _FMA
+
+ al = FMAMACC(al, vlv, leftv);
+ ar = FMAMACC(ar, vrv, rightv);
+#else
+ al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
+ ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
+#endif
+
+ leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 8]);
+ rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 8]);
+ vlv = _mm256_load_pd(&vl[8]);
+ vrv = _mm256_load_pd(&vr[8]);
+#ifdef _FMA
+
+ al = FMAMACC(al, vlv, leftv);
+ ar = FMAMACC(ar, vrv, rightv);
+#else
+ al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
+ ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
+#endif
+
+ leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 12]);
+ rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 12]);
+ vlv = _mm256_load_pd(&vl[12]);
+ vrv = _mm256_load_pd(&vr[12]);
+#ifdef _FMA
+
+ al = FMAMACC(al, vlv, leftv);
+ ar = FMAMACC(ar, vrv, rightv);
+#else
+ al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
+ ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
+#endif
+
+ leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 16]);
+ rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 16]);
+ vlv = _mm256_load_pd(&vl[16]);
+ vrv = _mm256_load_pd(&vr[16]);
+
+#ifdef _FMA
+ al = FMAMACC(al, vlv, leftv);
+ ar = FMAMACC(ar, vrv, rightv);
+#else
+ al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
+ ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
+#endif
+
+ /**************************************************************************************************************/
+
+ al = hadd3(al);
+ ar = hadd3(ar);
+ al = _mm256_mul_pd(ar,al);
+
+ /************************************************************************************************************/
+#ifdef _FMA
+ __m256d ev = _mm256_load_pd(&extEV[20 * l + 0]);
+ vv[0] = FMAMACC(vv[0], al, ev);
+#else
+ vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 0])));
+#endif
+ _mm256_store_pd(&v[0],vv[0]);
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[20 * l + 4]);
+ vv[1] = FMAMACC(vv[1], al, ev);
+#else
+ vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 4])));
+#endif
+ _mm256_store_pd(&v[4],vv[1]);
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[20 * l + 8]);
+ vv[2] = FMAMACC(vv[2], al, ev);
+#else
+ vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 8])));
+#endif
+ _mm256_store_pd(&v[8],vv[2]);
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[20 * l + 12]);
+ vv[3] = FMAMACC(vv[3], al, ev);
+#else
+ vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 12])));
+#endif
+ _mm256_store_pd(&v[12],vv[3]);
+
+#ifdef _FMA
+ ev = _mm256_load_pd(&extEV[20 * l + 16]);
+ vv[4] = FMAMACC(vv[4], al, ev);
+#else
+ vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 16])));
+#endif
+ _mm256_store_pd(&v[16],vv[4]);
+ }
+ }
+ v = &(x3[80 * i]);
+ scale = 1;
+ __m256d minlikelihood_avx = _mm256_set1_pd(minlikelihood);
+
+ for(l = 0; scale && (l < 80); l += 4)
+ {
+ __m256d vv = _mm256_load_pd(&v[l]);
+ __m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
+ vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
+ if(_mm256_movemask_pd(vv_abs) != 15)
+ scale = 0;
+ }
+
+ if(scale)
+ {
+ __m256d twotothe256v = _mm256_set_pd(twotothe256,twotothe256,twotothe256,twotothe256);
+ for(l = 0; l < 80; l += 4)
+ {
+ __m256d vv = _mm256_load_pd(&v[l]);
+ _mm256_store_pd(&v[l],_mm256_mul_pd(vv,twotothe256v));
+ }
+ if(useFastScaling)
+ addScale += wgt[i];
+ else
+ ex3[i] += 1;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ if(useFastScaling)
+ *scalerIncrement = addScale;
+}
+
+
void newviewGTRGAMMAPROT_AVX_GAPPED_SAVE(int tipCase,
double *x1_start, double *x2_start, double *x3_start, double *extEV, double *tipVector,
int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n,
@@ -2661,7 +3153,6 @@ void newviewGTRGAMMAPROT_AVX_GAPPED_SAVE(int tipCase,
double
*x1 = x1_start,
*x2 = x2_start,
- *x3 = x3_start,
*x3_ptr = x3_start,
*x2_ptr = x2_start,
*x1_ptr = x1_start,
View
542 axml.c
@@ -72,10 +72,11 @@
#endif
#include "axml.h"
+#include "mem_alloc.h"
#include "globalVariables.h"
-#define _PORTABLE_PTHREADS
+//#define _PORTABLE_PTHREADS
/***************** UTILITY FUNCTIONS **************************/
@@ -92,44 +93,6 @@ double FABS(double x)
return fabs(x);
}
-void *malloc_aligned(size_t size)
-{
- void
- *ptr = (void *)NULL;
-
- int
- res;
-
-#if (defined(__APPLE__) && (!defined(__AVX)))
- /*
- malloc on MACs always returns
- a 16-byte aligned pointer
- */
-
- ptr = malloc(size);
-
- if(ptr == (void*)NULL)
- assert(0);
-
-#else
- res = posix_memalign( &ptr, BYTE_ALIGNMENT, size );
-
- if(res != 0)
- assert(0);
-#endif
-
- /*
- to ensure that the allocated pages are mapped
- correctly on the distributed shared memory system:
-
- for(i=0; i<N; i++)
- // or i+=PAGE_SIZE
- huge[i] = 0.0; // mapping takes place here!
-
- */
-
- return ptr;
-}
@@ -615,7 +578,7 @@ static void rax_getline_insptr_valid(char **lineptr, size_t *n, size_t ins_ptr )
*n += n_inc;
- *lineptr = (char*)realloc((void*)(*lineptr), *n * sizeof(char));
+ *lineptr = (char*)rax_realloc((void*)(*lineptr), *n * sizeof(char));
assert(*lineptr != 0);
}
@@ -770,7 +733,7 @@ static void getnums (rawdata *rdta, analdef *adef)
assert(sites == sequenceLength);
if(line)
- free(line);
+ rax_free(line);
rewind(INFILE);
@@ -828,10 +791,10 @@ static void getyspace (rawdata *rdta)
int i;
unsigned char *y0;
- rdta->y = (unsigned char **) malloc((rdta->numsp + 1) * sizeof(unsigned char *));
+ rdta->y = (unsigned char **) rax_malloc((rdta->numsp + 1) * sizeof(unsigned char *));
assert(rdta->y);
- y0 = (unsigned char *) malloc(((size_t)(rdta->numsp + 1)) * size * sizeof(unsigned char));
+ y0 = (unsigned char *) rax_malloc(((size_t)(rdta->numsp + 1)) * size * sizeof(unsigned char));
assert(y0);
rdta->y0 = y0;
@@ -891,13 +854,13 @@ static boolean setupTree (tree *tr, analdef *adef)
tr->maxCategories = MAX(4, adef->categories);
- tr->partitionContributions = (double *)malloc(sizeof(double) * tr->NumberOfModels);
+ tr->partitionContributions = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels);
for(i = 0; i < tr->NumberOfModels; i++)
tr->partitionContributions[i] = -1.0;
- tr->perPartitionLH = (double *)malloc(sizeof(double) * tr->NumberOfModels);
- tr->storedPerPartitionLH = (double *)malloc(sizeof(double) * tr->NumberOfModels);
+ tr->perPartitionLH = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels);
+ tr->storedPerPartitionLH = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels);
for(i = 0; i < tr->NumberOfModels; i++)
{
@@ -923,13 +886,13 @@ static boolean setupTree (tree *tr, analdef *adef)
if(!adef->readTaxaOnly)
{
- tr->yVector = (unsigned char **) malloc((tr->mxtips + 1) * sizeof(unsigned char *));
+ tr->yVector = (unsigned char **) rax_malloc((tr->mxtips + 1) * sizeof(unsigned char *));
- tr->fracchanges = (double *)malloc(tr->NumberOfModels * sizeof(double));
+ tr->fracchanges = (double *)rax_malloc(tr->NumberOfModels * sizeof(double));
- tr->likelihoods = (double *)malloc(adef->multipleRuns * sizeof(double));
+ tr->likelihoods = (double *)rax_malloc(adef->multipleRuns * sizeof(double));
}
tr->numberOfTrees = -1;
@@ -938,7 +901,7 @@ static boolean setupTree (tree *tr, analdef *adef)
tr->treeStringLength = tr->mxtips * (nmlngth+128) + 256 + tr->mxtips * 2;
- tr->tree_string = (char*)calloc(tr->treeStringLength, sizeof(char));
+ tr->tree_string = (char*)rax_calloc(tr->treeStringLength, sizeof(char));
/*TODO, must that be so long ?*/
@@ -946,7 +909,7 @@ static boolean setupTree (tree *tr, analdef *adef)
{
tr->td[0].count = 0;
- tr->td[0].ti = (traversalInfo *)malloc(sizeof(traversalInfo) * tr->mxtips);
+ tr->td[0].ti = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * tr->mxtips);
for(i = 0; i < tr->NumberOfModels; i++)
tr->fracchanges[i] = -1.0;
@@ -955,18 +918,18 @@ static boolean setupTree (tree *tr, analdef *adef)
tr->fracchange = -1.0;
- tr->constraintVector = (int *)malloc((2 * tr->mxtips) * sizeof(int));
+ tr->constraintVector = (int *)rax_malloc((2 * tr->mxtips) * sizeof(int));
- tr->nameList = (char **)malloc(sizeof(char *) * (tips + 1));
+ tr->nameList = (char **)rax_malloc(sizeof(char *) * (tips + 1));
}
- if (!(p0 = (nodeptr) malloc((tips + 3*inter) * sizeof(node))))
+ if (!(p0 = (nodeptr) rax_malloc((tips + 3*inter) * sizeof(node))))
{
printf("ERROR: Unable to obtain sufficient tree memory\n");
return FALSE;
}
- if (!(tr->nodep = (nodeptr *) malloc((2*tr->mxtips) * sizeof(nodeptr))))
+ if (!(tr->nodep = (nodeptr *) rax_malloc((2*tr->mxtips) * sizeof(nodeptr))))
{
printf("ERROR: Unable to obtain sufficient tree memory, too\n");
return FALSE;
@@ -1237,7 +1200,7 @@ static boolean getdata(analdef *adef, rawdata *rdta, tree *tr)
buffer[my_i] = '\0';
len = strlen(buffer) + 1;
checkTaxonName(buffer, len);
- tr->nameList[i] = (char *)malloc(sizeof(char) * len);
+ tr->nameList[i] = (char *)rax_malloc(sizeof(char) * len);
strcpy(tr->nameList[i], buffer);
}
@@ -1563,7 +1526,7 @@ static void parseFasta(analdef *adef, rawdata *rdta, tree *tr)
buffer[nameCount] = '\0';
nameLength = strlen(buffer) + 1;
checkTaxonName(buffer, nameLength);
- tr->nameList[taxa] = (char *)malloc(sizeof(char) * nameLength);
+ tr->nameList[taxa] = (char *)rax_malloc(sizeof(char) * nameLength);
strcpy(tr->nameList[taxa], buffer);
sites = 0;
@@ -1633,7 +1596,7 @@ static void parseFasta(analdef *adef, rawdata *rdta, tree *tr)
assert(sites == sequenceLength);
if(line)
- free(line);
+ rax_free(line);
}
@@ -1707,7 +1670,7 @@ static void inputweights (rawdata *rdta)
{
int i, w, fres;
FILE *weightFile;
- int *wv = (int *)malloc(sizeof(int) * rdta->sites);
+ int *wv = (int *)rax_malloc(sizeof(int) * rdta->sites);
weightFile = myfopen(weightFileName, "rb");
@@ -1736,7 +1699,7 @@ static void inputweights (rawdata *rdta)
rdta->wgt[i] = wv[i - 1];
fclose(weightFile);
- free(wv);
+ rax_free(wv);
}
@@ -1756,15 +1719,15 @@ static void getinput(analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
if(!adef->readTaxaOnly)
{
- rdta->wgt = (int *) malloc((rdta->sites + 1) * sizeof(int));
- cdta->alias = (int *) malloc((rdta->sites + 1) * sizeof(int));
- cdta->aliaswgt = (int *) malloc((rdta->sites + 1) * sizeof(int));
- cdta->rateCategory = (int *) malloc((rdta->sites + 1) * sizeof(int));
- tr->model = (int *) calloc((rdta->sites + 1), sizeof(int));
- tr->initialDataVector = (int *) malloc((rdta->sites + 1) * sizeof(int));
- tr->extendedDataVector = (int *) malloc((rdta->sites + 1) * sizeof(int));
- cdta->patrat = (double *) malloc((rdta->sites + 1) * sizeof(double));
- cdta->patratStored = (double *) malloc((rdta->sites + 1) * sizeof(double));
+ rdta->wgt = (int *) rax_malloc((rdta->sites + 1) * sizeof(int));
+ cdta->alias = (int *) rax_malloc((rdta->sites + 1) * sizeof(int));
+ cdta->aliaswgt = (int *) rax_malloc((rdta->sites + 1) * sizeof(int));
+ cdta->rateCategory = (int *) rax_malloc((rdta->sites + 1) * sizeof(int));
+ tr->model = (int *) rax_calloc((rdta->sites + 1), sizeof(int));
+ tr->initialDataVector = (int *) rax_malloc((rdta->sites + 1) * sizeof(int));
+ tr->extendedDataVector = (int *) rax_malloc((rdta->sites + 1) * sizeof(int));
+ cdta->patrat = (double *) rax_malloc((rdta->sites + 1) * sizeof(double));
+ cdta->patratStored = (double *) rax_malloc((rdta->sites + 1) * sizeof(double));
@@ -1802,8 +1765,8 @@ static void getinput(analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
int
dataType = -1;
- tr->initialPartitionData = (pInfo*)malloc(sizeof(pInfo));
- tr->initialPartitionData[0].partitionName = (char*)malloc(128 * sizeof(char));
+ tr->initialPartitionData = (pInfo*)rax_malloc(sizeof(pInfo));
+ tr->initialPartitionData[0].partitionName = (char*)rax_malloc(128 * sizeof(char));
strcpy(tr->initialPartitionData[0].partitionName, "No Name Provided");
tr->initialPartitionData[0].protModels = adef->proteinMatrix;
@@ -1852,11 +1815,11 @@ static void getinput(analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
{
memcpy(tr->extendedDataVector, tr->initialDataVector, (rdta->sites + 1) * sizeof(int));
- tr->extendedPartitionData =(pInfo*)malloc(sizeof(pInfo) * tr->NumberOfModels);
+ tr->extendedPartitionData =(pInfo*)rax_malloc(sizeof(pInfo) * tr->NumberOfModels);
for(i = 0; i < tr->NumberOfModels; i++)
{
- tr->extendedPartitionData[i].partitionName = (char*)malloc((strlen(tr->initialPartitionData[i].partitionName) + 1) * sizeof(char));
+ tr->extendedPartitionData[i].partitionName = (char*)rax_malloc((strlen(tr->initialPartitionData[i].partitionName) + 1) * sizeof(char));
strcpy(tr->extendedPartitionData[i].partitionName, tr->initialPartitionData[i].partitionName);
strcpy(tr->extendedPartitionData[i].proteinSubstitutionFileName, tr->initialPartitionData[i].proteinSubstitutionFileName);
tr->extendedPartitionData[i].dataType = tr->initialPartitionData[i].dataType;
@@ -1883,7 +1846,7 @@ static void getinput(analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
- tr->executeModel = (boolean *)malloc(sizeof(boolean) * tr->NumberOfModels);
+ tr->executeModel = (boolean *)rax_malloc(sizeof(boolean) * tr->NumberOfModels);
for(i = 0; i < tr->NumberOfModels; i++)
tr->executeModel[i] = TRUE;
@@ -2120,7 +2083,7 @@ static unsigned char buildStates(int secModel, unsigned char v1, unsigned char v
static void adaptRdataToSecondary(tree *tr, rawdata *rdta)
{
- int *alias = (int*)calloc(rdta->sites, sizeof(int));
+ int *alias = (int*)rax_calloc(rdta->sites, sizeof(int));
int i, j, realPosition;
for(i = 0; i < rdta->sites; i++)
@@ -2170,7 +2133,7 @@ static void adaptRdataToSecondary(tree *tr, rawdata *rdta)
rdta->y[j][i+1] = rdta->y[j][alias[i]+1];
}
- free(alias);
+ rax_free(alias);
}
static void sitesort(rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *adef)
@@ -2261,15 +2224,15 @@ static void sitecombcrunch (rawdata *rdta, cruncheddata *cdta, tree *tr, analdef
*aliasModel = (int*)NULL,
*aliasSuperModel = (int*)NULL;
- tr->origNumSitePerModel = (int*)calloc(tr->NumberOfModels, sizeof(int));
+ tr->origNumSitePerModel = (int*)rax_calloc(tr->NumberOfModels, sizeof(int));
for(i = 1; i <= rdta->sites; i++)
tr->origNumSitePerModel[tr->model[i]]++;
if(adef->useMultipleModel)
{
- aliasSuperModel = (int*)malloc(sizeof(int) * (rdta->sites + 1));
- aliasModel = (int*)malloc(sizeof(int) * (rdta->sites + 1));
+ aliasSuperModel = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1));
+ aliasModel = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1));
}
i = 0;
@@ -2280,8 +2243,8 @@ static void sitecombcrunch (rawdata *rdta, cruncheddata *cdta, tree *tr, analdef
{
int i;
- tr->patternPosition = (int*)malloc(sizeof(int) * rdta->sites);
- tr->columnPosition = (int*)malloc(sizeof(int) * rdta->sites);
+ tr->patternPosition = (int*)rax_malloc(sizeof(int) * rdta->sites);
+ tr->columnPosition = (int*)rax_malloc(sizeof(int) * rdta->sites);
for(i = 0; i < rdta->sites; i++)
{
@@ -2380,8 +2343,8 @@ static void sitecombcrunch (rawdata *rdta, cruncheddata *cdta, tree *tr, analdef
if(adef->useMultipleModel)
{
- free(aliasModel);
- free(aliasSuperModel);
+ rax_free(aliasModel);
+ rax_free(aliasSuperModel);
}
}
@@ -2407,15 +2370,15 @@ static boolean makevalues(rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *
int i, j, model, fullSites = 0, modelCounter;
unsigned char
- *y = (unsigned char *)malloc(((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)),
- *yBUF = (unsigned char *)malloc( ((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char));
+ *y = (unsigned char *)rax_malloc(((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)),
+ *yBUF = (unsigned char *)rax_malloc( ((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char));
for (i = 1; i <= rdta->numsp; i++)
for (j = 0; j < cdta->endsite; j++)
y[(((size_t)(i - 1)) * ((size_t)cdta->endsite)) + j] = rdta->y[i][cdta->alias[j]];
- free(rdta->y0);
- free(rdta->y);
+ rax_free(rdta->y0);
+ rax_free(rdta->y);
rdta->y0 = y;
memcpy(yBUF, y, ((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char));
@@ -2479,10 +2442,10 @@ static boolean makevalues(rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *
tr->rdta = rdta;
tr->cdta = cdta;
- tr->invariant = (int *)malloc(cdta->endsite * sizeof(int));
- tr->originalDataVector = (int *)malloc(cdta->endsite * sizeof(int));
- tr->originalModel = (int *)malloc(cdta->endsite * sizeof(int));
- tr->originalWeights = (int *)malloc(cdta->endsite * sizeof(int));
+ tr->invariant = (int *)rax_malloc(cdta->endsite * sizeof(int));
+ tr->originalDataVector = (int *)rax_malloc(cdta->endsite * sizeof(int));
+ tr->originalModel = (int *)rax_malloc(cdta->endsite * sizeof(int));
+ tr->originalWeights = (int *)rax_malloc(cdta->endsite * sizeof(int));
memcpy(tr->originalModel, tr->model, cdta->endsite * sizeof(int));
memcpy(tr->originalDataVector, tr->dataVector, cdta->endsite * sizeof(int));
@@ -2523,9 +2486,9 @@ static void checkSequences(tree *tr, rawdata *rdta, analdef *adef)
{
int n = tr->mxtips + 1;
int i, j;
- int *omissionList = (int *)calloc(n, sizeof(int));
- int *undeterminedList = (int *)calloc((rdta->sites + 1), sizeof(int));
- int *modelList = (int *)malloc((rdta->sites + 1)* sizeof(int));
+ int *omissionList = (int *)rax_calloc(n, sizeof(int));
+ int *undeterminedList = (int *)rax_calloc((rdta->sites + 1), sizeof(int));
+ int *modelList = (int *)rax_malloc((rdta->sites + 1)* sizeof(int));
int count = 0;
int countNameDuplicates = 0;
int countUndeterminedColumns = 0;
@@ -2867,9 +2830,9 @@ static void checkSequences(tree *tr, rawdata *rdta, analdef *adef)
}
}
- free(undeterminedList);
- free(omissionList);
- free(modelList);
+ rax_free(undeterminedList);
+ rax_free(omissionList);
+ rax_free(modelList);
}
@@ -2940,7 +2903,7 @@ static void splitMultiGene(tree *tr, rawdata *rdta)
{
int i, l;
int n = rdta->sites + 1;
- int *modelFilter = (int *)malloc(sizeof(int) * n);
+ int *modelFilter = (int *)rax_malloc(sizeof(int) * n);
int length, k;
unsigned char *tip;
FILE *outf;
@@ -2998,7 +2961,7 @@ static void splitMultiGene(tree *tr, rawdata *rdta)
printf("Wrote individual gene/partition alignment to file %s\n", outFileName);
}
- free(modelFilter);
+ rax_free(modelFilter);
printf("Wrote all %d individual gene/partition alignments\n", tr->NumberOfModels);
printf("Exiting normally\n");
}
@@ -3066,17 +3029,17 @@ static void allocPartitions(tree *tr)
if(tr->useFastScaling)
- tr->partitionData[i].globalScaler = (unsigned int *)calloc(2 * tr->mxtips, sizeof(unsigned int));
+ tr->partitionData[i].globalScaler = (unsigned int *)rax_calloc(2 * tr->mxtips, sizeof(unsigned int));
- tr->partitionData[i].left = (double *)malloc_aligned(pl->leftLength * (maxCategories + 1) * sizeof(double));
- tr->partitionData[i].right = (double *)malloc_aligned(pl->rightLength * (maxCategories + 1) * sizeof(double));
- tr->partitionData[i].EIGN = (double*)malloc(pl->eignLength * sizeof(double));
- tr->partitionData[i].EV = (double*)malloc_aligned(pl->evLength * sizeof(double));
- tr->partitionData[i].EI = (double*)malloc(pl->eiLength * sizeof(double));
- tr->partitionData[i].substRates = (double *)malloc(pl->substRatesLength * sizeof(double));
- tr->partitionData[i].frequencies = (double*)malloc(pl->frequenciesLength * sizeof(double));
- tr->partitionData[i].tipVector = (double *)malloc_aligned(pl->tipVectorLength * sizeof(double));
+ tr->partitionData[i].left = (double *)rax_malloc_aligned(pl->leftLength * (maxCategories + 1) * sizeof(double));
+ tr->partitionData[i].right = (double *)rax_malloc_aligned(pl->rightLength * (maxCategories + 1) * sizeof(double));
+ tr->partitionData[i].EIGN = (double*)rax_malloc(pl->eignLength * sizeof(double));
+ tr->partitionData[i].EV = (double*)rax_malloc_aligned(pl->evLength * sizeof(double));
+ tr->partitionData[i].EI = (double*)rax_malloc(pl->eiLength * sizeof(double));
+ tr->partitionData[i].substRates = (double *)rax_malloc(pl->substRatesLength * sizeof(double));
+ tr->partitionData[i].frequencies = (double*)rax_malloc(pl->frequenciesLength * sizeof(double));
+ tr->partitionData[i].tipVector = (double *)rax_malloc_aligned(pl->tipVectorLength * sizeof(double));
if(tr->partitionData[i].protModels == LG4)
@@ -3086,35 +3049,35 @@ static void allocPartitions(tree *tr)
for(k = 0; k < 4; k++)
{
- tr->partitionData[i].EIGN_LG4[k] = (double*)malloc(pl->eignLength * sizeof(double));
- tr->partitionData[i].EV_LG4[k] = (double*)malloc_aligned(pl->evLength * sizeof(double));
- tr->partitionData[i].EI_LG4[k] = (double*)malloc(pl->eiLength * sizeof(double));
- tr->partitionData[i].substRates_LG4[k] = (double *)malloc(pl->substRatesLength * sizeof(double));
- tr->partitionData[i].frequencies_LG4[k] = (double*)malloc(pl->frequenciesLength * sizeof(double));
- tr->partitionData[i].tipVector_LG4[k] = (double *)malloc_aligned(pl->tipVectorLength * sizeof(double));
+ tr->partitionData[i].EIGN_LG4[k] = (double*)rax_malloc(pl->eignLength * sizeof(double));
+ tr->partitionData[i].EV_LG4[k] = (double*)rax_malloc_aligned(pl->evLength * sizeof(double));
+ tr->partitionData[i].EI_LG4[k] = (double*)rax_malloc(pl->eiLength * sizeof(double));
+ tr->partitionData[i].substRates_LG4[k] = (double *)rax_malloc(pl->substRatesLength * sizeof(double));
+ tr->partitionData[i].frequencies_LG4[k] = (double*)rax_malloc(pl->frequenciesLength * sizeof(double));
+ tr->partitionData[i].tipVector_LG4[k] = (double *)rax_malloc_aligned(pl->tipVectorLength * sizeof(double));
}
}
- tr->partitionData[i].symmetryVector = (int *)malloc(pl->symmetryVectorLength * sizeof(int));
- tr->partitionData[i].frequencyGrouping = (int *)malloc(pl->frequencyGroupingLength * sizeof(int));
- tr->partitionData[i].perSiteRates = (double *)malloc(sizeof(double) * tr->maxCategories);
- tr->partitionData[i].unscaled_perSiteRates = (double *)malloc(sizeof(double) * tr->maxCategories);
+ tr->partitionData[i].symmetryVector = (int *)rax_malloc(pl->symmetryVectorLength * sizeof(int));
+ tr->partitionData[i].frequencyGrouping = (int *)rax_malloc(pl->frequencyGroupingLength * sizeof(int));
+ tr->partitionData[i].perSiteRates = (double *)rax_malloc(sizeof(double) * tr->maxCategories);
+ tr->partitionData[i].unscaled_perSiteRates = (double *)rax_malloc(sizeof(double) * tr->maxCategories);
tr->partitionData[i].nonGTR = FALSE;
- tr->partitionData[i].gammaRates = (double*)malloc(sizeof(double) * 4);
- tr->partitionData[i].yVector = (unsigned char **)malloc(sizeof(unsigned char*) * (tr->mxtips + 1));
+ tr->partitionData[i].gammaRates = (double*)rax_malloc(sizeof(double) * 4);
+ tr->partitionData[i].yVector = (unsigned char **)rax_malloc(sizeof(unsigned char*) * (tr->mxtips + 1));
- tr->partitionData[i].xVector = (double **)malloc(sizeof(double*) * tr->innerNodes);
- tr->partitionData[i].xSpaceVector = (size_t *)calloc(tr->innerNodes, sizeof(size_t));
+ tr->partitionData[i].xVector = (double **)rax_malloc(sizeof(double*) * tr->innerNodes);
+ tr->partitionData[i].xSpaceVector = (size_t *)rax_calloc(tr->innerNodes, sizeof(size_t));
- tr->partitionData[i].expVector = (int **)malloc(sizeof(int*) * tr->innerNodes);
- tr->partitionData[i].expSpaceVector = (size_t *)calloc(tr->innerNodes, sizeof(size_t));
+ tr->partitionData[i].expVector = (int **)rax_malloc(sizeof(int*) * tr->innerNodes);
+ tr->partitionData[i].expSpaceVector = (size_t *)rax_calloc(tr->innerNodes, sizeof(size_t));
tr->partitionData[i].mxtips = tr->mxtips;
@@ -3162,14 +3125,14 @@ static void allocNodex (tree *tr)
tr->partitionData[model].gapVectorLength = ((int)width / 32) + 1;
- tr->partitionData[model].gapVector = (unsigned int*)calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int));
+ tr->partitionData[model].gapVector = (unsigned int*)rax_calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int));
tr->partitionData[model].initialGapVectorSize = tr->partitionData[model].gapVectorLength * 2 * tr->mxtips * sizeof(int);
/* always multiply by 4 due to frequent switching between CAT and GAMMA in standard RAxML */
- tr->partitionData[model].gapColumn = (double *)malloc_aligned(((size_t)tr->innerNodes) *
+ tr->partitionData[model].gapColumn = (double *)rax_malloc_aligned(((size_t)tr->innerNodes) *
((size_t)4) *
((size_t)(tr->partitionData[model].states)) *
sizeof(double));
@@ -3182,10 +3145,10 @@ static void allocNodex (tree *tr)
tr->partitionData[model].gapVector[tr->partitionData[model].gapVectorLength * j + i / 32] |= mask32[i % 32];
}
- tr->perSiteLL = (double *)malloc((size_t)tr->cdta->endsite * sizeof(double));
+ tr->perSiteLL = (double *)rax_malloc((size_t)tr->cdta->endsite * sizeof(double));
assert(tr->perSiteLL != NULL);
- tr->sumBuffer = (double *)malloc_aligned(memoryRequirements * sizeof(double));
+ tr->sumBuffer = (double *)rax_malloc_aligned(memoryRequirements * sizeof(double));
assert(tr->sumBuffer != NULL);
offset = 0;
@@ -3740,12 +3703,12 @@ static void parseOutgroups(char *outgr, tree *tr)
tr->numberOfOutgroups = count;
- tr->outgroups = (char **)malloc(sizeof(char *) * count);
+ tr->outgroups = (char **)rax_malloc(sizeof(char *) * count);
for(i = 0; i < tr->numberOfOutgroups; i++)
- tr->outgroups[i] = (char *)malloc(sizeof(char) * nmlngth);
+ tr->outgroups[i] = (char *)rax_malloc(sizeof(char) * nmlngth);
- tr->outgroupNums = (int *)malloc(sizeof(int) * count);
+ tr->outgroupNums = (int *)rax_malloc(sizeof(int) * count);
i = 0;
k = 0;
@@ -4573,10 +4536,10 @@ static void get_args(int argc, char *argv[], analdef *adef, tree *tr)
case 'o':
{
char *outgroups;
- outgroups = (char*)malloc(sizeof(char) * (strlen(optarg) + 1));
+ outgroups = (char*)rax_malloc(sizeof(char) * (strlen(optarg) + 1));
strcpy(outgroups, optarg);
parseOutgroups(outgroups, tr);
- free(outgroups);
+ rax_free(outgroups);
adef->outgroup = TRUE;
}
break;
@@ -5515,8 +5478,26 @@ void printBaseFrequencies(tree *tr)
printBothOpen("Partition: %d with name: %s\n", model, tr->partitionData[model].partitionName);
printBothOpen("Base frequencies: ");
- for(i = 0; i < tr->partitionData[model].states; i++)
- printBothOpen("%1.3f ", tr->partitionData[model].frequencies[i]);
+ if(tr->partitionData[model].protModels == LG4)
+ {
+ int
+ k;
+
+ printBothOpen("\n");
+
+ for(k = 0; k < 4; k++)
+ {
+ printBothOpen("LG4 %d: ", k);
+ for(i = 0; i < tr->partitionData[model].states; i++)
+ printBothOpen("%1.3f ", tr->partitionData[model].frequencies_LG4[k][i]);
+ printBothOpen("\n");
+ }
+ }
+ else
+ {
+ for(i = 0; i < tr->partitionData[model].states; i++)
+ printBothOpen("%1.3f ", tr->partitionData[model].frequencies[i]);
+ }
printBothOpen("\n\n");
}
@@ -5688,7 +5669,7 @@ static void printModelAndProgramInfo(tree *tr, analdef *adef, int argc, char *ar
if(!adef->readTaxaOnly)
{
- printBoth(infoFile, "All free model parameters will be estimated by RAxML\n");
+ printBoth(infoFile, "All rax_free model parameters will be estimated by RAxML\n");
if(tr->rateHetModel == GAMMA || tr->rateHetModel == GAMMA_I)
@@ -6239,13 +6220,29 @@ void printModelParams(tree *tr, analdef *adef)
{
case AA_DATA:
{
- char *freqNames[20] = {"A", "R", "N ","D", "C", "Q", "E", "G",
+ char *freqNames[20] = {"A", "R", "N","D", "C", "Q", "E", "G",
"H", "I", "L", "K", "M", "F", "P", "S",
"T", "W", "Y", "V"};
- printRatesRest(20, r, freqNames);
- printBothOpen("\n");
- printFreqs(20, f, freqNames);
+ if(tr->partitionData[model].protModels == LG4)
+ {
+ int
+ k;
+
+ for(k = 0; k < 4; k++)
+ {
+ printBothOpen("LGM %d\n", k);
+ printRatesRest(20, tr->partitionData[model].substRates_LG4[k], freqNames);
+ printBothOpen("\n");
+ printFreqs(20, tr->partitionData[model].frequencies_LG4[k], freqNames);
+ }
+ }
+ else
+ {
+ printRatesRest(20, r, freqNames);
+ printBothOpen("\n");
+ printFreqs(20, f, freqNames);
+ }
}
break;
case GENERIC_32:
@@ -6499,8 +6496,8 @@ static void finalizeInfoFile(tree *tr, analdef *adef)
printBothOpen("\n");
- printBothOpen("Number of free parameters for AIC-TEST(BR-LEN): %d\n", paramsBrLen);
- printBothOpen("Number of free parameters for AIC-TEST(NO-BR-LEN): %d\n", params);
+ printBothOpen("Number of rax_free parameters for AIC-TEST(BR-LEN): %d\n", paramsBrLen);
+ printBothOpen("Number of rax_free parameters for AIC-TEST(NO-BR-LEN): %d\n", params);
printBothOpen("\n\n");
@@ -6763,26 +6760,26 @@ static void initPartition(tree *tr, tree *localTree, int tid)
localTree->nameList = tr->nameList;
localTree->numBranches = tr->numBranches;
- localTree->lhs = (double*)malloc(sizeof(double) * localTree->originalCrunchedLength);
- localTree->executeModel = (boolean*)malloc(sizeof(boolean) * localTree->NumberOfModels);
- localTree->perPartitionLH = (double*)malloc(sizeof(double) * localTree->NumberOfModels);
- localTree->storedPerPartitionLH = (double*)malloc(sizeof(double) * localTree->NumberOfModels);
+ localTree->lhs = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength);
+ localTree->executeModel = (boolean*)rax_malloc(sizeof(boolean) * localTree->NumberOfModels);
+ localTree->perPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
+ localTree->storedPerPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
- localTree->fracchanges = (double*)malloc(sizeof(double) * localTree->NumberOfModels);
+ localTree->fracchanges = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
- localTree->partitionContributions = (double*)malloc(sizeof(double) * localTree->NumberOfModels);
+ localTree->partitionContributions = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels);
- localTree->partitionData = (pInfo*)malloc(sizeof(pInfo) * localTree->NumberOfModels);
+ localTree->partitionData = (pInfo*)rax_malloc(sizeof(pInfo) * localTree->NumberOfModels);
/* extend for multi-branch */
localTree->td[0].count = 0;
- localTree->td[0].ti = (traversalInfo *)malloc(sizeof(traversalInfo) * localTree->mxtips);
+ localTree->td[0].ti = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * localTree->mxtips);
- localTree->cdta = (cruncheddata*)malloc(sizeof(cruncheddata));
- localTree->cdta->patrat = (double*)malloc(sizeof(double) * localTree->originalCrunchedLength);
- localTree->cdta->patratStored = (double*)malloc(sizeof(double) * localTree->originalCrunchedLength);
+ localTree->cdta = (cruncheddata*)rax_malloc(sizeof(cruncheddata));
+ localTree->cdta->patrat = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength);
+ localTree->cdta->patratStored = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength);
localTree->discreteRateCategories = tr->discreteRateCategories;
@@ -6835,13 +6832,13 @@ static void allocNodex(tree *tr, int tid, int n)
tr->partitionData[model].gapVectorLength = ((int)width / 32) + 1;
- tr->partitionData[model].gapVector = (unsigned int*)calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int));
+ tr->partitionData[model].gapVector = (unsigned int*)rax_calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int));
tr->partitionData[model].initialGapVectorSize = tr->partitionData[model].gapVectorLength * 2 * tr->mxtips * sizeof(int);
/* always multiply by 4 due to frequent switching between CAT and GAMMA in standard RAxML */
- tr->partitionData[model].gapColumn = (double *)malloc_aligned(
+ tr->partitionData[model].gapColumn = (double *)rax_malloc_aligned(
((size_t)(tr->innerNodes)) *
((size_t)(4)) *
((size_t)(tr->partitionData[model].states)) *
@@ -6855,26 +6852,26 @@ static void allocNodex(tree *tr, int tid, int n)
if(tid == 0)
{
- tr->perSiteLL = (double *)malloc((size_t)tr->cdta->endsite * sizeof(double));
+ tr->perSiteLL = (double *)rax_malloc((size_t)tr->cdta->endsite * sizeof(double));
assert(tr->perSiteLL != NULL);
}
- tr->sumBuffer = (double *)malloc_aligned(memoryRequirements * sizeof(double));
+ tr->sumBuffer = (double *)rax_malloc_aligned(memoryRequirements * sizeof(double));
assert(tr->sumBuffer != NULL);
- tr->y_ptr = (unsigned char *)malloc(myLength * (size_t)(tr->mxtips) * sizeof(unsigned char));
+ tr->y_ptr = (unsigned char *)rax_malloc(myLength * (size_t)(tr->mxtips) * sizeof(unsigned char));
assert(tr->y_ptr != NULL);
- tr->perSiteLLPtr = (double*) malloc(myLength * sizeof(double));
+ tr->perSiteLLPtr = (double*) rax_malloc(myLength * sizeof(double));
assert(tr->perSiteLLPtr != NULL);
- tr->wgtPtr = (int*) malloc(myLength * sizeof(int));
+ tr->wgtPtr = (int*) rax_malloc(myLength * sizeof(int));
assert(tr->wgtPtr != NULL);
- tr->invariantPtr = (int*) malloc(myLength * sizeof(int));
+ tr->invariantPtr = (int*) rax_malloc(myLength * sizeof(int));
assert(tr->invariantPtr != NULL);
- tr->rateCategoryPtr = (int*) malloc(myLength * sizeof(int));
+ tr->rateCategoryPtr = (int*) rax_malloc(myLength * sizeof(int));
assert(tr->rateCategoryPtr != NULL);
}
@@ -6927,6 +6924,25 @@ static void broadcastPerSiteRates(tree *tr, tree *localTree)
}
+static void copyLG4(tree *localTree, tree *tr, int model, const partitionLengths *pl)
+{
+ if(tr->partitionData[model].protModels == LG4)
+ {
+ int
+ k;
+
+ for(k = 0; k < 4; k++)
+ {
+ memcpy(localTree->partitionData[model].EIGN_LG4[k], tr->partitionData[model].EIGN_LG4[k], pl->eignLength * sizeof(double));
+ memcpy(localTree->partitionData[model].EV_LG4[k], tr->partitionData[model].EV_LG4[k], pl->evLength * sizeof(double));
+ memcpy(localTree->partitionData[model].EI_LG4[k], tr->partitionData[model].EI_LG4[k], pl->eiLength * sizeof(double));
+ memcpy(localTree->partitionData[model].substRates_LG4[k], tr->partitionData[model].substRates_LG4[k], pl->substRatesLength * sizeof(double));
+ memcpy(localTree->partitionData[model].frequencies_LG4[k], tr->partitionData[model].frequencies_LG4[k], pl->frequenciesLength * sizeof(double));
+ memcpy(localTree->partitionData[model].tipVector_LG4[k], tr->partitionData[model].tipVector_LG4[k], pl->tipVectorLength * sizeof(double));
+ }
+ }
+}
+
static void execFunction(tree *tr, tree *localTree, int tid, int n)
{
double volatile result;
@@ -7066,7 +7082,7 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double));
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
-
+ copyLG4(localTree, tr, model, pl);
}
}
break;
@@ -7084,7 +7100,7 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double));
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
-
+ copyLG4(localTree, tr, model, pl);
}
}
@@ -7186,7 +7202,7 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double));
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
-
+ copyLG4(localTree, tr, model, pl);
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
localTree->partitionData[model].alpha = tr->partitionData[model].alpha;
@@ -7211,7 +7227,7 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double));
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
-
+ copyLG4(localTree, tr, model, pl);
memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4);
localTree->partitionData[model].alpha = tr->partitionData[model].alpha;
@@ -7334,6 +7350,7 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double));
memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double));
+ copyLG4(localTree, tr, model, pl);
}
}
@@ -7356,20 +7373,20 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
if(localTree->perPartitionEPA)
{
- localTree->readPartition = (int *)malloc(sizeof(int) * (size_t)localTree->numberOfTipsForInsertion);
+ localTree->readPartition = (int *)rax_malloc(sizeof(int) * (size_t)localTree->numberOfTipsForInsertion);
memcpy(localTree->readPartition, tr->readPartition, sizeof(int) * (size_t)localTree->numberOfTipsForInsertion);
}
}
- localTree->temporarySumBuffer = (double *)malloc_aligned(sizeof(double) * localTree->contiguousVectorLength);
- localTree->temporaryVector = (double *)malloc_aligned(sizeof(double) * localTree->contiguousVectorLength);
+ localTree->temporarySumBuffer = (double *)rax_malloc_aligned(sizeof(double) * localTree->contiguousVectorLength);
+ localTree->temporaryVector = (double *)rax_malloc_aligned(sizeof(double) * localTree->contiguousVectorLength);
- localTree->temporaryScaling = (int *)malloc(sizeof(int) * localTree->contiguousScalingLength);
+ localTree->temporaryScaling = (int *)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
- localTree->contiguousWgt = (int*)malloc(sizeof(int) * localTree->contiguousScalingLength);
- localTree->contiguousInvariant = (int*)malloc(sizeof(int) * localTree->contiguousScalingLength);
+ localTree->contiguousWgt = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
+ localTree->contiguousInvariant = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
memcpy(localTree->contiguousWgt , tr->cdta->aliaswgt, sizeof(int) * localTree->contiguousScalingLength);
@@ -7379,7 +7396,7 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
broadcastPerSiteRates(tr, localTree);
- localTree->contiguousRateCategory = (int*)malloc(sizeof(int) * localTree->contiguousScalingLength);
+ localTree->contiguousRateCategory = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength);
memcpy(localTree->contiguousRateCategory, tr->cdta->rateCategory, sizeof(int) * localTree->contiguousScalingLength);
@@ -7556,9 +7573,9 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
{
/* i is child of j */
List
- *elem = (List*) malloc(sizeof(List));
+ *elem = (List*) rax_malloc(sizeof(List));
- elem->value = calloc(1, sizeof(int));
+ elem->value = rax_calloc(1, sizeof(int));
*(int*)elem->value = i;
@@ -7729,9 +7746,9 @@ static void execFunction(tree *tr, tree *localTree, int tid, int n)
density = 0.0;
tr->bipStatusLen = newSectionEnd - tr->sectionEnd;
- free(tr->bipStatus);
+ rax_free(tr->bipStatus);
/* printf("%d\n" ,tr->bipStatusLen); */
- tr->bipStatus = (int*)calloc(tr->bipStatusLen, sizeof(int));
+ tr->bipStatus = (int*)rax_calloc(tr->bipStatusLen, sizeof(int));
tr->sectionEnd = newSectionEnd;
continue;
}
@@ -7898,7 +7915,7 @@ static void *likelihoodThread(void *tData)
threadData *td = (threadData*)tData;
tree
*tr = td->tr,
- *localTree = (tree *)malloc(sizeof(tree));
+ *localTree = (tree *)rax_malloc(sizeof(tree));
int
myCycle = 0;
@@ -7943,21 +7960,23 @@ static void startPthreads(tree *tr)
pthread_mutex_init(&mutex , (pthread_mutexattr_t *)NULL);
- threads = (pthread_t *)malloc(NumberOfThreads * sizeof(pthread_t));
- tData = (threadData *)malloc(NumberOfThreads * sizeof(threadData));
- reductionBuffer = (volatile double *)malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
- reductionBufferTwo = (volatile double *)malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
- reductionBufferThree = (volatile double *)malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
- reductionBufferParsimony = (volatile int *)malloc(sizeof(volatile int) * NumberOfThreads);
+ threads = (pthread_t *)rax_malloc(NumberOfThreads * sizeof(pthread_t));
+ tData = (threadData *)rax_malloc(NumberOfThreads * sizeof(threadData));
+
+
+ reductionBuffer = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
+ reductionBufferTwo = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
+ reductionBufferThree = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels);
+ reductionBufferParsimony = (volatile int *)rax_malloc(sizeof(volatile int) * NumberOfThreads);
- barrierBuffer = (volatile char *)malloc(sizeof(volatile char) * NumberOfThreads);
+ barrierBuffer = (volatile char *)rax_malloc(sizeof(volatile char) * NumberOfThreads);
for(t = 0; t < NumberOfThreads; t++)
barrierBuffer[t] = 0;
- branchInfos = (volatile branchInfo **)malloc(sizeof(volatile branchInfo *) * NumberOfThreads);
+ branchInfos = (volatile branchInfo **)rax_malloc(sizeof(volatile branchInfo *) * NumberOfThreads);
for(t = 1; t < NumberOfThreads; t++)
{
@@ -8023,7 +8042,7 @@ static void computeLHTest(tree *tr, analdef *adef, char *bootStrapFileName)
*treeFile = getNumberOfTrees(tr, bootStrapFileName, adef);
double
- *bestVector = (double*)malloc(sizeof(double) * tr->cdta->endsite);
+ *bestVector = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite);
for(i = 0; i < tr->cdta->endsite; i++)
weightSum += (double)(tr->cdta->aliaswgt[i]);
@@ -8089,7 +8108,7 @@ static void computeLHTest(tree *tr, analdef *adef, char *bootStrapFileName)
}
- free(bestVector);
+ rax_free(bestVector);
fclose(treeFile);
exit(0);
}
@@ -8104,7 +8123,7 @@ static void computePerSiteLLs(tree *tr, analdef *adef, char *bootStrapFileName)
*tlf = myfopen(perSiteLLsFileName, "wb");
double
- *unsortedSites = (double*)malloc(sizeof(double) * tr->rdta->sites);
+ *unsortedSites = (double*)rax_malloc(sizeof(double) * tr->rdta->sites);
@@ -8164,7 +8183,7 @@ static void computePerSiteLLs(tree *tr, analdef *adef, char *bootStrapFileName)
fclose(treeFile);
- free(unsortedSites);
+ rax_free(unsortedSites);
fclose(tlf);
}
@@ -8235,11 +8254,11 @@ static void computeAllLHs(tree *tr, analdef *adef, char *bootStrapFileName)
INFILE = getNumberOfTrees(tr, bootStrapFileName, adef);
- bestT = (bestlist *) malloc(sizeof(bestlist));
+ bestT = (bestlist *) rax_malloc(sizeof(bestlist));
bestT->ninit = 0;
initBestTree(bestT, 1, tr->mxtips);