Permalink
Browse files

-some general code cleanup

-added the big tree plausibility checking option -f R.
 This option allows to assess the plausibility of a very large tree that can not
 be inspected visually any more by computing its RF distance to many smaller (better quality) trees
 that contain a subset of taxa of the large tree.
 To invoke: ./raxmlHPC-SSE -f R -m GTRCAT -t largeTree -z smallReferenceTrees -n T1
 where file "largeTree" contains the huge tree and "smallReferenceTrees" the smaller
 reference trees against which we want to check.
  • Loading branch information...
1 parent 9a145ad commit 3e33815164a9ae161e8768590e04de3e1d2dd459 @stamatak committed Apr 15, 2013
Showing with 490 additions and 72 deletions.
  1. +2 −6 ancestralStates.c
  2. +33 −21 axml.c
  3. +4 −2 axml.h
  4. +435 −6 bipartitionList.c
  5. +0 −3 evaluateGenericSpecial.c
  6. +2 −5 fastDNAparsimony.c
  7. +2 −3 fastSearch.c
  8. +3 −7 leaveDropping.c
  9. +3 −8 makenewzGenericSpecial.c
  10. +2 −2 multiple.c
  11. +2 −4 optimizeModel.c
  12. +2 −5 searchAlgo.c
View
@@ -523,15 +523,11 @@ void newviewIterativeAncestral(tree *tr)
*tipX1 = (unsigned char *)NULL,
*tipX2 = (unsigned char *)NULL;
- size_t
- rateHet,
+ size_t
states = (size_t)tr->partitionData[model].states,
width = tr->partitionData[model].width;
- if(tr->rateHetModel == CAT)
- rateHet = 1;
- else
- rateHet = 4;
+
switch(tInfo->tipCase)
{
View
@@ -3021,13 +3021,7 @@ static void allocPartitions(tree *tr)
{
const partitionLengths
*pl = getPartitionLengths(&(tr->partitionData[i]));
-
- size_t
- k,
- width = tr->partitionData[i].width;
-
-
-
+
if(tr->useFastScaling)
tr->partitionData[i].globalScaler = (unsigned int *)rax_calloc(2 * tr->mxtips, sizeof(unsigned int));
@@ -3845,6 +3839,11 @@ static void printMinusFUsage(void)
printf(" if the trees have node labales represented as integer support values the program will also compute two flavors of\n");
printf(" the weighted Robinson-Foulds (WRF) distance\n");
+ printf(" \"-f R\": compute all pairwise Robinson-Foulds (RF) distances between a large reference tree passed via \"-t\" \n");
+ printf(" and many smaller trees (that must have a subset of the taxa of the large tree) passed via \"-z\".\n");
+ printf(" This option is intended for checking the plausibility of very large phylogenies that can not be inspected\n");
+ printf(" visually any more.\n");
+
printf(" \"-f s\": split up a multi-gene partitioned alignment into the respective subalignments \n");
printf(" \"-f S\": compute site-specific placement bias using a leave one out test inspired by the evolutionary placement algorithm\n");
@@ -3898,7 +3897,7 @@ static void printREADME(void)
printf(" [-b bootstrapRandomNumberSeed] [-B wcCriterionThreshold]\n");
printf(" [-c numberOfCategories] [-d] [-D]\n");
printf(" [-e likelihoodEpsilon] [-E excludeFileName]\n");
- printf(" [-f a|A|b|B|c|C|d|e|E|F|g|G|h|H|j|J|m|n|N|o|p|q|r|s|S|t|T|u|v|V|w|W|x|y] [-F]\n");
+ printf(" [-f a|A|b|B|c|C|d|e|E|F|g|G|h|H|j|J|m|n|N|o|p|q|r|R|s|S|t|T|u|v|V|w|W|x|y] [-F]\n");
printf(" [-g groupingFileName] [-G placementThreshold] [-h]\n");
printf(" [-i initialRearrangementSetting] [-I autoFC|autoMR|autoMRE|autoMRE_IGN]\n");
printf(" [-j] [-J MR|MR_DROP|MRE|STRICT|STRICT_DROP|T_<PERCENT>] [-k] [-K] [-M]\n");
@@ -4759,6 +4758,10 @@ static void get_args(int argc, char *argv[], analdef *adef, tree *tr)
adef->readTaxaOnly = TRUE;
adef->mode = COMPUTE_RF_DISTANCE;
break;
+ case 'R':
+ adef->readTaxaOnly = TRUE;
+ adef->mode = PLAUSIBILITY_CHECKER;
+ break;
case 's':
adef->mode = SPLIT_MULTI_GENE;
break;
@@ -5648,6 +5651,9 @@ static void printModelAndProgramInfo(tree *tr, analdef *adef, int argc, char *ar
case ANCESTRAL_SEQUENCE_TEST:
printBoth(infoFile, "\nRAxML ancestral sequence test for Jiajie\n\n");
break;
+ case PLAUSIBILITY_CHECKER:
+ printBoth(infoFile, "\nRAxML large-tree plausibility-checker\n\n");
+ break;
default:
assert(0);
}
@@ -8376,13 +8382,11 @@ static void computeELW(tree *tr, analdef *adef, char *bootStrapFileName)
*treeFile = getNumberOfTrees(tr, bootStrapFileName, adef);
int
- position = 0,
bestIndex = -1,
i,
k,
*originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)),
- *originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)),
- *countBest;
+ *originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int));
long
startSeed;
@@ -8424,8 +8428,6 @@ static void computeELW(tree *tr, analdef *adef, char *bootStrapFileName)
for(k = 0; k < tr->numberOfTrees; k++)
lhweights[k] = (double *)rax_calloc(adef->multipleRuns, sizeof(double));
- countBest = (int*)rax_calloc(adef->multipleRuns, sizeof(int));
-
/* read in the first tree and optimize ML params on it */
treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE);
@@ -8454,7 +8456,6 @@ static void computeELW(tree *tr, analdef *adef, char *bootStrapFileName)
for(i = 0; i < tr->numberOfTrees; i++)
{
- position = 0;
/* read in new tree */
@@ -8807,9 +8808,10 @@ static int sortLex(const void *a, const void *b)
}
-static void extractTaxaFromTopology(tree *tr, rawdata *rdta, cruncheddata *cdta)
+static void extractTaxaFromTopology(tree *tr, rawdata *rdta, cruncheddata *cdta, char fileName[1024])
{
- FILE *f = myfopen(bootStrapFile, "rb");
+ FILE
+ *f = myfopen(fileName, "rb");
char
**nameList,
@@ -9088,9 +9090,10 @@ void readBinaryModel(tree *tr)
masterBarrier(THREAD_COPY_RATE_CATS, tr);
#else
{
- int
- model,
+ size_t
i;
+ int
+ model;
for(model = 0; model < tr->NumberOfModels; model++)
{
@@ -9775,7 +9778,7 @@ static void thoroughTreeOptimization(tree *tr, analdef *adef, rawdata *rdta, cru
printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName);
}
-static void ancestralSequenceTest(tree *tr, analdef *adef)
+static void ancestralSequenceTest(tree *tr)
{
FILE
*f = myfopen(quartetGroupingFileName, "r");
@@ -9982,7 +9985,12 @@ int main (int argc, char *argv[])
if(adef->readTaxaOnly)
- extractTaxaFromTopology(tr, rdta, cdta);
+ {
+ if(adef->mode == PLAUSIBILITY_CHECKER)
+ extractTaxaFromTopology(tr, rdta, cdta, tree_file);
+ else
+ extractTaxaFromTopology(tr, rdta, cdta, bootStrapFile);
+ }
getinput(adef, rdta, cdta, tr);
@@ -10319,7 +10327,11 @@ int main (int argc, char *argv[])
modOpt(tr, adef, FALSE, adef->likelihoodEpsilon);
- ancestralSequenceTest(tr, adef);
+ ancestralSequenceTest(tr);
+ break;
+ case PLAUSIBILITY_CHECKER:
+ plausibilityChecker(tr, adef);
+ exit(0);
break;
default:
assert(0);
View
@@ -161,8 +161,8 @@
#define PointGamma(prob,alpha,beta) PointChi2(prob,2.0*(alpha))/(2.0*(beta))
#define programName "RAxML"
-#define programVersion "7.4.7"
-#define programDate "April 10 2013"
+#define programVersion "7.4.8"
+#define programDate "April 15 2013"
#define TREE_EVALUATION 0
@@ -191,6 +191,7 @@
#define THOROUGH_OPTIMIZATION 32
#define OPTIMIZE_BR_LEN_SCALER 33
#define ANCESTRAL_SEQUENCE_TEST 34
+#define PLAUSIBILITY_CHECKER 25
#define M_GTRCAT 1
#define M_GTRGAMMA 2
@@ -1102,6 +1103,7 @@ extern void resetBranches ( tree *tr );
extern void scaleBranches(tree *tr, boolean fromFile);
extern void modOpt ( tree *tr, analdef *adef , boolean resetModel, double likelihoodEpsilon);
+extern void plausibilityChecker(tree *tr, analdef *adef);
extern void parsePartitions ( analdef *adef, rawdata *rdta, tree *tr);
extern void computeBOOTRAPID (tree *tr, analdef *adef, long *radiusSeed);
Oops, something went wrong.

0 comments on commit 3e33815

Please sign in to comment.