diff --git a/tutorials/cont_traits/data/diprotodontia_continuous.nex b/tutorials/cont_traits/data/diprotodontia_continuous.nex new file mode 100644 index 00000000..3274503d --- /dev/null +++ b/tutorials/cont_traits/data/diprotodontia_continuous.nex @@ -0,0 +1,106 @@ +#NEXUS +[Data written by write.nexus.data.R, Tue Jun 10 15:02:17 2025] +BEGIN DATA; + DIMENSIONS NTAX=97 NCHAR=1; + FORMAT DATATYPE=CONTINUOUS MISSING=? GAP=- INTERLEAVE=NO; + MATRIX [ln (body mass (kg))] + acrobates_pygmaeus -4.3428059215206 + aepyprymnus_rufescens 1.03673688495002 + ailurops_ursinus 1.94591014905531 + bettongia_gaimardi 0.325700139639302 + bettongia_lesueur 0.36967902319398 + bettongia_penicillata 0.0963777915222527 + bettongia_tropica 0.169194100886454 + burramys_parvus -3.11677060193105 + cercartetus_caudatus -3.77008950953948 + cercartetus_concinnus -3.2188758248682 + cercartetus_lepidus -4.60367130986435 + cercartetus_nanus -2.81341071676004 + dactylopsila_trivirgata -0.899667172070274 + dendrolagus_dorianus 2.25978197066874 + dendrolagus_goodfellowi 2.00148000021012 + dendrolagus_lumholtzi 1.89461234337939 + dendrolagus_matschiei 1.95727390770563 + distoechurus_pennatus -2.95420652672549 + dorcopsis_hageni 1.70474627405495 + dorcopsulus_vanheurni 0.636767287123741 + gymnobelideus_leadbeateri -2.01245601689921 + hemibelideus_lemuroides 0.0392207131532813 + hypsiprymnodon_moschatus -0.62689138478288 + lagorchestes_conspicillatus 1.00814220998695 + lagorchestes_hirsutus 0.306432675976722 + lagostrophus_fasciatus 0.659321488075848 + lasiorhinus_krefftii 3.35454592881834 + lasiorhinus_latifrons 3.26437677597863 + macropus_agilis 2.63245522647221 + macropus_antilopinus 3.38939129662852 + macropus_dorsalis 2.42036457308855 + macropus_eugenii 1.61685831303758 + macropus_fuliginosus 3.29239948348687 + macropus_giganteus 3.72460717482746 + macropus_irma 2.07944154167984 + macropus_parma 1.43587231014343 + macropus_parryi 2.53606147625598 + macropus_robustus 3.38939129662852 + macropus_rufogriseus 2.80487438573775 + macropus_rufus 3.57596814169475 + onychogalea_fraenata 1.65365095086042 + onychogalea_unguifera 1.75785791755237 + petauroides_volans 0.131905070879939 + petaurus_abidi -1.2801341652915 + petaurus_breviceps -2.05924683439432 + petaurus_norfolcensis -1.30011666487884 + petrogale_assimilis 1.53015225146172 + petrogale_brachyotis 1.38629436111989 + petrogale_burbidgei 0.229523158278249 + petrogale_concinna 0.280732983363574 + petrogale_herberti 1.86652674780105 + petrogale_inornata 1.51633968445958 + petrogale_lateralis 1.66467203156154 + petrogale_mareeba 1.42310833424261 + petrogale_penicillata 1.86652674780105 + petrogale_persephone 1.72019722830809 + petrogale_purpureicollis 1.66467203156154 + petrogale_rothschildi 1.51431591492238 + petrogale_xanthopus 1.98100077921117 + petropseudes_dahli 0.629690740428164 + phalanger_carmelitae 0.607284234779769 + phalanger_gymnotis 0.8754687373539 + phalanger_lullulae 0.65167393235337 + phalanger_orientalis 0.830188032820743 + phalanger_sericeus 0.694646056683681 + phalanger_vestitus 0.615185639090233 + phascolarctos_cinereus 1.73110140935257 + potorous_gilbertii 0.0837297483314996 + potorous_longipes 0.555338080147092 + potorous_tridactylus 0.0532279217885611 + pseudocheirus_peregrinus -0.15163717466296 + pseudochirops_albertisii -0.242262662260921 + pseudochirops_archeri 0.112399682488911 + pseudochirops_corinnae 0.116003675756306 + pseudochirops_cupreus 0.567629308030175 + pseudochirulus_canescens -0.356674943938732 + pseudochirulus_caroli -0.785218610780435 + pseudochirulus_forbesi -0.447600464710949 + pseudochirulus_herbertensis 0.093799949070671 + pseudochirulus_mayeri -1.87650069768315 + setonix_brachyurus 1.03714460324006 + spilocuscus_maculatus 1.42436778176936 + spilocuscus_rufoniger 1.79175946922805 + strigocuscus_celebensis 0 + strigocuscus_pelengensis 0 + tarsipes_rostratus -4.60517018598809 + thylogale_billardierii 1.69247858326036 + thylogale_browni 1.70074007084089 + thylogale_brunii 1.56778223716527 + thylogale_calabyi 1.46467337793464 + thylogale_stigmatica 1.45994785501401 + thylogale_thetis 1.52605086869767 + trichosurus_caninus 1.13624841369115 + trichosurus_vulpecula 0.693730510390013 + vombatus_ursinus 3.24843462710975 + wallabia_bicolor 2.69546973113021 + wyulda_squamicaudata 0.587347679673312 + ; +END; + diff --git a/tutorials/cont_traits/data/diprotodontia_discrete_diet.nex b/tutorials/cont_traits/data/diprotodontia_discrete_diet.nex new file mode 100644 index 00000000..719751c0 --- /dev/null +++ b/tutorials/cont_traits/data/diprotodontia_discrete_diet.nex @@ -0,0 +1,113 @@ +#NEXUS +[Data written by write.nexus.data.R, Tue Jun 10 15:02:15 2025] +BEGIN DATA; + DIMENSIONS NTAX=97 NCHAR=1; + FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=NO symbols="01"; + MATRIX [herbivory] + acrobates_pygmaeus 0 + aepyprymnus_rufescens 1 + ailurops_ursinus 1 + bettongia_gaimardi 1 + bettongia_lesueur 1 + bettongia_penicillata 1 + bettongia_tropica 1 + burramys_parvus 0 + cercartetus_caudatus 0 + cercartetus_concinnus 0 + cercartetus_lepidus 0 + cercartetus_nanus 0 + dactylopsila_trivirgata 0 + dendrolagus_dorianus 1 + dendrolagus_goodfellowi 1 + dendrolagus_lumholtzi 1 + dendrolagus_matschiei 1 + distoechurus_pennatus 0 + dorcopsis_hageni 1 + dorcopsulus_vanheurni 1 + gymnobelideus_leadbeateri 0 + hemibelideus_lemuroides 1 + hypsiprymnodon_moschatus 0 + lagorchestes_conspicillatus 1 + lagorchestes_hirsutus 1 + lagostrophus_fasciatus 1 + lasiorhinus_krefftii 1 + lasiorhinus_latifrons 1 + macropus_agilis 1 + macropus_antilopinus 1 + macropus_dorsalis 1 + macropus_eugenii 1 + macropus_fuliginosus 1 + macropus_giganteus 1 + macropus_irma 1 + macropus_parma 1 + macropus_parryi 1 + macropus_robustus 1 + macropus_rufogriseus 1 + macropus_rufus 1 + onychogalea_fraenata 1 + onychogalea_unguifera 1 + petauroides_volans 1 + petaurus_abidi 0 + petaurus_breviceps 0 + petaurus_norfolcensis 0 + petrogale_assimilis 1 + petrogale_brachyotis 1 + petrogale_burbidgei 1 + petrogale_concinna 1 + petrogale_herberti 1 + petrogale_inornata 1 + petrogale_lateralis 1 + petrogale_mareeba 0 + petrogale_penicillata 1 + petrogale_persephone 1 + petrogale_purpureicollis 1 + petrogale_rothschildi 1 + petrogale_xanthopus 1 + petropseudes_dahli 1 + phalanger_carmelitae 0 + phalanger_gymnotis 1 + phalanger_lullulae 0 + phalanger_orientalis 0 + phalanger_sericeus 0 + phalanger_vestitus 0 + phascolarctos_cinereus 1 + potorous_gilbertii 0 + potorous_longipes 0 + potorous_tridactylus 0 + pseudocheirus_peregrinus 1 + pseudochirops_albertisii 1 + pseudochirops_archeri 1 + pseudochirops_corinnae 1 + pseudochirops_cupreus 1 + pseudochirulus_canescens 1 + pseudochirulus_caroli 1 + pseudochirulus_forbesi 1 + pseudochirulus_herbertensis 1 + pseudochirulus_mayeri 1 + setonix_brachyurus 1 + spilocuscus_maculatus 1 + spilocuscus_rufoniger 1 + strigocuscus_celebensis 1 + strigocuscus_pelengensis 0 + tarsipes_rostratus 1 + thylogale_billardierii 1 + thylogale_browni 1 + thylogale_brunii 1 + thylogale_calabyi 1 + thylogale_stigmatica 1 + thylogale_thetis 1 + trichosurus_caninus 1 + trichosurus_vulpecula 1 + vombatus_ursinus 1 + wallabia_bicolor 1 + wyulda_squamicaudata 0 + ; +END; + + + +[ + herbivory + 0: non-herbivory; plant consumption <90% + 1: herbivory; plant consumption >=90% +] \ No newline at end of file diff --git a/tutorials/cont_traits/data/diprotodontia_discrete_island.nex b/tutorials/cont_traits/data/diprotodontia_discrete_island.nex new file mode 100644 index 00000000..935efcb1 --- /dev/null +++ b/tutorials/cont_traits/data/diprotodontia_discrete_island.nex @@ -0,0 +1,114 @@ +#NEXUS +[Data written by write.nexus.data.R, Tue Jun 10 15:02:15 2025] +BEGIN DATA; + DIMENSIONS NTAX=97 NCHAR=1; + FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=NO symbols="012"; + MATRIX [island_endemicity] + acrobates_pygmaeus 0 + aepyprymnus_rufescens 0 + ailurops_ursinus 2 + bettongia_gaimardi 0 + bettongia_lesueur 0 + bettongia_penicillata 0 + bettongia_tropica 0 + burramys_parvus 0 + cercartetus_caudatus 0 + cercartetus_concinnus 0 + cercartetus_lepidus 0 + cercartetus_nanus 0 + dactylopsila_trivirgata 0 + dendrolagus_dorianus 1 + dendrolagus_goodfellowi 1 + dendrolagus_lumholtzi 0 + dendrolagus_matschiei 1 + distoechurus_pennatus 1 + dorcopsis_hageni 1 + dorcopsulus_vanheurni 1 + gymnobelideus_leadbeateri 0 + hemibelideus_lemuroides 0 + hypsiprymnodon_moschatus 0 + lagorchestes_conspicillatus 0 + lagorchestes_hirsutus 0 + lagostrophus_fasciatus 0 + lasiorhinus_krefftii 0 + lasiorhinus_latifrons 0 + macropus_agilis 0 + macropus_antilopinus 0 + macropus_dorsalis 0 + macropus_eugenii 0 + macropus_fuliginosus 0 + macropus_giganteus 0 + macropus_irma 0 + macropus_parma 0 + macropus_parryi 0 + macropus_robustus 0 + macropus_rufogriseus 0 + macropus_rufus 0 + onychogalea_fraenata 0 + onychogalea_unguifera 0 + petauroides_volans 0 + petaurus_abidi 1 + petaurus_breviceps 0 + petaurus_norfolcensis 0 + petrogale_assimilis 0 + petrogale_brachyotis 0 + petrogale_burbidgei 0 + petrogale_concinna 0 + petrogale_herberti 0 + petrogale_inornata 0 + petrogale_lateralis 0 + petrogale_mareeba 0 + petrogale_penicillata 0 + petrogale_persephone 0 + petrogale_purpureicollis 0 + petrogale_rothschildi 0 + petrogale_xanthopus 0 + petropseudes_dahli 0 + phalanger_carmelitae 1 + phalanger_gymnotis 1 + phalanger_lullulae 2 + phalanger_orientalis 1 + phalanger_sericeus 1 + phalanger_vestitus 1 + phascolarctos_cinereus 0 + potorous_gilbertii 0 + potorous_longipes 0 + potorous_tridactylus 0 + pseudocheirus_peregrinus 0 + pseudochirops_albertisii 1 + pseudochirops_archeri 0 + pseudochirops_corinnae 1 + pseudochirops_cupreus 1 + pseudochirulus_canescens 1 + pseudochirulus_caroli 1 + pseudochirulus_forbesi 1 + pseudochirulus_herbertensis 0 + pseudochirulus_mayeri 1 + setonix_brachyurus 0 + spilocuscus_maculatus 0 + spilocuscus_rufoniger 1 + strigocuscus_celebensis 2 + strigocuscus_pelengensis 2 + tarsipes_rostratus 0 + thylogale_billardierii 0 + thylogale_browni 1 + thylogale_brunii 1 + thylogale_calabyi 1 + thylogale_stigmatica 0 + thylogale_thetis 0 + trichosurus_caninus 0 + trichosurus_vulpecula 0 + vombatus_ursinus 0 + wallabia_bicolor 0 + wyulda_squamicaudata 0 + ; +END; + + + +[ + island_endemicity + 0: Occurs on mainland + 1: Occurs on large land bridge islands + 2: Occurs only on isolated islands +] \ No newline at end of file diff --git a/tutorials/cont_traits/data/diprotodontia_tree.nex b/tutorials/cont_traits/data/diprotodontia_tree.nex new file mode 100644 index 00000000..2bb9be91 --- /dev/null +++ b/tutorials/cont_traits/data/diprotodontia_tree.nex @@ -0,0 +1,207 @@ +#NEXUS +[R-package APE, Tue Jun 10 15:18:57 2025] + +BEGIN TAXA; + DIMENSIONS NTAX = 97; + TAXLABELS + petrogale_inornata + petrogale_mareeba + petrogale_assimilis + petrogale_herberti + petrogale_penicillata + petrogale_purpureicollis + petrogale_lateralis + petrogale_rothschildi + petrogale_xanthopus + petrogale_concinna + petrogale_brachyotis + petrogale_burbidgei + petrogale_persephone + dendrolagus_goodfellowi + dendrolagus_dorianus + dendrolagus_matschiei + dendrolagus_lumholtzi + thylogale_stigmatica + thylogale_brunii + thylogale_browni + thylogale_calabyi + thylogale_thetis + thylogale_billardierii + macropus_rufogriseus + macropus_parma + macropus_parryi + macropus_agilis + macropus_dorsalis + macropus_antilopinus + macropus_robustus + macropus_irma + macropus_rufus + macropus_giganteus + macropus_fuliginosus + macropus_eugenii + wallabia_bicolor + lagorchestes_conspicillatus + lagorchestes_hirsutus + onychogalea_fraenata + onychogalea_unguifera + setonix_brachyurus + dorcopsulus_vanheurni + dorcopsis_hageni + lagostrophus_fasciatus + bettongia_penicillata + bettongia_tropica + bettongia_gaimardi + bettongia_lesueur + aepyprymnus_rufescens + potorous_tridactylus + potorous_gilbertii + potorous_longipes + hypsiprymnodon_moschatus + pseudochirops_albertisii + pseudochirops_cupreus + pseudochirops_corinnae + pseudochirops_archeri + petropseudes_dahli + hemibelideus_lemuroides + petauroides_volans + pseudochirulus_forbesi + pseudochirulus_canescens + pseudochirulus_caroli + pseudochirulus_mayeri + pseudochirulus_herbertensis + pseudocheirus_peregrinus + petaurus_breviceps + petaurus_norfolcensis + petaurus_abidi + gymnobelideus_leadbeateri + dactylopsila_trivirgata + tarsipes_rostratus + distoechurus_pennatus + acrobates_pygmaeus + phalanger_orientalis + phalanger_carmelitae + phalanger_sericeus + phalanger_lullulae + phalanger_vestitus + phalanger_gymnotis + spilocuscus_rufoniger + spilocuscus_maculatus + strigocuscus_pelengensis + strigocuscus_celebensis + ailurops_ursinus + trichosurus_vulpecula + trichosurus_caninus + wyulda_squamicaudata + cercartetus_concinnus + cercartetus_nanus + cercartetus_lepidus + cercartetus_caudatus + burramys_parvus + lasiorhinus_latifrons + vombatus_ursinus + lasiorhinus_krefftii + phascolarctos_cinereus + ; +END; +BEGIN TREES; + TRANSLATE + 1 petrogale_inornata, + 2 petrogale_mareeba, + 3 petrogale_assimilis, + 4 petrogale_herberti, + 5 petrogale_penicillata, + 6 petrogale_purpureicollis, + 7 petrogale_lateralis, + 8 petrogale_rothschildi, + 9 petrogale_xanthopus, + 10 petrogale_concinna, + 11 petrogale_brachyotis, + 12 petrogale_burbidgei, + 13 petrogale_persephone, + 14 dendrolagus_goodfellowi, + 15 dendrolagus_dorianus, + 16 dendrolagus_matschiei, + 17 dendrolagus_lumholtzi, + 18 thylogale_stigmatica, + 19 thylogale_brunii, + 20 thylogale_browni, + 21 thylogale_calabyi, + 22 thylogale_thetis, + 23 thylogale_billardierii, + 24 macropus_rufogriseus, + 25 macropus_parma, + 26 macropus_parryi, + 27 macropus_agilis, + 28 macropus_dorsalis, + 29 macropus_antilopinus, + 30 macropus_robustus, + 31 macropus_irma, + 32 macropus_rufus, + 33 macropus_giganteus, + 34 macropus_fuliginosus, + 35 macropus_eugenii, + 36 wallabia_bicolor, + 37 lagorchestes_conspicillatus, + 38 lagorchestes_hirsutus, + 39 onychogalea_fraenata, + 40 onychogalea_unguifera, + 41 setonix_brachyurus, + 42 dorcopsulus_vanheurni, + 43 dorcopsis_hageni, + 44 lagostrophus_fasciatus, + 45 bettongia_penicillata, + 46 bettongia_tropica, + 47 bettongia_gaimardi, + 48 bettongia_lesueur, + 49 aepyprymnus_rufescens, + 50 potorous_tridactylus, + 51 potorous_gilbertii, + 52 potorous_longipes, + 53 hypsiprymnodon_moschatus, + 54 pseudochirops_albertisii, + 55 pseudochirops_cupreus, + 56 pseudochirops_corinnae, + 57 pseudochirops_archeri, + 58 petropseudes_dahli, + 59 hemibelideus_lemuroides, + 60 petauroides_volans, + 61 pseudochirulus_forbesi, + 62 pseudochirulus_canescens, + 63 pseudochirulus_caroli, + 64 pseudochirulus_mayeri, + 65 pseudochirulus_herbertensis, + 66 pseudocheirus_peregrinus, + 67 petaurus_breviceps, + 68 petaurus_norfolcensis, + 69 petaurus_abidi, + 70 gymnobelideus_leadbeateri, + 71 dactylopsila_trivirgata, + 72 tarsipes_rostratus, + 73 distoechurus_pennatus, + 74 acrobates_pygmaeus, + 75 phalanger_orientalis, + 76 phalanger_carmelitae, + 77 phalanger_sericeus, + 78 phalanger_lullulae, + 79 phalanger_vestitus, + 80 phalanger_gymnotis, + 81 spilocuscus_rufoniger, + 82 spilocuscus_maculatus, + 83 strigocuscus_pelengensis, + 84 strigocuscus_celebensis, + 85 ailurops_ursinus, + 86 trichosurus_vulpecula, + 87 trichosurus_caninus, + 88 wyulda_squamicaudata, + 89 cercartetus_concinnus, + 90 cercartetus_nanus, + 91 cercartetus_lepidus, + 92 cercartetus_caudatus, + 93 burramys_parvus, + 94 lasiorhinus_latifrons, + 95 vombatus_ursinus, + 96 lasiorhinus_krefftii, + 97 phascolarctos_cinereus + ; + TREE * UNTITLED = [&R] (((((((((((((((((((1:0.02604920405,2:0.02604920405):0.005842311197,3:0.03189151525):0.03039073806,(4:0.03226670955,5:0.03226670955):0.03001554376):0.03974379589,6:0.1020260492):0.009460256204,7:0.1114863054):0.009969448464,8:0.1214557539):0.08723267406,9:0.2087152275):0.01975129978,((10:0.1099051294,11:0.1099051294):0.01543656536,12:0.1253416948):0.1030980329):0.01851851852,13:0.2469582462):0.0612370692,(((14:0.153615265,15:0.153615265):0.02066248593,16:0.174277751):0.06970574047,17:0.2439834915):0.06421182398):0.05416197674,(((18:0.1859087742,((19:0.08259634454,20:0.08259634454):0.02291365171,21:0.1054831967):0.08042557753):0.04293294742,22:0.2288417216):0.07067052581,23:0.299539047):0.06284504476):0.03186471566,((((((((24:0.1879455432,25:0.1879455432):0.01396258777,((26:0.1478801522,27:0.1478801522):0.02060888674,28:0.168489039):0.03341909203):0.04033338693,(((29:0.07222490218,30:0.07222490218):0.127566061,31:0.1997909632):0.02728198531,32:0.2270729485):0.01516856944):0.01393578818,(33:0.1284504476,34:0.1284504476):0.1277536581):0.008870665166,35:0.2650747709):0.038618213,36:0.3036929839):0.02921155598,(37:0.2392667631,38:0.2392667631):0.09363777671):0.03808222115,((39:0.1876775473,40:0.1876775473):0.1466205714,41:0.3342981187):0.03671544193):0.02323524682):0.03709063622,(42:0.2350056279,43:0.2350324275):0.1963070161):0.1429490272,44:0.5742616712):0.07281449322,(((((45:0.05633274374,46:0.05633274374):0.04368333601,47:0.1000160798):0.1388754891,48:0.2388915688):0.1809776491,49:0.419869218):0.131050008,((50:0.2205338479,51:0.2205338479):0.1684086402,52:0.3889424881):0.161976738):0.09613013882):0.1347751514,53:0.7818245163):0.1400546712,((((((((54:0.2396151578,55:0.2396151578):0.07554805167,56:0.3151632095):0.04218255883,(57:0.3344857158,58:0.3344857158):0.02283325293):0.203221311,(59:0.3227474942,60:0.3227474942):0.2377927855):0.05547515678,((((61:0.1227153347,62:0.1227153347):0.01254220936,(63:0.1030980329,64:0.1030980329):0.03215951118):0.1735005628,65:0.3087581069):0.1521144879,66:0.4608725947):0.1551428418):0.1366511229,((((67:0.07426167122,68:0.07426167122):0.1040360187,69:0.1782976899):0.4202712119,70:0.5985957013):0.04030658734,71:0.6389022887):0.1137374712):0.0430133462,72:0.7956799057):0.05625234496,(73:0.6263064801,74:0.6263064801):0.2256525701):0.06994693681):0.01648174948,((((((((((75:0.09972128424,76:0.09972128424):0.06370263172,77:0.163423916):0.0397169963,78:0.2031409123):0.02454842686,79:0.2276893391):0.0484804631,80:0.2761698022):0.08503510747,(81:0.1125046899,82:0.1125046899):0.2487002198):0.01549016455,83:0.3766950742):0.1350163478,(84:0.3402744278,85:0.3402744278):0.1714369942):0.07410087367,((86:0.126118883,87:0.126118883):0.2077772418,88:0.3338961248):0.2519161709):0.2969662861,((((89:0.2741866324,90:0.2741866324):0.03408908185,91:0.3082757142):0.0794607922,92:0.3877097068):0.2988958568,93:0.6866055636):0.1961730182):0.05560915474):0.0615854639,(((94:0.1803880581,95:0.1803880581):0.07442246878,96:0.2548373265):0.5176073324,97:0.7724178592):0.2275553412):0; +END; diff --git a/tutorials/cont_traits/figures/state_dependent_OU.pdf b/tutorials/cont_traits/figures/state_dependent_OU.pdf new file mode 100644 index 00000000..2244ef81 Binary files /dev/null and b/tutorials/cont_traits/figures/state_dependent_OU.pdf differ diff --git a/tutorials/cont_traits/figures/state_dependent_ou_gm.pdf b/tutorials/cont_traits/figures/state_dependent_ou_gm.pdf new file mode 100644 index 00000000..ddc5c4b4 Binary files /dev/null and b/tutorials/cont_traits/figures/state_dependent_ou_gm.pdf differ diff --git a/tutorials/cont_traits/modules/simple_overview.md b/tutorials/cont_traits/modules/simple_overview.md index 6b434978..0fe25655 100644 --- a/tutorials/cont_traits/modules/simple_overview.md +++ b/tutorials/cont_traits/modules/simple_overview.md @@ -69,3 +69,15 @@ You can find examples and more information in the {% page_ref cont_traits/simple Optimal traits may change as species evolve over the adaptive landscape. Relaxing the assumption that the optimal value is fixed across the tree requires specifying a model that describes how theta varies. In the {% page_ref cont_traits/relaxed_ou %} tutorial, we provide an example of relaxing the assumption that optima are homogeneously across branches using a ''random local clock'' model, which is spiritually similar to the one described in {% citet Uyeda2014 %}. + + +{% subsubsection (3) Detecting state-dependent evolutionary optima and rates of evolution/adaptation %} + +*Does the continuous character adapt differently for species in specific discrete selective regimes?* + +For species in the same discrete selective regime across the phylogeny, a continuous character can converge towards the same optimal value. +Similarly, the rates of evolution/adaptation in some selective regimes may be higher than the others. +We can test hypotheses of discrete regime-driven continuous character evolution explicitly using the state-dependent Ornstein-Uhlenbeck model. + +In the {% page_ref cont_traits/state_dependent_ou %} tutorial, we provide an example of how to test hypotheses of state-dependent continuous character evolution using a joint-inference approach. + diff --git a/tutorials/cont_traits/modules/state_dependent_OU_exercise_1.md b/tutorials/cont_traits/modules/state_dependent_OU_exercise_1.md new file mode 100644 index 00000000..adb167d6 --- /dev/null +++ b/tutorials/cont_traits/modules/state_dependent_OU_exercise_1.md @@ -0,0 +1,2 @@ +{% subsection Exercise 1 %} +- coming soon! \ No newline at end of file diff --git a/tutorials/cont_traits/modules/state_dependent_OU_parameter_estimation.md b/tutorials/cont_traits/modules/state_dependent_OU_parameter_estimation.md new file mode 100644 index 00000000..b2216ac0 --- /dev/null +++ b/tutorials/cont_traits/modules/state_dependent_OU_parameter_estimation.md @@ -0,0 +1,366 @@ +{% section Testing hypotheses of state-dependent evolution with the OU Model %} + +The relaxed OU model detects shifts of the optimum on a phylogeny without assuming what caused (or is correlated with) the shifts. +Sometimes however, we are interested in whether, or how, some specific variables contributed to the continuous character evolution. +Here, we use the state-dependent OU model to infer (1) discrete character history and (2) discrete-state-dependent continuous character evolution. +Similar to {% citet Beaulieu2012 %}, this model supports multiple $\alpha$, $\sigma^2$, and $\theta$, allowing us to infer changes in the optimal value as well as the rates of evolution/adaptation. + +Conventionally, a sequential approach is used to infer discrete state-dependent continuous character evolution -- we first reconstruct the discrete character history, then infer continuous character evolution on the discrete character map(s) (e.g., {% citet Hansen1997 %}). +Alternatively, a joint approach can be used to infer evolution of both characters simultaneously while taking their dependence into account (e.g., {% citet Boyko2023 %}). +The state-dependent OU model in RevBayes can perform both. +In this tutorial, we will focus on the joint approach, where the discrete character history is treated as a random variable and can be modified using a data-augmentation approach. + +{% figure fig_ou_sd_gm %} + +{% figcaption %} +The graphical model representation of the state-dependent Ornstein-Uhlenbeck (OU) process using a joint-inference approach. For more information about graphical model representations see {% citet Hoehna2014b %}. +{% endfigcaption %} +{% endfigure %} + +In this tutorial, we use a time-calibrated phylogeny of 97 Diprotodontia from { % citet something % } and datasets of (log) body size and herbivory from {% cite PHYLACINE %} to test diet dependency in body size evolution. + +⇨ The full state-dependent OU-model specification is in the file called `mcmc_state_dependent_OU.Rev`. + +{% subsection Read the data %} + +We begin by deciding which of the characters to use. +Here, we assume we are analyzing the first discrete character (diet) and first continuous character (body mass). +``` +char_disc <- 1 +char_cont <- 1 +``` + +Now, we read in the phylogenetic tree. +``` +tree <- readTrees("data/diprotodontia_tree.nex")[1] +``` +We also want to keep track of the number of branches for our relaxed clock model. +``` +ntips <- tree.ntips() +nbranches <- 2 * ntips - 2 +``` + +Next, we read in the discrete character data. +We have to exclude all other characters that we are not interested in and only include our focal trait. +This can be done in RevBayes using the member methods `.excludeAll()` and `.includeCharacter()`. +``` +disc <- readContinuousCharacterData("data/primates_disc_traits.nex") +disc.excludeAll() +disc.includeCharacter( char_disc ) +num_disc_states <- disc.getStateDescriptions().size() +``` + +Similarly, we read in the continuous character data. +``` +cont <- readContinuousCharacterData("data/primates_cont_traits.nex") +cont.excludeAll() +cont.includeCharacter( char_cont ) +``` + +We initialize a variable for our vector of +moves and monitors. +We add one adaptable variance multivariate normal (AVMVN) kernel for each of the discrete and continuous models to improve mixing of MCMC traces. +``` +moves = VectorMoves() +monitors = VectorMonitors() +avmvn_disc = mvAVMVN(weight=20, waitBeforeLearning=500, waitBeforeUsing=1000) +avmvn_cont = mvAVMVN(weight=20, waitBeforeLearning=500, waitBeforeUsing=1000) +``` + +{% subsection Specify the model %} + +{% subsubsection The discrete character model %} + +We create a vector of transition rates drawn from a Dirichlet prior. +``` +num_type_transition <- num_disc_states * (num_disc_states - 1) +rates ~ dnDirichlet( rep(1, num_type_transition) ) +moves.append( mvBetaSimplex( rates, weight=2 ) ) +moves.append( mvDirichletSimplex( rates, weight=2 ) ) +avmvn_disc.addVariable( rates ) +``` + +We build the rate matrix and set the rescaled argument as TRUE. +``` +Q := fnFreeK( rates, rescaled=TRUE ) +``` + +Next, we assign a prior to the rescaling factor of the rate matrix. +``` +lambda ~ dnExponential(200) +moves.append( mvScale(lambda, weight=1.0) ) +avmvn_rates.addVariable(lambda) +``` +After including all parameters in the discrete model to avmvn_disc, we add avmvn_disc to our moves vector. +``` +moves.append( avmvn_disc ) +``` + +We set up the CTMC model. +Note that it is a different from the one used for stochastic mapping. +``` +X ~ dnPhyloCTMCDASiteIID(tree, Q, branchRates=lambda, type="Standard", nSites=1) +X.clamp(disc) +``` + +We include proposals for the discrete character history. +Node proposal changes the state of a randomly chosen node, as well as its parent branch and daughter branches. +Path proposal changes the state along a branch while the states of the parent node and the child node do not change. +``` +moves.append( mvCharacterHistory(ctmc=X, qmap_site=Q, graph="node", proposal="rejection", weight=100.0) ) +moves.append( mvCharacterHistory(ctmc=X, qmap_site=Q, graph="branch", proposal="rejection", weight=50.0) ) +``` + +We keep track of the number of transitions. +``` +for(i in 1:nbranches) { + num_changes[i] := sum(X.numCharacterChanges(i)) +} +total_num_changes := sum(num_changes) + +char_hist := X.characterHistories() +``` + +{% subsubsection The OU model %} +First, we set up the optimum parameter. +for (i in 1:num_disc_states){ + theta[i] ~ dnUniform(-10, 10) + moves.append(mvSlide(theta[i], weight = 1.0) ) + avmvn_ou.addVariable(theta[i]) +} + + +We retrieve some information from our data before proceeding. +``` +root_age <- tree.rootAge() +v_emp <- cont.var(char_cont) +``` + +There are two options to assign priors to $\alpha$ and $\sigma^2$. +On one hand, we can directly put priors on the parameters. +``` +for (i in 1:num_disc_states){ + alpha[i] ~ dnExponential(root_age/2/ln(2)) + moves.append(mvScale(alpha[i], weight = 1.0) ) + avmvn_cont.addVariable(alpha[i]) + + sigma2[i] ~ dnLognormal(ln(sqrt(v_emp), 0.587405)) + moves.append(mvScale(sigma2[i], weight = 3.0) ) + avmvn_cont.addVariable(sigma2[i]) +} +``` + +Alternatively, we can assign a hyperprior for phylogenetic half-life ($ln(2)/\alpha$) and stationary variance ($sigma^2/2\alpha$) respectively, and transform the parameters to $\alpha$ and $\sigma^2$. +The advantage of this option is that phylogenetic half-life and stationary variance have clearer biological meanings and can be interpreted intuitively. +``` +for (i in 1:num_disc_states){ + t_half[i] ~ dnLognormal(ln(root_age/2), 0.587405) + moves.append(mvScale(t_half[i], weight = 1.0) ) + avmvn_cont.addVariable(t_half[i]) + alpha[i] := abs(ln(2)/t_half[i]) + + Vy[i] ~ dnLognormal(ln(v_emp), 0.587405) + moves.append(mvScale(Vy[i], weight = 3.0) ) + avmvn_cont.addVariable(Vy[i]) + sigma2[i] := Vy[i] * 2 * alpha[i] +} +``` +Note that the prior distributions of the two options are _not_ equivalent. + + + +All paramters in the OU model go to avmvn_cont. +``` +moves.append( avmvn_ou ) +``` + +{% The state-dependent OU model %} +Now that we have specified the state-dependent parameters, we can draw the discrete model and the character data from the corresponding phylogenetic OU model. +``` +Y ~ dnPhyloOUSD(char_hist, theta=theta, rootTreatment="optimum", alpha=alpha, sigma=sigma2^0.5) +Y.clamp(cont) +``` + +Noting that $y$ is the observed data ({% ref fig_ou_relaxed_gm %}), we clamp the `cont` to this stochastic node. +``` +Y.clamp(data) +``` + +Finally, we create a workspace object for the entire model with `model()`. +Remember that workspace objects are initialized with the `=` operator, and are not themselves part of the Bayesian graphical model. +The `model()` function traverses the entire model graph and finds all the nodes in the model that we specified. +This object provides a convenient way to refer to the whole model object, rather than just a single DAG node. + +``` +mymodel = model(Y) +``` + +{% subsection Running an MCMC analysis %} + +{% subsubsection Specify Monitors %} + +For our MCMC analysis, we need to set up a vector of *monitors* to record the states of our Markov chain. The monitor functions are all called `mn*`, where `*` is the wildcard representing the monitor type. +First, we will initialize the model monitor using the `mnModel` function. +This creates a new monitor variable that will output the states for all model parameters when passed into a MCMC function. +``` +monitors.append( mnModel(filename="output/trace.log", printgen=10) ) +``` +Additionally, create a screen monitor that will report the states of +specified variables to the screen with `mnScreen`: +``` +monitors.append( mnScreen(printgen=1000, theta) ) +``` +Finally, we include a monitor for the character history on each branch. +``` +monitors.append( mnFile( char_hist, filename="output/char_hist.trees", printgen=10 ) ) +``` + +{% subsubsection Initialize and Run the MCMC Simulation %} + +With a fully specified model, a set of monitors, and a set of moves, we +can now set up the MCMC algorithm that will sample parameter values in +proportion to their posterior probability. The `mcmc()` function will +create our MCMC object: +``` +mymcmc = mcmc(mymodel, monitors, moves, nruns=2, combine="mixed") +``` +Now, run the MCMC: +``` +mymcmc.run(generations=50000, burnin=200) +``` + +Since the character history output is in simmap format, we will convert it to ancestral map format in R and summarize it in RevBayes. +We hope to simplify the process in the future. +Start R in the main directory for this analysis and then type the following commands: +```R +source("scripts/plot_helper_state_dependent_OU.R") + +tree <- read.nexus("data/diprotodontia_tree.nex") +simmap_to_ancStates("output/char_hist.trees", "output/anc_states.log", tree) + +``` + +Now, go back to RevBayes and type the following commands: +```rb +file_in <- "output/anc_states.log" +file_out <- "output/anc_states.tre" + +anc_states = readAncestralStateTrace(file_in) +anc_tree = ancestralStateTree(tree=tree, + ancestral_state_trace_vector=anc_states, + include_start_states=false, + file=file_out, + summary_statistic="MAP", + reconstruction="marginal", + burnin=0.1, + nStates=3, + site=1) +# nStates should always be >= 3 even if your character is binary +q() +``` + +Finally, open R again to plot the objects: +```R +library(RevGadgets) +source("scripts/plot_helper_state_dependent_OU.R") + +tree <- readTrees("data/diprotodontia_tree.nex") + +# plot the objects +ase <- processAncStates("output/anc_states.tre", + state_labels=c("0"="non-herbivore", "1"="herbivore")) + +p1 <- plotAncStatesMAP(t = ase, + tip_labels = FALSE, + node_color_as = "state", + node_color = c("non-herbivore"="#88ccee", "herbivore"="#999933"), + node_size = c(1, 3), + tip_states = TRUE, + tip_states_size = 2, + state_transparency = 0.7, +) + + # modify legend location using ggplot2 + theme(legend.position.inside = c(0.6,0.8)) + +p2 <- plotAncStatesPie(t = ase, + pie_colors = c("non-herbivore"="#88ccee", "herbivore"="#999933"), + node_pie_size = 3, + tip_pies = TRUE, + tip_pie_size = 2 + state_transparency = 0.7, + tip_labels = FALSE, +) + + # modify legend location using ggplot2 + theme(legend.position.inside = c(0.6,0.8)) + + +char_hist <- read.table("output/char_hist.trees", header=TRUE) +simmaps <- read.simmap(text=char_hist$char_hist, format="phylip") +stoc_map <- processStochMaps(tree, + simmap = simmaps, + states=c("0", "1")) + +colnames(stoc_map)[6] = "non-herbivore" +colnames(stoc_map)[7] = "herbivore" + +p3 <- plotStochMaps(tree, maps=stoc_map, + tip_labels = FALSE, + tree_layout = "rectangular", + color_by = "MAP", + colors = c("non-herbivore" = "#88ccee", + "herbivore" = "#999933"), + line_width = 0.5) + + theme(legend.position.inside = c(0.6,0.8)) + +trace <- readTrace("output/trace.log", burnin = 0.1) +color_diet <- c("#88ccee", "#999933") +names(color_diet) <- c("t_half[1]", "t_half[2]") +p4 <- plotTrace(trace, vars = c("t_half[1]", "t_half[2]"), color = color_diet)[[1]] + + ggtitle("Phylogenetic half-life") + + theme(legend.position = "none") + + xlab("Time (tree height)") + + ylab("Posterior probability density") + +names(color_diet) <- c("Vy[1]", "Vy[2]") +p5 <- plotTrace(trace, vars = c("Vy[1]", "Vy[2]"), color = color_diet)[[1]] + + ggtitle("Stationary variance") + + theme(legend.position = "none", + axis.title.y = element_blank()) + + xlab("ln(body mass (kg))^2") + + xlim(0, 100) + +names(color_diet) <- c("theta[1]", "theta[2]") +p6 <- plotTrace(trace, vars = c("theta[1]", "theta[2]"), color = color_diet)[[1]] + + ggtitle("Optimum") + + theme(axis.title.y = element_blank()) + + xlab("ln(body mass (kg))") + + theme(legend.position = "none", + axis.title.y = element_blank()) + +legend <- get_legend2(p6 + theme(legend.position = "left", + legend.box.margin = margin(0, 0, 0, 12)) + + guides(color = guide_legend(title='Diet'), + fill=guide_legend(title='Diet')) + + scale_fill_discrete(name = "Diet") + + scale_color_manual(values=c("#88ccee", "#999933"), + name="Diet", + labels=c("non-herbivore", "herbivore"))) + +row1 <- cowplot::plot_grid(p1, p2, p3, ncol=3) +row2_left <- cowplot::plot_grid(p4, p5, p6, ncol=3) +row2 <- cowplot::plot_grid(row2_left, legend, ncol=2, rel_widths = c(1, 0.3)) +plot <- cowplot::plot_grid(row1, row2, ncol=1, rel_heights=c(3,2)) + + +``` + +{% figure fig_state_dependent_OU %} + +{% figcaption %} +**Top row: Estimated discrete character history on the phylogeny. Bottom row: Estimated state-dependent OU parameters.** +Here we show the results of our example analysis. +{% endfigcaption %} +{% endfigure %} + + + diff --git a/tutorials/cont_traits/scripts/mcmc_state_dependent_OU.Rev b/tutorials/cont_traits/scripts/mcmc_state_dependent_OU.Rev new file mode 100644 index 00000000..7df81a09 --- /dev/null +++ b/tutorials/cont_traits/scripts/mcmc_state_dependent_OU.Rev @@ -0,0 +1,137 @@ +################################################################################ +# +# RevBayes Example: The state-dependent Ornstein-Uhlenbeck model under +# a joint-inference approach +# +# +# authors: Priscilla Lau and Sebastian Höhna +# +################################################################################ + +####################### +# Reading in the Data # +####################### + +### Select the trait to analyze +char_disc <- 1 +char_cont <- 1 + +### Read in the trees +tree <- readTrees("data/diprotodontia_tree.nex")[1] + +ntips <- tree.ntips() +nbranches <- 2 * ntips - 2 +root_age <- tree.rootAge() + +### Read in the character data +disc <- readDiscreteCharacterData("data/diprotodontia_discrete_diet.nex") +disc.excludeAll() +disc.includeCharacter( char_disc ) +num_disc_states <- disc.getStateDescriptions().size() + +cont <- readContinuousCharacterData("data/diprotodontia_continuous.nex") +cont.excludeAll() +cont.includeCharacter( char_cont ) + +# Create some vector for the moves and monitors of this analysis +moves = VectorMoves() +monitors = VectorMonitors() +avmvn_disc = mvAVMVN(weight=20, waitBeforeLearning=500, waitBeforeUsing=1000) +avmvn_cont = mvAVMVN(weight=20, waitBeforeLearning=500, waitBeforeUsing=1000) + +############################## +# Specify the discrete model # +############################## +num_type_transition <- num_disc_states * (num_disc_states - 1) +rates ~ dnDirichlet( rep(1, num_type_transition) ) +moves.append( mvBetaSimplex( rates, weight=2 ) ) +moves.append( mvDirichletSimplex( rates, weight=2 ) ) +avmvn_disc.addVariable( rates ) + +Q := fnFreeK( rates, rescaled=TRUE ) + +lambda ~ dnExponential(200) +moves.append( mvScale(lambda, weight=1.0) ) +avmvn_disc.addVariable(lambda) + +moves.append( avmvn_disc ) + +X ~ dnPhyloCTMCDASiteIID(tree, Q, branchRates=lambda, type="Standard", nSites=1) +X.clamp(disc) + +moves.append( mvCharacterHistory(ctmc=X, qmap_site=Q, graph="node", proposal="rejection", weight=100.0) ) +moves.append( mvCharacterHistory(ctmc=X, qmap_site=Q, graph="branch", proposal="rejection", weight=50.0) ) + +for(i in 1:nbranches) { + num_changes[i] := sum(X.numCharacterChanges(i)) +} +total_num_changes := sum(num_changes) + +char_hist := X.characterHistories() + +############################## +# Specify the continuous model # +############################## +root_age <- tree.rootAge() +v_emp <- cont.var(char_cont) + +for (i in 1:num_disc_states){ + theta[i] ~ dnUniform(-10, 10) + moves.append(mvSlide(theta[i], weight = 1.0) ) + avmvn_cont.addVariable(theta[i]) +} + +# The hyperprior option +for (i in 1:num_disc_states){ + t_half[i] ~ dnLognormal(ln(root_age/2), 0.587405) + moves.append(mvScale(t_half[i], weight = 1.0) ) + avmvn_cont.addVariable(t_half[i]) + alpha[i] := abs(ln(2)/t_half[i]) + + Vy[i] ~ dnLognormal(ln(v_emp), 0.587405) + moves.append(mvScale(Vy[i], weight = 3.0) ) + avmvn_cont.addVariable(Vy[i]) + sigma2[i] := Vy[i] * 2 * alpha[i] +} + + +# The direct prior option +# for (i in 1:num_disc_states){ +# alpha[i] ~ dnExponential(root_age/2/ln(2)) +# moves.append(mvScale(alpha[i], weight = 1.0) ) +# avmvn_cont.addVariable(alpha[i]) +# +# sigma2[i] ~ dnLognormal(ln(sqrt(v_emp), 0.587405)) +# moves.append(mvScale(sigma2[i], weight = 3.0) ) +# avmvn_cont.addVariable(sigma2[i]) +# } + +moves.append( avmvn_cont ) + + +############# +# The Model # +############# +Y ~ dnPhyloOUSD(char_hist, theta=theta, rootTreatment="optimum", alpha=alpha, sigma=sigma2^0.5) +Y.clamp(cont) + +mymodel = model(Y) + +### set up the monitors that will output parameter values to file and screen +monitors.append( mnModel(filename="output/trace.log", printgen=10) ) +monitors.append( mnScreen(printgen=1000, theta) ) +monitors.append( mnFile( char_hist, filename="output/char_hist.trees", printgen=10 ) ) + + +################ +# The Analysis # +################ + +### workspace mcmc ### +mymcmc = mcmc(mymodel, monitors, moves, nruns=2, combine="mixed") + +### run the MCMC ### +mymcmc.run(generations=10000, tuningInterval=200) + +## quit ## +q() \ No newline at end of file diff --git a/tutorials/cont_traits/scripts/plot_helper_state_dependent_OU.R b/tutorials/cont_traits/scripts/plot_helper_state_dependent_OU.R new file mode 100644 index 00000000..b2cb70ab --- /dev/null +++ b/tutorials/cont_traits/scripts/plot_helper_state_dependent_OU.R @@ -0,0 +1,1298 @@ +library(ape) +library(phytools) +library(tidyverse) + +processStochMaps <- function(tree, + paths = NULL, + simmap = NULL, + states, + num_intervals = 1000, + verbose = TRUE, + ...) { + + # pull tree from list object if necessary + if (inherits(tree,"list")) { + if (length(tree) == 1){ + tree <- tree[[1]] + } else {stop("tree should contain only one tree object")} + } + + if (inherits(tree,"list")) { + if (length(tree) == 1){ + tree <- tree[[1]] + } else {stop("tree should contain only one tree object")} + } + + # compute the number of states + nstates <- length(states) + + # create the index map + map <- matchNodes(tree@phylo) + + # either paths or simmap must be provided + if ( !is.null(paths) ) { # samples in files + + # read traces + samples <- readTrace(paths, verbose = verbose, ...) + + # combine multiple samples together + if ( length(samples) > 1 ) { + samples <- combineTraces(samples) + samples <- samples[[1]] + } else { + samples <- samples[[1]] + } + + # compute the number of samples + nsamples <- nrow(samples) + + } else if ( !is.null(simmap) ) { # samples in phytools format + + message("Reformatting simmap objects") + + # make the samples + samples <- as.data.frame(do.call(rbind, lapply(simmap, function(simmap) { + sapply(simmap$maps, function(edge) { + edge <- rev(edge) + return(paste0("{", paste0(paste0(names(edge),",", edge), collapse = ":"),"}")) + }) + }))) + + # add a root edge + root_edge_samples <- sapply(simmap, function(map) { + return(paste0("{", tail(names(map$maps[[1]]), n = 1), ",0}")) + }) + samples <- cbind(samples, root_edge_samples) + + # get the nodes + nodes <- c(tree@phylo$edge[,2], ape::Ntip(tree@phylo) + 1) + colnames(samples) <- map$Rev[nodes] + + # compute the number of samples + nsamples <- length(simmap) + + } else { + stop("Please provide either a paths or simmap argument.") + } + + message("Processing stochastic maps") + + # get the number of branches + # including the root branch + num_branches <- length(tree@phylo$edge.length) + 1 + root_index <- ape::Ntip(tree@phylo) + 1 + + # get the dt + root_age <- max(ape::branching.times(tree@phylo)) + if (!is.null(tree@phylo$root.edge)) { + root_age <- root_age + tree@phylo$root.edge + } else { + tree@phylo$root.edge <- 0 + } + dt <- root_age / num_intervals + + # loop over branches + dfs <- vector("list", num_branches) + + if (verbose) { pb <- txtProgressBar(min = 0, max = num_branches, initial = 0) } + + for(i in 1:num_branches) { + + # get the branch indexes + R_index <- map$R[i] + Rev_index <- as.character(map[R_index,2]) + + # get the time points + if ( R_index == root_index ) { + this_edge_length <- tree@phylo$root.edge + } else { + this_edge_length <- tree@phylo$edge.length[tree@phylo$edge[,2] == R_index] + } + these_pts <- seq(0, this_edge_length, by = dt) + + # get the samples + branch_samples <- samples[,Rev_index] + branch_samples <- gsub("{", "", branch_samples, fixed = TRUE) + branch_samples <- gsub("}", "", branch_samples, fixed = TRUE) + + # split the per event + branch_samples <- strsplit(branch_samples, ":") + + # get the times per state + branch_samples <- lapply(branch_samples, function(sample) { + sample <- do.call(rbind, strsplit(sample, ",")) + sample_states <- sample[,1] + sample_times <- as.numeric(sample[,2]) + names(sample_times) <- sample_states + # sample_times <- rev(sample_times) # turn this on for plotting the output from (old) tensorphylo runs + return(cumsum(sample_times)) + }) + + # get the state per interval + if ( this_edge_length == 0 ) { + branch_states_per_interval <- t(t(match(names(unlist(branch_samples)), states))) + } else { + branch_states_per_interval <- do.call(rbind, lapply(branch_samples, function(sample) { + match(names(sample)[findInterval(these_pts, sample) + 1], states) + })) + } + + # compute probability of each state per interval + branch_prob_per_state <- apply(branch_states_per_interval, 2, tabulate, nbins = nstates) / nsamples + rownames(branch_prob_per_state) <- states + + # now do the vertical segments + vert_prob_per_state <- t(branch_prob_per_state[,ncol(branch_prob_per_state), drop = FALSE]) + + # make the df + this_df <- data.frame(index = Rev_index, bl = this_edge_length, x0 = these_pts, x1 = c(these_pts[-1], this_edge_length), vert = FALSE) + this_df <- cbind(this_df, t(branch_prob_per_state)) + vert_df <- cbind(data.frame(index = Rev_index, bl = this_edge_length, x0 = this_edge_length, x1 = this_edge_length, vert = TRUE), vert_prob_per_state) + this_df <- rbind(this_df, vert_df) + + # store + dfs[[i]] <- this_df + + if (verbose) { setTxtProgressBar(pb,i) } + + } + if (verbose) { close(pb) } + + # combine the branches + dfs <- do.call(rbind, dfs) + + # get node instead of index + # node is R's standard numbering for nodes + # index is RevBayes specific + colnames(map) <- c("node", "index") + map$index <- as.character(map$index) + dfs <- dplyr::full_join(map,dfs, by = "index") + dfs$index <- NULL + + return(dfs) + +} + +plotStochMaps <- function(tree, + maps, + colors = "default", + color_by = "prob", + tree_layout = "rectangular", + line_width = 1, + tip_labels = TRUE, + tip_labels_italics = FALSE, + tip_labels_formatted = FALSE, + tip_labels_remove_underscore = TRUE, + tip_labels_color = "black", + tip_labels_size = 3, + tip_labels_offset = 0, + timeline = FALSE, + geo_units = list("epochs", "periods"), + geo = timeline, + time_bars = timeline, + label_sampled_ancs = FALSE, + ...) { + + # pull tree from list object if necessary + if (inherits(tree,"list")) { + if (length(tree) == 1){ + tree <- tree[[1]] + } else {stop("tree should contain only one tree object")} + } + + if (inherits(tree,"list")) { + if (length(tree) == 1){ + tree <- tree[[1]] + } else {stop("tree should contain only one tree object")} + } + + p <- plotTreeFull( + tree = list(list(tree)), + tree_layout = tree_layout, + line_width = line_width, + + tip_labels = tip_labels, + tip_labels_italics = tip_labels_italics, + tip_labels_formatted = tip_labels_formatted, + tip_labels_remove_underscore = tip_labels_remove_underscore, + tip_labels_color = tip_labels_color, + tip_labels_size = tip_labels_size, + tip_labels_offset = tip_labels_offset, + + timeline = timeline, + geo_units = geo_units, + geo = timeline, + time_bars = timeline, + + label_sampled_ancs = label_sampled_ancs, + + node_age_bars = FALSE, + age_bars_color = "blue", + age_bars_colored_by = NULL, + + node_labels = NULL, + node_labels_color = "black", + node_labels_size = 3, + node_labels_offset = 0, + + node_pp = FALSE, + node_pp_shape = 16, + node_pp_color = "black", + node_pp_size = "variable", + + branch_color = "black", + color_branch_by = NULL, + + tip_age_bars = FALSE, + lineend = "square", + ... + ) + + if (colors[1] != "default") { + # error checking + if (is.null(names(colors))) + {stop("colors must be a NAMED vector of colors where names correspond to the character states")} + states <- names(colors) + } else { + states <- colnames(maps)[-c(1:5)] + colors <- colFun(length(states)) + names(colors) <- states + } + + dat <- dplyr::left_join(maps, p$data, by = "node") + + #set up colors + if (color_by == "MAP") { + max <- apply(dat[, states], MARGIN = 1, which.max) + seg_col <- colors[unlist(max)] + dat$seg_col <- seg_col + names(seg_col) <- seg_col + } else if (color_by == "prob") { + rgbcols <- col2rgb(colors) + rgb_values_per_seg <- t(rgbcols %*% t(dat[,states])) + seg_col <- tolower(grDevices::rgb(red = rgb_values_per_seg[ ,1], + green = rgb_values_per_seg[ ,2], + blue = rgb_values_per_seg[ ,3], + maxColorValue = 255)) + dat$seg_col <- seg_col + names(seg_col) <- seg_col + } + + # horizontal segments + dat_horiz <- dat[dat$vert == FALSE,] + + seg_horiz <- data.frame( + x = dat_horiz$x - dat_horiz$x0, + xend = dat_horiz$x - dat_horiz$x1, + y = dat_horiz$y, + yend = dat_horiz$y, + col = dat_horiz$seg_col + ) + + #vertical segments + dat_vert <- dat[dat$vert == TRUE,] + + m <- match(x = dat_vert$parent, dat_vert$node) + dat_vert$y_parent <- dat_vert[m, "y"] + dat_vert$x_parent <- dat_vert[m, "x"] + + seg_vert <- data.frame( + x = dat_vert$x_parent, + xend = dat_vert$x_parent, + y = dat_vert$y, + yend = dat_vert$y_parent, + col = dat_vert$seg_col + ) + + # plot! + + p + ggplot2::geom_segment( + data = seg_horiz, + ggplot2::aes( + x = x, + y = y, + xend = xend, + yend = yend, + color = col + ), + lineend = "square", + size = line_width, + ) + + ggplot2::geom_segment( + data = seg_vert, + ggplot2::aes( + x = x, + y = y, + xend = xend, + yend = yend, + color = col + ), + lineend = "square", + size = line_width, + + ) + + ggplot2::scale_color_manual(values = seg_col, + breaks = colors, + name = "State", + labels = names(colors)) + +} + +plotTreeFull <- function(tree, + + timeline, + geo, + geo_units, + time_bars, + + node_age_bars, + tip_age_bars, + age_bars_color, + age_bars_colored_by, + + node_labels, + node_labels_color, + node_labels_size, + node_labels_offset, + + tip_labels, + tip_labels_italics, + tip_labels_formatted, + tip_labels_remove_underscore, + tip_labels_color, + tip_labels_size, + tip_labels_offset, + + label_sampled_ancs, + + node_pp, + node_pp_shape, + node_pp_color, + node_pp_size, + + branch_color, + color_branch_by, + line_width, + + tree_layout, + ...) { + # enforce argument matching + if (!is.list(tree)) + stop("tree should be a list of lists of treedata objects") + if (!methods::is(tree[[1]][[1]], "treedata")) + stop("tree should be a list of lists of treedata objects") + vars <- colnames(tree[[1]][[1]]@data) + if (is.logical(timeline) == FALSE) + stop("timeline should be TRUE or FALSE") + if (is.logical(node_age_bars) == FALSE) + stop("node_age_bars should be TRUE or FALSE") + if (any(.isColor(age_bars_color) == FALSE)) + stop("age_bars_color should be valid color(s)") + if (is.null(age_bars_colored_by) == FALSE & + any(vars %in% age_bars_colored_by) == FALSE) + stop("age_bars_colored_by should be a column in your tidytree object") + if (is.null(node_labels) == FALSE & + any(vars %in% node_labels) == FALSE) + stop("node_labels should be NULL or a column in your tidytree object") + if (is.null(node_labels_color) == FALSE & + .isColor(node_labels_color) == FALSE) + stop("node_labels_color should be NULL or a recognized color") + if (is.logical(tip_labels) == FALSE) + stop("tip_labels should be TRUE or FALSE") + if (is.logical(tip_labels_italics) == FALSE) + stop("tip_labels_italics should be TRUE or FALSE") + if (is.logical(tip_labels_formatted) == FALSE) + stop("tip_labels_formatted should be TRUE or FALSE") + if (tip_labels_italics == TRUE & tip_labels_formatted == TRUE) + stop("tip_labels_italics and tip_labels_formatted may not both be TRUE") + if (.isColor(tip_labels_color) == FALSE) + stop("tip_labels_color should be a recognized color") + if (!methods::is(node_pp,"logical")) + stop("node_pp should be TRUE or FALSE") + if (node_pp) { + if (length(node_pp_color) > 2) + stop("node_pp_color should be of length 1 or 2") + if (.isColor(node_pp_color) == FALSE) + stop("node_pp_color should be a recognized color") + if (node_pp_shape %in% 0:25 == FALSE) + stop("node_pp_shape should be a recognized shape + (value between 0 and 25)") + if (is.numeric(node_pp_size) == FALSE & + node_pp_size != "variable") + stop("node_pp_size should be numeric or 'variable'") + } + if (is.logical(tip_age_bars) == FALSE) + stop("tip_age_bars should be TRUE or FALSE") + if (length(branch_color) == 1 & + !.isColor(branch_color)) + stop("branch_color should be a recognized color") + if (length(branch_color) == 2) { + if (.isColor(branch_color[1] == FALSE) & + .isColor(branch_color[2]) == FALSE) + stop("Neither values of branch_color are a recognized color") + if (.isColor(branch_color[1] == FALSE) & + .isColor(branch_color[2])) + stop("branch_color[1] is not a recognized color") + if (.isColor(branch_color[1]) & + .isColor(branch_color[2]) == FALSE) + stop("branch_color[2] is not a recognized color") + } else if (length(branch_color) > 2) + stop("only 2 colors may be specified in branch_color") + if (is.null(color_branch_by) == FALSE & + any(vars %in% color_branch_by) == FALSE) + stop("color_branch_by should be NULL or a column in your tidytree object") + if (is.numeric(line_width) == FALSE) + stop ("line_width should be numeric") + if (is.logical(label_sampled_ancs) == FALSE) + stop("label_sampled_ancs should be TRUE or FALSE") + if (is.list(geo_units)) { + if (length(geo_units) != 2) + stop( + "geo_units should be 'periods', 'epochs', 'stages', 'eons', + 'eras', or a list of two of those units, such as: + list('epochs','periods')" + ) + if (geo_units[[1]] %in% + c('periods', 'epochs', 'stages', 'eons', 'eras') == FALSE) + stop( + "geo_units should be 'periods', 'epochs', 'stages', 'eons', + 'eras', or a list of two of those units, such as: + list('epochs','periods')" + ) + if (geo_units[[2]] %in% + c('periods', 'epochs', 'stages', 'eons', 'eras') == FALSE) + stop( + "geo_units should be 'periods', 'epochs', 'stages', 'eons', + 'eras', or a list of two of those units, such as: + list('epochs','periods')" + ) + } else { + if (geo_units %in% + c('periods', 'epochs', 'stages', 'eons', 'eras') == FALSE) + stop( + "geo_units should be 'periods', 'epochs', 'stages', 'eons', + 'eras', or a list of two of those units, such as: + list('epochs','periods')" + ) + } + if (is.numeric(tip_labels_offset) == FALSE) + stop ("tip_labels_offset should be a number") + if (is.numeric(node_labels_offset) == FALSE) + stop ("node_labels_offset should be a number") + tree_layout <- + match.arg( + tree_layout, + choices = c( + 'rectangular', + 'slanted', + 'ellipse', + 'cladogram', + 'roundrect', + 'fan', + 'circular', + 'inward_circular', + 'radial', + 'equal_angle', + 'daylight', + 'ape' + ) + ) + if (tree_layout != "rectangular") { + if (timeline == TRUE) { + stop("timeline is only compatible with + tree_layout = 'rectangular'") + } + } + # grab single tree from input + phy <- tree[[1]][[1]] + + ### fix for trees with sampled ancestors ### + phylo <- phy@phylo + node_map <- .matchNodesTreeData(phy, phylo) + if ("sampled_ancestor" %in% colnames(phy@data)) { + phy@data$node <- + as.character(node_map[match(as.numeric(phy@data$index), + node_map$Rev), ]$R) + } + + ### set up tree layout ### + + if (tree_layout == "cladogram") { + tree_layout <- "rectangular" + BL <- "none" + } else { + BL <- "branch.length" + } + + # initiate plot + if (is.null(color_branch_by)) { + pp <- ggtree::ggtree( + phy, + right = FALSE, + size = line_width, + color = branch_color, + branch.length = BL, + layout = tree_layout, + ... + ) + } else if (!is.null(color_branch_by)) { + pp <- ggtree::ggtree( + phy, + right = FALSE, + size = line_width, + branch.length = BL, + layout = tree_layout, + ... + ) + } + + #### parameter compatibility checks ### + if (length(node_pp_color) == 2 & + length(branch_color) == 2) + stop( + "You may only include variable colors for either node_pp_label or + branch_color, not for both" + ) + + #check that if user wants node_age_bars, there are dated intervals in file + if (node_age_bars == TRUE) { + if (!"age_0.95_HPD" %in% colnames(phy@data)) + stop( + "You specified node_age_bars, but there is no age_0.95_HPD column + in the treedata object." + ) + } + + # get dimensions + n_node <- treeio::Nnode(phy) + tree_height <- max(phytools::nodeHeights(phy@phylo)) + ntips <- sum(pp$data$isTip) + + # reformat labels if necessary + if (tip_labels_remove_underscore) { + pp$data$label <- gsub("_", " ", pp$data$label) + } + + # check that if user wants to label sampled ancs, + # there are sampled ancs in the files + if (label_sampled_ancs == TRUE & + "sampled_ancestor" %in% colnames(pp$data)) { + sampled_ancs <- + pp$data[!pp$data$isTip & !is.na(pp$data$sampled_ancestor),] + if (nrow(sampled_ancs) < 1) { + label_sampled_acs <- FALSE + } + } + + # add timeline + if (timeline == TRUE) { + warning("Plotting with default axis label (Age (Ma))") + if (node_age_bars == FALSE) { + minmax <- phytools::nodeHeights(phy@phylo) + max_age <- tree_height + } else { + pp$data$age_0.95_HPD <- lapply(pp$data$age_0.95_HPD, function(z) { + if (any(is.null(z)) || + any(is.na(z))) { + return(c(NA, NA)) + } else { + return(as.numeric(z)) + } + }) + minmax <- t(matrix(unlist(pp$data$age_0.95_HPD), nrow = 2)) + max_age <- max(minmax, na.rm = TRUE) + } + + interval <- max_age / 5 + dx <- max_age %% interval + + # add geo + tick_height <- ntips / 100 + if (geo == TRUE) { + #determine whether to include quaternary + if (tree_height > 50) { + skipit <- c("Quaternary", "Holocene", "Late Pleistocene") + } else { + skipit <- c("Holocene", "Late Pleistocene") + } + # add deep timescale + if (length(geo_units) == 1) { + pp <- pp + deeptime::coord_geo( + dat = geo_units, + pos = lapply(seq_len(length(geo_units)), function(x) + "bottom"), + size = lapply(seq_len(length(geo_units)), function(x) + tip_labels_size), + xlim = c(-max(minmax, na.rm = TRUE), tree_height / + 2), + ylim = c(-tick_height * 5, ntips * + 1.1), + height = grid::unit(4, "line"), + skip = skipit, + abbrv = FALSE, + rot = 90, + center_end_labels = TRUE, + bord = c("right", "top", "bottom"), + neg = TRUE + ) + } else if (length(geo_units) == 2) { + pp <- pp + deeptime::coord_geo( + dat = geo_units, + pos = lapply(seq_len(length(geo_units)), function(x) + "bottom"), + size = lapply(seq_len(length(geo_units)), function(x) + tip_labels_size), + xlim = c(-max(minmax, na.rm = TRUE), tree_height / + 2), + ylim = c(-tick_height * 5, ntips * + 1.1), + skip = skipit, + center_end_labels = TRUE, + bord = c("right", "top", "bottom"), + neg = TRUE + ) + } + } + #add axis title + pp <- pp + ggplot2::scale_x_continuous( + name = "Age (Ma)", + expand = c(0, 0), + limits = c(-max_age, tree_height / + 2), + breaks = -rev(seq(0, max_age + + dx, interval)), + labels = rev(seq(0, max_age + + dx, interval)) + ) + pp <- ggtree::revts(pp) + + # add ma ticks and labels + xline <- pretty(c(0, max_age))[pretty(c(0, max_age)) < max_age] + df <- + data.frame( + x = -xline, + y = rep(-tick_height * 5, length(xline)), + vx = -xline, + vy = rep(-tick_height * 5 + tick_height, length(xline)) + ) + + pp <- + pp + ggplot2::geom_segment(ggplot2::aes( + x = 0, + y = -tick_height * 5, + xend = -max_age, + yend = -tick_height * 5 + )) + + ggplot2::geom_segment(data = df, ggplot2::aes( + x = x, + y = y, + xend = vx, + yend = vy + )) + + ggplot2::annotate( + "text", + x = -rev(xline), + y = -tick_height * 5 + tick_height * 2, + label = rev(xline), + size = tip_labels_size + ) + + # add vertical gray bars + if (time_bars) { + if (geo) { + if ("epochs" %in% geo_units) { + x_pos <- -rev(c(0, deeptime::getScaleData("epochs")$max_age)) + } else { + x_pos <- -rev(c(0, deeptime::getScaleData("periods")$max_age)) + } + } else if (!geo) { + x_pos <- -rev(xline) + } + for (k in 2:(length(x_pos))) { + box_col <- "gray92" + if (k %% 2 == 1) + box_col <- "white" + box <- + ggplot2::geom_rect( + xmin = x_pos[k - 1], + xmax = x_pos[k], + ymin = -tick_height * 5, + ymax = ntips, + fill = box_col + ) + pp <- gginnards::append_layers(pp, box, position = "bottom") + } + } + if (tip_labels) { + # recenter legend + tot <- max_age + tree_height / 2 + pp <- + pp + + ggplot2::theme(axis.title.x = + ggplot2::element_text(hjust = max_age / (2 * tot))) + } + } + + # processing for node_age_bars and tip_age_bars + if (node_age_bars == TRUE) { + # Encountered problems with using geom_range to plot age HPDs in ggtree. It + # appears that geom_range incorrectly rotates the HPD relative to the height + # of the node unnecessarily. My guess for this would be because older + # version of ggtree primarily supported length measurements, and not + # height measurements so the new capability to handle height might contain + # a "reflection" bug. + # For example, suppose a node has height 3 with HPD [2, 7]. You can think of + # this offset as h + [2-h, 7-h]. ggtree seems to "rotate" this + # causing the HPD to appear as [-1, 4]. Figtree displays this correctly. + # + # See this excellent trick by Tauana: + # https://groups.google.com/forum/#!msg/bioc-ggtree/wuAlY9phL9Q/L7efezPgDAAJ + # Adapted this code to also plot fossil tip uncertainty in red + pp$data$age_0.95_HPD <- + lapply(pp$data$age_0.95_HPD, function(z) { + if (any(is.null(z)) || + any(is.na(z))) { + return(c(NA, NA)) + } else { + return(as.numeric(z)) + } + }) + + minmax <- t(matrix(unlist(pp$data$age_0.95_HPD), nrow = 2)) + bar_df <- + data.frame( + node_id = as.integer(pp$data$node), + isTip = pp$data$isTip, + as.data.frame(minmax) + ) + names(bar_df) <- c("node_id", "isTip", "min", "max") + if (tip_age_bars == TRUE) { + tip_df <- dplyr::filter(bar_df, isTip == TRUE & !is.na(min)) + tip_df <- + dplyr::left_join(tip_df, pp$data, by = c("node_id" = "node")) + tip_df <- dplyr::select(tip_df, node_id, min, max, y) + } + if (is.null(age_bars_colored_by) == TRUE) { + # plot age densities + bar_df <- + dplyr::left_join(bar_df, pp$data, by = c("node_id" = "node")) + bar_df <- dplyr::select(bar_df, node_id, min, max, y) + pp <- + pp + ggplot2::geom_segment( + ggplot2::aes( + x = -min, + y = y, + xend = -max, + yend = y + ), + data = bar_df, + color = age_bars_color, + size = 1.5, + alpha = 0.8 + ) + } else if (is.null(age_bars_colored_by) == FALSE) { + if (length(age_bars_color) == 1) { + age_bars_color <- colFun(2)[2:1] + } + + if ("sampled_ancestor" %in% colnames(pp$data) == TRUE) { + sampled_tip_probs <- + 1 - as.numeric(pp$data$sampled_ancestor[pp$data$isTip == TRUE]) + sampled_tip_probs[is.na(sampled_tip_probs)] <- 0 + } else { + sampled_tip_probs <- rep(1, sum(pp$data$isTip)) + } + + pp$data$olena <- + c(sampled_tip_probs, + as.numeric(.convertAndRound(L = + unlist(pp$data[pp$data$isTip == FALSE, + age_bars_colored_by])))) + + bar_df <- + dplyr::left_join(bar_df, pp$data, by = c("node_id" = "node")) + bar_df <- + dplyr::select(bar_df, node_id, min, max, y, olena, isTip = isTip.x) + if (tip_age_bars == FALSE) { + bar_df <- dplyr::filter(bar_df, isTip == FALSE) + } + pp <- + pp + ggplot2::geom_segment( + ggplot2::aes( + x = -min, + y = y, + xend = -max, + yend = y, + color = olena + ), + data = bar_df, + size = 1.5, + alpha = 0.8 + ) + + ggplot2::scale_color_gradient( + low = age_bars_color[1], + high = age_bars_color[2], + name = .titleFormat(age_bars_colored_by), + breaks = pretty(pp$data$olena) + ) + } + } + + # label sampled ancestors + if (label_sampled_ancs == TRUE & + "sampled_ancestor" %in% colnames(pp$data)) { + sampled_ancs <- + pp$data[!pp$data$isTip & !is.na(pp$data$sampled_ancestor),] + space_labels <- ntips / 30 + if (tip_labels_italics == TRUE) { + pp <- + pp + ggplot2::annotate( + "text", + x = sampled_ancs$x, + y = sampled_ancs$y, + label = seq_len(nrow(sampled_ancs)), + vjust = -.5, + size = tip_labels_size, + color = tip_labels_color + ) + + ggplot2::annotate( + "text", + x = rep(-max( + unlist(pp$data$age_0.95_HPD), na.rm = TRUE + ), + times = nrow(sampled_ancs)), + y = seq( + from = ntips - space_labels, + by = -space_labels, + length.out = nrow(sampled_ancs) + ), + label = paste0(seq_len(nrow(sampled_ancs)), + ": italic(`", + sampled_ancs$label, + "`)"), + size = tip_labels_size, + color = tip_labels_color, + hjust = 0, + parse = TRUE + ) + } else if (tip_labels_italics == TRUE) { + pp <- + pp + ggplot2::annotate( + "text", + x = sampled_ancs$x, + y = sampled_ancs$y, + label = seq_len(nrow(sampled_ancs)), + vjust = -.5, + size = tip_labels_size, + color = tip_labels_color + ) + + ggplot2::annotate( + "text", + x = rep(-max( + unlist(pp$data$age_0.95_HPD), na.rm = TRUE + ), + times = nrow(sampled_ancs)), + y = seq( + from = ntips - space_labels, + by = -space_labels, + length.out = nrow(sampled_ancs) + ), + label = paste0(seq_len(nrow(sampled_ancs)), + ": ", + sampled_ancs$label), + size = tip_labels_size, + color = tip_labels_color, + hjust = 0, + parse = TRUE + ) + } else { + pp <- + pp + ggplot2::annotate( + "text", + x = sampled_ancs$x, + y = sampled_ancs$y, + label = seq_len(nrow(sampled_ancs)), + vjust = -.5, + size = tip_labels_size, + color = tip_labels_color + ) + + ggplot2::annotate( + "text", + x = rep(-max( + unlist(pp$data$age_0.95_HPD), na.rm = TRUE + ), + times = nrow(sampled_ancs)), + y = seq( + from = ntips - space_labels, + by = -space_labels, + length.out = nrow(sampled_ancs) + ), + label = paste0(seq_len(nrow(sampled_ancs)), + ": ", + sampled_ancs$label), + size = tip_labels_size, + color = tip_labels_color, + hjust = 0 + ) + } + + t_height <- ntips / 200 + df <- data.frame( + x = sampled_ancs$x, + vx = sampled_ancs$x, + y = sampled_ancs$y + t_height, + vy = sampled_ancs$y - t_height + ) + pp <- + pp + ggplot2::geom_segment(data = df, ggplot2::aes( + x = x, + y = y, + xend = vx, + yend = vy + )) + + } + + # add node labels (text) + if (is.null(node_labels) == FALSE) { + # catch some funkiness from importing an unrooted tree + if (node_labels == "posterior") { + pp$data[grep("]:", unlist(pp$data[, node_labels])), node_labels] <- + NA + } + pp$data$kula <- + c(rep(NA, times = ntips), + .convertAndRound(L = + unlist(pp$data[pp$data$isTip == FALSE, + node_labels]))) + + # change any NAs that got converted to characters back to NA + pp$data$kula[pp$data$kula == "NA"] <- NA + pp <- pp + ggtree::geom_text( + ggplot2::aes(label = kula), + color = node_labels_color, + nudge_x = node_labels_offset, + hjust = 0, + size = node_labels_size + ) + } + + # add tip labels (text) + if (tip_labels == TRUE) { + if (tip_age_bars == TRUE) { + pp$data$extant <- !pp$data$node %in% tip_df$node_id + } else { + pp$data$extant <- TRUE + } + if (tip_labels_italics == TRUE) { + pp <- pp + ggtree::geom_tiplab( + ggplot2::aes( + subset = extant & isTip, + label = paste0('italic(`', label, '`)') + ), + size = tip_labels_size, + offset = tip_labels_offset, + color = tip_labels_color, + parse = TRUE + ) + if (tip_age_bars == TRUE) { + new_tip_df <- + dplyr::left_join(tip_df, + pp$data[, c("label", "node")], + by = c("node_id" = "node")) + pp <- + pp + ggplot2::annotate( + "text", + x = -new_tip_df$min + tip_labels_offset, + y = new_tip_df$y, + label = paste0('italic(`', new_tip_df$label, '`)'), + hjust = 0, + color = tip_labels_color, + size = tip_labels_size, + parse = TRUE + ) + } + } else if (tip_labels_formatted == TRUE ) { + pp <- pp + ggtree::geom_tiplab( + ggplot2::aes(subset = extant & isTip, + label = label), + size = tip_labels_size, + offset = tip_labels_offset, + color = tip_labels_color, + parse = TRUE + ) + if (tip_age_bars == TRUE) { + new_tip_df <- + dplyr::left_join(tip_df, + pp$data[, c("label", "node")], + by = c("node_id" = "node")) + pp <- + pp + ggplot2::annotate( + "text", + x = -new_tip_df$min + tip_labels_offset, + y = new_tip_df$y, + label = new_tip_df$label, + hjust = 0, + color = tip_labels_color, + size = tip_labels_size, + parse = TRUE + ) + } + } else { + pp <- pp + ggtree::geom_tiplab( + ggplot2::aes(subset = extant & isTip, + label = label), + size = tip_labels_size, + offset = tip_labels_offset, + color = tip_labels_color + ) + if (tip_age_bars == TRUE) { + new_tip_df <- + dplyr::left_join(tip_df, + pp$data[, c("label", "node")], + by = c("node_id" = "node")) + pp <- + pp + ggplot2::annotate( + "text", + x = -new_tip_df$min + tip_labels_offset, + y = new_tip_df$y, + label = new_tip_df$label, + hjust = 0, + color = tip_labels_color, + size = tip_labels_size + ) + } + } + } + + # add node PP (symbols) + if (node_pp == TRUE) { + # reformat posterior + pp$data$posterior <- as.numeric(pp$data$posterior) + + if (length(node_pp_color) == 1 & node_pp_size == "variable") { + pp <- pp + ggtree::geom_nodepoint(color = node_pp_color, + ggplot2::aes(size = posterior), + shape = node_pp_shape) + + ggplot2::scale_size_continuous(name = "Posterior") + } else if (length(node_pp_color) == 2 & + node_pp_size != "variable") { + pp <- pp + ggtree::geom_nodepoint(size = node_pp_size, + ggplot2::aes(color = posterior), + shape = node_pp_shape) + + ggplot2::scale_color_gradient( + name = "Posterior", + low = node_pp_color[1], + high = node_pp_color[2], + breaks = pretty(pp$data$posterior) + ) + } + } + + # add branch coloration by variable + if (is.null(color_branch_by) == FALSE) { + #set default colors if none provided + if (length(branch_color) != 2) { + branch_color <- c("#005ac8", "#fa7850") + } + col_num <- which(colnames(pp$data) == color_branch_by) + pp$data[, col_num] <- + as.numeric(as.data.frame(pp$data)[, col_num]) + name <- .titleFormat(color_branch_by) + pp <- pp + + ggplot2::aes(color = as.data.frame(pp$data)[, col_num]) + + ggplot2::scale_color_gradient( + low = branch_color[1], + high = branch_color[2], + breaks = pretty(as.data.frame(pp$data)[, col_num]), + name = name + ) + } + + # readjust axis for non-timeline plots + if (timeline == FALSE & BL != "none") { + if (node_age_bars == FALSE) { + xlim_min <- -tree_height + } else { + xlim_min <- -max(t(matrix( + unlist(pp$data$age_0.95_HPD), + nrow = 2 + )), na.rm = TRUE) + } + + if (tip_labels == TRUE) { + xlim_max <- tree_height / 2 + } else { + xlim_max <- 0 + } + + pp <- pp + ggtree::xlim(xlim_min, xlim_max) + pp <- ggtree::revts(pp) + + } + + # readjust axis for cladograms + if (timeline == FALSE & BL == "none") { + xlim_min <- range(pp$data$x)[1] + + if (tip_labels == TRUE) { + xlim_max <- range(pp$data$x)[2] * 1.5 + } else { + xlim_max <- range(pp$data$x)[2] + } + + pp <- pp + ggtree::xlim(xlim_min, xlim_max) + + } + + return(pp) +} + +.isColor <- function(var) { + if (is.null(var)) { + return(FALSE) + } else { + t <- try(col2rgb(var), silent = TRUE) + if (length(t) == 1 && methods::is(t, "try-error")) { + return(FALSE) + } + else + return(TRUE) + } +} + +.matchNodesTreeData <- function(treedata, phy) { + # get some useful info + num_sampled_anc <- sum(phy$node.label != "") + num_tips <- length(phy$tip.label) + num_nodes <- phy$Nnode + sampled_ancs <- which(tabulate(phy$edge[, 1]) == 1) + tip_indexes <- 1:(num_tips + num_sampled_anc) + node_indexes <- (num_tips + num_sampled_anc) + num_nodes:1 + + node_map <- + data.frame(R = 1:(num_tips + num_nodes), + Rev = NA, + visits = 0) + current_node <- num_tips + 1 + k <- 1 + t <- 1 + + while (TRUE) { + # compute the number of descendants of this tip + current_num_descendants <- sum(phy$edge[, 1] == current_node) + + if (current_node <= num_tips) { + treedata_node <- + which(as.character(treedata@data$node) == current_node) + node_map$Rev[node_map$R == current_node] <- + as.numeric(treedata@data[treedata_node, ]$index) + current_node <- phy$edge[phy$edge[, 2] == current_node, 1] + t <- t + 1 + + } else if (current_node %in% sampled_ancs) { + if (node_map$visits[node_map$R == current_node] == 0) { + node_map$Rev[node_map$R == current_node] <- node_indexes[k] + k <- k + 1 + } + node_map$visits[node_map$R == current_node] <- + node_map$visits[node_map$R == current_node] + 1 + + if (node_map$visits[node_map$R == current_node] == 1) { + # go left + current_node <- phy$edge[phy$edge[, 1] == current_node, 2][1] + } else if (node_map$visits[node_map$R == current_node] == 2) { + # go down + if (current_node == num_tips + 1) { + break + } else { + current_node <- phy$edge[phy$edge[, 2] == current_node, 1] + } + } + + } else { + if (node_map$visits[node_map$R == current_node] == 0) { + node_map$Rev[node_map$R == current_node] <- node_indexes[k] + k <- k + 1 + } + node_map$visits[node_map$R == current_node] <- + node_map$visits[node_map$R == current_node] + 1 + + num_visits <- node_map$visits[node_map$R == current_node] + + if (num_visits <= current_num_descendants) { + # go to next descendant + current_node <- + phy$edge[phy$edge[, 1] == current_node, 2][current_num_descendants - + num_visits + 1] + } else if (num_visits > current_num_descendants) { + # go down + if (current_node == num_tips + 1) { + break + } else { + current_node <- phy$edge[phy$edge[, 2] == current_node, 1] + } + } + + } + + } + + return(node_map[, 1:2]) + +} + +get_legend2 <- function(plot, legend = NULL) { + if (is.ggplot(plot)) { + gt <- ggplotGrob(plot) + } else { + if (is.grob(plot)) { + gt <- plot + } else { + stop("Plot object is neither a ggplot nor a grob.") + } + } + pattern <- "guide-box" + if (!is.null(legend)) { + pattern <- paste0(pattern, "-", legend) + } + indices <- grep(pattern, gt$layout$name) + not_empty <- !vapply( + gt$grobs[indices], + inherits, what = "zeroGrob", + FUN.VALUE = logical(1) + ) + indices <- indices[not_empty] + if (length(indices) > 0) { + return(gt$grobs[[indices[1]]]) + } + return(NULL) +} + +simmap_to_ancStates <- function(input_path, output_path, tree){ + index_to_rev <- RevGadgets::matchNodes(tree) + + char_hist <- read.table(input_path, header=TRUE) + simmaps <- read.simmap(text=char_hist$char_hist, format="phylip") + + df_rev <- data.frame() + + for (row_num in 1:length(simmaps)){ + simmap <- simmaps[[row_num]] + + # Iteration column + df_rev[row_num, 1] <- row_num-1 + + for (i in 1:(length(simmap$maps))){ + ape_node <- which(index_to_rev[,2]==i) + ape_edge <- which(simmap$edge[,2]==ape_node) + map <- simmap$maps[[ape_edge]] + df_rev[row_num, i+1] <- names(map[length(map)]) + } + df_rev[row_num, length(simmap$maps)+2] <- names(map[1]) + } + + header <- paste0("end_", as.character(1:(length(simmap$maps)+1))) + colnames(df_rev) <- c("Iteration", header) + write_tsv(df_rev, output_path) +} \ No newline at end of file diff --git a/tutorials/cont_traits/state_dependent_ou.md b/tutorials/cont_traits/state_dependent_ou.md new file mode 100644 index 00000000..2ec78c01 --- /dev/null +++ b/tutorials/cont_traits/state_dependent_ou.md @@ -0,0 +1,34 @@ +--- +title: State-dependent Ornstein-Uhlenbeck Models +subtitle: Detecting state-dependent evolutionary optima and rates of adaptation (work in progress) +authors: Priscilla Lau and Sebastian Höhna +level: 6 +order: 1.9 +prerequisites: +- intro +- intro/revgadgets +- mcmc +- cont_traits/cont_trait_intro +- cont_traits/simple_ou +- cont_traits/relaxed_ou + +index: true +redirect: false +include_all: false +include_files: +- data/diprotodontia_tree.nex +- data/diprotodontia_discrete_diet.nex +- data/diprotodontia_discrete_island.nex +- data/diprotodontia_continuous.nex +- scripts/mcmc_state_dependent_OU.Rev +- scripts/plot_state_dependent_OU.R +- scripts/plot_helper_state_dependent_OU.R +--- + +{% section Inferring adaptation of a continuous character to various discrete selective regimes %} + +This tutorial demonstrates how to specify an Ornstein-Uhlenbeck model where all parameters ($\alpha$, $\sigma^2$, and $\theta$) are allowed to vary depending on the state of a discrete character on a time-calibrated phylogeny {% cite LauInprep %}. We provide the probabilistic graphical model representation of each component for this tutorial. After specifying the model, you will estimate the parameters of the Ornstein-Uhlenbeck process for each state. + +{% include_relative modules/state_dependent_OU_parameter_estimation.md %} + +{% include_relative modules/state_dependent_OU_exercise_1.md %}