This script migrates the experiments after the BEAST2 package detection has been changed to also look at the "required" field.

In [21]:
import json
from io import BytesIO

from tqdm import tqdm

import phylodata
from phylodata.data_types import FileType
from phylodata.maintenance.create_new_version import create_new_version

from phylodata.process.evolutionary_model.parse_evolutionary_model import (
    parse_evolutionary_model,
)

In [2]:
EXPERIMENT_IDS = [
    "barrett-2024-mosaic-epo4",
    "wilinski-2024-congruity-6pcr",
    "wilinski-2024-congruity-fcth",
    "imfeld-2024-diversification-xtpf",
    "imfeld-2024-diversification-slkl",
    "imfeld-2024-diversification-wu1h",
    "imfeld-2024-diversification-i5i3",
    "magalhaes-2024-complete-njnd",
    "wutke-2024-phylogenomics-5qh4",
    "wutke-2024-phylogenomics-6eyi",
    "wutke-2024-phylogenomics-1p3a",
    "wutke-2024-phylogenomics-jeno",
    "wutke-2024-phylogenomics-oqvj",
    "wutke-2024-phylogenomics-ck2k",
    "wutke-2024-phylogenomics-b462",
    "wutke-2024-phylogenomics-a4w7",
    "wutke-2024-phylogenomics-0wb6",
    "wutke-2024-phylogenomics-05x2",
    "vieira-2024-mitochondrial-cg9a",
    "serrano-2024-targeted-zc3e",
    "serrano-2024-targeted-yd14",
    "titus-2024-topology-qk0s",
    "miri-2024-first-g6o6",
    "rozo-2024-polarization-9ofb",
    "harrington-2024-dispersal-xw9b",
    "harrington-2024-dispersal-wn6u",
    "harrington-2024-dispersal-we1f",
    "tanoyo-2024-systematics-frn3",
    "tanoyo-2024-systematics-5flw",
    "tanoyo-2024-systematics-cskv",
    "tanoyo-2024-systematics-3ytw",
    "mcguire-2023-species-cddh",
    "mcguire-2023-species-iyhy",
    "mcguire-2023-species-y2km",
    "pyrcz-2023-phylogeny-53vj",
    "letsch-2023-jumping-rg34",
    "thomas-2023-multiple-elyn",
    "auderset-2023-subgrouping-nq9a",
    "auderset-2023-subgrouping-uj71",
    "auderset-2023-subgrouping-eb04",
    "auderset-2023-subgrouping-l8go",
    "rez-2023-species-ejki",
    "rez-2023-species-3b6y",
    "meyer-2023-morrison-zdih",
    "mathers-2023-hybridisation-vwt0",
    "leach-2023-repeated-zn4e",
    "puentes-2023-pre-y0hv",
    "forni-2022-macroevolutionary-r34r",
    "bler-2022-phylogenetic-k54f",
    "davis-2022-population-f3j9",
    "kim-2022-phylogenomics-w5kg",
    "buckingham-2022-population-1mid",
    "buckingham-2022-population-wksf",
    "magalhaes-2022-phylogeny-iwk9",
    "obiol-2022-palaeoceanographic-7e7s",
    "obiol-2022-palaeoceanographic-cf5c",
    "obiol-2022-palaeoceanographic-l2ba",
    "obiol-2022-palaeoceanographic-fpjv",
    "obiol-2022-palaeoceanographic-u1tx",
    "obiol-2022-palaeoceanographic-rmgd",
    "brownstein-2022-hidden-zhs4",
    "brownstein-2022-hidden-nk9h",
    "johnston-2022-cophylogeny-2u2d",
    "darlim-2022-impact-umzp",
    "darlim-2022-impact-em28",
    "sancho-2022-tracking-8zg6",
    "takahashi-2022-stable-rt26",
    "gable-2022-genomic-n5f4",
    "gable-2022-genomic-nugh",
    "gable-2022-genomic-fm05",
    "anzas-2022-bayesian-p68i",
    "anest-2021-evolving-n92s",
    "johnson-2021-systematics-dbeu",
    "johnson-2021-systematics-3ch0",
    "johnson-2021-systematics-ex86",
    "brock-2021-color-a2u8",
    "leach-2021-phylogenomic-aphe",
    "leach-2021-phylogenomic-ahe1",
    "mil-2021-new-786l",
    "mil-2021-new-j2s4",
    "pela-2021-subterranean-zego",
    "pela-2021-subterranean-iao8",
    "blow-2021-molecular-l62k",
    "near-2021-phylogeny-dfe7",
    "near-2021-phylogeny-hiv3",
    "arbour-2021-little-vfxn",
    "arbour-2021-little-33ff",
    "catanach-2021-systematics-6ujn",
    "catanach-2021-systematics-yiig",
    "liu-2021-testing-me58",
    "hara-2021-relict-bi5r",
    "sidlauskas-2021-total-gcfr",
    "savelyev-2020-bayesian-88zc",
    "vianna-2020-genome-okrw",
    "vianna-2020-genome-mze8",
    "corrales-2020-allopatric-v6pl",
    "corrales-2020-allopatric-w7lu",
    "stervander-2020-molecular-0h8r",
    "chaves-2020-evolutionary-w9n2",
    "chaves-2020-evolutionary-vubq",
    "chaves-2020-evolutionary-5dgl",
    "chaves-2020-evolutionary-8byo",
    "chaves-2020-evolutionary-odm3",
    "chaves-2020-evolutionary-9cs6",
    "chaves-2020-evolutionary-jjat",
    "chaves-2020-evolutionary-fmyh",
    "chaves-2020-evolutionary-slrw",
    "chaves-2020-evolutionary-0vrl",
    "deli-2020-subterranean-9yvn",
    "buckley-2020-multiple-mpex",
    "burridge-2020-migration-nbmn",
    "tsang-2020-dispersal-9tzw",
    "oliver-2020-oligocene-cv1m",
    "linck-2020-speciation-ncxu",
    "imfeld-2020-mitochondrial-stuu",
    "kornilios-2019-genome-3ijw",
    "slater-2019-hierarchy-xv5v",
    "hancock-2019-phylogeography-4z1v",
    "brandrud-2019-phylogenomic-73ad",
    "brandrud-2019-phylogenomic-iyvq",
    "brandrud-2019-phylogenomic-q6h8",
    "presslee-2019-palaeoproteomics-btvn",
    "presslee-2019-palaeoproteomics-mjp2",
    "munro-2019-climate-6tvf",
    "piatkowski-2019-functional-7jsc",
    "nen-2019-postglacial-qh0e",
    "nen-2019-postglacial-n1bf",
    "economo-2019-evolution-vn9j",
    "leach-2019-exploring-rb8o",
    "leach-2019-exploring-1px9",
    "stervander-2019-origin-1z13",
    "stervander-2019-origin-avfu",
    "stervander-2019-origin-702n",
    "stange-2018-bayesian-q044",
    "stange-2018-bayesian-krv3",
    "stange-2018-bayesian-2uhx",
    "clucas-2018-comparative-sqdr",
    "clucas-2018-comparative-ic87",
    "toljagic-2018-millions-wb2s",
    "jorge-2018-getting-7s4n",
    "j-2018-allegory-iwn6",
    "marburger-2018-whole-8zqj",
    "zaher-2018-origin-11vu",
    "blanchet-2018-related-q4yw",
    "salariato-2018-reinstatement-b34a",
    "singhal-2018-population-v2of",
    "carnicero-2018-phylogeography-zpjw",
    "carnicero-2018-phylogeography-riax",
    "varela-2018-phylogeny-uvh2",
    "trotta-2018-community-a38u",
    "grear-2018-inferring-p7zg",
    "grear-2018-inferring-vv5k",
    "grear-2018-inferring-ltdj",
    "grear-2018-inferring-86ph",
    "grear-2018-inferring-plz2",
    "lajeunesse-2018-systematic-307m",
    "sessa-2018-evolution-l9k2",
    "sessa-2018-evolution-8p92",
    "shu-2017-multi-7trv",
    "sklen-2017-phylogeny-qwwe",
    "sklen-2017-phylogeny-nda2",
    "sklen-2017-phylogeny-l1mi",
    "salariato-2017-climatic-4as2",
    "salariato-2017-climatic-5lpn",
    "slater-2017-independent-xt7u",
    "saladin-2017-fossils-lfr1",
    "saladin-2017-fossils-008y",
    "saladin-2017-fossils-wtv8",
    "saladin-2017-fossils-s3c2",
    "saladin-2017-fossils-q2ha",
    "saladin-2017-fossils-ju2l",
    "saladin-2017-fossils-kgu9",
    "saladin-2017-fossils-jfg5",
    "saladin-2017-fossils-yv8r",
    "saladin-2017-fossils-ao96",
    "bagley-2016-phylogeography-18cm",
    "bagley-2016-phylogeography-tsuj",
    "close-2016-mosaicism-kq3y",
    "close-2016-mosaicism-6hm9",
    "close-2016-mosaicism-d9zx",
    "close-2016-mosaicism-zkjh",
    "rawlence-2016-human-852c",
    "rawlence-2016-human-ojb6",
    "salariato-2016-diversification-hhr5",
    "salariato-2016-diversification-6296",
    "salariato-2016-diversification-059x",
    "cole-2016-nearctic-fgy6",
    "foote-2016-genome-10ig",
    "foote-2016-genome-hcha",
    "foote-2016-genome-8xm0",
    "foote-2016-genome-ydzw",
    "foote-2016-genome-1k2i",
    "v-2016-new-g4qu",
    "looney-2016-tropics-5f11",
    "zarza-2016-hidden-0uci",
    "tornabene-2016-repeated-k72b",
    "bapst-2016-topology-7lg1",
    "bapst-2016-topology-d1nj",
    "vuataz-2016-molecular-9dq5",
    "jr-2015-multilocus-qbd4",
    "jr-2015-multilocus-t1nb",
    "winger-2015-inferring-g4kz",
    "paun-2015-processes-sqks",
    "close-2015-evidence-mmit",
    "morin-2015-geographic-orf9",
    "colombo-2015-diversity-4yr1",
    "colombo-2015-diversity-5jap",
    "colombo-2015-diversity-r5ri",
    "colombo-2015-diversity-cf8w",
    "colombo-2015-diversity-i28o",
    "colombo-2015-diversity-gwrh",
    "colombo-2015-diversity-inb1",
    "colombo-2015-diversity-xdg9",
    "colombo-2015-diversity-8kcc",
    "colombo-2015-diversity-ne95",
    "colombo-2015-diversity-4xoe",
    "colombo-2015-diversity-xvrj",
    "hodges-2014-phylogeography-573d",
    "gohli-2014-evolutionary-gtlx",
    "gohli-2014-evolutionary-scpc",
    "hammer-2014-multigene-sfjz",
    "georges-2013-contemporary-sr8q",
]

In [None]:
for experiment_id in tqdm(EXPERIMENT_IDS):
    experiment = phylodata.load_experiment(
        experiment_id, files_to_download=[FileType.BEAST2_CONFIGURATION], directory="../data"
    )

    xml_file = experiment.get_file_of_type(FileType.BEAST2_CONFIGURATION)
    xml_bytes = BytesIO(xml_file.local_path.read_bytes())

    models = parse_evolutionary_model(xml_bytes)

    if len(models) != len(experiment.evolutionary_model):
        print(experiment_id)
        print(models)
        print("(old:", experiment.evolutionary_model, ")")

        experiment.evolutionary_model = models
        create_new_version(experiment)

 37%|█████████████████████████████████████████▏                                                                    | 83/222 [00:12<00:06, 20.01it/s]

near-2021-phylogeny-dfe7
[EvolutionaryModelComponent(name='SA', type=<ModelType.TREE_PRIOR: 'treePrior'>, parameters={})]
(old: [] )


 41%|█████████████████████████████████████████████▌                                                                | 92/222 [00:27<01:11,  1.82it/s]

savelyev-2020-bayesian-88zc
[EvolutionaryModelComponent(name='SA', type=<ModelType.TREE_PRIOR: 'treePrior'>, parameters={})]
(old: [] )


 52%|████████████████████████████████████████████████████████▉                                                    | 116/222 [00:39<00:20,  5.14it/s]

slater-2019-hierarchy-xv5v
[EvolutionaryModelComponent(name='SA', type=<ModelType.TREE_PRIOR: 'treePrior'>, parameters={})]
(old: [] )


 52%|████████████████████████████████████████████████████████▉                                                    | 116/222 [00:54<00:20,  5.14it/s]