Skip to content

Commit

Permalink
Refine tskit metadata (#742)
Browse files Browse the repository at this point in the history
* DiploidMetadata.[alive|preserved|first_generation] are now converted to the intended
bool.

Add link to vignette from DiploidPopulation.dump_tables_to_tskit docs.

Add passing ModelParams to DiploidPopulation.dump_tables_to_tskit in
tests.

Remove unused docstring from _dump_tables_to_tskit.

Breaking change: all non-self arguments to
DiploidPopulation.dump_tables_to_tskit are now kw-only.

Add top-level tree-sequence/table collection metadata. Closes #725

Allow user-specified population table metadata. Closes #740

* Update docstring for DiploidPopulation.dump_tables_to_tskit
  • Loading branch information
molpopgen committed Jun 7, 2021
1 parent 9371bbb commit e8c3fec
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 47 deletions.
32 changes: 30 additions & 2 deletions fwdpy11/_types/diploid_population.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from typing import IO, Dict, Iterable, Iterator, List, Optional, Tuple, Union

import demes
import fwdpy11._types
import fwdpy11.tskit_tools._dump_tables_to_tskit
import numpy as np
import tskit

from .._fwdpy11 import DiploidGenotype, DiploidMetadata, ll_DiploidPopulation
from .model_params import ModelParams
from .population_mixin import PopulationMixin
from .table_collection import TableCollection

Expand Down Expand Up @@ -176,12 +178,27 @@ def deme_sizes(self, as_dict=False) -> Union[np.ndarray, Dict]:
return {i: j for i, j in zip(deme_sizes[0], deme_sizes[1])}

def dump_tables_to_tskit(
self, parameters: Optional[Dict] = None, *, destructive=False
self,
*,
model_params: Optional[Union[ModelParams, Dict[str, ModelParams]]] = None,
demes_graph: Optional[demes.Graph] = None,
population_metadata: Optional[Dict[int, object]] = None,
parameters: Optional[Dict] = None,
destructive=False
):
"""
Dump the population's TableCollection into
an tskit TreeSequence
:param model_params: Model parameters to be stored as top-level metadata
:type model_params: :class:`fwdpy11.ModelParams` or :class:`dict`
:param demes_graph: A demographic model specified via `demes`.
:type demes_graph: :class:`demes.Graph`
:param population_metadata: A mapping from integer id of a deme/population to metadata
:type population_metadata: dict
:param parameters: The simulation parameters for the provenance table.
:type parameters: None or dict
Expand All @@ -190,6 +207,8 @@ def dump_tables_to_tskit(
:rtype: tskit.TreeSequence
For examples, see :ref:`tskitconvert_vignette`.
.. warning::
If `destructive` is `True`, further opertations on the
Expand All @@ -214,9 +233,18 @@ def dump_tables_to_tskit(
Added `destructive` option.
.. versionchanged:: 0.15.0
Added `model_params`, `demes_graph`, `population_metadata` keyword args.
"""
return fwdpy11.tskit_tools._dump_tables_to_tskit._dump_tables_to_tskit(
self, parameters, destructive=destructive
self,
model_params=model_params,
demes_graph=demes_graph,
population_metadata=population_metadata,
parameters=parameters,
destructive=destructive,
)

def dump_to_file(self, filename: str):
Expand Down
65 changes: 38 additions & 27 deletions fwdpy11/tskit_tools/_dump_tables_to_tskit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,25 @@
import json
import typing

import demes
import fwdpy11.tskit_tools
import fwdpy11.tskit_tools.metadata_schema
import numpy as np
import tskit
from fwdpy11._types.model_params import ModelParams


def _initializePopulationTable(node_view, tc):
def _initializePopulationTable(
node_view, population_metadata: typing.Optional[typing.Dict[int, object]], tc
):
tc.populations.metadata_schema = (
fwdpy11.tskit_tools.metadata_schema.PopulationMetadata
)
for i in sorted(np.unique(node_view["deme"])):
tc.populations.add_row(metadata={"name": "deme" + str(i)})
if population_metadata is not None and i in population_metadata:
tc.populations.add_row(metadata=population_metadata[i])
else:
tc.populations.add_row(metadata={"name": "deme" + str(i)})


def _initializeIndividualTable(self, tc):
Expand Down Expand Up @@ -109,31 +116,16 @@ def _dump_mutation_site_and_site_tables(self, tc: tskit.TableCollection) -> None


def _dump_tables_to_tskit(
self, parameters: typing.Optional[typing.Dict] = None, *, destructive=False
self,
*,
model_params: typing.Optional[
typing.Union[ModelParams, typing.Dict[str, ModelParams]]
] = None,
demes_graph: typing.Optional[demes.Graph] = None,
population_metadata: typing.Optional[typing.Dict[int, object]] = None,
parameters: typing.Optional[typing.Dict] = None,
destructive=False,
):
"""
Dump the population's TableCollection into
an tskit TreeSequence
:param parameters: The simulation parameters for the provenance table.
:type parameters: None or dict
:rtype: tskit.TreeSequence
.. versionchanged:: 0.8.2
Added `parameters`.
Generate provenance information for return value.
The provenance information is validated using
:func:`tskit.validate_provenance`, which may
raise an exception.
.. versionchanged:: 0.10.0
Use tskit metadata schema.
Mutation time is now stored in the tskit.MutationTable column.
Origin time of mutations is part of the metadata.
"""
from .._fwdpy11 import gsl_version, pybind11_version

environment = tskit.provenance.get_environment(
Expand Down Expand Up @@ -162,6 +154,25 @@ def _dump_tables_to_tskit(

tc = tskit.TableCollection(self.tables.genome_length)

tc.metadata_schema = fwdpy11.tskit_tools.metadata_schema.TopLevelMetadata

# Populate the required fields
top_level_metadata = {"generation": self.generation}

if model_params is not None:
try:
top_level_metadata["model_params"] = str(model_params.asdict())
except:
mp = {}
for key, value in model_params.items():
mp[key] = str(value.asdict())
top_level_metadata["model_params"] = mp

if demes_graph is not None:
top_level_metadata["demes_graph"] = demes_graph.asdict()

tc.metadata = top_level_metadata

if destructive is True:
self._clear_haploid_genomes()

Expand All @@ -171,7 +182,7 @@ def _dump_tables_to_tskit(
# other than -1 in an tskit.NodeTable will
# raise an exception if the PopulationTable
# isn't set up.
_initializePopulationTable(node_view, tc)
_initializePopulationTable(node_view, population_metadata, tc)
node_to_individual = _initializeIndividualTable(self, tc)
individual = [-1 for i in range(len(node_view))]
for k, v in node_to_individual.items():
Expand Down
18 changes: 4 additions & 14 deletions fwdpy11/tskit_tools/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ class DiploidMetadata(object):
sex: int
deme: int
label: int
alive: bool
preserved: bool
first_generation: bool
alive: bool = attr.ib(converter=bool)
preserved: bool = attr.ib(converter=bool)
first_generation: bool = attr.ib(converter=bool)
parents: typing.List[int]
geography: typing.List[float]
nodes: typing.List[int]
Expand Down Expand Up @@ -129,17 +129,7 @@ def decode_individual_metadata(
rv.append(DiploidMetadata.from_table_row(ind))
except:
ind = tc.individuals[_rows]
alive = ind.flags & INDIVIDUAL_IS_ALIVE
preserved = ind.flags & INDIVIDUAL_IS_PRESERVED
first_generation = ind.flags & INDIVIDUAL_IS_FIRST_GENERATION
rv.append(
DiploidMetadata(
**ind.metadata,
alive=alive,
preserved=preserved,
first_generation=first_generation
)
)
rv.append(DiploidMetadata.from_table_row(ind))

return rv

Expand Down
35 changes: 34 additions & 1 deletion fwdpy11/tskit_tools/metadata_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,42 @@
import copy
import typing

import fwdpy11
import tskit

import fwdpy11
TopLevelMetadata = tskit.metadata.MetadataSchema(
{
"codec": "json",
"title": "Top-level metadata for table collection/tree sequence.",
"type": "object",
"properties": {
"generation": {
"type": "integer",
"description": "The value of pop.generation at the time data"
"were exported to tskit",
},
"model_params": {
"type": ["string", "object"],
"description": "One or more fwdpy11.ModelParams instances.",
},
"seed": {
"type": "integer",
"description": "Random number seed."
"This is optional because a random number generator"
"may be called prior to simulation, thus making the"
"initial seed not capable of reproducing the simulation",
},
"demes_graph": {
"type": "object",
"description": "A demographic model specified using demes."
"This information will be redundant with that stored in model_params,"
"but it may be useful as it allows reconstruction of the YAML file"
"from the tree sequence.",
},
},
"required": ["generation"],
}
)

IndividualDiploidMetadata = tskit.metadata.MetadataSchema(
{
Expand Down
9 changes: 6 additions & 3 deletions tests/test_metadata_roundtrips_via_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_metadata_roundtrip_single_sim(rng, pdict, pop):

fwdpy11.evolvets(rng, pop, params, 100, r)

ts = pop.dump_tables_to_tskit()
ts = pop.dump_tables_to_tskit(model_params=params)

# add neutral mutations w/no metadata
ts = msprime.sim_mutations(ts, rate=1.0, random_seed=654321)
Expand Down Expand Up @@ -114,6 +114,9 @@ def test_metadata_roundtrip_single_sim(rng, pdict, pop):
alive = 0
for row in range(ts.tables.individuals.num_rows):
i = fwdpy11.tskit_tools.decode_individual_metadata(ts.tables, row)[0]
assert type(i.alive) == bool
assert type(i.preserved) == bool
assert type(i.first_generation) == bool
if i.alive:
alive += 1
for n in i.nodes:
Expand Down Expand Up @@ -147,7 +150,7 @@ def test_metadata_roundtrip_single_sim_with_first_gen_preserved(rng, pdict, pop)

fwdpy11.evolvets(rng, pop, params, 100, r, preserve_first_generation=True)

ts = pop.dump_tables_to_tskit()
ts = pop.dump_tables_to_tskit(model_params=params)
assert len(ts.tables.individuals) == 2 * pop.N + 2
first = 0
preserved = 0
Expand Down Expand Up @@ -183,7 +186,7 @@ def test_metadata_roundtrip_single_deme_sim_with_parameters(rng, pdict, pop, inc
fwdpy11.evolvets(rng, pop, params, 100)

ts = pop.dump_tables_to_tskit(
{"params_dict": str(params.asdict()), "script": inception}
parameters={"params_dict": str(params.asdict()), "script": inception}
)

provenance = json.loads(ts.provenance(0).record)
Expand Down

0 comments on commit e8c3fec

Please sign in to comment.