This notebook compares the results from Liam Revell's tutorial with those from `terracotta` to sanity check and potentially diagnose issues arising with asymmetric migration rates.

In [1]:
import tskit
import newick
import pandas as pd

In [2]:
def from_newick(
    string, *, min_edge_length=0, span=1, time_units=None, node_name_key=None
) -> tskit.TreeSequence:
    """
    Create a tree sequence representation of the specified newick string.

    The tree sequence will contain a single tree, as specified by the newick. All
    leaf nodes will be marked as samples (``tskit.NODE_IS_SAMPLE``). Newick names and
    comments will be written to the node metadata. This can be accessed using e.g.
    ``ts.node(0).metadata["name"]``.

    :param string string: Newick string
    :param float min_edge_length: Replace any edge length shorter than this value by this
        value. Unlike newick, tskit doesn't support zero or negative edge lengths, so
        setting this argument to a small value is necessary when importing trees with
        zero or negative lengths.
    :param float span: The span of the tree, and therefore the
        :attr:`~TreeSequence.sequence_length` of the returned tree sequence.
    :param str time_units: The value assigned to the :attr:`~TreeSequence.time_units`
        property of the resulting tree sequence. Default: ``None`` resulting in the
        time units taking the default of :attr:`tskit.TIME_UNITS_UNKNOWN`.
    :param str node_name_key: The metadata key used for the node names. If ``None``
        use the string ``"name"``, as in the example of accessing node metadata above.
        Default ``None``.
    :return: A tree sequence consisting of a single tree.
    """
    
    trees = newick.loads(string)
    if len(trees) > 1:
        raise ValueError("Only one tree can be imported from a newick string")
    if len(trees) == 0:
        raise ValueError("Newick string was empty")
    tree = trees[0]
    tables = tskit.TableCollection(span)
    if time_units is not None:
        tables.time_units = time_units
    if node_name_key is None:
        node_name_key = "name"
    nodes = tables.nodes
    nodes.metadata_schema = tskit.MetadataSchema(
        {
            "codec": "json",
            "type": "object",
            "properties": {
                node_name_key: {
                    "type": ["string"],
                    "description": "Name from newick file",
                },
                "comment": {
                    "type": ["string"],
                    "description": "Comment from newick file",
                },
            },
        }
    )

    id_map = {}

    def get_or_add_node(newick_node, time):
        if newick_node not in id_map:
            flags = tskit.NODE_IS_SAMPLE if len(newick_node.descendants) == 0 else 0
            metadata = {}
            if newick_node.name:
                metadata[node_name_key] = newick_node.name
            if newick_node.comment:
                metadata["comment"] = newick_node.comment
            id_map[newick_node] = tables.nodes.add_row(
                flags=flags, time=time, metadata=metadata
            )
        return id_map[newick_node]

    root = next(tree.walk())
    get_or_add_node(root, 0)
    for newick_node in tree.walk():
        node_id = id_map[newick_node]
        for child in newick_node.descendants:
            length = max(child.length, min_edge_length)
            if length <= 0:
                raise ValueError(
                    "tskit tree sequences cannot contain edges with lengths"
                    " <= 0. Set min_edge_length to force lengths to a"
                    " minimum size"
                )
            child_node_id = get_or_add_node(child, nodes[node_id].time - length)
            tables.edges.add_row(0, span, node_id, child_node_id)
    # Rewrite node times to fit the tskit convention of zero at the youngest leaf
    nodes = tables.nodes.copy()
    youngest = min(tables.nodes.time)
    tables.nodes.clear()
    for node in nodes:
        tables.nodes.append(node.replace(time=node.time - youngest + root.length))
    tables.sort()
    return tables.tree_sequence()

newick_tree = open("tutorial_tree.newick", "r").read()[:-1]
tree = from_newick(newick_tree)
tree.dump("trees/0.trees")

In [6]:
feeding_mode = pd.read_csv("feeding_mode.csv")
for sample in tree.samples():
    feeding_mode.loc[feeding_mode["species"]==tree.node(sample).metadata["name"], "id"] = sample
feeding_mode["id"] = feeding_mode["id"].astype(int)
feeding_mode["deme"] = 0
feeding_mode.loc[feeding_mode["feed_mode"]=="suction", "deme"] = 1
print(feeding_mode)

                     species feed_mode  Max_TL_cm   id  deme
0              Albula_vulpes   suction      104.0  118     1
1          Anguilla_anguilla   suction       50.0   26     1
2           Anguilla_bicolor   suction      120.0   27     1
3          Anguilla_japonica   suction      150.0   28     1
4          Anguilla_rostrata   suction      152.0   25     1
..                       ...       ...        ...  ...   ...
56         Serrivomer_sector      bite       76.0   82     0
57    Simenchelys_parasitica   suction       61.0   34     1
58        Uroconger_lepturus   suction       52.0   51     1
59  Uropterygius_micropterus      bite       30.0  101     0
60      Venefica_proboscidea      bite      100.0   17     0

[61 rows x 5 columns]


In [7]:
samples = feeding_mode.loc[:, ["id", "deme", "species"]]
samples.to_csv("samples.tsv", sep="\t", index=False)
demes = pd.DataFrame({"id":[0,1], "xcoord":[0,1], "ycoord":[0,1], "type":[0,1], "neighbours":[1,0]})
demes.to_csv("demes.tsv", sep="\t", index=False)

NameError: name 'tct' is not defined

In [1]:
import sys
sys.path.append("/Users/jameskitchens/Documents/GitHub/terracotta")
import terracotta as tct
import importlib
importlib.reload(tct)

<module 'terracotta' from '/Users/jameskitchens/Documents/GitHub/terracotta/terracotta/__init__.py'>

In [2]:

world_map = tct.WorldMap(demes, samples, asymmetric=True)
world_map.draw(figsize=(5,5), color_demes=True, color_connections=True)

NameError: name 'demes' is not defined

In [67]:
print(tct.run(
    demes_path="demes.tsv",
    samples_path="samples.tsv",
    trees_dir_path="trees",
    asymmetric=True
))

[12.66778935 12.66778935] 0.0
[1.20804733e-03 1.42427784e+01] 0.0
[0.00438447 0.00067238] 0.0
[0.16581626 4.41220692] 0.0
[1.42427784e+01 1.20804733e-03] 0.0
[0.04063502 0.00995804] 0.0
[3.10437529e+00 1.71698145e-03] 0.0
[0.00995804 0.04063502] 0.0
[0.00308486 0.00308486] 0.0
[1.31171454e-01 7.25488812e-05] 0.0
[0.26497362 2.76108854] 0.0
[0.03214495 0.20961139] 0.0
[5.81194282e+01 1.15932782e-04] 0.0
[ 0.06493458 11.26696508] 0.0
[5.98026793e-04 2.87712033e+01] 0.0
[0.00787746 2.18420081] 0.0
[0.00885686 0.00013035] 0.0
[3.32852037e-04 8.15688997e-05] 0.0
[2.34190657e-04 1.72784506e+00] 0.0
[6.27101046 0.01415323] 0.0
[45.97625189  0.67663385] 0.0
[0.0159129  1.08125781] 0.0
[1.11961255e-02 2.02430697e+01] 0.0
[8.15688997e-05 3.32852037e-04] 0.0
[5.16924895e+01 1.03112751e-04] 0.0
[40.89212493  3.92429807] 0.0
[ 0.11666631 32.34833334] 0.0
[0.00095564 0.42342664] 0.0
[0.20961139 0.03214495] 0.0
[1.78913576e-02 6.45263107e-05] 0.0
[0.0226168  0.00217047] 0.0
[0.01415323 6.27101046] 0.