In [7]:
import sys
sys.path.append("/Users/jameskitchens/Documents/GitHub/sparg2.0")
sys.path.append("/Users/jameskitchens/Documents/GitHub/tskit_arg_visualizer")

import sparg
import tskit_arg_visualizer as viz
import importlib
importlib.reload(sparg)
import tskit
import msprime
import math
import numpy as np
import pandas as pd
import random
import warnings
import matplotlib.pyplot as plt

In [2]:
ts = tskit.load("args/orig_args/rep1.trees")

## Preparing the ARG for analysis

We select a subset of samples and simplify the ARG using a custom function to avoid loosing recombination nodes. We then chop the ARG at 10,000 generations in the past as we are not interested in deeper times.

In [4]:
np.random.seed(1)

cutoff = 10000
samples = list(np.random.choice(ts.samples(), 10, replace=False))
ts_sim, map_sim = ts.simplify(samples=samples, map_nodes=True, keep_input_roots=False, keep_unary=True, update_sample_flags=False)

random_ancestors = sparg.generate_random_ancestors_dataframe(ts=ts_sim, number_of_ancestors=10, cutoff=2000, include_locations=True, seed=1)

ts_final, maps_final = sparg.simplify_with_recombination(ts=ts_sim, flag_recomb=True)
ts_chopped = sparg.chop_arg(ts=ts_final, time=cutoff)

## Calculating spatial estimates

WARNING: Initializing the sparg.SpatialARG can take some time depending on the size of your ARG. This calculates a number of spatial parameters for your ARG given the locations of your sample nodes. These are stored as attributes of the sparg.SpatialARG and include:

- ts
- locations_of_individuals
- paths_shared_time_matrix
- paths
- node_paths_shared_times
- node_paths
- inverted_paths_shared_time_matrix
- root_locations
- path_dispersal_distances
- dispersal_rate_matrix
- fishers_information_1
- fishers_information_2

In [5]:
spatial_arg = sparg.SpatialARG(ts=ts_chopped, verbose=True)

Prepared input parameters - Section Elapsed Time: 0.0 - Total Elapsed Time: 0.0


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3329/3329 [00:07<00:00, 419.31it/s]


Calculated covariance matrix - Section Elapsed Time: 8.05 - Total Elapsed Time: 8.05
Inverted covariance matrix - Section Elapsed Time: 0.37 - Total Elapsed Time: 8.43
Created root locations vector - Section Elapsed Time: 1.59 - Total Elapsed Time: 10.02
Estimated dispersal rate - Section Elapsed Time: 0.0 - Total Elapsed Time: 10.02
Calculated Fisher's information matrices - Section Elapsed Time: 0.0 - Total Elapsed Time: 10.02
Completed building SpatialARG object - Total Elapsed Time: 10.02


## Estimating the locations of ancestors using different methods

We can estimate the locations of ancestors using the full ARG, the local tree, or using a midpoint method (similar to Wohns et al. 2022).

In [8]:
random_ancestors = sparg.estimate_locations_of_ancestors_in_dataframe_using_arg(df=random_ancestors, spatial_arg=spatial_arg)
random_ancestors = sparg.estimate_locations_of_ancestors_in_dataframe_using_tree(df=random_ancestors, spatial_arg=spatial_arg)
random_ancestors = sparg.estimate_locations_of_ancestors_in_dataframe_using_midpoint(df=random_ancestors, spatial_arg=spatial_arg, simplify=True)