In [1]:
import sys
import numpy as np

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd

import tskit

In [2]:
treefile = "../param_grid/abc_median/sim_4177974911520.trees"
outbase = ".".join(treefile.split(".")[:-1])

ts = tskit.load(treefile)
params = ts.metadata['SLiM']['user_metadata']

indiv_times = ts.individual_times
indiv_pops = ts.individual_populations
indiv_locs = ts.individual_locations

In [3]:
print(outbase)

../param_grid/abc_median/sim_4177974911520


In [4]:
modern = np.logical_and(
        indiv_times < 100,
        indiv_pops == 1
)
target_indivs = [
    np.where(np.logical_and(
        modern,
        indiv_locs[:, 1] == np.max(indiv_locs[modern, 1]),
    ))[0][0],
    np.where(np.logical_and(
        modern,
        indiv_locs[:, 1] == np.min(indiv_locs[modern, 1]),
    ))[0][0],
]


In [5]:
node_anc = ts.sample_count_stat(
        [ts.individual(n).nodes for n in target_indivs],
        lambda x: x/2, # 2 for diploidy
        2,
        polarised=True,
        strict=False,
        mode='node'
)

In [6]:
indiv_anc = np.zeros((ts.num_individuals, len(target_indivs)))
for n in ts.nodes():
    if n.individual >= 0:
        indiv_anc[n.individual] += node_anc[n.id]

In [7]:
tts = list(set(indiv_times))
tts.sort()

In [8]:
all_times = []
all_locs1 = []
all_locs2 = []
all_anc_props = []
all_inds = []
for k, ind in enumerate(target_indivs):
    j = 0
    for target_time in tts:
        anc_indivs = np.where(np.logical_and(
            indiv_times == target_time,
            indiv_pops == 1 # exclude dummy indiv in pop 2
        ))[0]
        assert len(anc_indivs) > 0
        anc_props = indiv_anc[anc_indivs, k]
        locs1 = indiv_locs[anc_indivs, 0]
        locs2 = indiv_locs[anc_indivs, 1]
        all_times.extend(np.repeat(target_time, len(locs1)))
        all_inds.extend(np.repeat(ind, len(locs1)))
        all_locs1.extend(locs1)
        all_locs2.extend(locs2)
        all_anc_props.extend(anc_props)
        j += 1

In [9]:
output = pd.DataFrame({'ind': all_inds, 'anc_prop': all_anc_props, 'loc1': all_locs1, 'loc2': all_locs2, 'time': all_times})
output.to_csv("data_for_ancestry_plot.csv", index = False)

In [10]:
print(indiv_locs[:,:])

[[ 61.06250371 213.80919301   0.        ]
 [ 61.06250371 213.80919301   0.        ]
 [ 61.06250371 213.80919301   0.        ]
 ...
 [158.04148297  58.62651557   0.        ]
 [158.04148297  58.62651557   0.        ]
 [158.04148297  58.62651557   0.        ]]


In [11]:
# Write out locations over time of all individuals
indiv_output = pd.DataFrame({'time': indiv_times, 'loc1': indiv_locs[:,0], 'loc2': indiv_locs[:,1]})
print(indiv_output)
indiv_output.to_csv("data_for_all_inds.csv", index = False)

          time        loc1        loc2
0          0.0   61.062504  213.809193
1          0.0   61.062504  213.809193
2          0.0   61.062504  213.809193
3          0.0   61.062504  213.809193
4          0.0   61.006831  213.425618
...        ...         ...         ...
300727  2250.0  158.639161   56.979526
300728  2250.0  158.041483   58.626516
300729  2250.0  158.041483   58.626516
300730  2250.0  158.041483   58.626516
300731  2250.0  158.041483   58.626516

[300732 rows x 3 columns]
