In [1]:
# Extracts the SAD for the best-fitting simulation.

In [16]:
import pandas as pd
import os
from pycoalescence import CoalescenceTree


In [4]:

intervals = {'artinskian',
 'asselian',
 'bashkirian',
 'gzhelian',
 'kasimovian',
 'kungurian',
 'moscovian',
 'sakmarian'}

tetrapod_groups = {'amniote', 'amphibian'}

In [63]:
# Set the import directories and variables - paths relative to the jupyter notebook directory
fragmented="fragmented" 
# fragmented="" # put back if want to analyse normal sims
best_fitting_csv = "/Users/samthompson/Documents/PhD/PaleoSampling/Results/Sim6/single_param_best.csv"
best_fitting_sad_csv = "/Users/samthompson/Documents/PhD/PaleoSampling/Results/single_param_best_sad.csv"
results_dir_fragmented = "/Volumes/Seagate 3TB/Paleo/Results/PaleoMain/Sim6"
results_dir_main = "/Volumes/Seagate 3TB/Paleo/Results/PaleoMain/Sim5"
data_dir = "/Volumes/Seagate 3TB/Paleo/Data"
speciation_rates = [0.0001, 0.00001, 0.000001, 0.0000001, 0.00000001]

In [37]:
best_fitting = pd.read_csv(best_fitting_csv)

In [10]:
def sim_type_detection(fine_map_file):
	"""
	Detects the simulation type from the fine map path (because I didn't save it anywhere sensible!
	:param fine_map_file: the path to check for
	:return the sim type
	"""
	for interval in intervals:
		for tet_group in tetrapod_groups:
			if interval in fine_map_file and tet_group in fine_map_file:
				return (interval, tet_group)
	raise ValueError("No type detected! Filename: {}.".format(fine_map_file))

In [56]:
best_fitting_vars = []
for _, row in best_fitting.iterrows():
	deme = row["deme"]
	sigma = row["sigma"]
	interval = row["interval"].lower()
	pc = row["percent_cover"]
	tet_group = row["tetrapod_group"].lower()
	scenario = row["scenario"]
	speciation_rate = row["speciation_rate"]
	best_fitting_vars.append({"deme": deme, "interval" : interval, "percent_cover" : pc,
							  "tetrapod_group" : tet_group, "scenario" : scenario, "sigma" : sigma,
							  "speciation_rate" : speciation_rate})

In [50]:
from math import isclose

sim_path_list = []
pc = best_fitting_vars[0]["percent_cover"]
for database in os.listdir(os.path.join(results_dir_fragmented, str(pc))):
	if ".db" not in database:
		continue
	ct = CoalescenceTree(os.path.join(results_dir_fragmented, str(pc), database))
	params = ct.get_simulation_parameters()
	interval, tet_group = sim_type_detection(params["sample_file"])
	# print("{} at {}, {} had deme={}, sigma={}".format(database, interval, tet_group, params["deme"], params["sigma"]))
	res = [x for x in best_fitting_vars if x["deme"] == params["deme"] and x["interval"] == interval and
		   x["tetrapod_group"] == tet_group and x["scenario"] == "fragmented" and isclose(params["sigma"], x["sigma"], abs_tol=0.01)]
	if len(res) > 0:
		sim_path_list.append(ct.file)
for database in os.listdir(os.path.join(results_dir_main)):
	if ".db" not in database:
		continue
	ct = CoalescenceTree(os.path.join(results_dir_main, database))
	params = ct.get_simulation_parameters()
	interval, tet_group = sim_type_detection(params["sample_file"])
	# print("{} at {}, {} had deme={}, sigma={}".format(database, interval, tet_group, params["deme"], params["sigma"]))
	res = [x for x in best_fitting_vars if x["deme"] == params["deme"] and x["interval"] == interval and
		   x["tetrapod_group"] == tet_group and x["scenario"] == "fragmented" and isclose(params["sigma"], x["sigma"], abs_tol=0.01)]
	if len(res) > 0:
		sim_path_list.append(ct.file)

In [55]:
best_fitting_vars

[{'deme': 134,
  'interval': 'kungurian',
  'percent_cover': 20,
  'tetrapod_group': 'amniote',
  'scenario': 'fragmented',
  'sigma': 19.340737},
 {'deme': 170,
  'interval': 'kungurian',
  'percent_cover': 20,
  'tetrapod_group': 'amphibian',
  'scenario': 'fragmented',
  'sigma': 5.829867},
 {'deme': 134,
  'interval': 'sakmarian',
  'percent_cover': 20,
  'tetrapod_group': 'amniote',
  'scenario': 'fragmented',
  'sigma': 19.340737},
 {'deme': 170,
  'interval': 'sakmarian',
  'percent_cover': 20,
  'tetrapod_group': 'amphibian',
  'scenario': 'fragmented',
  'sigma': 5.829867},
 {'deme': 134,
  'interval': 'artinskian',
  'percent_cover': 20,
  'tetrapod_group': 'amniote',
  'scenario': 'fragmented',
  'sigma': 19.340737},
 {'deme': 170,
  'interval': 'artinskian',
  'percent_cover': 20,
  'tetrapod_group': 'amphibian',
  'scenario': 'fragmented',
  'sigma': 5.829867},
 {'deme': 134,
  'interval': 'asselian',
  'percent_cover': 20,
  'tetrapod_group': 'amniote',
  'scenario': 'fra

In [60]:
output = []
spec_rate = best_fitting_vars[0]["speciation_rate"]
for sim in sim_path_list:
	ct = CoalescenceTree(sim)
	params = ct.get_simulation_parameters()
	interval, tet_group = sim_type_detection(params["sample_file"])
	fragmented = "fragmented" in params["fine_map_file"]
	best_ref = 0
	for ref in ct.get_community_references():
		if spec_rate == ct.get_community_parameters(ref)["speciation_rate"]:
			best_ref = ref
			break
	if best_ref == 0:
		raise ValueError("best ref is 0")
	sad = ct.get_species_abundances(reference=best_ref)
	for species_id, abundance in sad:
		output.append({"species_id" : species_id, "abundance" : abundance, "interval" : interval,
					   "tet_group" : tet_group})

In [62]:
output

[{'species_id': 0,
  'abundance': 0,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 1,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 2,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 3,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 4,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 5,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 6,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 7,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 8,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 9,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'},
 {'species_id': 10,
  'abundance': 1,
  'interval': 'kungurian',
  'tet_group': 'amniote'}

In [64]:
output_df = pd.DataFrame(output)
output_df.to_csv(best_fitting_sad_csv)