# Initial main simulation analysis

- Performs the precursory analysis for the first set of main simulations

In [6]:
import os
from collections import OrderedDict
from pycoalescence import CoalescenceTree
import pandas as pd
import shutil
import sqlite3

In [7]:
# For dev use only - auto-reloading of modules
%load_ext autoreload
%aimport pycoalescence.coalescence_tree
from pycoalescence.coalescence_tree import check_sql_table_exist
from pycoalescence.helper import update_parameter_names
%autoreload 1

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
intervals = {'artinskian',
 'asselian',
 'bashkirian',
 'gzhelian',
 'kasimovian',
 'kungurian',
 'moscovian',
 'sakmarian'}

tetrapod_groups = {'amniote', 'amphibian'}
max_density = OrderedDict(
    {
        ("artinskian", "amniote"): 15,
        ("artinskian", "amphibian"): 15,
        ("asselian", "amniote"): 6,
        ("asselian", "amphibian"): 6,
        ("bashkirian", "amniote"): 1,
        ("bashkirian", "amphibian"): 23,
        ("gzhelian", "amniote"): 7,
        ("gzhelian", "amphibian"): 7,
        ("kasimovian", "amniote"): 7,
        ("kasimovian", "amphibian"): 4,
        ("kungurian", "amniote"): 15,
        ("kungurian", "amphibian"): 17,
        ("moscovian", "amniote"): 4,
        ("moscovian", "amphibian"): 21,
        ("sakmarian", "amniote"): 6,
        ("sakmarian", "amphibian"): 15,
    }
)

In [9]:
def sim_type_detection(fine_map_file):
	"""
	Detects the simulation type from the fine map path (because I didn't save it anywhere sensible!
	:param fine_map_file: the path to check for
	:return the sim type
	"""
	for interval in intervals:
		for tet_group in tetrapod_groups:
			if interval in fine_map_file and tet_group in fine_map_file:
				return (interval, tet_group)
	raise ValueError("No type detected! Filename: {}.".format(fine_map_file))

In [10]:
# Set the import directories and variables - paths relative to the jupyter notebook directory
# fragmented="fragmented" 
fragmented="clustered" # put back if want to analyse normal sims
local_dir = "/home/sam/Documents/PhD/PaleoSampling"
ext_dir = "/run/media/sam/Media/Paleo"
# local_dir = "/Users/samthompson/Documents/PhD/PaleoSampling/"
# ext_dir = "/Volumes/Seagate 3TB/Paleo/"
results_dir = os.path.join(ext_dir, "Results", "PaleoMain", "Clustered1")
dst_folder = os.path.join(local_dir, "Results", "Clustered1")
data_dir = os.path.join(ext_dir, "Data")
speciation_rates = [0.0001, 0.00001, 0.000001, 0.0000001, 0.00000001]

In [11]:
# Delete all simulations which haven't completed yet.
for file in os.listdir(results_dir):
	if ".db" in file:
		try:
			t = CoalescenceTree(os.path.join(results_dir, file))
		except IOError:
			print("Removing incomplete simulation {}.".format(file))
			os.remove(os.path.join(results_dir, file))

In [13]:
# Calculate the biodiversity metrics -  can take a bit of time
for file in os.listdir(results_dir):
    if ".db" in file:
        # print(file)
        t = CoalescenceTree(os.path.join(results_dir, file))
        if check_sql_table_exist(t.database, "SPECIES_LIST_ORIGINAL"):
            t.revert_downsample()
        t.wipe_data()
        sim_params = t.get_simulation_parameters()
        (interval, tet_group) = sim_type_detection(sim_params["sample_file"])
        deme = sim_params["deme"]
        sample_size = sim_params["sample_size"]
        downsample_rate = max_density[(interval, tet_group)] /( deme * sample_size)
        (interval, tet_group) = sim_type_detection(sim_params["sample_file"])
        t.downsample(downsample_rate)
        t.set_speciation_parameters(record_spatial=True,
                                record_fragments=os.path.join(data_dir, "configs",
                                                              "fragments_{}_{}.csv".format(interval, tet_group)),
                                speciation_rates=speciation_rates)
        t.clear_calculations()
        t.apply()
        t.import_comparison_data(os.path.join(data_dir, "databases", "{}_{}.db".format(interval, tet_group)))
        # t.adjust_data()
        t._clear_goodness_of_fit()
        t.calculate_fragment_richness()
        # t.calculate_alpha_diversity()
        t.calculate_beta_diversity()
        # break
        t.calculate_goodness_of_fit()

In [14]:
# Sample from the simulations
tmp = []
distance_sim_tmp = []
fragment_abundances = []
for file in os.listdir(results_dir):
	if ".db" in file:
		# print(file)
		t = CoalescenceTree(os.path.join(results_dir, file), logging_level=30)
		if check_sql_table_exist(t.database, "SPECIES_DISTANCE_SIMILARITY"):
			t.cursor.execute("DROP TABLE IF EXISTS SPECIES_DISTANCE_SIMILARITY")
		t.calculate_species_distance_similarity()
		for sr in speciation_rates:
				ref = t.get_community_reference(speciation_rate=sr, time=0.0, fragments=True)
				spec_r = t.cursor.execute("SELECT actual FROM BIODIVERSITY_METRICS WHERE"
									 " community_reference==? AND fragment=='whole' AND "
									 "metric=='fragment_richness'", 
									 (ref,)).fetchall()[0][0]
				b = t.cursor.execute("SELECT actual FROM BIODIVERSITY_METRICS WHERE"
									 " community_reference==? AND fragment=='whole' AND "
									 "metric=='beta_diversity'", 
									 (ref,)).fetchall()[0][0]
				a = t.cursor.execute("SELECT actual FROM BIODIVERSITY_METRICS WHERE"
									 " community_reference==? AND fragment=='whole' AND "
									 "metric=='alpha_diversity'", 
									 (ref,)).fetchall()[0][0]
				sim_params = t.get_simulation_parameters()
				species_richness = t.get_species_richness(ref)
				beta = t.get_beta_diversity(ref)
				alpha = t.get_alpha_diversity(ref)
				goodness_fit = t.get_goodness_of_fit(reference=ref)
				total_ind = t.get_number_individuals(community_reference=ref)
				(interval, tet_group) = sim_type_detection(sim_params["sample_file"])
				pc = 1.0#percent_cover_detection(sim_params["fine_map_file"])
				tmp.append({"interval": interval, "tetrapod_group" : tet_group,
							"sigma" : sim_params["sigma"], "speciation_rate" : sr,
							"deme" : sim_params["deme"], "richness" : species_richness,
							"beta_diversity" : beta, "alpha_diversity" : alpha,
						   	"gof" : goodness_fit, "actual_richness" : spec_r, "actual_beta" : b,
							"actual_alpha" : a, "percent_cover": pc, "simulated_individuals" : total_ind})
				# try:
				distance_sim = t.get_species_distance_similarity(ref)
				for distance, no_ind in distance_sim:
					distance_sim_tmp.append({"interval": interval, "tetrapod_group" : tet_group,
											 "sigma" : sim_params["sigma"], "speciation_rate" : sr,
							"deme" : sim_params["deme"], "richness" : species_richness,
							"beta_diversity" : beta, "alpha_diversity" : alpha,
						   	"gof" : goodness_fit, "actual_richness" : spec_r, "actual_beta" : b,
							"actual_alpha" : a, "distance" : distance, "no_individuals" : no_ind,"percent_cover": pc})
				for fragment in t.get_fragment_list(ref):
					r = t.get_fragment_richness(fragment, ref)
					fragment_abundances.append({"interval": interval, "tetrapod_group" : tet_group,
												"sigma" : sim_params["sigma"], "speciation_rate" : sr,
												"deme" : sim_params["deme"], "richness" : r,
												"fragment" : fragment,
												"percent_cover": pc})
				
df = pd.DataFrame(tmp)
df_distance_sim = pd.DataFrame(distance_sim_tmp)
df_fragment_abundances = pd.DataFrame(fragment_abundances)

In [15]:
# Save the output to csv
src_csv = os.path.join(results_dir, "results_{}.csv".format(fragmented))
df.to_csv(src_csv, index=False)
src_csv2 = os.path.join(results_dir, "results_distance_sim_{}.csv".format(fragmented))
df_distance_sim.to_csv(src_csv2)
src_csv3 = os.path.join(results_dir, "results_fragment_abundances_{}.csv".format(fragmented))
df_fragment_abundances.to_csv(src_csv3)

In [16]:
# Move the output csvs - change fragmented variable as appropriate
if not os.path.exists(dst_folder):
	os.makedirs(dst_folder)
dst_csv = os.path.join(dst_folder, "results_{}.csv".format(fragmented))
dst_csv2 = os.path.join(dst_folder, "results_distance_sim_{}.csv".format(fragmented))
dst_csv3 = os.path.join(dst_folder, "results_fragment_abundances_{}.csv".format(fragmented))
shutil.copy2(src_csv, dst_csv)
shutil.copy2(src_csv2, dst_csv2)
shutil.copy2(src_csv3, dst_csv3)

'/home/sam/Documents/PhD/PaleoSampling/Results/Clustered1/results_fragment_abundances_clustered.csv'

In [17]:
fragmented

'clustered'