# Initial analysis script before all simulations are completed

- item
- item2

In [1]:
import os
from pycoalescence import CoalescenceTree
# from PyCoalescence.helper import update_parameter_names
import pandas as pd
import shutil
import sqlite3
from pycoalescence.coalescence_tree import check_sql_table_exist
from pycoalescence.helper import update_parameter_names

In [2]:
def percent_cover_detection(fine_map_file):
	"""
	Detects the percentage cover from the fine map path
	:param fine_map_file: path to check for
	:return: the simulation percentage cover
	"""
	for pc in [0.1, 0.2, 0.5]:
		if "_{}_".format(pc) in fine_map_file:
			return pc
	raise ValueError("No percentage cover detected!")

In [3]:
def sim_type_detection(fine_map_file):
	"""
	Detects the simulation type from the fine map path (because I didn't save it anywhere sensible!
	:param fine_map_file: the path to check for
	:return the sim type
	"""
	for type in ["early_carboniferous", "late_carboniferous", "permian"]:
		if type in fine_map_file:
			return type
	raise ValueError("No type detected!")

In [4]:
# Set the import directories and variables - paths relative to the jupyter notebook directory
fragmented="fragmented" 
# fragmented="" # put back if want to analyse normal sims
results_dir = "/Users/samthompson/Documents/PhD/PaleoSampling/Results/CoalescenceSimulations{}".format(fragmented)
data_dir = "/Volumes/Seagate 3TB/Paleo/Data"
speciation_rates = [0.0001, 0.00001, 0.000001, 0.0000001, 0.00000001]

In [9]:
# Delete all simulations which haven't completed yet.
for file in os.listdir(results_dir):
	if ".db" in file:
		try:
			t = CoalescenceTree(os.path.join(results_dir, file))
		except IOError:
			os.remove(os.path.join(results_dir, file))
			

In [10]:
# Update the file names (only needs to be run once)
for file in os.listdir(results_dir):
	if ".db" in file:
		try:
			file_path = os.path.join(results_dir, file)
			try:
				t = CoalescenceTree(file_path, logging_level=50)
				sim_params = t.get_simulation_parameters()
			except sqlite3.OperationalError as soe:
				update_parameter_names(file_path)
		except Exception as e:
			print("Error in {}: {}".format(file, e))

In [12]:
# Calculate the biodiversity metrics -  can take a bit of time
for file in os.listdir(results_dir):
	if ".db" in file:
		t = CoalescenceTree(os.path.join(results_dir, file))
		sim_params = t.get_simulation_parameters()
		landscape_type = sim_type_detection(sim_params["fine_map_file"])
		sim_type = sim_type_detection(sim_params["fine_map_file"])
		t.set_speciation_params(record_spatial=True,
								record_fragments=os.path.join(data_dir, "configs",
															  "fragments_{}.csv".format(landscape_type)),
								speciation_rates=speciation_rates)
		t.wipe_data()
		t.apply()
		t.import_comparison_data(os.path.join(data_dir, "databases", "{}.db".format(landscape_type)))
		# t.adjust_data()
		t._clear_goodness_of_fit()
		t.calculate_fragment_richness()
		# t.calculate_alpha_diversity()
		t.calculate_beta_diversity()
		t.calculate_goodness_of_fit()

In [5]:
# Sample from the simulations
tmp = []
distance_sim_tmp = []
fragment_abundances = []
for file in os.listdir(results_dir):
	if ".db" in file:
		print(file)
		t = CoalescenceTree(os.path.join(results_dir, file), logging_level=10)
		if check_sql_table_exist(t.database, "SPECIES_DISTANCE_SIMILARITY"):
			t.cursor.execute("DROP TABLE IF EXISTS SPECIES_DISTANCE_SIMILARITY")
		t.calculate_species_distance_similarity()
		for sr in speciation_rates:
				ref = t.get_community_reference(speciation_rate=sr, time=0.0, fragments=True)
				spec_r = t.cursor.execute("SELECT actual FROM BIODIVERSITY_METRICS WHERE"
									 " community_reference==? AND fragment=='whole' AND "
									 "metric=='fragment_richness'", 
									 (ref,)).fetchall()[0][0]
				b = t.cursor.execute("SELECT actual FROM BIODIVERSITY_METRICS WHERE"
									 " community_reference==? AND fragment=='whole' AND "
									 "metric=='beta_diversity'", 
									 (ref,)).fetchall()[0][0]
				a = t.cursor.execute("SELECT actual FROM BIODIVERSITY_METRICS WHERE"
									 " community_reference==? AND fragment=='whole' AND "
									 "metric=='alpha_diversity'", 
									 (ref,)).fetchall()[0][0]
				sim_params = t.get_simulation_parameters()
				species_richness = t.get_richness(ref)
				beta = t.get_beta_diversity(ref)
				alpha = t.get_alpha_diversity(ref)
				goodness_fit = t.get_goodness_of_fit(reference=ref)
				sim_type = sim_type_detection(sim_params["fine_map_file"])
				pc = percent_cover_detection(sim_params["fine_map_file"])
				tmp.append({"type": sim_type, "sigma" : sim_params["sigma"], "speciation_rate" : sr,
							"deme" : sim_params["deme"], "richness" : species_richness,
							"beta_diversity" : beta, "alpha_diversity" : alpha,
						   	"gof" : goodness_fit, "actual_richness" : spec_r, "actual_beta" : b,
							"actual_alpha" : a, "percent_cover": pc})
				# try:
				distance_sim = t.get_species_distance_similarity(ref)
				for distance, no_ind in distance_sim:
					distance_sim_tmp.append({"type": sim_type, "sigma" : sim_params["sigma"], "speciation_rate" : sr,
							"deme" : sim_params["deme"], "richness" : species_richness,
							"beta_diversity" : beta, "alpha_diversity" : alpha,
						   	"gof" : goodness_fit, "actual_richness" : spec_r, "actual_beta" : b,
							"actual_alpha" : a, "distance" : distance, "no_individuals" : no_ind,"percent_cover": pc})
				for fragment in t.get_fragment_list(ref):
					r = t.get_fragment_richness(fragment, ref)
					fragment_abundances.append({"type": sim_type, "sigma" : sim_params["sigma"], "speciation_rate" : sr,
												"deme" : sim_params["deme"], "richness" : r, "fragment" : fragment,
												"percent_cover": pc})
				
df = pd.DataFrame(tmp)
df_distance_sim = pd.DataFrame(distance_sim_tmp)
df_fragment_abundances = pd.DataFrame(fragment_abundances)

data_0_0.db
data_0_1.db


data_0_10.db
data_0_11.db


data_0_12.db


data_0_13.db


data_0_14.db
data_0_17.db


data_0_18.db
data_0_19.db


data_0_20.db
data_0_22.db


data_0_23.db
data_0_3.db


data_0_4.db
data_0_5.db


data_0_8.db
data_0_9.db


No distances found for 5 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

data_1_0.db
data_1_1.db
data_1_10.db
data_1_11.db
data_1_12.db
data_1_13.db


No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

data_1_14.db
data_1_15.db
data_1_16.db
data_1_17.db


No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

data_1_18.db
data_1_19.db
data_1_2.db
data_1_20.db


No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

data_1_21.db
data_1_22.db
data_1_23.db
data_1_24.db
data_1_3.db


No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 2 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

data_1_4.db
data_1_5.db
data_1_6.db
data_1_7.db
data_1_8.db


No distances found for 3 - likely no species exist with more than one location.

No distances found for 4 - likely no species exist with more than one location.

No distances found for 5 - likely no species exist with more than one location.

data_1_9.db
data_2_1.db
data_2_10.db


data_2_11.db
data_2_12.db


data_2_14.db
data_2_15.db


data_2_17.db
data_2_18.db


data_2_2.db
data_2_22.db


data_2_23.db
data_2_24.db


data_2_3.db
data_2_4.db


data_2_5.db
data_2_6.db


data_2_7.db
data_2_8.db


data_2_9.db


In [6]:
# Save the output to csv
src_csv = os.path.join(results_dir, "results_{}.csv".format(fragmented))
df.to_csv(src_csv, index=False)
src_csv2 = os.path.join(results_dir, "results_distance_sim_{}.csv".format(fragmented))
df_distance_sim.to_csv(src_csv2)
src_csv3 = os.path.join(results_dir, "results_fragment_abundances_{}.csv".format(fragmented))
df_fragment_abundances.to_csv(src_csv3)

In [7]:
# Move the output csvs - change fragmented variable as appropriate
dst_folder = "/Users/samthompson/Documents/PhD/PaleoSampling/Results"
dst_csv = os.path.join(dst_folder, "results_{}.csv".format(fragmented))
dst_csv2 = os.path.join(dst_folder, "results_distance_sim_{}.csv".format(fragmented))
dst_csv3 = os.path.join(dst_folder, "results_fragment_abundances_{}.csv".format(fragmented))
shutil.copy2(src_csv, dst_csv)
shutil.copy2(src_csv2, dst_csv2)
shutil.copy2(src_csv3, dst_csv3)

'/Users/samthompson/Documents/PhD/PaleoSampling/Results/results_fragment_abundances_fragmented.csv'