# Spatially implicit simulations of early tetrapod diversity

Simulations are non-spatial (spatially implicit) so that a baseline of diversity can be achieved.

Simulations are dictated by three parameters: the speciation rate, the local density and the metacommunity size. The same speciation rate is used to generate both the local and the metacommunity.

In [1]:
import os
import pandas as pd
import numpy as np
from math import ceil
import random

In [2]:
# For dev use only - auto-reloading of modules
%load_ext autoreload
%aimport pycoalescence.coalescence_tree
%aimport pycoalescence.simulation
from pycoalescence.coalescence_tree import check_sql_table_exist
from pycoalescence import Simulation, CoalescenceTree
%autoreload 1

In [3]:
intervals = ['artinskian',
 'asselian',
 'bashkirian',
 'gzhelian',
 'kasimovian',
 'kungurian',
 'moscovian',
 'sakmarian']

tetrapod_groups = ['amniote', 'amphibian']

immigration_rates = 10.0**-np.linspace(0.1, 4, 20)
speciation_rates = [10**-x for x in range(1, 9)]

In [4]:
# The location to store simulations in
output_dir = "/Volumes/Seagate 3TB/Paleo/Results/nse_03"
# Location with the folder of databases containing the real data
data_dir = "/Volumes/Seagate 3TB/Paleo/Data/"
# The location of the csv containing location data for the fossil sites
csv_directory = "../MainSimulationR/input"
# The location to output the results csv to
output_csv_directory = "../../Results/nse1"
if not os.path.exists(output_csv_directory):
	os.makedirs(output_csv_directory)

In [5]:
# Read the data for each paleo coord
info_per_pcoord = pd.read_csv(os.path.join(csv_directory, "info_per_pcoord_main.csv"))
info_per_pcoord['lat'] = np.NaN
info_per_pcoord['long'] = np.NaN
# split into lat and long for shapefile
for index, row in info_per_pcoord.iterrows():
	lat, long = row["pcoords"].split(",")
	info_per_pcoord.loc[index, 'lat'] = pd.to_numeric(lat) # plus some small modifier\n",
	info_per_pcoord.loc[index, 'long'] = pd.to_numeric(long)
info_per_pcoord["interval"] = [x.lower() for x in info_per_pcoord["interval"]]
total_per_interval = info_per_pcoord.groupby(["interval", "tetrapod_group"],
										   squeeze=True)["individuals_total"].sum().reset_index()
local_community_size = max(total_per_interval.individuals_total)

In [6]:
job_type = 0
from pycoalescence import Simulation, CoalescenceTree
for interval in intervals:
	print(interval)
	for tet_group in tetrapod_groups:
		job_type += 1
		sampling_number = total_per_interval[(total_per_interval.tetrapod_group == tet_group) &
											 (total_per_interval.interval == interval)].individuals_total
		for seed in range(1, 11, 1):
			sim = Simulation(logging_level=40)
			sim.set_simulation_parameters(seed=seed, job_type=job_type, output_directory=output_dir,
										  min_speciation_rate=0.0001, spatial=False, deme=int(sampling_number))
			sim.set_speciation_rates(immigration_rates.tolist())
			try:
				sim.run()
			except IOError:
				pass
			tree = CoalescenceTree(sim, logging_level=40)
			# tree.wipe_data()
			tree.set_speciation_parameters(speciation_rates=immigration_rates)
			# tree.add_metacommunity_parameters(10000, 0.1, metacommunity_option="simulated")
			for metacommunity_size in [10**x for x in range(4, 9)]:
				for speciation_rate in speciation_rates:
					tree.add_metacommunity_parameters(metacommunity_size=metacommunity_size,
													  metacommunity_speciation_rate=speciation_rate,
													  metacommunity_option="analytical")
			tree.apply()

artinskian


necsimError: Keyboard interrupt detected.

### Get the SADs and total species richnesses from the simulations

In [8]:
job_type = 0
output = []
output_sad = []
from pycoalescence import CoalescenceTree
for interval in intervals:
	print(interval)
	for tet_group in tetrapod_groups:
		job_type += 1
		for seed in range(1, 11, 1):
			sim = os.path.join(output_dir, "data_{}_{}.db".format(job_type, seed))
			tree = CoalescenceTree(sim, logging_level=40)
			for reference in tree.get_community_references():
				params = tree.get_community_parameters(reference)
				metacommunity_reference = params["metacommunity_reference"]
				if metacommunity_reference == 0:
					continue
				meta_params = tree.get_metacommunity_parameters(metacommunity_reference)
				# Put this back if you want SADs as well
				sad = tree.get_species_abundances(reference=reference)
				for species_id, abundance in sad:
					output_sad.append({"interval" : interval, "seed" : seed, "tetrapod_group" : tet_group,
									   "speciation_rate" : params["speciation_rate"],
									   "metacommunity_size" : meta_params["metacommunity_size"],
									   "metacommunity_speciation_rate": meta_params["speciation_rate"],
									   "species_id" : species_id,
									   "abundance" : abundance})
				output.append({"interval" : interval, "seed" : seed, "tetrapod_group" : tet_group,
							   "speciation_rate" : params["speciation_rate"],
							   "metacommunity_size" : meta_params["metacommunity_size"],
							   "metacommunity_speciation_rate": meta_params["speciation_rate"],
							   "species_richness" : tree.get_species_richness(reference)})

artinskian


KeyboardInterrupt: 

In [61]:
# Output the results to a csv
output_df = pd.DataFrame(output)
output_df.to_csv(os.path.join(output_csv_directory, "nse_richness1.csv"))

In [64]:
output_sad_df = pd.DataFrame(output_sad)
output_sad_df.to_csv(os.path.join(output_csv_directory, "nse_abundances1.csv"))

In [9]:
# Now randomly select to get the species richness for each fragment
import sqlite3

from pycoalescence.sqlite_connection import check_sql_table_exist

job_type = 0
output_richness = []
output_alpha = []
output_fragment_richness = []
from pycoalescence import CoalescenceTree
for interval in intervals:
	print(interval)
	for tet_group in tetrapod_groups:
		job_type += 1
		interval_group_fragments = info_per_pcoord[(info_per_pcoord["interval"] == interval) & (info_per_pcoord["tetrapod_group"] == tet_group)]
		for seed in range(1, 11, 1):
			sim = os.path.join(output_dir, "data_{}_{}.db".format(job_type, seed))
			# print("data_{}_{}.db".format(job_type, seed))
			tree = CoalescenceTree(sim, logging_level=40)
			# Drop existing tables
			tree._check_database()
			tree.cursor.execute("DROP TABLE IF EXISTS FRAGMENT_RICHNESS")
			tree.cursor.execute("DROP TABLE IF EXISTS BIODIVERSITY_METRICS")
			tree.cursor.execute("DROP TABLE IF EXISTS ALPHA_DIVERSITY")
			tree.cursor.execute("DROP TABLE IF EXISTS BETA_DIVERSITY")
			tree.database.commit()
			tmp_richness = []
			for reference in tree.get_community_references():
				params = tree.get_community_parameters(reference)
				metacommunity_reference = params["metacommunity_reference"]
				if metacommunity_reference == 0:
					meta_params = {"metacommunity_size" : 0, "speciation_rate" : 0.0}
				else:
					meta_params = tree.get_metacommunity_parameters(metacommunity_reference)
				sad = tree.get_species_abundances(reference=reference)
				individuals = []
				for species_id, abundance in sad:
					for i in range(abundance):
						individuals.append(species_id)
				for index, row in interval_group_fragments.iterrows():
					fragment_name = row["fragment_name"]
					no_inds = row["individuals_total"]
					fragment_richnesses = []
					# Shuffle 10 times
					for i in range(10):
						fragment_abundances = []
						if no_inds > len(individuals):
							no_inds -= 1
						if no_inds > len(individuals):
							raise RuntimeError("Number of individuals, {} > {} in {}, {}, {}".format(len(individuals), no_inds, 
																									 interval, tet_group, fragment_name))
						richness = len(set(random.sample(individuals, no_inds)))
						fragment_richnesses.append(richness)
					mean_richness = sum(fragment_richnesses)/len(fragment_richnesses)
					tmp_richness.append({"interval" : interval, "tetrapod_group" : tet_group,
											"seed" : seed, "reference" : reference,
										 	"speciation_rate" : params["speciation_rate"],
											"metacommunity_size" : meta_params["metacommunity_size"],
											"metacommunity_speciation_rate": meta_params["speciation_rate"],
											"fragment" : fragment_name, "richness" : mean_richness})
			output_richness.extend(tmp_richness)
			# Now output to a fragment richness table
			tmp_create = "CREATE TABLE FRAGMENT_RICHNESS (ref INT PRIMARY KEY NOT NULL, fragment TEXT NOT NULL," \
				 " community_reference INT NOT NULL,  richness INT NOT NULL)"
	# Now try and create FRAGMENT_RICHNESS
			try:
				tree.cursor.execute(tmp_create)
				tree.database.commit()
			except sqlite3.OperationalError:
				raise sqlite3.OperationalError("Could not create FRAGMENT_RICHNESS table")
			# self.fragments.extend(([]*len(times)-1))
			ref = 0
			db_output = []
			for each in tmp_richness:
				ref += 1
				db_output.append([ref, each["fragment"], each["reference"], each["richness"]])
			tree.cursor.executemany("INSERT INTO FRAGMENT_RICHNESS VALUES(?, ?, ?, ?)", db_output)
			tree.database.commit()
			tree.fragments = [list(x) for x in tree.cursor.execute("SELECT fragment, community_reference, richness"
																   " FROM FRAGMENT_RICHNESS").fetchall()]
			# Move fragment richnesses into BIODIVERSITY METRICS
			ref = tree.check_biodiversity_table_exists()
			tmp_fragments = []
			for x in db_output:
				ref += 1
				tmp = [ref, "fragment_richness"]
				tmp.extend(x[1:])
				tmp_fragments.append(tmp)
			tree.cursor.executemany("INSERT INTO BIODIVERSITY_METRICS VALUES(?,?,?,?,?, NULL, NULL)", tmp_fragments)
			tree.database.commit()
			if check_sql_table_exist(tree.database, "ALPHA_DIVERSITY"):
				tree.cursor.execute("DROP TABLE ALPHA_DIVERSITY")
				tree.database.commit()
			tmp_create = "CREATE TABLE ALPHA_DIVERSITY (reference INT PRIMARY KEY NOT NULL, alpha_diversity INT NOT NULL)"
			try:
				tree.cursor.execute(tmp_create)
				tree.database.commit()
			except sqlite3.OperationalError:
				raise sqlite3.OperationalError("Could not create ALPHA_DIVERSITY table")
			alpha_tmp = {}
			for _, _, reference, richness in db_output:
				if reference in alpha_tmp.keys():
					alpha_tmp[reference].append(richness)
				else:
					alpha_tmp[reference] = [richness]
			alpha_out = []
			for k, v in alpha_tmp.items():
				alpha = sum(v)/len(v)
				alpha_out.append([k, alpha])
			tree.cursor.executemany("INSERT INTO ALPHA_DIVERSITY VALUES(?,?)", alpha_out)
			ref = tree.check_biodiversity_table_exists() + 1
			output = [[i + ref, "alpha_diversity", "whole", x[0], x[1]] for i, x in enumerate(alpha_out)]
			tree.cursor.executemany("INSERT INTO BIODIVERSITY_METRICS VALUES(?,?,?,?,?, NULL, NULL)", output)
			tree.database.commit()
			tree.calculate_beta_diversity()
			for reference in tree.get_community_references():
				alpha = tree.get_alpha_diversity(reference)
				beta = tree.get_beta_diversity(reference)
				params = tree.get_community_parameters(reference)
				metacommunity_reference = params["metacommunity_reference"]
				if metacommunity_reference == 0:
					meta_params = {"metacommunity_size" : 0, "speciation_rate" : 0.0}
				else:
					meta_params = tree.get_metacommunity_parameters(metacommunity_reference)
				output_alpha.append({"interval" : interval, "tetrapod_group" : tet_group,
									 "seed" : seed, "reference" : reference,
									 "speciation_rate" : params["speciation_rate"],
									 "metacommunity_size" : meta_params["metacommunity_size"],
									 "metacommunity_speciation_rate": meta_params["speciation_rate"],
									 "alpha" : alpha, "beta" : beta})
			# tree.import_comparison_data(os.path.join(data_dir, "databases", "{}_{}.db".format(interval, tet_group)))
			# tree.calculate_goodness_of_fit()
			# break
		# break
	# break
	

artinskian


asselian


bashkirian


gzhelian


kasimovian


kungurian


moscovian


sakmarian


In [10]:
output_alpha_df = pd.DataFrame(output_alpha)
output_alpha_df.to_csv(os.path.join(output_csv_directory, "nse_metrics1.csv"))

In [11]:
output_fragment_richness_df = pd.DataFrame(output_richness)
# This is to reduce the file size
output_fragment_richness_df = output_fragment_richness_df[output_fragment_richness_df.metacommunity_size > 0]
output_fragment_richness_df.to_csv(os.path.join(output_csv_directory, "nse_fragment_richness1.csv"))

In [58]:
# Read in the real data
input_fragment_richness = pd.read_csv(os.path.join(csv_directory, "info_per_pcoord_main.csv"))
input_fragment_richness.rename(index=str, columns={"fragment_name" : "fragment", "individuals_total" : "real_richness", 
												   "Unnamed: 0":"a"}, inplace=True)
input_fragment_richness.interval = input_fragment_richness.interval.str.lower()
input_fragment_richness.drop(["pcoords", "collections", "species_total"], axis=1, inplace=True)
input_fragment_richness.drop(["a"], axis=1, inplace=True)
combined_fragment_richness = pd.merge(input_fragment_richness, output_fragment_richness_df, on=["fragment", "interval", "tetrapod_group"])
combined_fragment_richness["richness_error"] = combined_fragment_richness
combined_fragment_richness.head()

Unnamed: 0,fragment,interval,tetrapod_group,real_richness,metacommunity_size,metacommunity_speciation_rate,reference,richness,seed,speciation_rate
0,109702,gzhelian,amniote,1,10000.0,0.1,21,1.0,1,0.0001
1,109702,gzhelian,amniote,1,10000.0,0.1,22,1.0,1,0.00016
2,109702,gzhelian,amniote,1,10000.0,0.1,23,1.0,1,0.000257
3,109702,gzhelian,amniote,1,10000.0,0.1,24,1.0,1,0.000413
4,109702,gzhelian,amniote,1,10000.0,0.1,25,1.0,1,0.000662
