# MAP PREPARATION FOR SPATIALLY-EXPLICIT NEUTRAL MODELS 

This version uses the occurrences-squared metric

This file requires
- scotese shape files for continental boundaries at different times (280, 320 and 340 mya)
- info_per_pcoords.csv for location data for each fragment (output of R script)

# This file outputs
- Raster files for each interval for continental extents
- Raster files for sampling regimes across the world
- Shapefiles for all sites at each interval

In [1]:
import math
import numpy as np
import os
import pandas as pd
import shutil
import sys
sys.path.append("../")
from data_preparation import rasterize, extract_from_shapefile, \
	create_shapefile_from_points, find_distance_lat_long, randomly_clear_landscape, add_masks
from pycoalescence import Map

In [7]:
# Data set up
# set this to the path to the data directory. Everything else should work from that
# data_directory = "/Volumes/Seagate 3TB/Paleo/Data/"
data_directory = "/run/media/sam/Media/Paleo/Data"
# data_directory = "../../../Data/"
# the list of folders containing the shape files to convert to raster
csv_directory = "../../MainSimulationR/input"

In [3]:
interval_csv = pd.read_csv(os.path.join(csv_directory, "interval_data.csv"))
interval_csv.interval = [x.lower() for x in interval_csv.interval]

In [23]:
# This extracts the continental shapes from the shapefiles into a new file, and then converts it to a
# raster.
for folder in set(interval_csv.map_file):
	file_path = os.path.join(data_directory, "scotese", folder, "wcnt.shp")
	file_path_altered = os.path.join(data_directory, "scotese", folder, "wcnt_altered.shp")
	if not os.path.exists(file_path):
		raise IOError("file {} does not exist".format(file_path))
	# Drop all features from the shapefile which don't match "CM" field
	extract_from_shapefile(file_path, file_path_altered, field="TYPE", field_value="CM")
	# Now convert our altered shapefile to a raster
	output_path = os.path.join(data_directory, "paleo_maps", "main", "{}.tif".format(folder))
	rasterize(file_path_altered, output_path, pixel_size=0.01)

In [29]:
# Slightly wasteful way of generating maps, but worth it for ease
for index, row in interval_csv.iterrows():
	src = os.path.join(data_directory, "paleo_maps", "main", "{}.tif".format(row["map_file"]))
	dst = os.path.join(data_directory, "paleo_maps", "main", "{}.tif".format(row["interval"]))
	shutil.copyfile(src, dst)

In [4]:
# Import the fragment coordinates from csv.
info_per_pcoord = pd.read_csv(os.path.join(csv_directory, "info_per_pcoord_main.csv"))
info_per_pcoord['lat'] = np.NaN
info_per_pcoord['long'] = np.NaN
# split into lat and long for shapefile
for index, row in info_per_pcoord.iterrows():
	lat, long = row["pcoords"].split(",")
	info_per_pcoord.loc[index, 'lat'] = pd.to_numeric(lat) # plus some small modifier\n",
	info_per_pcoord.loc[index, 'long'] = pd.to_numeric(long)

In [11]:
# Create the shape file from these points and the spatial sampling masks,
#  which are = 1 at our fossil sites, and 0 everywhere else.
# These sampling masks will be updated later on to contain the sampling proportion at each site,
# not just a binary mask.
for interval in info_per_pcoord['interval'].unique():
	for tet_group in info_per_pcoord['tetrapod_group'].unique():
		tmp_df = info_per_pcoord[((info_per_pcoord.interval == interval) &
								  (info_per_pcoord.tetrapod_group == tet_group))]
		# the mask files for defining our spatial sampling
		mask_shp = os.path.join(data_directory, "paleo_maps", "main",
								"pointsmask_{}_{}.shp".format(interval.replace(" ", "_").lower(),
															  tet_group))
		mask_raster = os.path.join(data_directory, "paleo_maps", "main",
								"paleomask_{}_{}.tif".format(interval.replace(" ", "_").lower(),
															 tet_group))
		create_shapefile_from_points(tmp_df, mask_shp)
		# use our mask to create a binary raster map
		rasterize(mask_shp, mask_raster, pixel_size=0.01, field="prop_ind")

In [5]:
interval_list = set([x.replace(" ", "_").lower() for x in info_per_pcoord["interval"]])
tetrapod_g_list = set(info_per_pcoord["tetrapod_group"])

In [8]:
# Calculate the origins for each of the time points
mask_origins = {}
for interval in interval_list:
	for tet_group in tetrapod_g_list:
		mask_raster = os.path.join(data_directory, "paleo_maps",
								"paleomask_{}_{}.tif".format(interval, tet_group))
		paleo_mask = Map(mask_raster)
		_, _, _,_, x_res, y_res, ulx_mask, uly_mask = paleo_mask.get_dimensions()
		mask_origins[(interval, tet_group)] = [uly_mask, ulx_mask, y_res, x_res]

In [9]:
# Lower case the info_per_pcoord
info_per_pcoord["interval"] = [x.lower() for x in info_per_pcoord["interval"]]
interval_max_ind = info_per_pcoord.groupby(["interval", "tetrapod_group"],
										   squeeze=True)["individuals_total"].max().reset_index()
interval_max_ind.to_csv(os.path.join(csv_directory, "max_individuals.csv"))

In [10]:
max_density_dict = {}
all_intervals = []
all_tetrapod_groups = []
for i, row in interval_max_ind.iterrows():
	all_intervals.append(row.interval)
	all_tetrapod_groups.append(row.tetrapod_group)
	max_density_dict[(row.interval, row.tetrapod_group)] = row.individuals_total
all_intervals = set(all_intervals)
all_tetrapod_groups = set(all_tetrapod_groups)


In [11]:
mask_origins

{('artinskian', 'amphibian'): [36.075, -34.855, -0.01, 0.01],
 ('artinskian', 'amniote'): [12.475, -35.265, -0.01, 0.01],
 ('gzhelian', 'amphibian'): [5.335, -36.975, -0.01, 0.01],
 ('gzhelian', 'amniote'): [5.335, -36.145, -0.01, 0.01],
 ('asselian', 'amphibian'): [27.285, -36.825, -0.01, 0.01],
 ('asselian', 'amniote'): [11.295, -36.575, -0.01, 0.01],
 ('kasimovian', 'amphibian'): [4.665, -36.335, -0.01, 0.01],
 ('kasimovian', 'amniote'): [2.8649999999999998,
  -28.544999999999998,
  -0.01,
  0.01],
 ('moscovian', 'amphibian'): [3.6149999999999998, -36.595, -0.01, 0.01],
 ('moscovian', 'amniote'): [0.915, -27.105, -0.01, 0.01],
 ('sakmarian', 'amphibian'): [25.855, -36.825, -0.01, 0.01],
 ('sakmarian', 'amniote'): [11.295, -36.575, -0.01, 0.01],
 ('bashkirian', 'amphibian'): [3.215, -44.515, -0.01, 0.01],
 ('bashkirian', 'amniote'): [-12.655, -27.105, -0.01, 0.01],
 ('kungurian', 'amphibian'): [36.075, -34.855, -0.01, 0.01],
 ('kungurian', 'amniote'): [30.615000000000002, -35.265, -0

In [11]:
info_per_pcoord.sort_values(["lat"])

Unnamed: 0.1,Unnamed: 0,fragment_name,pcoords,interval,tetrapod_group,collections,species_total,individuals_total,max_ind,prop_ind,lat,long
349,350,27263,"-54.82,63.12",asselian,amphibian,1,2,2,6,0.333333,-54.82,63.12
350,351,27263,"-54.82,63.12",sakmarian,amphibian,1,2,2,15,0.133333,-54.82,63.12
51,52,156530,"-52.7,60.53",artinskian,amphibian,1,1,1,15,0.066667,-52.70,60.53
254,255,180810,"-27.02,-10.29",kungurian,amphibian,1,2,2,17,0.117647,-27.02,-10.29
253,254,180810,"-27.02,-10.29",kungurian,amniote,1,1,1,15,0.066667,-27.02,-10.29
252,253,180809,"-26.76,-10.25",kungurian,amphibian,1,1,1,17,0.058824,-26.76,-10.25
528,529,81525,"-12.7,-27.05",bashkirian,amniote,1,1,1,1,1.000000,-12.70,-27.05
530,531,81525,"-12.7,-27.05",moscovian,amniote,1,1,1,4,0.250000,-12.70,-27.05
529,530,81525,"-12.7,-27.05",bashkirian,amphibian,1,3,3,23,0.130435,-12.70,-27.05
531,532,81525,"-12.7,-27.05",moscovian,amphibian,1,3,3,21,0.142857,-12.70,-27.05


In [12]:
# Create the fragment csv for defining fragments within the simulation
# This creates one fragment csv for each interval
# We now need to create our masks again, altering the values of the shapefile to match our fragment coordinates
# This seems a bit stupid, but because fragment coordinates can't be calculated until the mask file
# origin is known, this is the best way
if not os.path.exists(os.path.join(data_directory, "configs")):
	os.mkdir(os.path.join(data_directory, "configs"))
for interval in interval_list:
	for tet_group in tetrapod_g_list:
		columns = ['fragment', 'x_min', 'y_min', 'x_max', 'y_max', 'no_individuals']
		rows = [x for x in range(0, info_per_pcoord.shape[0], 1)]
		fragment_locations = pd.DataFrame(columns=columns, index=rows)
		fragment_map = Map(file=os.path.join(data_directory, "paleo_maps",
											 "paleomask_{}_{}.tif".format(interval, tet_group)))
		fragment_map.open()
		subset_df = info_per_pcoord[(info_per_pcoord["interval"] == interval) &
									 (info_per_pcoord["tetrapod_group"]== tet_group)]
		if len(subset_df.index) == 0:
			continue
		for index, row in subset_df.iterrows():
			if row.interval.replace(" ", "_").lower() != interval:
				continue
			modifier = 0
			# # Weirdly one of the fragments is put above instead of below the geo-coord - change it here
			# if row.collection_ref == "174420":
			# 	modifier = -1
			fragment_locations.fragment[index] = row.fragment_name
			offsets = find_distance_lat_long(mask_origins[(interval, tet_group)][0],
											 mask_origins[(interval, tet_group)][1], row.lat, row.long, 
											 res=mask_origins[(interval, tet_group)][3])
			fragment_locations.x_min[index] = int(math.floor(offsets[1])) 
			fragment_locations.x_max[index] = int(math.floor(offsets[1] + 1))
			fragment_locations.y_min[index] = int(math.floor(offsets[0])) 
			fragment_locations.y_max[index] = int(math.floor(offsets[0] + 1)) + modifier
			fragment_locations.no_individuals[index] = row.individuals_total
			this_max_ind = interval_max_ind[(interval_max_ind["interval"] == interval) & 
											(interval_max_ind["tetrapod_group"] == tet_group)].individuals_total
			# Alter the fragment mask with updating the sampling values.  ***** add this back in if needed
			fragment_map.data[fragment_locations.y_min[index], fragment_locations.x_min[index]] = \
				(1 + row.individuals_total)/this_max_ind
		fragment_locations = fragment_locations[pd.notnull(fragment_locations['x_min'])]
		fragment_locations.to_csv(os.path.join(data_directory, "configs", "fragments_{}_{}.csv".format(interval,
																									   tet_group)),
								  index=False, header=False)
		fragment_map.write()

In [15]:
# Fix the fossil sites that are actually in the sea according to the paleomap.
for interval in interval_list:
	interval_map_path = os.path.join(data_directory, "paleo_maps", "{}.tif".format(interval))
	interval_map = Map(interval_map_path)
	for tet_group in tetrapod_g_list:
		sample_map_path = os.path.join(data_directory, "paleo_maps",
								  "paleomask_{}_{}.tif".format(interval, tet_group))
		sample_map = Map(sample_map_path)
		sample_map.open()
		subset = np.array(sample_map.data > 0.0).astype(int)
		x, y = sample_map.get_x_y()
		x_off, y_off = sample_map.calculate_offset(interval_map)
		density = np.maximum(interval_map.get_subset(x_off, y_off, x, y, no_data_value=0),
						 subset)
		interval_map.write_subset(density, x_off, y_off)

In [10]:
# Now double check that these all make sense
for interval in interval_list:
	for tet_group in tetrapod_g_list:
		df = pd.read_csv(os.path.join(data_directory, "configs", "fragments_{}_{}.csv".format(interval, tet_group)),
						 header=None)
		df.columns = ['fragment', 'x_min', 'y_min', 'x_max', 'y_max', 'no_individuals']
		mask_raster = Map(os.path.join(data_directory, "paleo_maps", 
									   "paleomask_{}_{}.tif".format(interval, tet_group)))
		mask_value = mask_raster.get_cached_subset(0, 0, 1, 1)
		this_max_ind = interval_max_ind[(interval_max_ind["interval"] == interval) & 
											(interval_max_ind["tetrapod_group"] == tet_group)].individuals_total
		for index, row in df.iterrows():
			# print(row.fragment)
			mask_value = mask_raster.get_cached_subset(row.x_min, row.y_min, 1, 1)
			if not math.isclose(mask_value[0][0], (1+row.no_individuals)/this_max_ind, rel_tol=1e-2):
				print("Mask value not correct: {} != {}"
					  " for location {}, {} - fragment {}".format(mask_value[0][0],
																  row.no_individuals/this_max_ind,
																  row.x_min,
																  row.y_min,
																  row.fragment))
print("All maps verified")

All maps verified


In [27]:
# Now verify the samplemaps
for interval in interval_list:
	interval_map_path = os.path.join(data_directory, "paleo_maps", "{}.tif".format(interval))
	interval_map = Map(interval_map_path)
	for tet_group in tetrapod_g_list:
		sample_map_path = os.path.join(data_directory, "paleo_maps", 
								  "paleomask_{}_{}.tif".format(interval, tet_group))
		sample_map = Map(sample_map_path)
		sample_map.open()
		subset = sample_map.data == 0.0
		x, y = sample_map.get_x_y()
		offset_x, offset_y = sample_map.calculate_offset(interval_map)
		density = np.ma.array(interval_map.get_subset(offset_x, offset_y, x, y, no_data_value=0),
							  mask=subset)
		tot_subset = np.sum(np.invert(subset))
		tot_density = np.ma.sum(density)
		if tot_subset != tot_density:
			print("Total from subset: {}".format(tot_subset))
			print("Total from density: {}".format(tot_density))
			raise ValueError("Zero density in sampled region found in {}, {}.".format(interval, tet_group))
print("All maps verified")

All maps verified


### Generate the fragmented landscapes

Generate 20, 40 and 80% fragmented landscapes for times after 305mya.

In [29]:
if not os.path.exists(os.path.join(data_directory, "paleo_maps", "fragmented")):
	os.mkdir(os.path.join(data_directory, "paleo_maps", "fragmented"))
fragmented_intervals = []
for proportion_cover in [0.2, 0.4, 0.8]:
    for index, row in interval_csv.iterrows():
        if row["midpoint"] < 307:
            interval = row["interval"]
            if interval != "kasimovian":
                continue
            print("Interval: {}".format(interval))
            fragmented_intervals.append(interval)
            fragmented_map = os.path.join(data_directory, "paleo_maps", "fragmented",
                                          "{}_{}_fragmented.tif".format(interval, proportion_cover))
            if not os.path.exists(fragmented_map):
                randomly_clear_landscape(os.path.join(data_directory, 
                                                      "paleo_maps",
                                                      "{}.tif".format(interval)),
                                         fragmented_map, proportion_cover)
fragmented_intervals = set(fragmented_intervals)

Interval: kasimovian
Interval: kasimovian
Interval: kasimovian


In [30]:
# Fix the fossil sites that are actually in the sea according to the paleomap,
# or have been randomly removed
for proportion_cover in [0.2, 0.4, 0.8]:
    print(proportion_cover)
    for interval in interval_list:
        if interval in fragmented_intervals:
            interval_map_path = os.path.join(data_directory, "paleo_maps", "fragmented",
                                             "{}_{}_fragmented.tif".format(interval, proportion_cover))
            interval_map = Map(interval_map_path)
            for tet_group in tetrapod_g_list:
                sample_map_path = os.path.join(data_directory, "paleo_maps",
                                          "paleomask_{}_{}.tif".format(interval, tet_group))
                sample_map = Map(sample_map_path)
                sample_map.open()
                subset = np.array(sample_map.data > 0.0).astype(int)
                x, y = sample_map.get_x_y()
                x_off, y_off = sample_map.calculate_offset(interval_map)
                density = np.maximum(interval_map.get_subset(x_off, y_off, x, y, no_data_value=0),
                                 subset)
                interval_map.write_subset(density, x_off, y_off)

0.2
0.4
0.8


In [31]:
# Now verify the samplemaps
for proportion_cover in [0.2, 0.4, 0.8]:
    for interval in interval_list:
        if interval in fragmented_intervals:
            interval_map_path = os.path.join(data_directory, "paleo_maps", "fragmented",
                                             "{}_{}_fragmented.tif".format(interval,
                                                                           proportion_cover))
            interval_map = Map(interval_map_path)
            for tet_group in tetrapod_g_list:
                sample_map_path = os.path.join(data_directory, "paleo_maps",
                                          "paleomask_{}_{}.tif".format(interval, tet_group))
                sample_map = Map(sample_map_path)
                sample_map.open()
                subset = sample_map.data == 0.0
                x, y = sample_map.get_x_y()
                offset_x, offset_y = sample_map.calculate_offset(interval_map)
                density = np.ma.array(interval_map.get_subset(offset_x, offset_y, x, y, no_data_value=0),
                                      mask=subset)
                tot_subset = np.sum(np.invert(subset))
                tot_density = np.ma.sum(density)
                if tot_subset != tot_density:
                    print("Total from subset: {}".format(tot_subset))
                    print("Total from density: {}".format(tot_density))
                    raise ValueError("Zero density in sampled region found in {}, {}"
                                     " with {} proportion cover.".format(interval, tet_group, proportion_cover))
print("All maps verified")

All maps verified
