In [1]:
from biogeodataframe import BioGeoDataFrame
from osgeo import gdal
import geopandas as gpd
from rioxarray.merge import merge_arrays
from geocube.api.core import make_geocube
import numpy as np
from multiprocessing import Pool

In [2]:
# Set the CRS to BC Albers
CRS = 'EPSG:3005'
BUFFER_DISTANCE = 2000 # in units of CRS
GEOCUBE_RES = 100
N_SAMPLES = 5000
N_CORES = 8

In [3]:
# Read in species occurrence data as a geodataframe and remove non-georeferenced rows
species_tmp = gpd.read_file('../data/black_bear_occurrences.csv')
species_tmp = species_tmp[(species_tmp['decimalLatitude'] != '') & (species_tmp['decimalLongitude'] != '')]

In [4]:
# Convert the geopandas to a BioGeoDataFrame, giving access to useful methods
N = np.nanmin((N_SAMPLES, species_tmp.shape[0]))
species_tmp = species_tmp.sample(N)

species = BioGeoDataFrame(species_tmp)
species = species.set_geometry(gpd.points_from_xy(
        species['decimalLongitude'], species['decimalLatitude'])).set_crs(4326)
species = species.to_crs(CRS)

  super().__setattr__(attr, val)


In [5]:
# Load in biogeoclimatic zones and reproject to desired CRS
# Use only the ZONE and geometry fields, the former of which is what we will predict species' distributions with
bec_tmp = gpd.read_file('../data/bec').to_crs(CRS)
bec_tmp = bec_tmp[['ZONE', 'geometry']]

In [6]:
# Categorical variables must be made numeric to be transformed into a raster, so must convert numbers back to strings
# To do this, create list of all strings
bec_zones = bec_tmp.ZONE.drop_duplicates().values.tolist()
categorical_enums = {'ZONE': bec_zones}

In [7]:
# Convert bec geodataframe to rioxarray raster
# Resolution is in the units of target CRS
bec = make_geocube(vector_data = bec_tmp, resolution=(GEOCUBE_RES, -GEOCUBE_RES), categorical_enums=categorical_enums)

In [8]:
# print(np.unique(bec['ZONE']))
# print(np.unique(bec['ZONE'].astype(int)))

In [9]:
# Convert numeric back to categorical string
######################################### DO NOT DELETE ######################################### 
# zone_string = bec['ZONE_categories'][bec['ZONE'].astype(int)].drop('ZONE_categories')
# bec['ZONE'] = zone_string

In [10]:
# Create pseudo-absences
pres_abs = species.add_pseudo_absences(amount=species.shape[0], region_poly=bec_tmp)

EPSG:3005 EPSG:3005
28 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
22 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
11 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
7 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
2 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
1 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
1 pseudo-absence points remaining.
EPSG:3005 EPSG:3005


  super().__setattr__(attr, val)


In [11]:
# Given a list of raster tiles, find which ones intersect the species occurrence points and are therefore required
# Using a single raster, bec, for simplicity
rasters = pres_abs.which_rasters(BUFFER_DISTANCE, [bec])

In [12]:
# Load the list of raster tiles into memory
# Would load the rasters here, but bec is already loaded for simplicity. Something like:
# rasters = [rioxarray.open_rasterio(x) for x in raster]
# merged_raster = merge_arrays(rasters)
merged_raster = bec

In [13]:
# # Buffer each point so it intersects adjacent raster cells
pres_abs['buffered_geometry'] = pres_abs['geometry'].buffer(BUFFER_DISTANCE, cap_style=3)

In [14]:
# merged_raster.rio.clip(geometries=pres_abs['buffered_geometry'][0]).dims
# [x.dims for x in merged_raster.rio.clip(geometries=pres_abs['buffered_geometry'])]

In [15]:
# from itertools import repeat

# chunks = np.array_split(
#         pres_abs.sample(np.minimum(N_SAMPLES, pres_abs.shape[0])-1).reset_index(drop=True), N_CORES)

# with Pool(N_CORES) as pool:
#     data = pool.starmap(pres_abs.extract_values, zip(
#         chunks, repeat(merged_raster)))
#     pool.close()

In [20]:
# For each occurrence point, build a 3D tensor 
vals = pres_abs.extract_values(merged_raster)
vals = np.concatenate(vals)

[                                           occurrenceID            eventDate  \
528   https://www.inaturalist.org/observations/12172...  2022-06-10T17:04:00   
7                                                   NaN                  NaN   
3                                                   NaN                  NaN   
5                                                   NaN                  NaN   
440   https://www.inaturalist.org/observations/12615...  2022-07-13T20:14:55   
20                                                  NaN                  NaN   
4                                                   NaN                  NaN   
982   https://www.inaturalist.org/observations/97620459  2021-10-08T13:47:36   
2425  https://www.inaturalist.org/observations/19029147  2016-06-05T07:46:00   
34                                                  NaN                  NaN   
23                                                  NaN                  NaN   
21                                     

<xarray.Dataset>
Dimensions:          (y: 139, x: 161, ZONE_categories: 17)
Coordinates:
  * y                (y) float64 3.55e+05 3.65e+05 ... 1.725e+06 1.735e+06
  * x                (x) float64 1.875e+06 1.865e+06 ... 2.85e+05 2.75e+05
  * ZONE_categories  (ZONE_categories) object 'BAFA' 'BG' ... 'SWB' 'nodata'
    spatial_ref      int64 0
Data variables:
    ZONE             (y, x) int16 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 0
<xarray.Dataset>
Dimensions:          (y: 139, x: 161, ZONE_categories: 17)
Coordinates:
  * y                (y) float64 3.55e+05 3.65e+05 ... 1.725e+06 1.735e+06
  * x                (x) float64 1.875e+06 1.865e+06 ... 2.85e+05 2.75e+05
  * ZONE_categories  (ZONE_categories) object 'BAFA' 'BG' ... 'SWB' 'nodata'
    spatial_ref      int64 0
Data variables:
    ZONE             (y, x) int16 -1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1 -1 -1 0
<xarray.Dataset>
Dimensions:          (y: 139, x: 161, ZONE_categories: 17)
Coordinates:
  * y                (y) float6



0 [[6 6]
 [0 6]]
0 [[6 6]
 [6 6]]
0 [[15  2]
 [15  2]]
0 [[15 15]
 [ 2  2]]
0 [[10  5]
 [ 4  4]]
0 [[11  8]
 [ 8  8]]
0 0 [[5 5]
 [5 5]]
[[6 6]
 [0 0]]
0 [[5 5]
 [5 5]]
0 [[0 0]
 [0 0]]
0 [[14 14]
 [14 14]]
0 [[5 5]
 [5 5]]
0 [[ 5  5]
 [ 5 10]]
0 [[ 5 10]
 [ 3  5]]
0 0 [[6 7]
 [7 7]]
[[ 8  8]
 [14  8]]
0 [[2 2]
 [2 2]]
0 [[ 7 11]
 [ 6  6]]
0 [[ 2 15]
 [15  0]]
0 [[2 2]
 [2 2]]
0 [[6 7]
 [6 6]]
0 [[13 13]
 [13 13]]


In [17]:
# Import required packages
import tensorflow as tf
import keras
from keras import layers
import pandas as pd

In [21]:
x_train = np.stack([x['arr'] for x in vals if x['arr'] is not None and 'nodata' not in x['arr']])
y_train = np.stack([x['presence'] for x in vals if x['arr'] is not None and 'nodata' not in x['arr']])

original, int_array = np.unique(x_train, return_inverse=True)

# x_train = int_array
# original

In [22]:
model = tf.keras.models.Sequential([
  # tf.keras.layers.Input(shape=(1,)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(2, activation='softmax')
])

# len(model.weights)

2023-12-05 00:01:48.307424: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2023-12-05 00:01:48.307462: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-12-05 00:01:48.307467: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-12-05 00:01:48.307553: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-05 00:01:48.307756: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [23]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [24]:
m = model.fit(x_train, y_train, batch_size=32, epochs=100)

Epoch 1/100


2023-12-05 00:01:52.081395: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [None]:
m.history