In [143]:
from biogeodataframe import BioGeoDataFrame
from osgeo import gdal
import geopandas as gpd
from rioxarray.merge import merge_arrays
from geocube.api.core import make_geocube

In [145]:
# Set the CRS to BC Albers
CRS = 'EPSG:3005'
BUFFER_DISTANCE = 5000 # in units of CRS
GEOCUBE_RES = 1000

In [146]:
# Read in species occurrence data as a geodataframe and remove non-georeferenced rows
species_tmp = gpd.read_file('../data/black_bear_occurrences.csv')
species_tmp = species_tmp[(species_tmp['decimalLatitude'] != '') & (species_tmp['decimalLongitude'] != '')]

In [147]:
# Convert the geopandas to a BioGeoDataFrame, giving access to useful methods
species = BioGeoDataFrame(species_tmp).sample(500)
species = species.set_geometry(gpd.points_from_xy(
        species['decimalLongitude'], species['decimalLatitude'])).set_crs(4326)
species = species.to_crs(CRS)

  super().__setattr__(attr, val)


In [148]:
# Load in biogeoclimatic zones and reproject to desired CRS
# Use only the ZONE and geometry fields, the former of which is what we will predict species' distributions with
bec_tmp = gpd.read_file('../data/bec').to_crs(CRS)
bec_tmp = bec_tmp[['ZONE', 'geometry']]

In [149]:
# Categorical variables must be made numeric to be transformed into a raster, so must convert numbers back to strings
# To do this, create list of all strings
bec_zones = bec_tmp.ZONE.drop_duplicates().values.tolist()
categorical_enums = {'ZONE': bec_zones}

In [150]:
# Convert bec geodataframe to rioxarray raster
# Resolution is in the units of target CRS
bec = make_geocube(vector_data = bec_tmp, resolution=(GEOCUBE_RES, -GEOCUBE_RES), categorical_enums=categorical_enums)

In [151]:
# Convert numeric back to categorical string
zone_string = bec['ZONE_categories'][bec['ZONE'].astype(int)].drop('ZONE_categories')
bec['ZONE'] = zone_string

In [153]:
# Create pseudo-absences
pres_abs = species.add_pseudo_absences(amount=species.shape[0], region_poly=bec_tmp)

EPSG:3005 EPSG:3005
271 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
142 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
69 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
29 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
11 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
8 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
4 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
2 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
2 pseudo-absence points remaining.
EPSG:3005 EPSG:3005
1 pseudo-absence points remaining.
EPSG:3005 EPSG:3005


  super().__setattr__(attr, val)


In [154]:
# Given a list of raster tiles, find which ones intersect the species occurrence points and are therefore required
# Using a single raster, bec, for simplicity
rasters = pres_abs.which_rasters(BUFFER_DISTANCE, [bec])

In [155]:
# Load the list of raster tiles into memory
# Would load the rasters here, but bec is already loaded for simplicity. Something like:
# rasters = [rioxarray.open_rasterio(x) for x in raster]
# merged_raster = merge_arrays(rasters)
merged_raster = bec

In [156]:
# # Buffer each point so it intersects adjacent raster cells
pres_abs['buffered_geometry'] = pres_abs['geometry'].buffer(BUFFER_DISTANCE, cap_style=3)

In [157]:
# For each occurrence point, build a 3D tensor 
vals = pres_abs.extract_values(merged_raster)

<xarray.Dataset>
Dimensions:          (y: 1377, x: 1598, ZONE_categories: 17)
Coordinates:
  * y                (y) float64 3.595e+05 3.605e+05 ... 1.734e+06 1.736e+06
  * x                (x) float64 1.87e+06 1.87e+06 ... 2.745e+05 2.735e+05
  * ZONE_categories  (ZONE_categories) object 'BAFA' 'BG' ... 'SWB' 'nodata'
    spatial_ref      int64 0
Data variables:
    ZONE             (y, x) object 'nodata' 'nodata' 'nodata' ... 'BAFA' 'BAFA'
['ZONE']
Extracting values: iteration 2186
ZONE: [['ESSF' 'ESSF' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH']
 ['ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH']
 ['ESSF' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH']
 ['ESSF' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH']
 ['ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH']
 ['ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ESSF']
 ['ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ICH' 'ESSF']
 ['ICH' 'ICH' 'IC

In [158]:
# Import required packages
import tensorflow as tf
import keras
from keras import layers
import numpy as np
import pandas as pd

In [159]:
x = np.asarray(vals).squeeze(axis=1)
# x_train_og, x_train = np.unique(x, return_inverse=True)
y_train = pres_abs[['presence']].to_numpy()

In [160]:
x_train = np.asarray([np.unique(x, return_inverse=True)[1] for x in x])

In [165]:
model = tf.keras.models.Sequential([
  # tf.keras.layers.Input(shape=(1,)),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(2, activation='softmax')
])

# len(model.weights)

In [162]:
# y_train.shape
# x_train.squeeze(axis=0)

In [166]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [168]:
m = model.fit(x_train, y_train, batch_size=128, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78