In [None]:
from biogeodataframe import BioGeoDataFrame
from osgeo import gdal
import geopandas as gpd
from rioxarray.merge import merge_arrays
from geocube.api.core import make_geocube
import numpy as np

In [None]:
# Set the CRS to BC Albers
CRS = 'EPSG:3005'
BUFFER_DISTANCE = 10000 # in units of CRS
GEOCUBE_RES = 5000
N_SAMPLES = 50

In [None]:
# Read in species occurrence data as a geodataframe and remove non-georeferenced rows
species_tmp = gpd.read_file('../data/black_bear_occurrences.csv')
species_tmp = species_tmp[(species_tmp['decimalLatitude'] != '') & (species_tmp['decimalLongitude'] != '')]

In [None]:
# Convert the geopandas to a BioGeoDataFrame, giving access to useful methods
N = np.nanmin((N_SAMPLES, species_tmp.shape[0]))
species = BioGeoDataFrame(species_tmp).sample(N)
species = species.set_geometry(gpd.points_from_xy(
        species['decimalLongitude'], species['decimalLatitude'])).set_crs(4326)
species = species.to_crs(CRS)

In [None]:
# Load in biogeoclimatic zones and reproject to desired CRS
# Use only the ZONE and geometry fields, the former of which is what we will predict species' distributions with
bec_tmp = gpd.read_file('../data/bec').to_crs(CRS)
bec_tmp = bec_tmp[['ZONE', 'geometry']]

In [None]:
# Categorical variables must be made numeric to be transformed into a raster, so must convert numbers back to strings
# To do this, create list of all strings
bec_zones = bec_tmp.ZONE.drop_duplicates().values.tolist()
categorical_enums = {'ZONE': bec_zones}

In [None]:
# Convert bec geodataframe to rioxarray raster
# Resolution is in the units of target CRS
bec = make_geocube(vector_data = bec_tmp, resolution=(GEOCUBE_RES, -GEOCUBE_RES), categorical_enums=categorical_enums)

In [None]:
# print(np.unique(bec['ZONE']))
# print(np.unique(bec['ZONE'].astype(int)))

In [None]:
# Convert numeric back to categorical string
zone_string = bec['ZONE_categories'][bec['ZONE'].astype(int)].drop('ZONE_categories')
bec['ZONE'] = zone_string

In [None]:
# Create pseudo-absences
pres_abs = species.add_pseudo_absences(amount=species.shape[0], region_poly=bec_tmp)

In [None]:
# Given a list of raster tiles, find which ones intersect the species occurrence points and are therefore required
# Using a single raster, bec, for simplicity
rasters = pres_abs.which_rasters(BUFFER_DISTANCE, [bec])

In [None]:
# Load the list of raster tiles into memory
# Would load the rasters here, but bec is already loaded for simplicity. Something like:
# rasters = [rioxarray.open_rasterio(x) for x in raster]
# merged_raster = merge_arrays(rasters)
merged_raster = bec

In [None]:
# # Buffer each point so it intersects adjacent raster cells
pres_abs['buffered_geometry'] = pres_abs['geometry'].buffer(BUFFER_DISTANCE, cap_style=3)

In [None]:
# merged_raster.rio.clip(geometries=pres_abs['buffered_geometry'][0]).dims
# [x.dims for x in merged_raster.rio.clip(geometries=pres_abs['buffered_geometry'])]

In [None]:
merged_raster.rio.crs == pres_abs.crs

In [None]:
# For each occurrence point, build a 3D tensor 
vals = pres_abs.extract_values(merged_raster)

In [None]:
# Import required packages
import tensorflow as tf
import keras
from keras import layers
import pandas as pd

In [None]:
x_train = np.stack([x['arr'] for x in vals if x['arr'] is not None and 'nodata' not in x['arr']])
y_train = np.stack([x['presence'] for x in vals if x['arr'] is not None and 'nodata' not in x['arr']])

original, int_array = np.unique(x_train, return_inverse=True)

# x_train = int_array
# original

In [None]:
x_train

In [None]:
model = tf.keras.models.Sequential([
  # tf.keras.layers.Input(shape=(1,)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(4, activation='relu'),
  tf.keras.layers.Dense(2, activation='softmax')
])

# len(model.weights)

In [None]:
model.compile(optimizer='rmsprop',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
[x.dtype for x in np.asarray(x_train, dtype='float64')]


In [None]:
m = model.fit(x_train, y_train, batch_size=32, epochs=100)