In [1]:
from biogeodataframe import BioGeoDataFrame
from osgeo import gdal
import geopandas as gpd
from rioxarray.merge import merge_arrays
from geocube.api.core import make_geocube
import numpy as np
from multiprocessing import Pool

In [2]:
# Set the CRS to BC Albers
CRS = 'EPSG:3005'
BUFFER_DISTANCE = 2000 # in units of CRS
GEOCUBE_RES = 500
N_SAMPLES = 50

In [3]:
# Read in species occurrence data as a geodataframe and remove non-georeferenced rows
species_tmp = gpd.read_file('../data/black_bear_occurrences.csv')
species_tmp = species_tmp[(species_tmp['decimalLatitude'] != '') & (species_tmp['decimalLongitude'] != '')]

In [4]:
# Convert the geopandas to a BioGeoDataFrame, giving access to useful methods
N = np.nanmin((N_SAMPLES, species_tmp.shape[0]))
species_tmp = species_tmp.sample(N)

species = BioGeoDataFrame(species_tmp)
species = species.set_geometry(gpd.points_from_xy(
        species['decimalLongitude'], species['decimalLatitude'])).set_crs(4326)
species = species.to_crs(CRS)

  super().__setattr__(attr, val)


In [5]:
# Load in biogeoclimatic zones and reproject to desired CRS
# Use only the ZONE and geometry fields, the former of which is what we will predict species' distributions with
bec_tmp = gpd.read_file('../data/bec').to_crs(CRS)
bec_tmp = bec_tmp[['ZONE', 'geometry']]

In [6]:
# Categorical variables must be made numeric to be transformed into a raster, so must convert numbers back to strings
# To do this, create list of all strings
bec_zones = bec_tmp.ZONE.drop_duplicates().values.tolist()
categorical_enums = {'ZONE': bec_zones}

In [7]:
# Convert bec geodataframe to rioxarray raster
# Resolution is in the units of target CRS
bec = make_geocube(vector_data = bec_tmp, resolution=(GEOCUBE_RES, -GEOCUBE_RES), categorical_enums=categorical_enums)

In [8]:
# print(np.unique(bec['ZONE']))
# print(np.unique(bec['ZONE'].astype(int)))

In [9]:
# Convert numeric back to categorical string
######################################### DO NOT DELETE ######################################### 
# zone_string = bec['ZONE_categories'][bec['ZONE'].astype(int)].drop('ZONE_categories')
# bec['ZONE'] = zone_string

In [10]:
# Create pseudo-absences
pres_abs = species.add_pseudo_absences(amount=species.shape[0], region_poly=bec_tmp)

54 pseudo-absence points remaining.
30 pseudo-absence points remaining.
17 pseudo-absence points remaining.
11 pseudo-absence points remaining.
8 pseudo-absence points remaining.
4 pseudo-absence points remaining.
1 pseudo-absence points remaining.


  super().__setattr__(attr, val)


In [11]:
# Given a list of raster tiles, find which ones intersect the species occurrence points and are therefore required
# Using a single raster, bec, for simplicity
rasters = pres_abs.list_rasters(BUFFER_DISTANCE, [bec])

In [12]:
# Load the list of raster tiles into memory
# Would load the rasters here, but bec is already loaded for simplicity. Something like:
# rasters = [rioxarray.open_rasterio(x) for x in raster]
# merged_raster = merge_arrays(rasters)
merged_raster = bec

In [13]:
# # Buffer each point so it intersects adjacent raster cells
pres_abs['buffered_geometry'] = pres_abs['geometry'].buffer(BUFFER_DISTANCE, cap_style=3)

In [14]:
# For each occurrence point, build a 3D tensor 
vals = pres_abs.extract_values(merged_raster)
vals = np.concatenate(vals)

                                           occurrenceID  ...                                  buffered_geometry
2                                                   NaN  ...  POLYGON ((1037260.231 1281814.026, 1037456.265...
208   https://www.inaturalist.org/observations/13452...  ...  POLYGON ((1246675.788 512867.728, 1246871.823 ...
9                                                   NaN  ...  POLYGON ((649001.383 1523915.301, 649197.418 1...
18                                                  NaN  ...  POLYGON ((1276774.310 758924.060, 1276970.345 ...
3                                                   NaN  ...  POLYGON ((1343558.733 1426959.944, 1343754.768...
1665  https://www.inaturalist.org/observations/60406528  ...  POLYGON ((1698682.640 704973.050, 1698878.674 ...
42                                                  NaN  ...  POLYGON ((783024.230 1502626.726, 783220.264 1...
232   https://www.inaturalist.org/observations/13284...  ...  POLYGON ((822645.803 1099631.934, 822841.8

In [None]:
# Import required packages
import tensorflow as tf
import keras
from keras import layers
import pandas as pd
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

In [None]:
x_train = np.stack([x['arr'] for x in vals if x['arr'] is not None and 'nodata' not in x['arr']])
y_train = np.stack([x['presence'] for x in vals if x['arr'] is not None and 'nodata' not in x['arr']])

# original, int_array = np.unique(x_train, return_inverse=True)
#
# x_train = int_array
# original

In [None]:
# model = tf.keras.models.Sequential([
#   # tf.keras.layers.Input(shape=(1,)),
#   tf.keras.layers.Flatten(),
#   tf.keras.layers.Dense(4, activation='relu'),
#   tf.keras.layers.Dense(4, activation='relu'),
#   tf.keras.layers.Dense(2, activation='softmax')
# ])

model = tf.keras.models.Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(41, 41, 1)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) # downsample each dimension by a factor of 2

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dropout(0.5))

model.add(Dense(2)) # This should be the number of layers
model.add(Activation('softmax'))
# len(model.weights)

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
m = model.fit(x_train, y_train, batch_size=128, epochs=100)