In [None]:
from biogeodataframe import BioGeoDataFrame
from osgeo import gdal
import geopandas as gpd
from rioxarray.merge import merge_arrays
from geocube.api.core import make_geocube
import numpy as np
import math

In [None]:
# Set the CRS to BC Albers
CRS = 'EPSG:3005'
GEOCUBE_RES = 500
N_SAMPLES = 5000

In [None]:
# Read in species occurrence data as a geodataframe and remove non-georeferenced rows
species_tmp = gpd.read_file('../data/black_bear_occurrences.csv')
species_tmp = species_tmp[(species_tmp['decimalLatitude'] != '') & (species_tmp['decimalLongitude'] != '')]

In [None]:
# Convert the geopandas to a BioGeoDataFrame, giving access to useful methods
N = np.nanmin((N_SAMPLES, species_tmp.shape[0]))
species_tmp = species_tmp.sample(N)

species = BioGeoDataFrame(species_tmp)
species = species.set_geometry(gpd.points_from_xy(
        species['decimalLongitude'], species['decimalLatitude'])).set_crs(4326)
species = species.to_crs(CRS)

In [None]:
# Load in biogeoclimatic zones and reproject to desired CRS
# Use only the ZONE and geometry fields, the former of which is what we will predict species' distributions with
bec_tmp = gpd.read_file('../data/bec').to_crs(CRS)
bec_tmp = bec_tmp[['ZONE', 'SUBZONE', 'geometry']]

In [None]:
# Categorical variables must be made numeric to be transformed into a raster, so must convert numbers back to strings
# To do this, create list of all strings
bec_zones = bec_tmp.ZONE.drop_duplicates().values.tolist()
bec_subzones = bec_tmp.SUBZONE.drop_duplicates().values.tolist()
categorical_enums = {"ZONE": bec_zones, "SUBZONE": bec_subzones}

In [None]:
# Convert bec geodataframe to rioxarray raster
# Resolution is in the units of target CRS
bec = make_geocube(vector_data = bec_tmp, resolution=(GEOCUBE_RES, -GEOCUBE_RES), categorical_enums=categorical_enums)

In [None]:
# print(np.unique(bec['ZONE']))
# print(np.unique(bec['ZONE'].astype(int)))

In [None]:
# Convert numeric back to categorical string
######################################### DO NOT DELETE ######################################### 
# zone_string = bec['ZONE_categories'][bec['ZONE'].astype(int)].drop('ZONE_categories')
# bec['ZONE'] = zone_string

In [None]:
# Create pseudo-absences
pres_abs = species.add_pseudo_absences(amount=species.shape[0], region_poly=bec_tmp)

In [None]:
BUFFER_DISTANCE = bec.rio.resolution()[1] * 31.5 # in units of CRS

In [None]:
# Given a list of raster tiles, find which ones intersect the species occurrence points and are therefore required
# Using a single raster, bec, for simplicity
rasters = pres_abs.list_rasters(BUFFER_DISTANCE, [bec])

In [None]:
# Load the list of raster tiles into memory
# Would load the rasters here, but bec is already loaded for simplicity. Something like:
# rasters = [rioxarray.open_rasterio(x) for x in raster]
# merged_raster = merge_arrays(rasters)
merged_raster = bec

In [None]:
# Buffer each point so it intersects adjacent raster cells
# pres_abs['buffered_geometry'] = pres_abs['geometry'].buffer(BUFFER_DISTANCE, cap_style=3)

In [None]:
# For each occurrence point, build a 3D tensor
vals = pres_abs.extract_values(raster=merged_raster, distance=BUFFER_DISTANCE)
vals = np.concatenate(vals)

In [None]:
# Import required packages
import tensorflow as tf
import keras
from keras import layers
import pandas as pd
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

In [None]:
# listt = []
# listt.append(np.array((((1, 1), (2, 2)), ((1, 1), (2, 2)))))
# arr2 = np.array((((2, 2), (4, 4)), ((4, 4), (4, 4))))

# # np.stack((listt, arr2)).shape
# np.stack((listt))
# # arr2

In [None]:
# # [x['presence'] for x in vals if None not in x['arr'][0] and 'nodata' not in x['arr'][1]].__len__()
# l = [
#     x["arr"]
#     for x in vals
# ]
# vals
# # [x.shape for x in l]

In [None]:
# np.array((((2, 2), (3, 3)), ((2, 2), (3, 3)), ((2, 2), (3, 3))))
# np.zeros((255,255,3))
# x_train[0].transpose().shape

In [None]:
from matplotlib.pyplot import imshow

imshow([x["arr"].transpose() for x in vals][10][:,:,1])


In [None]:
x_data = np.stack(
    [x["arr"].transpose() for x in vals]
)  # if None not in x['arr'] is not None and 'nodata' not in x['arr']], axis=0)
y_data = np.stack(
    [x["presence"] for x in vals]
)  # if x['arr'] is not None and 'nodata' not in x['arr']])

In [None]:
x_train, x_test = (
    x_data[0 : math.ceil(x_data.shape[0] * 0.8)],
    x_data[math.ceil(x_data.shape[0] * 0.8) : -1],
)

y_train, y_test = (
    y_data[0 : math.ceil(x_data.shape[0] * 0.8)],
    y_data[math.ceil(x_data.shape[0] * 0.8) : -1],
)

In [33]:
# model = tf.keras.models.Sequential([
#   # tf.keras.layers.Input(shape=(1,)),
#   tf.keras.layers.Flatten(),
#   tf.keras.layers.Dense(4, activation='relu'),
#   tf.keras.layers.Dense(4, activation='relu'),
#   tf.keras.layers.Dense(2, activation='softmax')
# ])

model = tf.keras.models.Sequential()
model.add(Conv2D(32, (2, 2), input_shape=(64, 64, 2)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) # downsample each dimension by a factor of 2

model.add(Conv2D(32, (2, 2)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dropout(0.5))

model.add(Dense(2)) # This should be the number of layers
model.add(Activation('softmax'))
# len(model.weights)

In [34]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [35]:
m = model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
