# Analyzing model performance

In [None]:
import torch
import os
import json
import math

import numpy as np
import pandas as pd

import preproc

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Load the dataset

In [None]:
data_dir = 'dataset/preprocessed'
train = np.load(os.path.join(data_dir, 'train.npy'))
valid = np.load(os.path.join(data_dir, 'valid.npy'))
test = np.load(os.path.join(data_dir, 'test.npy'))
# load the bounds as well
bounds_path = os.path.join(data_dir, 'bounds.json')
bounds = {}
with open(bounds_path)as file:
    bounds = json.load(file)

# a list of all the columns in the dataset
cols = list(bounds.keys())

# split into inputs and labels
x_train = torch.Tensor(train[:,:preproc.WINDOW_SIZE,:])
y_train = torch.Tensor(train[:,preproc.WINDOW_SIZE,:])

x_valid = torch.Tensor(valid[:,:preproc.WINDOW_SIZE,:])
y_valid = torch.Tensor(valid[:,preproc.WINDOW_SIZE,:])

## Load the model

In [None]:
from lstm_model import FirePredictor

# initialize it
hidden_dim = 1024
n_layers = 4
model = FirePredictor(train.shape[-1], preproc.WINDOW_SIZE, hidden_dim, n_layers)

# turn off training stuff (still not sure what this actually does)
model.eval()
# load the saved weights
model.from_json('models/single_lstm.json')

## Run the model

In [None]:
with torch.no_grad(): # turn of gradient computation
    p_valid = model(x_valid.to(device))
    p_train = model(x_train[0:x_valid.shape[0]].to(device))

## un-preprocess the results

In [None]:
# turn it into a dataframe
train_p_frame = pd.DataFrame(p_train.cpu().detach().numpy(), columns=cols)
# undo preprocessing
train_p_frame = preproc.unprocess(train_p_frame, bounds)
display(train_p_frame.shape)
display(train_p_frame.head())

# do it again for the ground truth values
train_y_frame = pd.DataFrame(y_train.cpu().detach().numpy(), columns=cols)
train_y_frame = preproc.unprocess(train_y_frame, bounds)
display(train_y_frame.shape)
display(train_y_frame.head())

In [None]:
# do it again again for the validation data
valid_p_frame = pd.DataFrame(p_valid.cpu().detach().numpy(), columns=cols)
valid_p_frame = preproc.unprocess(valid_p_frame, bounds)
display(valid_p_frame.shape)
display(valid_p_frame.head())

valid_y_frame = pd.DataFrame(y_valid.detach().numpy(), columns=cols)
valid_y_frame = preproc.unprocess(valid_y_frame, bounds)
display(valid_y_frame.shape)
display(valid_y_frame.head())

# Make the graph

In [None]:
import folium
import geojson
from datetime import datetime

## Convert data to GeoJSON

In [None]:
# converts a dataframe into a list of geojson.points
def get_points(df):
    points = list(zip(df['longitude'], df['latitude']))
    return geojson.MultiPoint(points)

# do that for all of the things
train_y_markers = get_points(train_y_frame)
train_p_markers = get_points(train_p_frame)
valid_y_markers = get_points(valid_y_frame)
valid_p_markers = get_points(valid_p_frame)

## Make the graph itself

In [None]:
# define the map
m = folium.Map(
    [-30.0, 140.0],
    zoom_start=4,
    tiles='OpenTopoMap'
)

# create groups so that we can control what stuff is visible
train_y_group = folium.FeatureGroup(name="Training Ground Truth")
m.add_child(train_y_group)
train_p_group = folium.FeatureGroup(name="Training Predictions")
m.add_child(train_p_group)

valid_y_group = folium.FeatureGroup(name="Validation Ground Truth")
m.add_child(valid_y_group)
valid_p_group = folium.FeatureGroup(name="Validation Predictions")
m.add_child(valid_p_group)

# add training y markers to the map
folium.GeoJson(
    train_y_markers,
    name="Fire Pixels",
    marker=folium.Circle(radius=100, fill_color = 'blue', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'blue',
        'radius': 10000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(train_y_group)

# add training predicted markers to the map
folium.GeoJson(
    train_p_markers,
    name="Predicted Pixels",
    marker=folium.Circle(radius=100, fill_color = 'green', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'green',
        'radius': 10000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(train_p_group)

# add validation y markers to the map
folium.GeoJson(
    valid_y_markers,
    name="Fire Pixels",
    marker=folium.Circle(radius=100, fill_color = 'red', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'red',
        'radius': 10000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(valid_y_group)

# add validation predicted markers to the map
folium.GeoJson(
    valid_p_markers,
    name="Predicted Pixels",
    marker=folium.Circle(radius=100, fill_color = 'orange', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'orange',
        'radius': 10000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(valid_p_group)

# add a layer control to the map so that you can turn each of these categories on and off
# automatically adds all of the folium.FeatureGroup's on the map to the controls
folium.LayerControl(collapsed=False).add_to(m)

# display the map (very official)
m

In [None]:
# writes the webpage generated by folium to a file
m.save('graphs/model_comparison.html')

## Compute stats for the predictions

Mean spatial & temporal distance: Compute average spatial & temporal distances between predictions and ground truth

In [None]:
def geo_dist(lat1, lon1, lat2, lon2):
    # compute distance in km between 2 points
    # https://andrew.hedges.name/experiments/haversine/
    R = 6373 # approximate radius of the earth in kilometers

    # get lat an lon diff in radians
    dlon = math.radians(lon2 - lon1)
    dlat = math.radians(lat2 - lat1)

    # magic
    a = (math.sin(dlat/2))**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * (math.sin(dlon/2)**2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = R * c

    return d

# test the formula
lat1, lon1 = 38.898556, -77.037852
lat2, lon2 = 38.897147, -77.043934

# this should be 0.549
geo_dist(lat1, lon1, lat2, lon2)

In [None]:
# get time deltas
d_times = (valid_y_frame['timestamp'] - valid_p_frame['timestamp']).abs()

# get distances (in kilometers)
distances = []
for i,_ in valid_y_frame.iterrows():
    lat1 = valid_y_frame.loc[i, 'latitude']
    lon1 = valid_y_frame.loc[i, 'longitude']

    lat2 = valid_p_frame.loc[i, 'latitude']
    lon2 = valid_p_frame.loc[i, 'longitude']

    distances.append(geo_dist(lat1, lon2, lat2, lon2))

distances = pd.Series(distances)
print(f'{d_times.mean()=}\n{distances.mean()=}')

In [None]:
# get the size of the geographical region that the dataset covers
lat1 = bounds['latitude']['min']
lat2 = bounds['latitude']['max']

lon1 = bounds['longitude']['min']
lon2 = bounds['longitude']['max']

# corner-to-corner distance
geo_dist(lat1, lon1, lat2, lon2)