# Graphing model predictions against actual values

In [1]:
import torch
import os
import json
import math

import numpy as np
import pandas as pd

import preproc

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## read the dataset

In [3]:
data_dir = 'dataset/reduced_preprocessed'
train = np.load(os.path.join(data_dir, 'train.npy'))
valid = np.load(os.path.join(data_dir, 'valid.npy'))
test = np.load(os.path.join(data_dir, 'test.npy'))
# load the bounds as well
bounds_path = os.path.join(data_dir, 'bounds.json')
bounds = {}
with open(bounds_path)as file:
    bounds = json.load(file)

cols = list(bounds.keys())

# split into inputs and labels
x_train = torch.Tensor(train[:,:64,:])
y_train = torch.Tensor(train[:,64,:])

x_valid = torch.Tensor(valid[:,:64,:])
y_valid = torch.Tensor(valid[:,64,:])

x_test = torch.Tensor(test[:,:64,:])
y_test = torch.Tensor(test[:,64,:])

## Load the model

In [4]:
from lstm_model import FirePredictor

hidden_dim = 1024
n_layers = 4
model = FirePredictor(train.shape[-1], preproc.WINDOW_SIZE, hidden_dim, n_layers)
model.eval()
model.from_json('models/reduced_lstm.json')

## Run the model

In [5]:
with torch.no_grad():
    p_valid = model(x_valid.to(device))
    p_train = model(x_train[0:x_valid.shape[0]].to(device))

## un-preprocess the results

In [6]:
train_p_frame = pd.DataFrame(p_train.cpu().detach().numpy(), columns=cols)
train_p_frame = preproc.unprocess(train_p_frame, bounds)
display(train_p_frame.shape)
display(train_p_frame.head())

train_y_frame = pd.DataFrame(y_train.cpu().detach().numpy(), columns=cols)
train_y_frame = preproc.unprocess(train_y_frame, bounds)
display(train_y_frame.shape)
display(train_y_frame.head())

(161, 3)

Unnamed: 0,latitude,longitude,timestamp
0,-14.956472,131.076531,1565131000.0
1,-15.212038,131.101844,1564859000.0
2,-14.889662,131.690436,1564676000.0
3,-14.645594,131.571813,1564631000.0
4,-14.165296,131.322979,1564682000.0


(484, 3)

Unnamed: 0,latitude,longitude,timestamp
0,-15.4381,130.5266,1564662000.0
1,-15.1529,131.1778,1564662000.0
2,-15.0855,130.5753,1564662000.0
3,-14.3149,130.6678,1564662000.0
4,-13.029,132.9311,1564662000.0


In [7]:
valid_p_frame = pd.DataFrame(p_valid.cpu().detach().numpy(), columns=cols)
valid_p_frame = preproc.unprocess(valid_p_frame, bounds)
display(valid_p_frame.shape)
display(valid_p_frame.head())

valid_y_frame = pd.DataFrame(y_valid.detach().numpy(), columns=cols)
valid_y_frame = preproc.unprocess(valid_y_frame, bounds)
display(valid_y_frame.shape)
display(valid_y_frame.head())

(161, 3)

Unnamed: 0,latitude,longitude,timestamp
0,-13.195839,134.196302,1567519000.0
1,-13.157678,134.004862,1567488000.0
2,-13.102028,134.31921,1567543000.0
3,-12.994351,133.988489,1567489000.0
4,-12.872519,133.703639,1567512000.0


(161, 3)

Unnamed: 0,latitude,longitude,timestamp
0,-12.4988,133.8459,1567338000.0
1,-12.2855,134.6376,1567338000.0
2,-12.2702,134.5508,1567338000.0
3,-12.1738,134.4555,1567338000.0
4,-11.7896,132.7161,1567338000.0


In [8]:
display(valid_y_frame.std(axis=0))
display(valid_p_frame.std(axis=0))

latitude          1.350285
longitude         2.277625
timestamp    308088.618063
dtype: float64

latitude          0.820840
longitude         0.776884
timestamp    268747.940125
dtype: float64

# Make the graph

In [9]:
import folium
import geojson
from datetime import datetime

Convert data to GeoJSON

In [10]:
def get_points(df):
    points = list(zip(df['longitude'], df['latitude']))
    return geojson.MultiPoint(points)

train_y_markers = get_points(train_y_frame)
train_p_markers = get_points(train_p_frame)
valid_y_markers = get_points(valid_y_frame)
valid_p_markers = get_points(valid_p_frame)

In [11]:
m = folium.Map(
    [-30.0, 140.0],
    zoom_start=4,
    tiles='OpenTopoMap'
)

# add training y markers to the map
folium.GeoJson(
    train_y_markers,
    name="Fire Pixels",
    marker=folium.Circle(radius=100, fill_color = 'blue', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'blue',
        'radius': 3000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(m)

# add training predicted markers to the map
folium.GeoJson(
    train_p_markers,
    name="Predicted Pixels",
    marker=folium.Circle(radius=100, fill_color = 'green', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'green',
        'radius': 3000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(m)

# add validation y markers to the map
folium.GeoJson(
    valid_y_markers,
    name="Fire Pixels",
    marker=folium.Circle(radius=100, fill_color = 'red', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'red',
        'radius': 3000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(m)

# add validation predicted markers to the map
folium.GeoJson(
    valid_p_markers,
    name="Predicted Pixels",
    marker=folium.Circle(radius=100, fill_color = 'orange', fill_opacity=0.5, color="black", weight=1),
    style_function=lambda x: {
        "fillColor": 'orange',
        'radius': 3000
    },
    highlight_function=lambda x: {"fillOpacity": 0.8}
).add_to(m)


# folium.GeoJson(rect).add_to(m)
m

In [12]:
m.save('graphs/reduced_comparison.html')

## Compute stats for the predictions

Mean spatial & temporal distance: Compute average spatial & temporal distances between predictions and ground truth

In [13]:
def geo_dist(lat1, lon1, lat2, lon2):
    # compute distance in km between 2 points
    # https://andrew.hedges.name/experiments/haversine/
    R = 6373 # approximate radius of the earth in kilometers

    # get lat an lon diff in radians
    dlon = math.radians(lon2 - lon1)
    dlat = math.radians(lat2 - lat1)

    a = (math.sin(dlat/2))**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * (math.sin(dlon/2)**2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = R * c
    return d

# test the formula
lat1, lon1 = 38.898556, -77.037852
lat2, lon2 = 38.897147, -77.043934

# this should be 0.549
geo_dist(lat1, lon1, lat2, lon2)

0.5493281835413389

In [14]:
# average time difference
d_times = (valid_y_frame['timestamp'] - valid_p_frame['timestamp']).abs()

# average distance
distances = []
for i,_ in valid_y_frame.iterrows():
    lat1 = valid_y_frame.loc[i, 'latitude']
    lon1 = valid_y_frame.loc[i, 'longitude']

    lat2 = valid_p_frame.loc[i, 'latitude']
    lon2 = valid_p_frame.loc[i, 'longitude']

    distances.append(geo_dist(lat1, lon2, lat2, lon2))

distances = pd.Series(distances)
print(f'{d_times.mean()=}\n{distances.mean()=}')

d_times.mean()=486571.7907528626
distances.mean()=99.19492057878864


In [15]:
# get the size of the geographical region that the dataset covers
lat1 = bounds['latitude']['min']
lat2 = bounds['latitude']['max']

lon1 = bounds['longitude']['min']
lon2 = bounds['longitude']['max']

geo_dist(lat1, lon1, lat2, lon2)

1115.5403727190117