# Keras MLP

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import metrics
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import preprocess
import umap

from confusion import confusion_matrix_dataframe, plot_confusion_matrix

## Load data

In [None]:
# replace FILE with the appropriate location of the full dataset
FILE = '../../finalproject/wildfires-shuffled.parquet'
(X_train, Y_train), (X_test, Y_test) = preprocess.load_dataset(FILE)

## Train MLP

In [None]:
model = Sequential([
    Dense(512, activation='relu', input_dim=len(X_train.columns)),
    Dense(512, activation='relu'),
    BatchNormalization(axis=1),
    Dense(512, activation='relu'),
    Dense(512, activation='relu'),
    Dense(Y_train.shape[1], activation='softmax'),
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [None]:
history = model.fit(X_train.values, Y_train.values, epochs=5, batch_size=256, validation_split=0.222)

## Predict and report results

In [None]:
model = keras.models.load_model('../models/mlp-4x512-with-bnorm.h5')

In [None]:
Y_pred = model.predict(X_test)

In [None]:
sns.set(rc={'figure.figsize': (10, 8)})
ax = plot_confusion_matrix(confusion_matrix_dataframe(Y_test, Y_pred))
# ax.figure.savefig('mlp_confusion_mtx.pdf', bbox_inches='tight')

## UMAP

Load the model from file here.

In [None]:
model = keras.models.load_model('../models/mlp-4x512-with-bnorm.h5')

In [None]:
# In case umap-learn is not installed on this system
# !sudo pip3 install umap-learn

In [None]:
UMAP_COLS = """
fire_size
fire_year
discovery_date
burn_time
burn_time_notna
latitude
longitude
""".strip().split("\n")

Xsamp = X_test.sample(25000)
Ysamp = Y_test.loc[Xsamp.index]
Xsamp = Xsamp.filter(items=UMAP_COLS)

### On raw data

In [None]:
embedding = umap.UMAP().fit_transform(Xsamp)

In [None]:
plt.figure(figsize=(13,10))
sns.set(style='white', context='paper')
plt.scatter(embedding[:, 0], embedding[:, 1], c=np.argmax(Ysamp.values, axis=1), cmap='Spectral_r', s=3)
plt.gca().set_aspect('equal', 'datalim')
cbar = plt.colorbar(boundaries=np.arange(12)-0.5)
cbar.set_ticks(np.arange(11))
cbar.set_ticklabels(Ysamp.columns)
plt.tight_layout()

### On last layer output

In [None]:
sans_softmax = Sequential(model.layers[:-2])
Ysamp_pred_lastlayer = sans_softmax.predict(X_test.loc[Xsamp.index])

In [None]:
other_embedding = umap.UMAP().fit_transform(Ysamp_pred_lastlayer)

In [None]:
plt.figure(figsize=(13,10))
sns.set(style='white')
plt.scatter(other_embedding[:, 0], other_embedding[:, 1], c=np.argmax(Ysamp.values, axis=1), cmap='Spectral_r', s=3)
plt.gca().set_aspect('equal', 'datalim')
cbar = plt.colorbar(boundaries=np.arange(12)-0.5)
cbar.set_ticks(np.arange(11))
cbar.set_ticklabels(Ysamp.columns)
plt.tight_layout()