## Tutorial on how to use FireDatasetLoader class

The tutorial shows you how to load the next day Next Day Wildfire Spread dataset. 

In [None]:
from fire_dataset_loader import FireDatasetLoader

### Install all the required packages

You can find the required packages needed to run the class in the requirements.txt file. 
To install the package on an anaconda environment or vitrual environment run the command below. (uncomment it)

In [None]:
# TODO: Uncomment if you are missing some packages
# !pip install -r ../requirements.txt

### Required Parameters

The FireDatasetLoader class requires the data_pattern and index_pattern template strings parameters. The class has other default parameters you can override like data_size, sample_size, and batch_size

In [None]:
data_pattern = "/Users/juan/Documents/cpp/masters_project/archive/next_day_wildfire_spread_{}.tfrecord"

### Creating Index files

Index file must be provided when using multiple workers, otherwise the loader may return duplicate records.

You can create the index files with the code below you have to provide a list of TFRecord files and the path where the index files will be stored.

In [None]:
import glob 
import subprocess

# TODO: uncomment the for loop and replace the tfrecords_path and index_base_path

tfrecords_path = "/Users/juan/Documents/cpp/masters_project/archive/next_day_wildfire_spread_*"
index_base_path = "/Users/juan/Documents/cpp/masters_project/archive/index/"

# for file_path in glob.iglob(tfrecords_path):
#     file_name = file_path.split("/")[-1].replace(".tfrecord", ".index")
#     index_path = index_base_path + file_name
#     subprocess.run(["python3", "-m", "tfrecord.tools.tfrecord2idx", file_path, index_path])
#     print(f"Created index file {index_path}") 

In [None]:
index_pattern = "/Users/juan/Documents/cpp/masters_project/archive/index/next_day_wildfire_spread_{}.index"

### Creating splits dictionaries 

The splits parameter is a dictionary of (key, value) pairs, where the key is used to
construct the data and index path(s) and the value determines
the contribution of each split to the batch.


In [None]:
num_training_files = 15
train_splits = {f"train_{i:02d}": 1/num_training_files for i in range(num_training_files)}

num_test_files = 2
test_splits = {f"test_{i:02d}": 1/num_test_files for i in range(num_test_files)}

num_validation_files = 2
eval_splits = {f"eval_{i:02d}": 1/num_validation_files for i in range(num_validation_files)}

print(train_splits)
print(test_splits)
print(eval_splits)

### Creating list for input and output features names

In [None]:
input_features = [
    "elevation",
    "th",
    "vs",
    "tmmn",
    "tmmx",
    "sph",
    "pr",
    "pdsi",
    "NDVI",
    "population",
    "erc",
    "PrevFireMask",
]

output_features = [
    "FireMask",
]

### Using the FireDatasetLoader class

In [None]:
training_data = FireDatasetLoader(data_pattern, index_pattern, train_splits, input_features, output_features)
training_data_loader = training_data.get_loader()
training_features, training_labels = next(iter(training_data_loader))
print(training_features.shape)

In [None]:
testing_data = FireDatasetLoader(data_pattern, index_pattern, test_splits, input_features, output_features, batch_size=50, sample_size=32, center_crop=True)
testing_data_loader = testing_data.get_loader()

testing_features, testing_labels = next(iter(testing_data_loader))
print(testing_features.shape)

In [None]:
eval_data = FireDatasetLoader(data_pattern, index_pattern, eval_splits, input_features, output_features, batch_size=60)
eval_data_loader = eval_data.get_loader()
eval_features, eval_labels = next(iter(eval_data_loader))
print(eval_features.shape)

### Data Visualization

In [None]:
import matplotlib.pyplot as plt
from matplotlib import colors

TITLES = [
    'Elevation',
    'Wind\ndirection',
    'Wind\nvelocity',
    'Min\ntemp',
    'Max\ntemp',
    'Humidity',
    'Precip',
    'Drought',
    'Vegetation',
    'Population\ndensity',
    'Energy\nrelease\ncomponent',
    'Previous\nfire\nmask',
    'Fire\nmask'
]

# TODO: Replace with any other dataset loader 
inputs = training_features
labels = training_labels

# Number of rows of data samples to plot
n_rows = 5
# Number of data variables
n_features = inputs.shape[3]
# Variables for controllong the color map for the fire masks
CMAP = colors.ListedColormap(['black', 'silver', 'orangered'])
BOUNDS = [-1, -0.1, 0.001, 1]
NORM = colors.BoundaryNorm(BOUNDS, CMAP.N)

fig = plt.figure(figsize=(15, 6.5))

for i in range(n_rows):
    for j in range(n_features + 1):
        plt.subplot(n_rows, n_features + 1, i * (n_features + 1) + j + 1)
        if i == 0:
            plt.title(TITLES[j], fontsize=13)
        if j < n_features - 1:
            plt.imshow(inputs[i, :, :, j], cmap='viridis')
        if j == n_features - 1:
            plt.imshow(inputs[i, :, :, -1], cmap=CMAP, norm=NORM)
        if j == n_features:
            plt.imshow(labels[i, :, :, 0], cmap=CMAP, norm=NORM)
        plt.axis('off')
plt.tight_layout()