# Tutorial
### Import packages

In [42]:
import tensorflow as tf
from tensorflow import keras
import glob

import maelstrom

Next, set up data loader. This will provide a tf.dataset object that can be used for training. The loader has many options that influence how data is loaded.

In [43]:
input_filenames = glob.glob("data/air_temperature/5GB/2020030*T*Z.nc")
loader = maelstrom.loader.FileLoader(filenames=input_filenames,
    patch_size=32,   # Break the grid into 32x32 squares to increase the number of samples
    # limit_leadtimes=[0, 12, 24],  # Only load these leadtimes
    # limit_predictors=["air_temperature_2m", "wind_speed_10m"], # Only load these predictors
    # x_range=0:12, # Only load the first 12 columns of the grid
    # y_range=0:10, # Only load the first 10 columns of the grid
    predict_diff=True, # Change the target to be the difference between the target and raw forecast
    cache_size=100,  # How many files should be stored in memory between epochs
    prefetch=1
)
print(loader)
input_shape = loader.predictor_shape
num_outputs = loader.num_targets

dataset = loader.get_dataset()

{
    "Predictor shape": "10, 32, 32, 14",
    "Target shape": "10, 32, 32, 1",
    "Num files": 9,
    "Samples per file": 1,
    "Patches per sample": 16,
    "Num patches": 144,
    "Patch size": 32,
    "Num leadtimes": 10,
    "Batch size": 1,
    "Num predictors": 14,
    "Num targets": 1,
    "Predictors": [
        "air_temperature_0.1_2m",
        "air_temperature_0.9_2m",
        "air_temperature_2m",
        "bias_yesterday",
        "cloud_area_fraction",
        "precipitation_amount",
        "x_wind_10m",
        "y_wind_10m",
        "altitude",
        "analysis_std",
        "bias_recent",
        "land_area_fraction",
        "model_altitude",
        "model_laf"
    ],
    "Patch size (MB)": 0.5859375,
    "Total size (GB)": 0.0823974609375
}


Set up the model for training. Either use a predefined model from `maelstrom.models` or create one using the keras interface.

In [44]:
loss = maelstrom.loss.mae
optimizer = keras.optimizers.Adam(learning_rate=1.0e-2)

model = maelstrom.models.BasicBenchmark(input_shape, num_outputs)
model.compile(optimizer=optimizer, loss=loss)

Train the model

In [45]:
history = model.fit(dataset, epochs=2)
print(history.history)

Epoch 1/2
    138/Unknown - 42s 296ms/step - loss: 9.6469

Test the model on independent data

In [None]:
input_filenames = glob.glob("data/air_temperature/5GB/2021030*T*Z.nc")
val_loader = maelstrom.loader.FileLoader(filenames=input_filenames)
val_dataset = val_loader.get_dataset()

results = model.evaluate(val_dataset)
print(results)

2.728421449661255
