In [133]:
#wget https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip
import os 
import pandas as pd
import numpy as np
fname = os.path.join("../../Data/jena_climate_2009_2016.csv")
with open(fname) as f:
    data = f.read()

lines = data.split("\n")
header = lines[0].split(",")
lines = lines[1:]
temperature = np.zeros((len(lines),))
raw_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
    values = [float(x) for x in line.split(",")[1:]]
    temperature[i] = values[1]
    raw_data[i, :] = values[:]

In [None]:
import matplotlib.pyplot as plt

plt.plot(temperature)

In [None]:
plt.plot(range(1440), temperature[:1440])

In [None]:
num_train_samples = int(0.5*len(raw_data))
num_val_samples = int(0.25*len(raw_data))
num_test_samples = len(raw_data) - num_train_samples - num_val_samples
num_train_samples, num_val_samples, num_test_samples

In [137]:
mean = raw_data[:num_train_samples].mean(axis=0)
raw_data -= mean
std = raw_data[:num_train_samples].std(axis=0)
raw_data /= std

In [138]:
import keras
sampling_rate = 6
sequence_length = 120
delay = sampling_rate * (sequence_length + 24 - 1)
batch_size = 256

train_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=0,
    end_index=num_train_samples
)
val_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples,
    end_index=num_train_samples + num_val_samples
)
test_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples + num_val_samples
)

In [None]:
for samples, targets  in train_dataset:
    print(samples[:, -1,1])
    print(targets.shape)
    break

In [None]:
header

In [None]:
def evalaute_naive_method(dataset):
    total_abs_error = float(0)
    samples_seen = 0
    for samples, targets in dataset:
        preds = samples[:, -1, 1] * std[1] + mean[1]
        total_abs_error += np.sum(np.abs(preds - targets))
        samples_seen += samples.shape[0]
    return total_abs_error / samples_seen
        
print(f"Validation MAE: {evalaute_naive_method(val_dataset):.2f}")
print(f"Test MAE: {evalaute_naive_method(test_dataset):.2f}")

In [None]:
from keras import layers

inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))
x = layers.Dense(16, activation="relu")(inputs)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs=inputs, outputs=outputs)
callbacks = [
    keras.callbacks.ModelCheckpoint("jena_dense.keras", save_best_only=True)
]
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(train_dataset,  epochs=10, validation_data=val_dataset, callbacks=callbacks)
model = keras.models.load_model("jena_dense.keras")


In [None]:
print(f"Test MAE: {model.evaluate(test_dataset)[1]:.2f}")

In [None]:
import matplotlib.pyplot as plt
loss = history.history["mae"]
val_loss = history.history["val_mae"]
epochs = range(1, len(loss) + 1)
plt.figure()
plt.plot(epochs, loss, "bo", label="Training MAE")
plt.plot(epochs, val_loss, "b", label="Validation MAE")
plt.title("Training and validation MAE")
plt.legend()
plt.show()

In [None]:
inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))
x = layers.LSTM(32, recurrent_dropout=0.25)(inputs)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs)
callbacks = [
    keras.callbacks.ModelCheckpoint("jena_lstm.keras", save_best_only=True)
]
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(train_dataset, epochs=50, validation_data=val_dataset, callbacks=callbacks)


In [164]:
model = keras.models.load_model("jena_lstm.keras")
print(f"Test MAE: {model.evaluate(test_dataset)[1]:.2f}")

[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - loss: 9.9943 - mae: 2.4848
Test MAE: 2.49


In [None]:
def plot_mae(history):
    loss = history.history["mae"]
    val_loss = history.history["val_mae"]
    epochs = range(1, len(loss) + 1)
    plt.figure()
    plt.plot(epochs, loss, "bo", label="Training MAE")
    plt.plot(epochs, val_loss, "b", label="Validation MAE")
    plt.title("Training and validation MAE")
    plt.legend()
    plt.show()

In [165]:
inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))
x = keras.layers.GRU(32, recurrent_dropout=0.5, return_sequences=True)(inputs)
x = keras.layers.GRU(32)(x)
x = keras.layers.Dropout(0.5)(x)
outputs = keras.layers.Dense(1)(x)
model = keras.Model(inputs, outputs)
callbacks = [keras.callbacks.ModelCheckpoint("jena_stacked_gru_drouput.keras")]
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(train_dataset, epochs=50, validation_data=val_dataset, callbacks=callbacks)
model = keras.models.load_model("jena_stacked_gru_drouput.keras")
print(f"Test MAE: {model.evaluate(test_dataset)[1]:.2f}")

Epoch 1/50
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 74ms/step - loss: 40.4623 - mae: 4.9566 - val_loss: 32.6733 - val_mae: 4.5615
Epoch 2/50
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 73ms/step - loss: 29.2607 - mae: 4.2584 - val_loss: 28.3303 - val_mae: 4.2241
Epoch 3/50
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 71ms/step - loss: 27.9328 - mae: 4.1556 - val_loss: 25.3359 - val_mae: 4.0021
Epoch 4/50
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 74ms/step - loss: 27.1115 - mae: 4.0981 - val_loss: 29.1536 - val_mae: 4.3361
Epoch 5/50
[1m382/819[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m28s[0m 64ms/step - loss: 27.3278 - mae: 4.1114

KeyboardInterrupt: 