# Example of time-series forecasting
### Based on Chap 6.3. from [Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python) by F. Chollet

### Load Jena climate data set as Pandas dataframe
Fetch the CSV using `fetch_jena_climate.sh` script.

In [None]:
import pandas as pd
csv_file = './datasets/jena_climate/jena_climate_2009_2016.csv'

data = pd.read_csv(csv_file)
data['Date Time'] = pd.to_datetime(data['Date Time'], format='%d.%m.%Y %H:%M:%S')
data

### Investigate the data

In [None]:
data.describe()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

data_sample = data.head(250)

def plot_column(column):
    fig = plt.figure(figsize = (15, 4))
    plt.plot(data_sample['Date Time'], data_sample[column])
    plt.xlabel('Date')
    plt.ylabel(column)
    
plot_column('T (degC)')

### Transform to NumPy data, dropping the date time

In [None]:
import numpy as np

data_without_timestamp = data.drop(['Date Time'], axis=1)
float_data = np.array(data_without_timestamp, dtype=np.float32)[:, 1:]

training_inds = range(20000)
validation_inds = range(training_inds[-1] + 1, 30000)


### Normalize with the training data

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(float_data[training_inds])
float_data = scaler.transform(float_data)

### Generator for reading input batches from the multi-dimensional timeseries
- `step`: The step at which data is sampled for prediction (orig. data sampled every 10 minutes, so use `step=6` for one-hour time step)
- `lookback`: How many steps to look into the past to predict the future (in units of 10 min). Each input has `lookback // step` time series points.
- `delay`: How many timesteps to the future to predict (in units of 10 min)
- `min_index` and `max_index`: Indices used for keeping training and validation data separate
- `shuffle`: Should the rows be picked randomly or in chronological order


In [None]:
def generator(data, lookback, delay, min_index, max_index, shuffle = False, batch_size=128, step=6):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while True:
        if shuffle:
            batch_rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            batch_rows = np.arange(i, min(i + batch_size, max_index))
            i += len(batch_rows)
        samples = np.zeros((len(batch_rows), lookback // step, data.shape[-1]))
        targets = np.zeros((len(batch_rows),))
        for j, row in enumerate(batch_rows):
            indices = range(batch_rows[j] - lookback, batch_rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[batch_rows[j] + delay][1]
        yield samples, targets
        

### Define the parameters and generators

In [None]:
step = 6 # Training samples sampled every one hour
lookback = 3 * 24 * 6 # Use data from 3 previous days for prediction (input data sampled every 10 minutes)
delay = 24 * 6 # Trying to predict temperature one day to the future
batch_size = 128

from functools import partial

partial_gen = partial(generator, 
                      data = float_data, 
                      lookback = lookback, 
                      delay = delay, 
                      batch_size = batch_size)

train_gen = partial_gen(min_index = 0, 
                        max_index = training_inds[-1], 
                        shuffle = True)

val_gen = partial_gen(min_index = validation_inds[0], max_index = validation_inds[-1])

test_gen = partial_gen(min_index = validation_inds[-1], max_index = None)

val_steps = validation_inds[-1] - validation_inds[0] - lookback
test_steps = len(float_data) - validation_inds[-1] - lookback

### Feed-forward fully connected network

In [None]:
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop

model = Sequential()

model.add(layers.Flatten(input_shape = (lookback // step, float_data.shape[-1])))
model.add(layers.Dense(32, activation = 'relu'))
model.add(layers.Dense(1))

model.compile(optimizer = RMSprop(), loss = 'mae')
history = model.fit_generator(train_gen, 
                              steps_per_epoch = 500, 
                              epochs = 5, 
                              validation_data = val_gen, 
                              validation_steps = val_steps)

### GRU (Gated recurrent unit)

In [None]:
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop

model = Sequential()
model.add(layers.GRU(32, input_shape = (None, float_data.shape[-1])))
model.add(layers.Dense(1))

model.compile(optimizer = RMSprop(), loss = 'mae')
history = model.fit_generator(train_gen, 
                              steps_per_epoch = 500, 
                              epochs = 20, 
                              validation_data = val_gen, 
                              validation_steps = val_steps)
