#### Download data 

We will use [weather data](https://www.bgc-jena.mpg.de/wetter/) provided by [Max Plank Institute for BiogeoChemistry](https://www.bgc-jena.mpg.de).

In [None]:
#Download the data
!wget https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip --quiet

In [None]:
!ls -l

In [None]:
#Unzip the file
!unzip jena_climate_2009_2016.csv.zip

In [None]:
!ls -l

#### Data Exploration

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [None]:
#Read dataset
df = pd.read_csv('jena_climate_2009_2016.csv')

In [None]:
#Let's check the dataset contents
df.head(n=5)

In [None]:
#Columns
df.columns

In [None]:
df.tail()

In [None]:
#Check number of records
df.shape

What is the Frequency of this Time-Series data?

In [None]:
df.head(n=10)

In [None]:
#Check if null values
df.isnull().sum()

How should we deal missing values in Time Series data?

#### Data for Air Tempreture

Excercise #1 : Build a model which will predict future 'Air Tempreture' based on the past data.

In [None]:
#Get data for Air Tempreture
temp_df = df['T (degC)']

In [None]:
temp_df

In [None]:
#Make Date time column as index to make sure data is sorted
temp_df.index = df['Date Time']

In [None]:
#Check data
temp_df.head(n=50)

In [None]:
temp_df.tail(n=50)

In [None]:
#Visualize data
temp_df.plot(subplots=True, figsize=(10,6))
plt.show()

In [None]:
temp_df.shape

#### Data Preparation

Split data between Training and Test

In [None]:
#Number of training examples - set to 80%
num_training_examples = int(0.8 * temp_df.shape[0])
num_training_examples

In [None]:
#Prepare training and test data
train_data = temp_df.to_list()[:num_training_examples]
test_data = temp_df.to_list()[num_training_examples:]

In [None]:
#Check the data
print(train_data[:100])

In [None]:
#Find the mean standard deviation
mean = np.array(train_data).mean()
std = np.array(train_data).std()

In [None]:
mean, std

Normalize the data

In [None]:
#Normalize training and test data
norm_train = (np.array(train_data) - mean)/std
norm_test  = (np.array(test_data) - mean)/std

In [None]:
norm_train[:100]

In [None]:
def prepare_xy(dataset, window_size=20):

    dataX, dataY = [], []
    
    for i in range(len(dataset)-window_size):
        
        #Prepare input features
        input_features = dataset[i:(i+window_size)]        
        dataX.append(input_features)

        #Prepare Label
        label = dataset[i + window_size]
        dataY.append(label)
    
    return np.array(dataX), np.array(dataY)

In [None]:
#Prepare x,y for train and test
train_x, train_y = prepare_xy(norm_train, window_size=20)
test_x, test_y = prepare_xy(norm_test, window_size=20)

Build X (Input features) and y (Label) for Model training

In [None]:
train_x.shape

In [None]:
train_y.shape

In [None]:
#First example input features (x)
train_x[0]

In [None]:
#First example label (y)
train_y[0]

Visualize single example

In [None]:
def show_example(input_features, target, prediction=None):

    #Plot historical values
    plt.plot(list(range(input_features.shape[0])), input_features.flatten(), '.-', label='History' )
    
    #Plot target
    plt.plot(input_features.shape[0]+1, target, 'rx', markersize=10, label='Actual')

    #Plot prediction, if applicable
    if prediction:
        plt.plot(input_features.shape[0]+1, prediction, 'go', markersize=10, label='Prediction')

    plt.legend()
    plt.show()

In [None]:
#Display examples
exp_num = np.random.randint(0, train_x.shape[0])
show_example(train_x[exp_num], train_y[exp_num])

Prediction using Average method

In [None]:
#Prediction is taken as average of all the points in sequence e.g 20
exp_num = np.random.randint(0, train_x.shape[0])
show_example(train_x[exp_num], train_y[exp_num], train_x[exp_num].mean())

#### Build Model

In [None]:
import tensorflow as tf

In [None]:
#Random seed for reproducibility
tf.random.set_seed(13)

In [None]:
#Build Model
tf.keras.backend.clear_session()
model = tf.keras.Sequential()

#Add LSTM layer
model.add(tf.keras.layers.LSTM(8, input_shape=(20, 1)))

In [None]:
model.output

In [None]:
#Add Output layer
model.add(tf.keras.layers.Dense(1))

In [None]:
model.output

In [None]:
#Compile the model
model.compile(optimizer='adam', loss='mae')

In [None]:
model.summary()

Train the model

- Convert batch to be 3 dimensional data : Batch size x Sequence length x 1

In [None]:
model.fit(train_x, train_y,
          validation_data=(test_x, test_y),
          epochs=10, 
          batch_size=256)

In [None]:
train_x.shape

In [None]:
model.input

In [None]:
train_x = np.expand_dims(train_x, axis=2)
test_x = np.expand_dims(test_x, axis=2)

In [None]:
train_x.shape

In [None]:
train_y.shape

In [None]:
model.fit(train_x, train_y,
          validation_data=(test_x, test_y),
          epochs=10, 
          batch_size=256)

In [None]:
test_x.shape

In [None]:
#Model Prediction on first example
a = model.predict(test_x[0:1])
print('Normalized Prediction', a)

In [None]:
print('De-normalized Prediction', a * std + mean)

In [None]:
#Actual 
test_y[0]*std+mean

Visualize Model prediction

In [None]:
test_x[0].shape

In [None]:
model.input

In [None]:
np.expand_dims(test_x[1], axis=0).shape

In [None]:
#Pick a test example
exp_num = np.random.randint(0, test_x.shape[0])

#Make input example a batch of 1
prediction = model.predict(np.expand_dims(test_x[exp_num], axis=0))

#Visualize
show_example(test_x[exp_num], test_y[exp_num], prediction[0])

#### Mutiple Time Series - Data Preparation

In [None]:
#Air tempreture, pressure and air density
features_to_include = ['T (degC)', 'p (mbar)', 'rho (g/m**3)']

#Get data
multi_df = df[features_to_include]
multi_df.index = df['Date Time']

In [None]:
multi_df.head(n=10)

In [None]:
num_training_examples = int(0.8 * multi_df.shape[0])

In [None]:
#Train and Test data
train_df = multi_df.iloc[:num_training_examples,:]
test_df = multi_df.iloc[num_training_examples:,:]

In [None]:
train_df.shape

Normalize data

In [None]:
#Find mean and standard deviation
mean_multi = train_df.mean()
std_multi = train_df.std()

In [None]:
mean_multi

In [None]:
std_multi

In [None]:
#Normalize Train and Test data
norm_train_df = (train_df - mean_multi)/std_multi
norm_test_df = (test_df - mean_multi)/std_multi

In [None]:
norm_train_df.sample(n=5)

Prepare X and Y

In [None]:
def prepare_xy_multi(dataset, num_time_series=3, window_size=[20,15,10], target_series=0):

    dataX, dataY = [], []

    act_data = []

    for i in range(num_time_series):
        dataX.append([]) #Initialize an empty list for each time series
        act_data.append(dataset.iloc[:,i].to_list())

    #Get max window size
    max_window_size = max(window_size)

    for i in range(len(dataset)-max_window_size):
        
        #Prepare input for each time series
        for j in range(num_time_series):

            #Prepare input features
            input_features = act_data[j][(i+max_window_size-window_size[j]):(i+max_window_size)]
            dataX[j].append(input_features)

        #Prepare Label
        label = act_data[target_series][i + max_window_size]
        dataY.append(label)
    
    return dataX, dataY

In [None]:
#Prepare Training and Test X, y
train_x_multi, train_y_multi = prepare_xy_multi(norm_train_df)
test_x_multi, test_y_multi = prepare_xy_multi(norm_test_df)

In [None]:
len(train_x_multi)

In [None]:
train_x_multi[0][0]

In [None]:
train_x_multi[1][0]

In [None]:
train_x_multi[2][0]

#### Build Model II

In [None]:
import tensorflow as tf

In [None]:
#Build 3 input layers - one for each time series
input_1 = tf.keras.layers.Input(shape=(20,1)) #Tempreture
input_2 = tf.keras.layers.Input(shape=(15,1)) #Pressure
input_3 = tf.keras.layers.Input(shape=(10,1)) #Relative humidity

In [None]:
#Build 3 LSTM Layers - One for each time series
lstm_1 = tf.keras.layers.LSTM(8)(input_1)
lstm_2 = tf.keras.layers.LSTM(6)(input_2)
lstm_3 = tf.keras.layers.LSTM(5)(input_3)

In [None]:
lstm_1

In [None]:
lstm_2

In [None]:
lstm_3

In [None]:
#Concatenate LSTM layers output
cat = tf.keras.layers.concatenate([lstm_1, lstm_2, lstm_3])

In [None]:
cat

In [None]:
#Output Layer
op = tf.keras.layers.Dense(1)(cat)

In [None]:
op

In [None]:
#Build Non-Sequential Model
model_multi = tf.keras.Model([input_1, input_2, input_3], #3 Inputs
                             op) #Output

In [None]:
#compile model 
model_multi.compile(optimizer='adam', loss='mae')

In [None]:
model_multi.summary()

##### Model Training

We need to feed 3 inputs. Each input will be 3 dimensional

In [None]:
np.array(train_x_multi[0]).shape

In [None]:
#Build data for training
train_x_multi_1 = np.reshape(np.array(train_x_multi[0]), (len(train_x_multi[0]),len(train_x_multi[0][1]),1 ))
train_x_multi_2 = np.reshape(np.array(train_x_multi[1]), (len(train_x_multi[1]),len(train_x_multi[1][1]),1 ))
train_x_multi_3 = np.reshape(np.array(train_x_multi[2]), (len(train_x_multi[2]),len(train_x_multi[2][1]),1 ))

In [None]:
train_x_multi_1.shape

In [None]:
train_x_multi_2.shape

In [None]:
train_x_multi_3.shape

In [None]:
#Build data for test
test_x_multi_1 = np.reshape(np.array(test_x_multi[0]), (len(test_x_multi[0]),len(test_x_multi[0][1]),1 ))
test_x_multi_2 = np.reshape(np.array(test_x_multi[1]), (len(test_x_multi[1]),len(test_x_multi[1][1]),1 ))
test_x_multi_3 = np.reshape(np.array(test_x_multi[2]), (len(test_x_multi[2]),len(test_x_multi[2][1]),1 ))

In [None]:
test_x_multi_1.shape

In [None]:
#Model training
model_multi.fit([train_x_multi_1, train_x_multi_2, train_x_multi_3], np.array(train_y_multi), 
                validation_data=([test_x_multi_1, test_x_multi_2, test_x_multi_3], np.array(test_y_multi)), 
                epochs=5, 
                batch_size=256)

Visualize Model Prediction

In [None]:
#Pick a test example
exp_num = np.random.randint(0, len(test_y_multi))

In [None]:
exp_num

In [None]:
np.array(test_x_multi[0][exp_num]).shape

In [None]:
np.expand_dims(np.array(test_x_multi[0][exp_num]), axis=0).shape

In [None]:
#Pick a test example
exp_num = np.random.randint(0, len(test_y_multi))

#Prepare 3 batch inputs - each 3 dimensional
in_1 = np.expand_dims(np.array(test_x_multi[0][exp_num]), axis=0)
in_2 = np.expand_dims(np.array(test_x_multi[1][exp_num]), axis=0)
in_3 = np.expand_dims(np.array(test_x_multi[2][exp_num]), axis=0)

#print(in_1.shape, in_2.shape)
#Make prediction
prediction = model_multi.predict([in_1, in_2, in_3])

#Visualize
show_example(in_1[0], test_y_multi[exp_num], prediction[0])

**Using Single LSTM** with same time window for multiple time series

In [None]:
train_x_multi, train_y_multi = prepare_xy_multi(norm_train_df, window_size=(20,20,20))
test_x_multi, test_y_multi = prepare_xy_multi(norm_test_df, window_size=(20,20,20))

In [None]:
len(train_x_multi)

In [None]:
np.array(train_x_multi[2]).shape

In [None]:
train_x = np.concatenate([np.reshape(train_x_multi[0], (-1, 20,1)), 
                          np.reshape(train_x_multi[1], (-1, 20,1)), 
                          np.reshape(train_x_multi[2], (-1, 20,1))], axis=2) 

In [None]:
train_x.shape

In [None]:
test_x = np.concatenate([np.reshape(test_x_multi[0], (-1, 20,1)), 
                          np.reshape(test_x_multi[1], (-1, 20,1)), 
                          np.reshape(test_x_multi[2], (-1, 20,1))], axis=2) 

In [None]:
test_x.shape

In [None]:
tf.keras.backend.clear_session()
model1 = tf.keras.Sequential()

In [None]:
model1.add(tf.keras.layers.LSTM(8, input_shape=(20,3)))

In [None]:
model1.output

In [None]:
model1.add(tf.keras.layers.Dense(1))

In [None]:
model1.compile(optimizer='adam', loss='mae')

In [None]:
model1.summary()

In [None]:
model1.fit(train_x, np.array(train_y_multi), epochs=5, batch_size=200)

In [None]:
#Pick a test example
exp_num = np.random.randint(0, test_x.shape[0])

#Prepare 3 batch inputs - each 3 dimensional
in_1 = np.expand_dims(test_x[exp_num], axis=0)

#Make prediction
prediction = model1.predict(in_1)

#Visualize
#show_example(in_1[0], test_y_multi[exp_num], prediction[0])
prediction