In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

#tf.debugging.set_log_device_placement(True)

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import datetime

tf.get_logger().setLevel('WARNING')
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [None]:
matplotlib.rcParams['figure.figsize'] = (16, 12)
matplotlib.rcParams['axes.grid'] = False

train_size = 300000 

In [None]:
zip_data = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True)
csv_path, _ = os.path.splitext(zip_data)

In [None]:
df = pd.read_csv(csv_path)
display(df.head())

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df.plot(subplots=True)

In [None]:
df_values = df.set_index("Date Time")
display(df_values.head())
df_values = df_values.values
print("df_values created")

In [None]:
data_mean = df_values[:train_size].mean(axis=0)
print("df_mean created")
data_std = df_values[:train_size].std(axis=0)
print("df_std created")

normalized_df = (df_values - data_mean)/data_std

In [None]:
def prepare_data(train_dataset, target_dataset, start_index, end_index, trainig_window_size,
                      predict_window_size, prediction_steps, single_step=False):
  """
    Parameters:

      train_dataset - data to be splited
      target_dataset -  what we want to predict
      start_index - from what index to split
      end_index - till what indext split

      trainig_window_size - training size window
      predict_window_size - prediction size window
      prediction_steps - how many steps should we predict
      single_step - should we predict only single steps 
    
    Returns:
      train_dataset, test_dataset
  """

  data = []
  labels = []

  print("train_dataset shape ", train_dataset.shape)
  print("target_dataset shape ", target_dataset.shape)

  # setting start index for next from first input data index
  # if end_index is not given thern end index is last valid one
  # in train_single case 
  #         start_index = 720
  #         end index = 300000
  start_index = start_index + trainig_window_size   # start_index for prediction
  if end_index is None:
    end_index = len(train_dataset) - predict_window_size   # end_index is max available if not passed as arg

  # iteration for start/end with prediction_steps 1 ( sliding window )
  # for i in range(720, 300000, 1):
  for i in range(start_index, end_index):
    indices = range(i-trainig_window_size, i, prediction_steps)
    # input_data indicaes are from range(720 - 720, 720, 6)
    data.append(train_dataset[indices])

    if single_step:
      labels.append(target_dataset[i+predict_window_size])
    else:
      labels.append(target_dataset[i:i+predict_window_size])

  print("data shape {}\nlabels shape {}" .format(np.array(data).shape, np.array(labels).shape))
  return np.array(data), np.array(labels)

def create_time_steps(length):
  return list(range(-length, 0))

def multi_step_plot(history, true_future, prediction):
  plt.figure(figsize=(12, 6))
  num_in = create_time_steps(len(history))
  num_out = len(true_future)

  plt.plot(num_in, np.array(history[:, 1]), label='History')
  plt.plot(np.arange(num_out)/prediction_steps, np.array(true_future), 'bo',
           label='True Future')
  if prediction.any():
    plt.plot(np.arange(num_out)/prediction_steps, np.array(prediction), 'ro',
             label='Predicted Future')
  plt.legend(loc='upper left')
  plt.show()

In [None]:
past_history = 720
future_target = 72
prediction_steps = 6

print("TRAINING DATASET")
x_train, y_train = prepare_data(train_dataset=df_values, 
                                target_dataset=df_values[:, 1],    # predicting temperature
                                start_index=0,                     # 0
                                end_index=train_size,              # 300000
                                trainig_window_size=past_history,  # 720
                                predict_window_size=future_target, # 72
                                prediction_steps=prediction_steps, # 6
                                single_step=False)
print("VALIDATION DATASET")
x_val, y_val = prepare_data(train_dataset=df_values,           # all data
                            target_dataset=df_values[:, 1],    # predicting temperature
                            start_index = train_size,          # 300000
                            end_index=None,                    # None ( we`re going till the end )
                            trainig_window_size=past_history, # 720
                            predict_window_size=future_target, # 72
                            prediction_steps=prediction_steps, # 6
                            single_step=False)                  

In [None]:
strategy = tf.distribute.MirroredStrategy()
print("mirrored_strategy.num_replicas_in_sync ", strategy.num_replicas_in_sync)

In [None]:
EPOCHS = 500
BATCH_SIZE = 128*strategy.num_replicas_in_sync
BUFFER_SIZE = 10000

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
#!rm -rf logs/fit

In [None]:
checkpoint_path = "training_1_clip/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
print("logdir = ", log_dir)

# Create a callback that saves the model's weights

with strategy.scope():
    
    def create_model():
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Conv1D(filters=150,
                                        kernel_size=11,
                                        strides=3,
                                        padding='same',
                                        input_shape=x_train.shape[-2:]))
        model.add(tf.keras.layers.Dropout(0.2))
        model.add(tf.keras.layers.LSTM(128,
                                       dropout=0.2,
                                       return_sequences=True))
        model.add(tf.keras.layers.LSTM(32, 
                                       dropout=0.2, 
                                       activation='relu'))
        model.add(tf.keras.layers.Dense(150))
        model.add(tf.keras.layers.Dense(72))
        return model

    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                     save_best_only=True,
                                                     save_weights_only=True,
                                                     verbose=1)

    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    reduce_callback = tf.keras.callbacks.ReduceLROnPlateau(verbose=1)

    csv_logger = tf.keras.callbacks.CSVLogger('logs/training.log')

    model = create_model()
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss='mae')
    model.summary()
    multi_step_history = model.fit(train_dataset, epochs=EPOCHS,
                                          validation_data=val_dataset,
                                          callbacks=[cp_callback, tensorboard_callback, csv_logger, reduce_callback])

In [None]:
for x, y in val_dataset.take(3):
  multi_step_plot(x[0], y[0], model.predict(x)[0])