In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import datetime
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras

In [None]:

df1 = pd.read_csv('/kaggle/input/jena-climate-2009-2016/jena_climate_2009_2016.csv',index_col = None)
df1

In [None]:
df1.shape

**Replace -9999 with 0**

In [None]:
df1['wv (m/s)']=df1['wv (m/s)'].replace(-9999.00, 0)
df1['max. wv (m/s)']=df1['max. wv (m/s)'].replace(-9999.00, 0)

In [None]:
df1.describe().transpose()

In [None]:
corr = df1.corr()
ax = sns.heatmap(
    corr, 
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);

**To know the indices of point of split**

In [None]:
print(df1[df1["Date Time"]=='31.12.2014 23:50:00'].index.values)
print(df1[df1["Date Time"]=='31.12.2015 23:50:00'].index.values)

In [None]:
df=df1

**Removing the redundant features**

In [None]:
df=df.iloc[:, [0, 1,2,6,8,9,11,12]]
df

**Extract Date-Time column**

In [None]:
date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')
df

**Sin-Cos Extraction**

In [None]:
timestamp_s = date_time.map(datetime.datetime.timestamp)
day = 24*60*60
year = (365.2425)*day

df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

In [None]:
df.head()

**Split fraction to use 6 yrs of training data**

In [None]:
split_fraction = 0.75075
train_split = int(split_fraction * int(df.shape[0]))

step = 6
past = 720
future = 72
batch_size = 256
epochs = 8

data_mean = df[:train_split].mean(axis=0)
data_std = df[:train_split].std(axis=0)

df=(df-data_mean)/data_std



**Normalising the Data**

In [None]:
#df = normalize(df.values, train_split)
df=df.values
df = pd.DataFrame(df)
df.head()

train_data = df.loc[0 : train_split - 1]
val_data = df.loc[train_split:]

In [None]:
train_data.head()

In [None]:
start = past + future
end = start + train_split

x_train = train_data[[i for i in range(11)]].values
y_train = df.iloc[start:end][[1]]

sequence_length = int(past / step)

In [None]:
print('X_train shape == {}.'.format(x_train.shape))
print('y_train shape == {}.'.format(y_train.shape))

In [None]:
dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    x_train,
    y_train,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

In [None]:
x_end = len(val_data) - past - future

label_start = train_split + past + future

x_val = val_data.iloc[:x_end][[i for i in range(11)]].values
y_val = df.iloc[label_start:][[1]]

dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)


for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard


In [None]:
learning_rate = 0.001
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out = keras.layers.LSTM(48)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()

**I am suing ModelCheckpoint to save checkpoints, and the EarlyStopping to stop training when the validation loss is not longer improving.**

In [None]:
%%time
path_checkpoint = "model_checkpoint.h5"
es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

modelckpt_callback = keras.callbacks.ModelCheckpoint(
    monitor="val_loss",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)

history = model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val,
    callbacks=[es_callback, modelckpt_callback],
)

**Tarining is lower then validation Loass, it means the model is slightly over fittting**

In [None]:
def visualize_loss(history, title):
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs = range(len(loss))
    plt.figure()
    
    plt.plot(epochs, loss, "b", label="Training loss")
    plt.plot(epochs, val_loss, "r", label="Validation loss")
    plt.title(title)
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()


visualize_loss(history, "Training and Validation Loss")

In [None]:
std=data_std[1]
avg=data_mean[1]
   
def show_plot(plot_data, delta, title):
    labels = ["History", "True Future", "Model Prediction"]
    marker = [".-", "rx", "go"]
    time_steps = list(range(-(plot_data[0].shape[0]), 0))
    if delta:
        future = delta
    else:
        future = 0

    plt.title(title)
    for i, val in enumerate(plot_data):
        if i:
            plt.plot(future, plot_data[i], marker[i], markersize=10, label=labels[i])
        else:
            plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
    plt.legend()
    plt.xlim([time_steps[0], (future + 5) * 2])
    plt.xlabel("Time-Step")
    plt.show()
    return


for x, y in dataset_val.take(20):
    p=x[0][:, 1].numpy()
    q=y[0].numpy()
    p=p*std+avg
    q=q*std+avg
    pred=model.predict(x)[0]
    pred=(pred*std + avg)
    
    show_plot(
        [p, q, pred],
        12,
        "Single Step Prediction",
    )