In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_df = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/train.csv")
test_df = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/test.csv")
sample_df = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv")

In [None]:
train_df['date_time'] = pd.to_datetime(train_df['date_time'])
test_df['date_time'] = pd.to_datetime(test_df['date_time'])
print(train_df.head())

In [None]:
# Feature engineering
# train_df['year'] = train_df['date_time'].dt.year
# train_df['month'] = train_df['date_time'].dt.month
# train_df['hour'] = train_df['date_time'].dt.hour
# train_df['day'] = train_df['date_time'].dt.day

# test_df['year'] = test_df['date_time'].dt.year
# test_df['month'] = test_df['date_time'].dt.month
# test_df['hour'] = test_df['date_time'].dt.hour
# test_df['day'] = test_df['date_time'].dt.day
# print(train_df.head())

In [None]:
# Set data_time column as index as it is needed for RNN
train = train_df.set_index("date_time").copy()
test = test_df.set_index("date_time").copy()
print(train.shape)

In [None]:
target_cols = [col for col in train.columns if col.startswith('target')]
feat_cols = [col for col in train.columns if col not in target_cols]
print(target_cols)
test = pd.concat([train.drop(target_cols[:], axis=1).iloc[-10:-1], test])
print(test.head())

In [None]:
train, val = train_test_split(train, test_size=0.2, random_state=42)
print(test)

In [None]:
from sklearn.preprocessing import MinMaxScaler

fea_scaler = MinMaxScaler()
lab_scaler = MinMaxScaler()

Xtrain_scaled = fea_scaler.fit_transform(train.drop(target_cols[:],axis=1))
Xval_scaled = fea_scaler.transform(val.drop(target_cols[:],axis=1))
Ytrain_scaled =lab_scaler.fit_transform(train[target_cols[:]])
Yval_scaled =lab_scaler.transform(val[target_cols[:]])
Xtest_scaled = fea_scaler.transform(test)
print(Xtrain_scaled)

In [None]:
length = 10 # use 50 observation to test_generator 51
batch_size = 1 # usually this batch size works well

# train_generator = TimeseriesGenerator(data=Xtrain_scaled,
#                                       targets=train[target_cols[:]],
#                                       length=length,
#                                       batch_size=batch_size)
# test_generator = TimeseriesGenerator(data=Xtest_scaled,
#                                      targets=test[target_cols[:]],
#                                      length=length,
#                                      batch_size=batch_size)

In [None]:
train_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
    Xtrain_scaled,
    targets=Ytrain_scaled,
    sequence_length=length,
    batch_size=batch_size,
    shuffle=True,
    start_index=0).prefetch(64)
val_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
    Xval_scaled,
    targets=Yval_scaled,
    sequence_length=length,
    batch_size=batch_size,
    shuffle=True,
    start_index=0).prefetch(64)
test[target_cols[:]] = 0
test_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
    Xtest_scaled,
    targets=test[target_cols[:]],
    sequence_length=length,
    batch_size=batch_size,
    shuffle=True,
    start_index=0).prefetch(64)
print(train[target_cols[:]].shape)

In [None]:
n_features = Xtrain_scaled.shape[1]
print(n_features)

In [None]:
def rmsle_custom(y_true, y_pred):
    msle = tf.keras.losses.MeanSquaredLogarithmicError()
    return K.sqrt(msle(y_true, y_pred))


es = tf.keras.callbacks.EarlyStopping(monitor='val_rmsle_custom', 
                                      mode='min',patience=4, 
                                      restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss",
                                               factor=0.5,
                                               patience=3,
                                               verbose=1,
                                               mode="auto",
                                               min_delta=0.0001,
                                               cooldown=0,
                                               min_lr=0.000001,)

weights_initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1., seed=45)

def lstm_autoencoder():
    
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(100, return_sequences=True,
                             dropout=0.3, 
                             activation = "tanh", 
                             stateful=True,
                             batch_input_shape=(batch_size,length, n_features),
                             kernel_initializer='LecunUniform'),
        tf.keras.layers.LSTM(50, 
                             dropout=0.3, 
                             stateful=True,
                             activation = "tanh", 
                             kernel_initializer='LecunUniform'),
        tf.keras.layers.RepeatVector(1),
        tf.keras.layers.LSTM(50, return_sequences=True, 
                             dropout=0.3, 
                             stateful=True,
                             activation = "sigmoid", 
                             kernel_initializer='LecunUniform'),
        tf.keras.layers.LSTM(100, return_sequences=True, 
                             dropout=0.3,
                             stateful=True,
                             activation = "sigmoid", 
                             kernel_initializer='LecunUniform'),
        tf.keras.layers.TimeDistributed(Dense(n_features)), 
        tf.keras.layers.Dense(20, kernel_initializer='LecunUniform',
                              activation = tf.keras.layers.PReLU()), 
        tf.keras.layers.Dropout(0.3), 
        tf.keras.layers.Dense(3, kernel_initializer='LecunUniform',
                              activation = tf.keras.layers.PReLU())
        ])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01),
                  loss=tf.keras.losses.mean_squared_error,
                  metrics=[rmsle_custom])
    
    history = model.fit(train_dataset,
                        validation_data = val_dataset, 
                        epochs=50,
                        callbacks=[es,plateau],
                        verbose=1)
    
    return history, model

lstm_2_history, lstm_model = lstm_autoencoder()

In [None]:
preds = lstm_model.predict(test_dataset)
preds = preds.reshape(2247, 3)
preds = lab_scaler.inverse_transform(preds)
sample_df[target_cols[:]] = preds
sample_df.to_csv('sample_submission.csv', index=0)