#### <font color="red">Warning 1</font>: This is no submission material, experimenting with some fancy architectures (LSTM's, skip connections, concatenation, etc...)
#### <font color="red">Warning 2</font>: First notebook, as i never really tried kaggle before, so expect weird stuff from newcomers (even if my account is not that new)...

Some ideas from Remek's [notebook](https://www.kaggle.com/remekkinas/lstm-seq2seq-encoder-decoder)! Check it out too!

In [None]:
!pip install -q tensorflow==2.5 # Need Tensorflow >= 2.5
!pip list | grep tensorflow

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras import backend as K

np.set_printoptions(suppress=True, edgeitems=20, linewidth=1000)

In [None]:
assert float(tf.__version__[:tf.__version__.rfind('.')]) >= 2.5, 'Need TF 2.5 version'

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv(os.path.join(dirname, "train.csv"))
test = pd.read_csv(os.path.join(dirname, "test.csv"))
sub = pd.read_csv(os.path.join(dirname, "sample_submission.csv"))

In [None]:
print(train.shape)
train.head()

In [None]:
train.describe().loc[['mean', 'std'], :] # checking sparseness

In [None]:
num_train_samples = int(0.8 * len(train))
num_val_samples = int(0.2 * len(train))
num_test_samples = len(test)

print("num_train_samples:", num_train_samples)
print("num_val_samples:", num_val_samples)
print("num_test_samples:", num_test_samples)

In [None]:
sequence_length = 12 # window (lest use a half day)

In [None]:
raw_data = train.iloc[:, 1:-3].values
targets = train.iloc[:, -3:].values
targets = np.log1p(targets)

test_data = np.concatenate([raw_data[-sequence_length:-1, :], test.iloc[:, 1:].values]) # adding last window together with test
all_data = np.concatenate([raw_data, test.iloc[:, 1:].values])

# Normalizing with test data also for more accurate mean/std
mean = all_data.mean(axis=0)
std = all_data.std(axis=0)

mean_target = targets.mean(axis=0)
std_target = targets.std(axis=0)

raw_data = (raw_data - mean) / std
targets = (targets - mean_target) / std_target
test_data = (test_data - mean) / std

In [None]:
# Datasets
batch_size = 32

train_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
    raw_data,
    targets=targets[sequence_length:],
    sequence_length=sequence_length,
    batch_size=batch_size,
    shuffle=True,
    start_index=0,
    end_index=num_train_samples).prefetch(64)

val_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
    raw_data,
    targets=targets[sequence_length:],
    sequence_length=sequence_length,
    batch_size=batch_size,
    shuffle=True,
    start_index=num_train_samples,
    end_index=num_train_samples + num_val_samples).prefetch(64)

test_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
    test_data,
    targets=None,
    sequence_length=sequence_length,
    batch_size=batch_size)

In [None]:
def rmsle(y_true, y_pred):
    msle = tf.keras.losses.MeanSquaredLogarithmicError()
    return K.sqrt(msle(y_true, y_pred)) 

In [None]:
def load_model():
    tf.keras.backend.clear_session()
    
    inputs = tf.keras.Input(shape=(sequence_length, raw_data.shape[-1]))
    x = Conv1D(filters=32, kernel_size=5, strides=1, padding="causal", activation="relu")(inputs)
    x = LSTM(128, return_sequences=True)(x)
    x = LSTM(64, return_sequences=True)(x)
    x = Dropout(0.5)(x)
    x = TimeDistributed(Dense(64))(x)
    x = GlobalAveragePooling1D()(x)
    
    x1 = Dense(64)(x)
    x1 = Add()([x1, x])
    x1 = Dropout(0.5)(x1)
    x1 = Dense(32)(x1)
    x1 = Concatenate()([x1, x])
    x1 = Dense(16)(x1)
    x1 = Dropout(0.3)(x1)
    x1 = Dense(1, name="carb")(x1)
    
    x2 = Dense(64)(x)
    x2 = Add()([x2, x])
    x2 = Dropout(0.5)(x2)
    x2 = Dense(32)(x2)
    x2 = Concatenate()([x2, x])
    x2 = Dense(16)(x2)
    x2 = Dropout(0.3)(x2)
    x2 = Dense(1, name="bezn")(x2)
    
    x3 = Dense(64)(x)
    x3 = Add()([x3, x])
    x3 = Dropout(0.5)(x3)
    x3 = Dense(32)(x3)
    x3 = Concatenate()([x3, x])
    x3 = Dense(16)(x3)
    x3 = Dropout(0.3)(x3)
    x3 = Dense(1, name="nitro")(x3)
    
    model = tf.keras.Model(inputs, [x1, x2, x3])
    return model

In [None]:
model = load_model()

callbacks = [tf.keras.callbacks.ModelCheckpoint("lstm.keras", save_best_only=True)]

lr = 3e-3
optim = tf.keras.optimizers.Adam(learning_rate=lr)
model.compile(optimizer=optim, loss=rmsle)

In [None]:
history = model.fit(train_dataset,
                    epochs=100,
                    validation_data=val_dataset,
                    verbose=2,
                    callbacks=callbacks)

In [None]:
model = tf.keras.models.load_model("lstm.keras", custom_objects={'rmsle': rmsle})
model.evaluate(val_dataset)

In [None]:
# PREDICT
preds = model.predict(test_dataset)
preds = np.hstack(preds)
preds.shape

In [None]:
sub.iloc[:, 1:] = np.expm1(preds) * std_target + mean_target
sub.iloc[0, 1:] = train.iloc[-1, -3:].values # First prediction we got for free...
sub.to_csv('sub.csv', index=False)

In [None]:
sub.head()