In [None]:
import os
import sys
import time

import math
import numpy as np 
import pandas as pd
import seaborn as sns

from sklearn.metrics import mean_absolute_error as MAE, mean_squared_error as MSE
from sklearn.model_selection import train_test_split, KFold
from sklearn.feature_selection import RFE

import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display

In [None]:
mpl.rcParams['figure.figsize'] = (20, 13)
mpl.rcParams['axes.grid'] = False

sns.set(style="ticks", color_codes=True)

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 256)

from pandas.core.common import SettingWithCopyWarning

import warnings
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

# **Data Loading**

In [None]:
from dateutil.parser import parse

date_parser = lambda date: parse(date)

In [None]:
df_raw = pd.read_csv('../input/jena-climate-2009-2016/jena_climate_2009_2016.csv',
                     parse_dates=['Date Time'],
                     date_parser=date_parser)

In [None]:
df_raw.head(10)

In [None]:
for col in ['wv (m/s)', 'max. wv (m/s)']:
    df_raw[col] = df_raw[col].replace(-9999.00, 0)

In [None]:
features_exog = ['p (mbar)', 'VPmax (mbar)', 'VPdef (mbar)', 'sh (g/kg)', 'rho (g/m**3)', 'wv (m/s)', ]
feature_endog = ['T (degC)', ]

df = df_raw[['Date Time']+features_exog+feature_endog].copy()
df.head()

## Add **seasonality** by **Sin-Cos Extraction**

In [None]:
df.set_index('Date Time', inplace=True, drop=True)
date_time = pd.Series(df.index)
date_time.head()

In [None]:
from datetime import date, datetime

timestamp_dt = date_time.map(datetime.timestamp)

# Define timestamp constants
SECOND, MINUTE, HOUR = 1, 60, 3_600
DAY = 24*HOUR
WEEK = 7*DAY
MONTH = DAY*30.4375 # (7*31 + 4*30 +28.25) / 12 = 30.4375
YEAR = DAY*365.25
dt_features = [MINUTE, HOUR, DAY, WEEK, MONTH, YEAR,]

# Generate new periodic features
dt_names = ['minute', 'hour', 'day', 'week', 'month', 'year', ]
for dt_n, dt_f in zip(dt_names, dt_features):
    print(f"Generating features for {dt_n} ...")
    t1 = time.time()
    df[f"{dt_n}_sin"] = np.sin(timestamp_dt * (2 * np.pi / dt_f)).values
    df[f"{dt_n}_cos"] = np.cos(timestamp_dt * (2 * np.pi / dt_f)).values
    t2 = time.time()
    print(f"\t\t ... in {round(t2-t1, 2)} seconds")

# for col in dt_names:
#     plt.plot(date_time.values, df[col+'_sin'].values, 'ro', 
#              date_time.values, df[col+'_cos'].values, 'bo')
#     plt.show()

In [None]:
print(len(df))
for year in range(2009, 2020):
    print(year, len(df[df.index.year==year]))

In [None]:
features_dt = [col+'_sin' for col in dt_names] + \
              [col+'_cos' for col in dt_names]
columns = features_dt + feature_endog
train_df = df[columns][ (df.index.year >= 2013) & (df.index.year <= 2014) ]
test_df = df[columns][ (df.index.year == 2015) & (df.index.year <= 2016) ]
display(train_df.tail(5))
display(test_df.head(5))

train_size, test_size = len(train_df), len(test_df)
print(train_size, test_size)

compose_df = pd.concat([train_df, test_df])

plt.plot(train_df.index, train_df['T (degC)'], 'bo',
          test_df.index, test_df['T (degC)'], 'ro')

In [None]:
# for col in list(compose_df.columns):
#     if col == 'Date Time':
#         continue
#     compose_df[[col]].plot()
#     plt.show()

In [None]:
# for col in features_exog+feature_endog:
#     sns.distplot(train_df[col])
#     sns.distplot(test_df[col])
#     plt.show()

## **Build dataset**

In [None]:
# 1 day: 24*6=144 samples
# 1 week: 7*24*6=1008 samples
SEQ_LEN = 256

datasets = dict()
datasets['train'] = train_df
datasets['test'] = pd.concat([train_df[-SEQ_LEN+1:], test_df])

In [None]:
datasets['train'].describe()

In [None]:
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = StandardScaler()
BATCH_SIZE = 64
data_generators = dict()

for ds_name, ds in datasets.items():
    data = ds.values
    X, y = data[:,:-1].astype(np.float32), data[:,-1].astype(np.float32)
    if ds_name == 'train':
        y = scaler.fit_transform(y.reshape(-1, 1)).flatten()
    else:
        y = scaler.transform(y.reshape(-1, 1)).flatten()
    print(f"{ds_name}: {data.shape} --> {X.shape} + {y.shape}")
    data_generators[ds_name] = timeseries_dataset_from_array(
        X, y, batch_size=BATCH_SIZE, 
        sampling_rate=1,
        sequence_stride=1,
        sequence_length=SEQ_LEN, 
    )
    for batch in data_generators[ds_name].take(1):
        inputs, targets = batch
        # print(targets)
        print("\t Input shape:", inputs.numpy().shape)
        print("\t Target shape:", targets.numpy().shape)
        
    del ds

In [None]:
LEN = dict()
for ds_name, generator in data_generators.items():
    LEN[ds_name] = 0
    for b_i, (X_batch, y_batch) in enumerate(generator):
        LEN[ds_name] += X_batch.shape[0]
        
print(LEN)

In [None]:
N_FEATURES = len(train_df.columns) - 1
N_FEATURES

In [None]:
data_batch = dict()

for ds_name, generator in data_generators.items():
    # Use memory-mapping to reduce RAM usage
    X_all = np.memmap(f"{ds_name}_X.npy", dtype='float32', mode='w+', shape=(LEN[ds_name], SEQ_LEN, N_FEATURES))
    y_all = np.memmap(f"{ds_name}_y.npy", dtype='float32', mode='w+', shape=(LEN[ds_name], ))
    
    for b_i, (X_batch, y_batch) in enumerate(generator):
        if len(X_batch) == BATCH_SIZE:
            b_start, b_end = b_i*BATCH_SIZE, (b_i+1)*BATCH_SIZE
            X_all[b_start:b_end, ...] = X_batch.numpy()[...]
            y_all[b_start:b_end] = y_batch.numpy()[:]

    del generator
        
    # TSAI Input Shape: (N_samples, N_features, Max_seq_len)
    X_all = np.transpose(X_all, axes=(0,2,1))
    
    data_batch[ds_name] = [X_all, y_all]
    print(ds_name, X_all.shape, y_all.shape)
    
    del X_all, y_all

In [None]:
X_train, y_train = data_batch['train']
X_test, y_test = data_batch['test']

# **Modelling**

## **Loss Function**

In [None]:
# Huber Loss, aka Smoothed Mean Absolute Error
from tensorflow.keras.losses import Huber, Reduction

loss_func = Huber(reduction=Reduction.NONE)

In [None]:
loss_df = pd.DataFrame()
loss_df['Date'] = test_df.index

## **TSAI**

In [None]:
!pip install --ignore-installed tsai

In [None]:
from tsai.all import *

import torch

def torch2np(tensor: torch.Tensor) -> np.array:
    if torch.cuda.is_available():
        tensor = tensor.cpu()
    return tensor.numpy()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## **MiniRocket**

In [None]:
# params = {'num_features': 10_000, 
#           'max_dilations_per_kernel': 64, 
#           'normalize_features': False, 
#           'scoring': make_scorer(MSE, greater_is_better=False),
#           'verbose': True, }

# for n_estimators in [1,3,5]:
#     if n_estimators == 1:
#         model = MiniRocketRegressor(**params)
#     else:
#         model = MiniRocketVotingRegressor(n_estimators=n_estimators, **params)

#     print(f"\n\n Training MiniRocket-{n_estimators} ...")
#     timer.start(False)
#     model.fit(X_train, y_train)
#     t = timer.stop()
#     print(f"\t ... in {t}")
    
#     predictions = model.predict(X_test)
#     predictions = scaler.inverse_transform(predictions.reshape((-1,1)))
    
#     plt.plot(train_df.index, train_df['T (degC)'], 'ro',
#              test_df.index, test_df['T (degC)'], 'yo', 
#              test_df.index, predictions, 'bo')
#     plt.show()
    
#     loss = loss_func(test_df['T (degC)'].values.reshape(-1,1),
#                      predictions).numpy()
#     loss_df[f'MiniRocket-{n_estimators}'] = loss.flatten()
    
#     del model
    
# loss_df.describe()

## **Deep Neural Networks**:
* **ResNet**
* **XceptionTime**
* **InceptionTime**
* **TSTransformer**

In [None]:
DL_models = {
    "ResNet": (ResNetPlus, {'nf': 16, 'ks': [5, 3, 2], 'seq_len': SEQ_LEN}), 
    "XceptionTime": (XceptionTime, {'nf': 16, 'adaptive_size': 24, 'residual': True}), 
    "XceptionTimePlus": (XceptionTimePlus, {'nf': 24, 'adaptive_size': 28, 'residual': True}), 
    "InceptionTime": (InceptionTime, {'nf': 16, 'ks': SEQ_LEN//2}), 
    "InceptionTimePlus": (InceptionTimePlus, {'nf': 24, 'ks': SEQ_LEN//2, 'bottleneck': True, 'depth': 4, 'dilation': 1, 'stride': 1}), 
    "TSTransformer": (TST, {'max_seq_len': SEQ_LEN, 'd_model': 32, 'd_ff': 16, 'n_layers': 2, 'n_heads': 4, }), 
    "TSTransformerPlus": (TSTPlus, {'max_seq_len': SEQ_LEN, 'd_model': 32, 'd_ff': 16, 'n_layers': 2, 'n_heads': 4, }), 
}

In [None]:
X_dl, y_dl, splits = combine_split_data([X_train, X_test], [y_train, y_test])

transformations = [None, [TSRegression()]]
batch_transformations = [TSStandardize(by_sample=False, by_var=False)]
dsets = TSDatasets(X_dl, y_dl, splits=splits, tfms=transformations, inplace=True)
dloaders = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[64, 32], batch_tfms=batch_transformations, num_workers=0)

In [None]:
dloaders.show_batch(sharey=True)

In [None]:
for model_name, (model, params) in DL_models.items():
    
    print(f"\n\n Training {model_name} ...")
    timer.start(False)
    
    # Create model
    model = create_model(model, dls=dloaders, **params)
    learner = Learner(dls=dloaders, model=model, metrics=[mae, rmse], opt_func=Adam)
    
    # Find best learning-rate
    try:
        lr_lowest, lr_steepest = learner.lr_find(start_lr=1e-7, end_lr=1e0, num_it=169)
    except Exception as e:
        print('\t', e)
        lr_lowest = 1e-3
    print(f"\t ... with learning-rate = {lr_lowest}")

    # Train
    try:
        learner.fit_one_cycle(n_epoch=7, lr_max=lr_lowest)
    except Exception as e:
        print('\t', e)

    t = timer.stop()
    print(f"\t ... in {t}")
        
    # Evaluate
    # X_test = torch.Tensor(X_test).to(device)
    # y_pred = learner.get_X_preds(X_test)[0]
    y_pred = learner.get_preds(dl=dloaders.valid)[0]
    y_pred = torch2np(y_pred.detach())
    
    predictions = scaler.inverse_transform(y_pred.reshape((-1,1)))
    
    # Visualize
    plt.show()
    plt.plot(train_df.index, train_df['T (degC)'], 'ro', 
              test_df.index, test_df['T (degC)'], 'yo', 
              test_df.index, predictions, 'bo')
    plt.show()
    
    # Loss statistics
    loss = loss_func(test_df['T (degC)'].values.reshape(-1,1),
                     predictions).numpy()
    loss_df[f'{model_name}'] = loss.flatten()
    
    del model, learner

In [None]:
loss_df.describe()