In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import torch
from sklearn.metrics import mean_squared_error as sk_mse

from fastai.learner import Learner
from fastai.metrics import rmse
from fastai.tabular.model import get_emb_sz
from fastai.tabular.all import *

from dies.data import tp_from_df, get_y_ranges, split_by_year, create_consistent_number_of_sampler_per_day, TimeseriesTransform
from dies.utils_pytorch import xavier_init_uniform
from dies.autoencoder import ConvolutionalAutoEncoder
from dies.data import *
from dies.embedding import EmbeddingModule

from dies.losses import CnnMSELoss

%load_ext autoreload
%autoreload 2
sns.set_context('poster')
sns.set_style('whitegrid')
mpl.rcParams["legend.loc"] = 'upper right'

In [None]:
with pd.HDFStore('./data/GEFCOM_Z1.h5') as store:
    df = store['powerdata']

In [None]:
df = df.fillna(df.mean())
df = df.dropna(axis=1)

In [None]:
df.TimeUTC = pd.to_datetime(df.TimeUTC, infer_datetime_format=True, utc=True)
df.set_index('TimeUTC', inplace=True)
df.PowerGeneration = df.PowerGeneration.apply(float) / df.MaxPowerGeneration.apply(float)
df.drop('MaxPowerGeneration', axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df['DayOfYear'] = df.index.dayofyear
df['Hour'] = df.index.hour

In [None]:
df.shape

In [None]:
df = create_consistent_number_of_sampler_per_day(df, 24)
df.shape

In [None]:
cols = [ 'WindDirectionMeridional100m',
       'WindDirectionMeridional10m', 'WindDirectionZonal100m',
       'WindDirectionZonal10m', 'WindSpeed100m', 'WindSpeed10m']

cat_cols = ['DayOfYear', 'Hour']

tp = tp_from_df(df, y_columns=cols, x_columns=cols, 
                cat_columns = cat_cols, 
                standardize_X=True,
                do_split_by_n_weeks=True)

In [None]:
train_all = TimeseriesTransform(tp, 
                        timeseries_length = 24,
                        batch_first=True, 
                        sequence_last=True, 
                        is_train=False,
                        is_valid=False,
                        drop_inconsistent_cats=False)
                        #check_consistent_number_per_days=True)

In [None]:
train_all.tp.items.shape[0] / 24

In [None]:
t1 = tp[tp.cont_names].values.reshape(-1, 24, len(tp.cont_names))
t1 = torch.Tensor(t1)
t2 = t1.permute(0, 2, 1)
t2

In [None]:
# added random splitter for testing purpose, a sample on the validation set corresponds to a single day
dls = train_all.to_dataloaders(bs=64, splits=RandomSplitter(valid_pct=0.2))

In [None]:
# dls = DataLoaders.from_dsets(train_tl, valid_tl, bs = 64, shuffle=False)
# dls = train_all.dataloaders(bs=64)
test_batch = dls.one_batch()

In [None]:
test_batch[2].shape

In [None]:
dls.show_batch()

In [None]:
test_batch[0].shape, test_batch[1].shape, test_batch[2].shape

In [None]:
input_size = tp.conts.shape[1]
sizes_cat = [367, 25]

ann_structure = [input_size, 10, 5]
from dies.embedding import Embedding

embedding_module = EmbeddingModule(sizes_cat, 
                                   embedding_dropout=0.1, 
                                   embedding_dimensions=None)

ann_model = ConvolutionalAutoEncoder(
    ann_structure,
    kernel_size=4,
    padding=True,
    embedding_module=embedding_module,
    embeding_position="start",
    
)

ann_model = ann_model.apply(xavier_init_uniform)

In [None]:
learn = Learner(
    dls,
    ann_model,
    loss_func=CnnMSELoss(),
)

In [None]:
learn.summary()

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(20, lr_max=0.02)

In [None]:
learn.lr_find()

In [None]:
learn.fit(10, lr=1e-3)

In [None]:
# ds_idx, refers to valid dataset
preds, targets = learn.get_preds(ds_idx=1)

In [None]:
cont_names = learn.dls.valid_ds.tp.cont_names

In [None]:
for idx,cont_name in enumerate(cont_names):
    plt.plot(preds[0,idx,:], label="Pred")
    plt.plot(targets[0,idx,:], label="Targget")
    plt.title(cont_name)
    plt.legend()
    plt.show()