In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import glob
from tqdm import tqdm_notebook as tqdm

import torch
import cv2
from PIL import Image

import torchvision.transforms as transforms
import torchvision.models as pretrained_models
import os
#import pretrainedmodels
import torch.nn as nn
import torch.nn.functional as F
import math
import torchvision

import warnings
warnings.filterwarnings('ignore')


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from tqdm import trange
from sklearn.model_selection import train_test_split
import sklearn.metrics
from sklearn import preprocessing

import torch
import torch.nn as nn
import torch.nn.functional as F

import warnings
warnings.filterwarnings("ignore")

In [None]:
## utils

def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [None]:

def find_start_end(data: np.ndarray):
    """
    Calculates start and end of real demand data. Start is an index of first non-zero and
    end index of non-zero

    :param data: Time series, shape [n_items, n_days]
    :return : start_idx, end_idx
    """

    n_items = data.shape[0]
    n_days = data.shape[1]
    
    start_idx = np.full(n_items, -1, dtype=np.int32)
    end_idx = np.full(n_items, -1, dtype=np.int32)

    for item in range(n_items):
        # scan from start to the end
        for day in range(n_days):
            if not np.isnan(data[item, day]) and data[item, day] > 0:
                start_idx[item] = day
                break
        # reverse scan, from end to start
        for day in range(n_days - 1, -1, -1):
            if not np.isnan(data[item, day]) and data[item, day] > 0:
                end_idx[item] = day
                break

    return start_idx, end_idx


def read_x(df, start, end) -> pd.DataFrame:
    """
    Gets source data from start to end data, Any data can be None
    :param df -> dataframe
    :param start -> start day
    :param end -> end day

    :returns -> df 
    """
    if start and end:
        return df.loc[:, start:end]
    elif end:
        return df.loc[:, end]
    else:
        return df


def prepare_data(df , start, end, valid_threshold):
    """
    Reads source data, calculates start and end of each series, drops bad series, calculates log1p(demand)
    :param df
    :param start: start date of effective time interval, can be None to start from beginning
    :param end: end dae of effective time interval, can be None to return all data
    :param valid_threshold: minimal ratio of series real length to entire (end-start) interval.Series dropped if
    ratio is less then threshold
    :return: tuple(log1p(series), series start, series end)
    """

    df = read_x(df, start, end)
    starts, ends = find_start_end(df.values)

    # boolean mask for bad (too short) series
    page_mask = (ends - starts) / df.shape[1] < valid_threshold

    print("Masked %d pages from %d" % (page_mask.sum(), len(df)))
    inv_mask = ~page_mask
    df = df[inv_mask]
    
    #return np.log1p(df), starts[inv_mask], ends[inv_mask]
    return df, starts[inv_mask], ends[inv_mask]

def encode_id_features(df):
    """
    Applies one-hot encoding to id features and normalises result
    :param df: id features DataFrame (one column per features)
    :return: dictionary feature_name: encoded_values.Encoded values is [n_ids, n_values] array
    """

    df = df.set_index("id")

    def encode(column) -> pd.DataFrame:
        one_hot = pd.get_dummies(df[column], drop_first=False)
        return (one_hot - one_hot.mean()) / one_hot.std()
    
    return {str(column): encode(column) for column in df.columns}
    

def encode_id_features(id_features):

    
    df = id_features.set_index("id")
    df["id"] = df.index.values

    #id
    #id = pd.get_dummies(df["id"], drop_first=False, prefix="id")
    #id = (id - id.mean()) / id.std()

    #final_df = id

    #item_id
    item_id = pd.get_dummies(df["item_id"], drop_first=False, prefix="item_id")
    item_id = (item_id - item_id.mean()) / item_id.std()

    # dept_id
    dept_id = pd.get_dummies(df["dept_id"], drop_first=False, prefix="dept_id")
    dept_id = (dept_id - dept_id.mean()) / dept_id.std()

    # cat_id
    cat_id = pd.get_dummies(df["cat_id"], drop_first=False, prefix="cat_id")
    cat_id = (cat_id - cat_id.mean()) / cat_id.std()

    # store_id
    store_id = pd.get_dummies(df["store_id"], drop_first=False, prefix="store_id")
    store_id = (store_id - store_id.mean()) / store_id.std()

    # state_id
    state_id = pd.get_dummies(df["state_id"], drop_first=False, prefix="state_id")
    state_id = (state_id - state_id.mean()) / state_id.std()


    # encoded_id_df
    final_df = pd.merge(item_id,dept_id, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,cat_id, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,store_id, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,state_id, how="left", left_index=True, right_index=True)



    print(f"item_id : {item_id.shape} , dept_id : {dept_id.shape} , cat_id : {cat_id.shape} , store_id : {store_id.shape}, state_id : {state_id.shape}")

    print(f"encoded_id_df : {final_df.shape}")

    return final_df

def normalize(values: np.ndarray):
    return (values - values.mean()) / np.std(values)

def lag_indexes(begin, end):
    """
    Calculates indexes for 3, 6, 9, 12 months backward lag for the given date range
    :param begin: start of date range
    :param end: end of date range
    :return : List of 4 Series, one for each lag. For each Series, index is date in range(begin, end),value is an index
    of target (lagged) date in a same Series. If target date is out of (begin, end) range, index is -1
    """

    dr = pd.date_range(begin, end)

    #key is date, value is day index
    base_index = pd.Series(np.arange(0, len(dr)), index=dr)

    def lag(offset):
        dates = dr - offset

        date_lag = []
        for d in dates:
            if d in base_index.index:
                date_lag.append(base_index.loc[d])
            else:
                date_lag.append(-1)
        return pd.Series(data=np.array(date_lag).astype(np.int16), index=dr)
    
    return [lag(pd.DateOffset(months=m)) for m in (3, 6, 9, 12)]

def single_autocorr(series, lag):
    """
    Autocorrelation for single data series
    :param series: traffic series
    :param lag: lag, days
    :return:
    """
    s1 = series[lag:]
    s2 = series[:-lag]
    ms1 = np.mean(s1)
    ms2 = np.mean(s2)
    ds1 = s1 - ms1
    ds2 = s2 - ms2
    divider = np.sqrt(np.sum(ds1 * ds1)) * np.sqrt(np.sum(ds2 * ds2))
    return np.sum(ds1 * ds2) / divider if divider != 0 else 0

def batch_autocorr(data, lag, starts, ends, threshold, backoffset=0):
    """
    Calculate autocorrelation for batch (many time series at once)
    :param data: Time series, shape [n_pages, n_days]
    :param lag: Autocorrelation lag
    :param starts: Start index for each series
    :param ends: End index for each series
    :param threshold: Minimum support (ratio of time series length to lag) to calculate meaningful autocorrelation.
    :param backoffset: Offset from the series end, days.
    :return: autocorrelation, shape [n_series]. If series is too short (support less than threshold),
    autocorrelation value is NaN
    """
    n_series = data.shape[0]
    n_days = data.shape[1]
    max_end = n_days - backoffset
    corr = np.empty(n_series, dtype=np.float64)
    support = np.empty(n_series, dtype=np.float64)
    for i in range(n_series):
        series = data[i]
        end = min(ends[i], max_end)
        real_len = end - starts[i]
        support[i] = real_len/lag
        if support[i] > threshold:
            series = series[starts[i]:end]
            c_365 = single_autocorr(series, lag)
            c_364 = single_autocorr(series, lag-1)
            c_366 = single_autocorr(series, lag+1)
            # Average value between exact lag and two nearest neighborhs for smoothness
            corr[i] = 0.5 * c_365 + 0.25 * c_364 + 0.25 * c_366
        else:
            corr[i] = np.NaN
    return corr #, support

def create_seles_features(sales_df):

    id_features = sales_df[["id","item_id", "dept_id", "cat_id", "store_id","state_id"]]

    sales_df.drop(columns=["item_id", "dept_id", "cat_id", "store_id","state_id"],inplace=True)

    df = sales_df.set_index("id")

    df, starts, ends = prepare_data(df, start=None, end=None, valid_threshold=0.0)

    train_days_range = pd.date_range('29-01-2011',periods=1913)
    valid_days_range = pd.date_range('25-04-2016',periods=28)
    test_days_range = pd.date_range('23-05-2016',periods=28)

    df.columns = df.columns.to_series().apply(lambda x: int(x.split("_")[-1]))

    date_start, date_end = train_days_range[0] , train_days_range[-1]
    features_end = valid_days_range[-1]

    print(f"date_satart : {date_start} , date_end : {date_end}, features_end : {features_end}")
    
    encoded_id_features = encode_id_features(id_features)

    item_popularity = df.median(axis=1)
    item_popularity = (item_popularity - item_popularity.mean()) / item_popularity.std()

    # Yearly(annual) autocorrelation
    raw_year_autocorr = batch_autocorr(df.values, 365, starts, ends, 1.5, 0)
    year_unknown_pct = np.sum(np.isnan(raw_year_autocorr))/len(raw_year_autocorr)
    # Normalise all the things
    year_autocorr = normalize(np.nan_to_num(raw_year_autocorr))

    # Quarterly autocorrelation
    raw_quarter_autocorr = batch_autocorr(df.values, int(round(365.25/4)), starts, ends, 2, 0)
    quarter_unknown_pct = np.sum(np.isnan(raw_quarter_autocorr)) / len(raw_quarter_autocorr)  # type: float
    # Normalise all the things
    quarter_autocorr = normalize(np.nan_to_num(raw_quarter_autocorr))

    print("Percent of undefined autocorr = yearly:%.3f, quarterly:%.3f" % (year_unknown_pct, quarter_unknown_pct))

    final_df = pd.DataFrame({
        "item_popularity" : item_popularity,
        "year_autocorr" : year_autocorr,
        "quarter_autocorr": quarter_autocorr
    })

    final_df.index = df.index.values

    final_df = pd.merge(final_df,encoded_id_features, how="left", left_index=True, right_index=True)

    print("id_features : ", final_df.shape)

    extra_features = {
        "train_days_range": train_days_range,
        "valid_days_range": valid_days_range,
        "test_days_range": test_days_range,
        "starts" : starts,
        "ends" : ends
    }
    
    return final_df, df, extra_features

    
def create_date_features(df):
    
    df["date"] = pd.to_datetime(df["date"])

    #df.set_index("date", inplace=True)

    # week day
    week_period = 7 / (2 * np.pi)
    dow_norm = df["wday"].values / week_period
    wday_cos = np.cos(dow_norm)
    wday_sin = np.sin(dow_norm)
    
    """
    # month
    month_period = 12 / (2 * np.pi)
    dow_norm = df["month"].values / month_period
    month_cos = np.cos(dow_norm)
    month_sin = np.sin(dow_norm)

    #print(df["date"])

    # day
    day_period = 31 / (2 * np.pi)
    dow_norm = df["date"].dt.day / day_period
    day_cos = np.cos(dow_norm)
    day_sin = np.sin(dow_norm)
    """

    # month
    month = pd.get_dummies(df["month"], drop_first=False, prefix="month")
    month = (month - month.mean()) / month.std()

    # day
    day = pd.get_dummies(df["date"].dt.day, drop_first=False, prefix="date")
    day = (day - day.mean()) / day.std()


    # event_name_1
    event_name_1 = pd.get_dummies(df["event_name_1"], drop_first=False, dummy_na=True, prefix="event_name_1")
    event_name_1 = (event_name_1 - event_name_1.mean()) / event_name_1.std()

    # event_type_1
    event_type_1 = pd.get_dummies(df["event_type_1"], drop_first=False, dummy_na=True, prefix="event_type_1")
    event_type_1 = (event_type_1 - event_type_1.mean()) / event_type_1.std()

    # event_name_2
    event_name_2 = pd.get_dummies(df["event_name_2"], drop_first=False, dummy_na=True, prefix="event_name_2")
    event_name_2 = (event_name_2 - event_name_2.mean()) / event_name_2.std()

    # event_type_2
    event_type_2 = pd.get_dummies(df["event_type_2"], drop_first=False, dummy_na=True, prefix="event_type_2")
    event_type_2 = (event_type_2 - event_type_2.mean()) / event_type_2.std()

    snap_CA = df["snap_CA"].values
    snap_TX = df["snap_TX"].values
    snap_WI = df["snap_WI"].values

    final_df = pd.DataFrame({
        "date":df["date"],
        "wday_cos": wday_cos,
        "wday_sin": wday_sin,
        "snap_CA": snap_CA,
        "snap_TX": snap_TX,
        "snap_WI": snap_WI
    })

    final_df = pd.merge(final_df,month, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,day, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,event_name_1, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,event_type_1, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,event_name_2, how="left", left_index=True, right_index=True)
    final_df = pd.merge(final_df,event_type_2, how="left", left_index=True, right_index=True)
    
    final_df.set_index("date", inplace=True)

    return final_df

### Id Features

In [None]:
sales_train_validation = pd.read_csv("../input/m5-forecasting-accuracy/sales_train_validation.csv")
sales_train_validation = reduce_mem_usage(sales_train_validation)
id_features_df, demand_df, extra_features_dict = create_seles_features(sales_train_validation)

### Lag Indexs

In [None]:
def lag_indexes(begin, end):
    """
    Calculates indexes for 3, 6, 9, 12 months backward lag for the given date range
    :param begin: start of date range
    :param end: end of date range
    :return : List of 4 Series, one for each lag. For each Series, index is date in range(begin, end),value is an index
    of target (lagged) date in a same Series. If target date is out of (begin, end) range, index is -1
    """

    dr = pd.date_range(begin, end)

    #key is date, value is day index
    base_index = pd.Series(np.arange(0, len(dr)), index=dr)

    def lag(offset):
        dates = dr - offset

        date_lag = []
        for d in dates:
            if d in base_index.index:
                date_lag.append(base_index.loc[d])
            else:
                date_lag.append(-1)
        return pd.Series(data=np.array(date_lag).astype(np.int16), index=dr)
    
    return [lag(pd.DateOffset(months=m)) for m in (1, 3, 6, 9, 12)]

In [None]:
lag_idx = lag_indexes('2011-01-29', '2016-05-22')

lag_idx_df = pd.DataFrame({
    "month_1": lag_idx[0],
    "month_3": lag_idx[1],
    "month_6": lag_idx[2],
    "month_9": lag_idx[3],
    "month_12": lag_idx[4]

})

lag_index = lag_idx_df.values

lag_index.shape

In [None]:
# checking NaN values
id_features_df.isna().sum().sum(), demand_df.isna().sum().sum()

### Calendar Features

In [None]:
%%time
calendar = pd.read_csv("../input/m5-forecasting-accuracy/calendar.csv")
calendar = reduce_mem_usage(calendar)
calendar_features_df = create_date_features(calendar)

In [None]:
# check NaN values
calendar_features_df.isna().sum().sum()

In [None]:
id_features_df.shape, demand_df.shape, calendar_features_df.shape

### Convert to Pytorch tensors

In [None]:
def convert_pytorch_tensors(df):
    df_tensor = torch.tensor(df.values)
    df_indexs = df.index.values
    return df_tensor, df_indexs

id_tensor, id_idx = convert_pytorch_tensors(id_features_df)
demand_tensor, demand_idx = convert_pytorch_tensors(demand_df)
calender_tensor, calender_idx = convert_pytorch_tensors(calendar_features_df)

In [None]:
id_tensor.shape, demand_tensor.shape,calender_tensor.shape

### Pytorch Data Loader 

In [None]:
# make train data set for pytorch data loaders

train_df = pd.DataFrame(demand_df.values, columns = np.arange(1, 1914))
train_df["id"] = id_idx
train_df["id_index"] = np.arange(len(id_idx))
train_df = pd.melt(train_df, id_vars=["id", "id_index"], value_name="demand", var_name="day")

In [None]:
train_df.shape

### Train & Valid & Test df's

In [None]:
train_days_df = train_df[(train_df.day > 1500) & (train_df.day < 1885)]
valid_days_df = train_df[train_df.day == 1885]
test_days_df = train_df[train_df.day == 1913]

In [None]:
train_days_df.shape, valid_days_df.shape, test_days_df.shape

### For traing take some days in between

In [None]:
train_days = np.arange(1500,1913,28)
train_days_df = train_days_df[train_days_df.day.isin(train_days)]
train_days_df.shape

### DataLoader Class

In [None]:
class TimeDataset:
    def __init__(self, df, hparams):
        
        self.id = df.id.values
        self.id_index = df.id_index.values
        self.day = df.day.values
        
        self.train_window = hparams["train_window"]
        self.predict_window = hparams["predict_window"]
        
    def __len__(self):
        return len(self.id)
    
    def __getitem__(self, item):
        
        id = self.id[item]
        id_index = self.id_index[item]
        day = self.day[item]-1 # because day index starts from 1
        
        id_features = id_tensor[id_index].unsqueeze(0)
        
        # train_x / traing
        calendar_train = calender_tensor[day-self.train_window:day, :]
        id_train = torch.repeat_interleave(id_features, repeats=self.train_window, dim=0)
        
        demand_train = demand_tensor[id_index, day-self.train_window:day]
        lag_index_train = lag_index[day-self.train_window:day]
        lag_train = demand_tensor[id_index, lag_index_train]
        
        # train_y / prediction
        calendar_predict = calender_tensor[day:day+self.predict_window, :]
        id_predict = torch.repeat_interleave(id_features, repeats=self.predict_window, dim=0)
        
        demand_predict = demand_tensor[id_index, day:day+self.predict_window]
        
        
        train_x = torch.cat([id_train, calendar_train, demand_train.unsqueeze(-1).double() ,lag_train.double()], dim=1)
        train_y = torch.cat([id_predict, calendar_predict], dim=1)
        
        return {
            "encoder_input":train_x,
            "train_demand":demand_train,
            "decoder_input":train_y,
            "prediction_demand" : demand_predict
        }

In [None]:
# Example

hparams = {
    "train_window": 100,
    "predict_window": 28
}

example = TimeDataset(train_days_df, hparams)
output = example.__getitem__(100)

In [None]:
output["encoder_input"].shape, output["train_demand"].shape, output["decoder_input"].shape, output["prediction_demand"].shape

### Encoder

In [None]:
class Encoder(nn.Module):
    def __init__(self, hparams):
        super(Encoder, self).__init__()

        print(hparams["encoder_input_size"])

        self.RNN = nn.RNN(
                    input_size=hparams["encoder_input_size"],
                    hidden_size=hparams["encoder_hidden_size"], 
                    num_layers=hparams["encoder_num_layers"],
                    batch_first=True
                )

        
    def forward(self, X):
        
        rnn_out, rnn_state = self.RNN(X)

        encoder_state = rnn_state.squeeze(0)

        return rnn_out, encoder_state
    

### Decoder

In [None]:
class Decoder(nn.Module):
    def __init__(self, hparams):
        super(Decoder, self).__init__()

        self.hparams = hparams
        
        if hparams["decoder_rnn_layers"] > 1:
            pass
        else:
            self.cell = nn.GRUCell(input_size=hparams["decoder_input_size"],
                              hidden_size=hparams["decoder_hidden_size"]
                              )
            
        self.projected_output = nn.Linear(in_features=hparams["decoder_fc_in_features"], out_features=1)

    

    def forward(self, encoder_state, decoder_inputs, previous_y):
        
        predict_days = self.hparams["predict_window"]

        # [batch_size, time, input_depth] -> [time, batch_size, input_depth]
        inputs_by_time = decoder_inputs.permute(1,0,2)

        # Return raw outputs for RNN loss calcula
        targets_ta = []

        #print(previous_y.shape)
        prev_output = previous_y.unsqueeze(-1)

        prev_state = encoder_state

        for time in range(inputs_by_time.shape[0]):
            
            # RNN inputs for current step
            features = inputs_by_time[time]

            # [batch, predict_window, readout_depth * n_heads] -> [batch, readout_depth * n_heads]
            
            # append previous predicted values to input features
            next_input = torch.cat([prev_output, features], dim=1)
        
            # Run RNN cell
            #print(next_input.shape)
            output = self.cell(next_input, prev_state)

            # Make prediction from RNN outputs
            projected_output = self.projected_output(output)

            targets_ta.append(projected_output)

            prev_output = projected_output
            prev_state = output

    
        # Get final tensors from buffer list
        targets = torch.stack(targets_ta)

        # [time, batch_size, 1] -> [time, batch_size]
        targets = targets.squeeze(-1)
        
        # [time, batch_size] -> [batch_size, time]
        targets = targets.transpose(1,0)

        return targets

### Loss Function

In [None]:
def criterion1(pred1, targets):
    l1 = nn.MSELoss()(pred1, targets)
    return torch.sqrt(l1)

### Train & Eval Loops

In [None]:
def train_model(encoder, decoder,train_loader, epoch , encoder_optimizer, decoder_optimizer, scheduler, history, DEVICE):
    encoder.train()
    decoder.train()
    total_loss = 0
    loss = 0
    
    RMSE_list = []
    
    t = tqdm(train_loader)
    for i, d in enumerate(t):
        
        encoder_input = d["encoder_input"].float().to(DEVICE)
        train_demand = d["train_demand"].float().to(DEVICE)
        decoder_input = d["decoder_input"].float().to(DEVICE)
        prediction_demand = d["prediction_demand"].float().to(DEVICE)
        
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        
        previous_y = train_demand[:, -1]
        
        encoder_readout, encoder_state = encoder(encoder_input)
        output = decoder(encoder_state, decoder_input, previous_y)

        prediction_demand = prediction_demand.squeeze(-1)

        loss = criterion1(output, prediction_demand)

        loss.backward()

        total_loss += loss
        
        encoder_optimizer.step()
        decoder_optimizer.step()

        
        t.set_description(f'Epoch {epoch+1} : Loss: %.4f'%(total_loss/(i+1)))

        if history is not None:
            history.loc[epoch + i / len(X), 'train_loss'] = loss.data.cpu().numpy()
            history.loc[epoch + i / len(X), 'lr'] = optimizer.state_dict()['param_groups'][0]['lr']

            
def eval_model(encoder, decoder,valid_loader, epoch , scheduler, history, DEVICE):
    encoder.eval()
    decoder.eval()
    total_loss = 0
    loss = 0
    
    t = tqdm(valid_loader)
    RMSE_list = []

    with torch.no_grad():
        for i, d in enumerate(t):
        
            encoder_input = d["encoder_input"].float().to(DEVICE)
            train_demand = d["train_demand"].float().to(DEVICE)
            decoder_input = d["decoder_input"].float().to(DEVICE)
            prediction_demand = d["prediction_demand"].float().to(DEVICE)

            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            previous_y = train_demand[:, -1]

            encoder_readout, encoder_state = encoder(encoder_input)
            output = decoder(encoder_state, decoder_input, previous_y)

            prediction_demand = prediction_demand.squeeze(-1)

            l1 = criterion1(output, prediction_demand)

            loss += l1

            output = output.cpu().numpy()
            predict = prediction_demand.cpu().numpy()

            for pred, real in zip(output, predict):
                rmse = sklearn.metrics.mean_squared_error(real, pred, squared=False)
                RMSE_list.append(rmse)

    loss /= len(valid_loader)

    print(f'Valid loss: %.4f RMSE : %.4f'%(loss, np.mean(RMSE_list)))

    return loss, np.mean(RMSE_list)

### Hparams

In [None]:
hparams = {
    "encoder_input_size" : 3173,
    "encoder_hidden_size" : 128,
    "encoder_num_layers" : 1,
    "decoder_rnn_layers" : 1,
    "decoder_input_size" : 3168,
    "decoder_hidden_size" : 128,
    "decoder_fc_in_features" : 128,
    "predict_window":28,
    "train_window":100
}

### Pytorch DataLoaders

In [None]:
train_dataset = TimeDataset(train_days_df, hparams)
    
train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size= 128,
        shuffle=False,
        num_workers=4,
        drop_last=False
    )

valid_dataset = TimeDataset(valid_days_df, hparams)
    
valid_loader = torch.utils.data.DataLoader(
        dataset=valid_dataset,
        batch_size= 128,
        shuffle=False,
        num_workers=4,
        drop_last=False
    )

In [None]:
history = pd.DataFrame()
history2 = pd.DataFrame()

DEVICE = "cuda"
TRAIN_BATCH_SIZE = 128
TEST_BATCH_SIZE = 128
EPOCHS = 1
start_e = 0

encoder = Encoder(hparams)
decoder = Decoder(hparams)

encoder.to(DEVICE)
decoder.to(DEVICE)


#optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)
encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=0.0001)
decoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=0.0001)

#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, mode='min', factor=0.7, verbose=True, min_lr=1e-5)
    
print("FOLD : ", )

for epoch in range(start_e, EPOCHS):
    #(encoder, decoder,train_loader, epoch , encoder_optimizer, decoder_optimizer, scheduler=None, history=None)
    train_model(encoder, decoder, train_loader, epoch, encoder_optimizer, decoder_optimizer, scheduler=None, history=None, DEVICE=DEVICE)
    loss, rmse = eval_model(encoder, decoder,valid_loader, epoch , scheduler=None, history=None, DEVICE=DEVICE)
    torch.save(encoder.state_dict(), f"encoder_{epoch}_loss_{loss}_rmse_{rmse}.pth")
    torch.save(decoder.state_dict(), f"decoder_{epoch}_loss_{loss}_rmse_{rmse}.pth")

<h3 style="color:blue;"> Please upvote if you like it. It motivates me. Thank you ☺️ .</h3>