In [1]:
import os
import gc
import time
import math
import datetime
from math import log, floor
from sklearn.neighbors import KDTree

import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.utils import shuffle
from tqdm.notebook import tqdm as tqdm

import seaborn as sns
from matplotlib import colors
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import pywt
#import PyWavelets
from statsmodels.robust import mad

import scipy
import statsmodels
from scipy import signal
import statsmodels.api as sm
#from fbprophet import Prophet
from scipy.signal import butter, deconvolve
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

import joblib
#from tqdm import tqdm_notebook as tqdm
from tqdm.notebook import tqdm as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
import sklearn

import warnings
warnings.filterwarnings("ignore")

In [2]:
torch.cuda.get_device_name()

'NVIDIA GeForce MX330'

In [3]:
torch.cuda.is_available()

True

### Load ICS Data

In [4]:
# Load ICS data from University of Michigan repository
ics_monthly = pd.read_csv('http://www.sca.isr.umich.edu/files/tbmics.csv')
ics_monthly = ics_monthly.dropna()
ics_monthly = ics_monthly[-200:]
ics_monthly.head()

Unnamed: 0,Month,YYYY,ICS_ALL
437,October,2006,93.6
438,November,2006,92.1
439,December,2006,91.7
440,January,2007,96.9
441,February,2007,91.3


In [5]:
# Construct date column from year, month and date
ics_monthly["date"] = np.nan
for i in range(len(ics_monthly)):
    ics_monthly.iloc[i, 3] = pd.to_datetime((str(ics_monthly.iloc[i,1]) + '-' + str(ics_monthly.iloc[i,0])))
ics_monthly = ics_monthly.dropna()
ics_monthly["date"] = pd.to_datetime(ics_monthly.date)

In [6]:
# Convert monthly ics data into daily csi data
ics_monthly.index = ics_monthly.date
dates = pd.date_range(ics_monthly.date.min(), ics_monthly.date.max(), freq='D')
ics_daily = ics_monthly.reindex(dates, method='ffill')
ics_daily["date"] = ics_daily.index
ics_daily.drop(["Month", "YYYY"], axis = 1, inplace=True)
del dates

In [7]:
ics_daily.head()

Unnamed: 0,ICS_ALL,date
2006-10-01,93.6,2006-10-01
2006-10-02,93.6,2006-10-02
2006-10-03,93.6,2006-10-03
2006-10-04,93.6,2006-10-04
2006-10-05,93.6,2006-10-05


### CPI data

In [8]:
cpi_monthly = pd.read_excel("../data/m5-forecasting-accuracy/macro-data/CPI-data.xlsx")

In [9]:
cpi_monthly.head()

Unnamed: 0,year-month,cpi
0,197001,15.9
1,197002,16.0
2,197003,16.1
3,197004,16.2
4,197005,16.3


In [10]:
# Construct date column from year and month
cpi_monthly["date"] = np.nan
for i in range(len(cpi_monthly)):
    cpi_monthly.iloc[i, 2] = pd.to_datetime(str(cpi_monthly.iloc[i,0])[:4] + '-' + str(cpi_monthly.iloc[i,0])[4:])
cpi_monthly["date"] = pd.to_datetime(cpi_monthly.date)

In [11]:
# Convert monthly cpi data into daily data
cpi_monthly.index = cpi_monthly.date
dates = pd.date_range(cpi_monthly.date.min(), cpi_monthly.date.max(), freq='D')
cpi_daily = cpi_monthly.reindex(dates, method='ffill')
cpi_daily["date"] = cpi_daily.index
cpi_daily.drop(["year-month"], axis = 1, inplace=True)
del dates

In [12]:
cpi_daily.head()

Unnamed: 0,cpi,date
1970-01-01,15.9,1970-01-01
1970-01-02,15.9,1970-01-02
1970-01-03,15.9,1970-01-03
1970-01-04,15.9,1970-01-04
1970-01-05,15.9,1970-01-05


### Unemployment data

In [13]:
unemp_yearly = pd.read_csv("../data/m5-forecasting-accuracy/macro-data/unemployment-data.csv")

In [14]:
unemp_yearly.head()

Unnamed: 0,year,unemp
0,1991,6.8
1,1992,7.5
2,1993,6.9
3,1994,6.12
4,1995,5.65


In [15]:
# Construct date column from year and month
unemp_yearly["date"] = np.nan
for i in range(len(unemp_yearly)):
    unemp_yearly.iloc[i, 2] = pd.to_datetime(str(unemp_yearly.iloc[i,0]) + '-' + '01-01')
unemp_yearly["date"] = pd.to_datetime(unemp_yearly.date)

In [16]:
# Convert yearly unemployment data into daily data
unemp_yearly.index = unemp_yearly.date
dates = pd.date_range(unemp_yearly.date.min(), unemp_yearly.date.max(), freq='D')
unemp_daily = unemp_yearly.reindex(dates, method='ffill')
unemp_daily["date"] = unemp_daily.index
unemp_daily.drop(["year"], axis = 1, inplace=True)
del dates

In [17]:
unemp_daily.head()

Unnamed: 0,unemp,date
1991-01-01,6.8,1991-01-01
1991-01-02,6.8,1991-01-02
1991-01-03,6.8,1991-01-03
1991-01-04,6.8,1991-01-04
1991-01-05,6.8,1991-01-05


In [18]:
macro_data = ics_daily.merge(cpi_daily,on='date',how="left").merge(unemp_daily,on='date',how="left")

In [19]:
macro_data.head()

Unnamed: 0,ICS_ALL,date,cpi,unemp
0,93.6,2006-10-01,85.1,4.62
1,93.6,2006-10-02,85.1,4.62
2,93.6,2006-10-03,85.1,4.62
3,93.6,2006-10-04,85.1,4.62
4,93.6,2006-10-05,85.1,4.62


In [20]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

### Data Preprocessing

#### Load Data

In [21]:
def read_data(PATH):
    print('Reading files...')
    calendar = pd.read_csv(f'{PATH}/calendar.csv')
    calendar = reduce_mem_usage(calendar)
    print('Calendar has {} rows and {} columns'.format(calendar.shape[0], calendar.shape[1]))
    sell_prices = pd.read_csv(f'{PATH}/sell_prices.csv')
    sell_prices = reduce_mem_usage(sell_prices)
    print('Sell prices has {} rows and {} columns'.format(sell_prices.shape[0], sell_prices.shape[1]))
    sales_train_validation = pd.read_csv(f'{PATH}/sales_train_validation.csv')
    print('Sales train validation has {} rows and {} columns'.format(sales_train_validation.shape[0], sales_train_validation.shape[1]))
    submission = pd.read_csv(f'{PATH}/sample_submission.csv')
    return calendar, sell_prices, sales_train_validation, submission

In [22]:
calendar, sell_prices, sales_train_validation, submission = read_data("../data/m5-forecasting-accuracy")

Reading files...
Mem. usage decreased to  0.12 Mb (41.9% reduction)
Calendar has 1969 rows and 14 columns
Mem. usage decreased to 130.48 Mb (37.5% reduction)
Sell prices has 6841121 rows and 4 columns
Sales train validation has 30490 rows and 1919 columns


In [23]:
calendar["date"] = pd.to_datetime(calendar.date)

In [24]:
# Merge sale data with csi data and drop columns that are not needed
calendar_macro = calendar.merge(macro_data, on=["date"], how="left")

In [25]:
calendar_macro.head()

Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,ICS_ALL,cpi,unemp
0,2011-01-29,11101,Saturday,1,1,2011,d_1,,,,,0,0,0,74.2,92.9,8.95
1,2011-01-30,11101,Sunday,2,1,2011,d_2,,,,,0,0,0,74.2,92.9,8.95
2,2011-01-31,11101,Monday,3,1,2011,d_3,,,,,0,0,0,74.2,92.9,8.95
3,2011-02-01,11101,Tuesday,4,2,2011,d_4,,,,,1,1,0,77.5,93.4,8.95
4,2011-02-02,11101,Wednesday,5,2,2011,d_5,,,,,1,0,1,77.5,93.4,8.95


In [26]:
calendar_macro.shape

(1969, 17)

In [27]:
sales_train_validation.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4


In [28]:
sales_train_validation.shape

(30490, 1919)

In [29]:
sell_prices.head()

Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price
0,CA_1,HOBBIES_1_001,11325,9.578125
1,CA_1,HOBBIES_1_001,11326,9.578125
2,CA_1,HOBBIES_1_001,11327,8.257812
3,CA_1,HOBBIES_1_001,11328,8.257812
4,CA_1,HOBBIES_1_001,11329,8.257812


In [30]:
sales_train_validation_melt = pd.melt(sales_train_validation, id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], var_name='day', value_name='demand')

In [31]:
sales_train_validation_melt.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,day,demand
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0


In [32]:
sales_train_validation.shape, sales_train_validation_melt.shape

((30490, 1919), (58327370, 8))

In [33]:
new_data_CA = sales_train_validation_melt[sales_train_validation_melt.store_id=="CA_1"]
new_data_WI = sales_train_validation_melt[sales_train_validation_melt.store_id=="WI_1"]
new_data_TX = sales_train_validation_melt[sales_train_validation_melt.store_id=="TX_1"]
new_data = pd.concat([new_data_CA, new_data_WI, new_data_TX])
new_data = pd.merge(new_data, calendar_macro, left_on=["day"], right_on=["d"], how="left")
new_data = pd.merge(new_data, sell_prices, left_on=["store_id", "item_id", "wm_yr_wk"],right_on=["store_id", "item_id", "wm_yr_wk"], how="left")
new_data["day_int"] = new_data.day.apply(lambda x: int(x.split("_")[-1]))

In [34]:
new_data_CA.shape,new_data_WI.shape,new_data_TX.shape, new_data.shape, sales_train_validation_melt.shape

((5832737, 8), (5832737, 8), (5832737, 8), (17498211, 27), (58327370, 8))

In [35]:
new_data.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,day,demand,date,wm_yr_wk,...,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,ICS_ALL,cpi,unemp,sell_price,day_int
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,2011-01-29,11101,...,,,0,0,0,74.2,92.9,8.95,,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,2011-01-29,11101,...,,,0,0,0,74.2,92.9,8.95,,1
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,2011-01-29,11101,...,,,0,0,0,74.2,92.9,8.95,,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,2011-01-29,11101,...,,,0,0,0,74.2,92.9,8.95,,1
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,2011-01-29,11101,...,,,0,0,0,74.2,92.9,8.95,,1


### Modeling

In [36]:
cols = ["item_id","store_id", "day_int", "demand", "sell_price", "date"]
#cols = ["item_id","store_id", "day_int", "demand", "date"]
new_data = new_data[cols]
new_data.fillna(0, inplace=True)
print(new_data.shape)
new_data.head()

(17498211, 6)


Unnamed: 0,item_id,store_id,day_int,demand,sell_price,date
0,HOBBIES_1_001,CA_1,1,0,0.0,2011-01-29
1,HOBBIES_1_002,CA_1,1,0,0.0,2011-01-29
2,HOBBIES_1_003,CA_1,1,0,0.0,2011-01-29
3,HOBBIES_1_004,CA_1,1,0,0.0,2011-01-29
4,HOBBIES_1_005,CA_1,1,0,0.0,2011-01-29


In [37]:
new_data["item_store_id"]=new_data["item_id"]+"_"+new_data["store_id"]

In [38]:
new_data.head()

Unnamed: 0,item_id,store_id,day_int,demand,sell_price,date,item_store_id
0,HOBBIES_1_001,CA_1,1,0,0.0,2011-01-29,HOBBIES_1_001_CA_1
1,HOBBIES_1_002,CA_1,1,0,0.0,2011-01-29,HOBBIES_1_002_CA_1
2,HOBBIES_1_003,CA_1,1,0,0.0,2011-01-29,HOBBIES_1_003_CA_1
3,HOBBIES_1_004,CA_1,1,0,0.0,2011-01-29,HOBBIES_1_004_CA_1
4,HOBBIES_1_005,CA_1,1,0,0.0,2011-01-29,HOBBIES_1_005_CA_1


In [39]:
combined_data = pd.get_dummies(new_data, columns=['store_id'])

In [40]:
combined_data.head()

Unnamed: 0,item_id,day_int,demand,sell_price,date,item_store_id,store_id_CA_1,store_id_TX_1,store_id_WI_1
0,HOBBIES_1_001,1,0,0.0,2011-01-29,HOBBIES_1_001_CA_1,1,0,0
1,HOBBIES_1_002,1,0,0.0,2011-01-29,HOBBIES_1_002_CA_1,1,0,0
2,HOBBIES_1_003,1,0,0.0,2011-01-29,HOBBIES_1_003_CA_1,1,0,0
3,HOBBIES_1_004,1,0,0.0,2011-01-29,HOBBIES_1_004_CA_1,1,0,0
4,HOBBIES_1_005,1,0,0.0,2011-01-29,HOBBIES_1_005_CA_1,1,0,0


### Feature Engineering

In [41]:
def date_features(df):
    
    #df["date"] = pd.to_datetime(df["date"])
    df["day"] = df.date.dt.day
    df["month"] = df.date.dt.month
    df["week_day"] = df.date.dt.weekday

    df.drop(columns="date", inplace=True)

    return df

def sales_features(df):

    df.sell_price.fillna(0, inplace=True)

    return df

def demand_features(df):

    df["lag_t28"] = df["demand"].transform(lambda x: x.shift(28))
    df["rolling_mean_t7"] = df["demand"].transform(lambda x:x.shift(28).rolling(7).mean())
    df['rolling_mean_t30'] = df['demand'].transform(lambda x: x.shift(28).rolling(30).mean())
    df['rolling_mean_t60'] = df['demand'].transform(lambda x: x.shift(28).rolling(60).mean())
    df['rolling_mean_t90'] = df['demand'].transform(lambda x: x.shift(28).rolling(90).mean())
    df['rolling_mean_t180'] = df['demand'].transform(lambda x: x.shift(28).rolling(180).mean())
    df['rolling_std_t7'] = df['demand'].transform(lambda x: x.shift(28).rolling(7).std())
    df['rolling_std_t30'] = df['demand'].transform(lambda x: x.shift(28).rolling(30).std())

    df.fillna(0, inplace=True)

    return df

In [42]:
!mkdir "../something_all_spl"
# Saving each item with there item name.npy
for item_store in tqdm(combined_data.item_store_id.unique()):
    one_item_store = combined_data[combined_data.item_store_id == item_store][["demand", "sell_price", "date", "store_id_CA_1", "store_id_TX_1", "store_id_WI_1"]]
    #one_item = CA1[CA1.item_id == item][["demand", "date"]]
    item_store_df = date_features(one_item_store)
    #item_df = sales_features(item_df)
    item_store_df = demand_features(item_store_df)
    joblib.dump(item_store_df.values, f"../something_all_spl/{item_store}.npy")

  0%|          | 0/9147 [00:00<?, ?it/s]

In [44]:
# create dataframe for loading npy files and  train valid split

data_info = combined_data[["item_store_id", "day_int", "item_id"]]

# total number of days -> 1913
# for training we are taking data between 1200 < train <- 1913-28-28 = 1857

train_df = data_info[(1200 < data_info.day_int) &( data_info.day_int < 1857)]

# valid data is given last day -> 1885 we need to predict next 28days

valid_df = data_info[data_info.day_int == 1885]

In [45]:
train_df.head()

Unnamed: 0,item_store_id,day_int,item_id
3658800,HOBBIES_1_001_CA_1,1201,HOBBIES_1_001
3658801,HOBBIES_1_002_CA_1,1201,HOBBIES_1_002
3658802,HOBBIES_1_003_CA_1,1201,HOBBIES_1_003
3658803,HOBBIES_1_004_CA_1,1201,HOBBIES_1_004
3658804,HOBBIES_1_005_CA_1,1201,HOBBIES_1_005


In [46]:
label = preprocessing.LabelEncoder()
label.fit(train_df.item_id)
label.transform(["FOODS_3_827"])

array([1436])

In [47]:
train_df.head()

Unnamed: 0,item_store_id,day_int,item_id
3658800,HOBBIES_1_001_CA_1,1201,HOBBIES_1_001
3658801,HOBBIES_1_002_CA_1,1201,HOBBIES_1_002
3658802,HOBBIES_1_003_CA_1,1201,HOBBIES_1_003
3658803,HOBBIES_1_004_CA_1,1201,HOBBIES_1_004
3658804,HOBBIES_1_005_CA_1,1201,HOBBIES_1_005


In [59]:
class DataLoading:
    def __init__(self, df, train_window = 28, predicting_window=28):
        self.df = df.values
        self.train_window = train_window
        self.predicting_window = predicting_window

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, item):
        df_item = self.df[item]
        item_id = df_item[0]
        day_int = df_item[1]
        
        item_npy = joblib.load(f"../something_all_spl/{item_id}.npy")
        item_npy_demand = item_npy[:,0]
        features = item_npy[day_int-self.train_window:day_int]
    

        predicted_demand = item_npy_demand[day_int:day_int+self.predicting_window]

        item_label = label.transform([item_id[:-5]])
        item_onehot = [0] * 3049
        item_onehot[item_label[0]] = 1

        list_features = []
        for f in features:
            one_f = []
            one_f.extend(item_onehot)
            one_f.extend(f)
            list_features.append(one_f)

        return {
            "features" : torch.Tensor(list_features),
            "label" : torch.Tensor(predicted_demand)
        }

In [60]:
train_df.head()

Unnamed: 0,item_store_id,day_int,item_id
3658800,HOBBIES_1_001_CA_1,1201,HOBBIES_1_001
3658801,HOBBIES_1_002_CA_1,1201,HOBBIES_1_002
3658802,HOBBIES_1_003_CA_1,1201,HOBBIES_1_003
3658803,HOBBIES_1_004_CA_1,1201,HOBBIES_1_004
3658804,HOBBIES_1_005_CA_1,1201,HOBBIES_1_005


In [61]:
## for example one item
datac = DataLoading(train_df)
n = datac.__getitem__(100)
n["features"].shape, n["label"].shape

(torch.Size([28, 3065]), torch.Size([28]))

In [62]:
'HOBBIES_1_105_CA_1'[:-5]

'HOBBIES_1_105'

#### LSTM+NN Model

In [63]:
input_size = n["features"].shape[1]

In [64]:
class LSTM(nn.Module):
    def __init__(self, input_size=input_size, hidden_layer_size=2, output_size=28):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))
        
    def forward(self, input_seq):

        lstm_out, self.hidden_cell = self.lstm(input_seq)

        lstm_out = lstm_out[:, -1]

        predictions = self.linear(lstm_out)

        return predictions

#### Train and Eval functions

In [65]:
# loss function
def criterion1(pred1, targets):
    l1 = nn.MSELoss()(pred1, targets)
    return l1

In [66]:
def train_model(model,train_loader, epoch, optimizer, scheduler=None, history=None):
    model.train()
    total_loss = 0
    t = tqdm(train_loader)
            
    for i, d in enumerate(t):
        item = d["features"].cuda().float()
        y_batch = d["label"].cuda().float()

        optimizer.zero_grad()

        out = model(item)
        loss = criterion1(out, y_batch)

        total_loss += loss
        
        t.set_description(f'Epoch {epoch+1} : , LR: %6f, Loss: %.4f'%(optimizer.state_dict()['param_groups'][0]['lr'],total_loss/(i+1)))

        if history is not None:
            history.loc[epoch + i / len(X), 'train_loss'] = loss.data.cpu().numpy()
            history.loc[epoch + i / len(X), 'lr'] = optimizer.state_dict()['param_groups'][0]['lr']

        loss.backward()
        optimizer.step()
        

def evaluate_model(model, val_loader, epoch, scheduler=None, history=None):
    model.eval()
    loss = 0
    pred_list = []
    real_list = []
    RMSE_list = []
    with torch.no_grad():
        for i,d in enumerate(tqdm(val_loader)):
            item = d["features"].cuda().float()
            y_batch = d["label"].cuda().float()

            o1 = model(item)
            l1 = criterion1(o1, y_batch)
            loss += l1
            
            o1 = o1.cpu().numpy()
            y_batch = y_batch.cpu().numpy()
            
            for pred, real in zip(o1, y_batch):
                rmse = np.sqrt(sklearn.metrics.mean_squared_error(real, pred))
                RMSE_list.append(rmse)
                pred_list.append(pred)
                real_list.append(real)

    loss /= len(val_loader)
    
    if scheduler is not None:
        scheduler.step(loss)

    print(f'\n Dev loss: %.4f RMSE : %.4f'%(loss, np.mean(RMSE_list)))
 

#### Run Function

In [67]:
torch.cuda.memory_allocated()

0

In [68]:
DEVICE = "cuda"
#TRAIN_BATCH_SIZE = 512
TRAIN_BATCH_SIZE = 1024
TEST_BATCH_SIZE = 128
EPOCHS = 1
start_e = 1


#model = LSTM().cuda()
model = LSTM()
model.to(DEVICE)

train_dataset = DataLoading(train_df)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size= TRAIN_BATCH_SIZE,
    shuffle=True,
    #num_workers=4,
    drop_last=True
)

valid_dataset = DataLoading(valid_df)


valid_loader = torch.utils.data.DataLoader(
    dataset=valid_dataset,
    batch_size= TEST_BATCH_SIZE,
    shuffle=False,
    #num_workers=4,
    drop_last=True
)

optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, mode='min', factor=0.7, verbose=True, min_lr=1e-5)

for epoch in range(start_e, EPOCHS+1):
    print(epoch)
    train_model(model, train_loader, epoch, optimizer, scheduler=scheduler, history=None)
    evaluate_model(model, valid_loader, epoch, scheduler=scheduler, history=None)

1


  0%|          | 0/5859 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]


 Dev loss: 8.5753 RMSE : 1.6016
