Sorce:
    https://github.com/fastai/fastai/blob/master/courses/dl1/lesson3-rossman.ipynb

# Neural network for Tabular and Categorical  Data Using PyTorch
The following paper is followed here
    
    Entity Embeddings of Categorical Variables
    https://arxiv.org/abs/1604.06737
    
The original paper implements the model using Keras but this note uses PyTorch.

## Install additional packages

In [None]:
#!pip install sklearn_pandas
#!conda install pyarrow -c conda-forge

## Call autoreload magic functions

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

## Import required Libraries

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from sklearn.preprocessing import LabelEncoder, StandardScaler, Imputer
from sklearn_pandas import DataFrameMapper

## Define Neural Network Architecture by Extending nn.Module from PyTorch

## Define TabularDataset Class by extending Dataset from PyTorch library

In [3]:
class TabularDataset(Dataset):
    def __init__(self, data, cat_cols=None, output_col=None):
        """
        Characterizes a Dataset for PyTorch

        Parameters
        ----------

        data: pandas data frame
          The data frame object for the input data. It must
          contain all the continuous, categorical and the
          output columns to be used.

        cat_cols: List of strings
          The names of the categorical columns in the data.
          These columns will be passed through the embedding
          layers in the model. These columns must be
          label encoded beforehand. 

        output_col: string
          The name of the output variable column in the data
          provided.
        """

        self.n = data.shape[0]

        if output_col:
            self.y = data[output_col].astype(np.float32).values.reshape(-1, 1)
        else:
            self.y =  np.zeros((self.n, 1))

        self.cat_cols = cat_cols if cat_cols else []
        self.cont_cols = [col for col in data.columns
                          if col not in self.cat_cols + [output_col]]

        if self.cont_cols:
            self.cont_X = data[self.cont_cols].astype(np.float32).values
        else:
            self.cont_X = np.zeros((self.n, 1))

        if self.cat_cols:
            self.cat_X = data[cat_cols].astype(np.int64).values
        else:
            self.cat_X =  np.zeros((self.n, 1))

    def __len__(self):
        """
        Denotes the total number of samples.
        """
        return self.n

    def __getitem__(self, idx):
        """
        Generates one sample of data.
        """
        return [self.y[idx], self.cont_X[idx], self.cat_X[idx]]

## Load Data
We have already preprocessed the data and stored on the PATH

In [4]:
PATH = '' #Referring current directory

In [5]:
joined = pd.read_feather(f'{PATH}joined')
joined_test = pd.read_feather(f'{PATH}joined_test')

In [6]:
joined.head().T.head(40)

Unnamed: 0,0,1,2,3,4
index,0,1,2,3,4
Store,1,2,3,4,5
DayOfWeek,5,5,5,5,5
Date,2015-07-31 00:00:00,2015-07-31 00:00:00,2015-07-31 00:00:00,2015-07-31 00:00:00,2015-07-31 00:00:00
Sales,5263,6064,8314,13995,4822
Customers,555,625,821,1498,559
Open,1,1,1,1,1
Promo,1,1,1,1,1
StateHoliday,False,False,False,False,False
SchoolHoliday,1,1,1,1,1


Now that we've loaded all our engineered features, we need to convert to input compatible with a neural network.

This includes converting categorical variables into contiguous integers or one-hot encodings, normalizing continuous features to standard normal, etc...


In [7]:
cat_vars = ['Store', 'DayOfWeek', 'Year', 'Month', 'Day', 'StateHoliday', 'CompetitionMonthsOpen',
    'Promo2Weeks', 'StoreType', 'Assortment', 'PromoInterval', 'CompetitionOpenSinceYear', 'Promo2SinceYear',
    'State', 'Week', 'Events', 'Promo_fw', 'Promo_bw', 'StateHoliday_fw', 'StateHoliday_bw',
    'SchoolHoliday_fw', 'SchoolHoliday_bw']

contin_vars = ['CompetitionDistance', 'Max_TemperatureC', 'Mean_TemperatureC', 'Min_TemperatureC',
   'Max_Humidity', 'Mean_Humidity', 'Min_Humidity', 'Max_Wind_SpeedKm_h', 
   'Mean_Wind_SpeedKm_h', 'CloudCover', 'trend', 'trend_DE',
   'AfterStateHoliday', 'BeforeStateHoliday', 'Promo', 'SchoolHoliday']

n = len(joined); n

1017209

In [8]:
dep = 'Sales'
joined = joined[cat_vars+contin_vars+[dep, 'Date']].copy()

In [9]:
joined.head()

Unnamed: 0,Store,DayOfWeek,Year,Month,Day,StateHoliday,CompetitionMonthsOpen,Promo2Weeks,StoreType,Assortment,...,Mean_Wind_SpeedKm_h,CloudCover,trend,trend_DE,AfterStateHoliday,BeforeStateHoliday,Promo,SchoolHoliday,Sales,Date
0,1,5,2015,7,31,False,24,0,c,a,...,11,1.0,85,83,57,0,1,1,5263,2015-07-31
1,2,5,2015,7,31,False,24,25,a,a,...,11,4.0,80,83,67,0,1,1,6064,2015-07-31
2,3,5,2015,7,31,False,24,25,a,a,...,5,2.0,86,83,57,0,1,1,8314,2015-07-31
3,4,5,2015,7,31,False,24,0,c,c,...,16,6.0,74,83,67,0,1,1,13995,2015-07-31
4,5,5,2015,7,31,False,3,0,a,a,...,11,4.0,82,83,57,0,1,1,4822,2015-07-31


In [10]:
joined_test[dep] = 0
joined_test = joined_test[cat_vars+contin_vars+[dep, 'Date', 'Id']].copy()

In [11]:
joined_test.head()

Unnamed: 0,Store,DayOfWeek,Year,Month,Day,StateHoliday,CompetitionMonthsOpen,Promo2Weeks,StoreType,Assortment,...,CloudCover,trend,trend_DE,AfterStateHoliday,BeforeStateHoliday,Promo,SchoolHoliday,Sales,Date,Id
0,1,4,2015,9,17,False,24,0,c,a,...,6.0,69,67,105,0,1,0,0,2015-09-17,1
1,3,4,2015,9,17,False,24,25,a,a,...,6.0,68,67,105,0,1,0,0,2015-09-17,2
2,7,4,2015,9,17,False,24,0,a,c,...,5.0,59,67,115,0,1,0,0,2015-09-17,3
3,8,4,2015,9,17,False,11,0,a,a,...,5.0,59,67,115,0,1,0,0,2015-09-17,4
4,9,4,2015,9,17,False,24,0,a,c,...,6.0,68,67,105,0,1,0,0,2015-09-17,5


In [12]:
for v in cat_vars: joined[v] = joined[v].astype('category').cat.as_ordered()

In [13]:
joined['Month'].head()

0    7
1    7
2    7
3    7
4    7
Name: Month, dtype: category
Categories (12, int64): [1 < 2 < 3 < 4 ... 9 < 10 < 11 < 12]

In [14]:
def apply_cats(df, trn):
    """Changes any columns of strings in df into categorical variables using trn as
    a template for the category codes.
    Parameters:
    -----------
    df: A pandas dataframe. Any columns of strings will be changed to
        categorical values. The category codes are determined by trn.
    trn: A pandas dataframe. When creating a category for df, it looks up the
        what the category's code were in trn and makes those the category codes
        for df.
    """
    for n,c in df.items():
        if (n in trn.columns) and (trn[n].dtype.name=='category'):
            df[n] = c.astype('category').cat.as_ordered()
            df[n].cat.set_categories(trn[n].cat.categories, ordered=True, inplace=True)

In [15]:
apply_cats(joined_test, joined)

In [16]:
for v in contin_vars:
    joined[v] = joined[v].fillna(0).astype('float32')
    joined_test[v] = joined_test[v].fillna(0).astype('float32')

We're going to run on a sample.

In [17]:
def get_cv_idxs(n, cv_idx=0, val_pct=0.2, seed=42):
    """ Get a list of index values for Validation set from a dataset
    
    Arguments:
        n : int, Total number of elements in the data set.
        cv_idx : int, starting index [idx_start = cv_idx*int(val_pct*n)] 
        val_pct : (int, float), validation set percentage 
        seed : seed value for RandomState
        
    Returns:
        list of indexes 
    """
    np.random.seed(seed)
    n_val = int(val_pct*n)
    idx_start = cv_idx*n_val
    idxs = np.random.permutation(n)
    return idxs[idx_start:idx_start+n_val]

In [18]:
idxs = get_cv_idxs(n, val_pct=150000/n)
joined_samp = joined.iloc[idxs].set_index("Date")
samp_size = len(joined_samp); samp_size

150000

In [None]:
#?nn.Embedding

In [19]:
samp_size = n
joined_samp = joined.set_index("Date")

To run on the full dataset, use this instead:

In [None]:
#samp_size = n
#joined_samp = joined.set_index("Date")

## We can now process our data.

In [20]:
joined_samp.head(2)

Unnamed: 0_level_0,Store,DayOfWeek,Year,Month,Day,StateHoliday,CompetitionMonthsOpen,Promo2Weeks,StoreType,Assortment,...,Max_Wind_SpeedKm_h,Mean_Wind_SpeedKm_h,CloudCover,trend,trend_DE,AfterStateHoliday,BeforeStateHoliday,Promo,SchoolHoliday,Sales
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-31,1,5,2015,7,31,False,24,0,c,a,...,24.0,11.0,1.0,85.0,83.0,57.0,0.0,1.0,1.0,5263
2015-07-31,2,5,2015,7,31,False,24,25,a,a,...,14.0,11.0,4.0,80.0,83.0,67.0,0.0,1.0,1.0,6064


In [21]:
def numericalize(df, col, name, max_n_cat):
    """ Changes the column col from a categorical type to it's integer codes.
    Parameters:
    -----------
    df: A pandas dataframe. df[name] will be filled with the integer codes from
        col.
    col: The column you wish to change into the categories.
    name: The column name you wish to insert into df. This column will hold the
        integer codes.
    max_n_cat: If col has more categories than max_n_cat it will not change the
        it to its integer codes. If max_n_cat is None, then col will always be
        converted.
    """
    if not is_numeric_dtype(col) and ( max_n_cat is None or len(col.cat.categories)>max_n_cat):
        df[name] = pd.Categorical(col).codes+1

In [22]:
def scale_vars(df, mapper):
    #warnings.filterwarnings('ignore', category=sklearn.exceptions.DataConversionWarning)
    if mapper is None:
        map_f = [([n],StandardScaler()) for n in df.columns if is_numeric_dtype(df[n])]
        mapper = DataFrameMapper(map_f).fit(df)
    df[mapper.transformed_names_] = mapper.transform(df)
    return mapper

In [23]:
def fix_missing(df, col, name, na_dict):
    """ Fill missing data in a column of df with the median, and add a {name}_na column
    which specifies if the data was missing.
    Parameters:
    -----------
    df: The data frame that will be changed.
    col: The column of data to fix by filling in missing data.
    name: The name of the new filled column in df.
    na_dict: A dictionary of values to create na's of and the value to insert. If
        name is not a key of na_dict the median will fill any missing data. Also
        if name is not a key of na_dict and there is no missing data in col, then
        no {name}_na column is not created.
    """
    if is_numeric_dtype(col):
        if pd.isnull(col).sum() or (name in na_dict):
            df[name+'_na'] = pd.isnull(col)
            filler = na_dict[name] if name in na_dict else col.median()
            df[name] = col.fillna(filler)
            na_dict[name] = filler
    return na_dict

In [24]:
def proc_df(df, y_fld=None, skip_flds=None, ignore_flds=None, do_scale=False, na_dict=None,
            preproc_fn=None, max_n_cat=None, subset=None, mapper=None):
    """ proc_df takes a data frame df and splits off the response variable, and
    changes the df into an entirely numeric dataframe. For each column of df 
    which is not in skip_flds nor in ignore_flds, na values are replaced by the
    median value of the column.
    Parameters:
    -----------
    df: The data frame you wish to process.
    y_fld: The name of the response variable
    skip_flds: A list of fields that dropped from df.
    ignore_flds: A list of fields that are ignored during processing.
    do_scale: Standardizes each column in df. Takes Boolean Values(True,False)
    na_dict: a dictionary of na columns to add. Na columns are also added if there
        are any missing values.
    preproc_fn: A function that gets applied to df.
    max_n_cat: The maximum number of categories to break into dummy values, instead
        of integer codes.
    subset: Takes a random subset of size subset from df.
    mapper: If do_scale is set as True, the mapper variable
        calculates the values used for scaling of variables during training time (mean and standard deviation).
    Returns:
    --------
    [x, y, nas, mapper(optional)]:
        x: x is the transformed version of df. x will not have the response variable
            and is entirely numeric.
        y: y is the response variable
        nas: returns a dictionary of which nas it created, and the associated median.
        mapper: A DataFrameMapper which stores the mean and standard deviation of the corresponding continuous
        variables which is then used for scaling of during test-time.
    """
    if not ignore_flds: ignore_flds=[]
    if not skip_flds: skip_flds=[]
    if subset: df = get_sample(df,subset)
    else: df = df.copy()
    ignored_flds = df.loc[:, ignore_flds]
    df.drop(ignore_flds, axis=1, inplace=True)
    if preproc_fn: preproc_fn(df)
    if y_fld is None: y = None
    else:
        if not is_numeric_dtype(df[y_fld]): df[y_fld] = pd.Categorical(df[y_fld]).codes
        y = df[y_fld].values
        skip_flds += [y_fld]
    df.drop(skip_flds, axis=1, inplace=True)

    if na_dict is None: na_dict = {}
    else: na_dict = na_dict.copy()
    na_dict_initial = na_dict.copy()
    for n,c in df.items(): na_dict = fix_missing(df, c, n, na_dict)
    if len(na_dict_initial.keys()) > 0:
        df.drop([a + '_na' for a in list(set(na_dict.keys()) - set(na_dict_initial.keys()))], axis=1, inplace=True)
    if do_scale: mapper = scale_vars(df, mapper)
    for n,c in df.items(): numericalize(df, c, n, max_n_cat)
    df = pd.get_dummies(df, dummy_na=True)
    df = pd.concat([ignored_flds, df], axis=1)
    res = [df, y, na_dict]
    if do_scale: res = res + [mapper]
    return res

In [25]:
df, y, nas, mapper = proc_df(joined_samp, 'Sales', do_scale=True)
yl = np.log(y+.000001)

In [26]:
df.head(2)

Unnamed: 0_level_0,Store,DayOfWeek,Year,Month,Day,StateHoliday,CompetitionMonthsOpen,Promo2Weeks,StoreType,Assortment,...,Min_Humidity,Max_Wind_SpeedKm_h,Mean_Wind_SpeedKm_h,CloudCover,trend,trend_DE,AfterStateHoliday,BeforeStateHoliday,Promo,SchoolHoliday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-31,1,5,3,7,31,1,25,1,3,1,...,-1.62656,0.148437,-0.147662,-1.859165,1.744361,1.743049,0.644376,1.090367,1.273237,2.144211
2015-07-31,2,5,3,7,31,1,25,2,1,1,...,-1.269897,-0.97008,-0.147662,-0.493858,1.303439,1.743049,0.965073,1.090367,1.273237,2.144211


In [27]:
joined_test = joined_test.set_index("Date")

In [28]:
df_test, _, nas, mapper = proc_df(joined_test, 'Sales', do_scale=True, skip_flds=['Id'],
                                  mapper=mapper, na_dict=nas)

In [29]:
df_test.head(2)

Unnamed: 0_level_0,Store,DayOfWeek,Year,Month,Day,StateHoliday,CompetitionMonthsOpen,Promo2Weeks,StoreType,Assortment,...,Min_Humidity,Max_Wind_SpeedKm_h,Mean_Wind_SpeedKm_h,CloudCover,trend,trend_DE,AfterStateHoliday,BeforeStateHoliday,Promo,SchoolHoliday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-09-17,1,4,3,9,17,1,25,1,3,1,...,1.940075,0.707695,0.361056,0.416347,0.333409,0.079557,2.183721,1.090367,1.273237,-0.466372
2015-09-17,3,4,3,9,17,1,25,2,1,1,...,0.411517,1.602508,2.395924,0.416347,0.245224,0.079557,2.183721,1.090367,1.273237,-0.466372




In time series data, cross-validation is not random. Instead, our holdout data is generally the most recent data, as it would be in real application. This issue is discussed in detail in this post (https://www.fast.ai/2017/11/13/validation-sets/).

One approach is to take the last 25% of rows (sorted by date) as our validation set.


In [30]:
train_ratio = 0.75
# train_ratio = 0.9
train_size = int(samp_size * train_ratio); train_size
val_idx = list(range(train_size, len(df)))

An even better option for picking a validation set is using the exact same length of time period as the test set uses - this is implemented here:

In [31]:
import datetime
val_idx = np.flatnonzero(
    (df.index<=datetime.datetime(2014,9,17)) & (df.index>=datetime.datetime(2014,8,1)))

In [32]:
val_idx

array([334555, 334556, 334557, ..., 379432, 379433, 379434], dtype=int64)

## Apply Data to Model

In [33]:
# from sklearn.preprocessing import LabelEncoder
# label_encoders = {}
# for cat_col in categorical_features:
#     label_encoders[cat_col] = LabelEncoder()
#     data[cat_col] = label_encoders[cat_col].fit_transform(data[cat_col])

In [34]:
df['output'] = y

In [35]:
df.head(2)

Unnamed: 0_level_0,Store,DayOfWeek,Year,Month,Day,StateHoliday,CompetitionMonthsOpen,Promo2Weeks,StoreType,Assortment,...,Max_Wind_SpeedKm_h,Mean_Wind_SpeedKm_h,CloudCover,trend,trend_DE,AfterStateHoliday,BeforeStateHoliday,Promo,SchoolHoliday,output
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-31,1,5,3,7,31,1,25,1,3,1,...,0.148437,-0.147662,-1.859165,1.744361,1.743049,0.644376,1.090367,1.273237,2.144211,5263
2015-07-31,2,5,3,7,31,1,25,2,1,1,...,-0.97008,-0.147662,-0.493858,1.303439,1.743049,0.965073,1.090367,1.273237,2.144211,6064


In [36]:
dataset = TabularDataset(data=df, cat_cols=cat_vars, output_col='output')

In [37]:
#?DataLoader

In [38]:
batchsize = 64
dataloader = DataLoader(dataset, batchsize, shuffle=False, drop_last=True, num_workers=0)

In [39]:
cat_dims = [int(df[col].nunique()+1) for col in cat_vars]
cat_dims

[1116, 8, 4, 13, 32, 3, 26, 3, 5, 4, 5, 24, 9, 13, 53, 23, 7, 7, 4, 4, 9, 9]

In [40]:
emb_dims = [(x, min(50, (x + 1) // 2)) for x in cat_dims]
emb_dims

[(1116, 50),
 (8, 4),
 (4, 2),
 (13, 7),
 (32, 16),
 (3, 2),
 (26, 13),
 (3, 2),
 (5, 3),
 (4, 2),
 (5, 3),
 (24, 12),
 (9, 5),
 (13, 7),
 (53, 27),
 (23, 12),
 (7, 4),
 (7, 4),
 (4, 2),
 (4, 2),
 (9, 5),
 (9, 5)]

In [41]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [42]:
def inv_y(a): return np.exp(a)

def exp_rmspe(y_pred, targ):
    targ = inv_y(targ)
    pct_var = (targ - inv_y(y_pred))/targ
    return math.sqrt((pct_var**2).mean())

max_log_y = np.max(yl)
y_range = (0, max_log_y*1.2)

In [43]:
def emb_init(x):
    x = x.weight.data
    sc = 2/(x.size(1)+1)
    x.uniform_(-sc,sc)


class TabularDataNN(nn.Module):
    """Model able to handle inputs consisting of both categorical and continuous variables.
    Args:
       emb_szs (list of int): List of embedding size
       n_cont (int): Number of continuous variables in inputs
       emb_drop (float): Dropout applied to the output of embedding
       out_sz (int): Size of model's output.
       szs (list of int): List of hidden variables sizes
       drops (list of float): List of dropout applied to hidden variables
       y_range (list of float): Min and max of `y`. y_range[0] = min, y_range[1] = max.
       use_bn (bool): If use BatchNorm, set ``True``
       is_reg (bool): If regression, set ``True``
       is_multi (bool): If multi-label classification, set ``True``
    """
    def __init__(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,
                 y_range=None, use_bn=False, is_reg=True, is_multi=False):
        
        super().__init__()
        
        # Check constraints (optional)
        for i,(c,s) in enumerate(emb_szs): 
            assert c > 1, f"cardinality must be >=2, got emb_szs[{i}]: ({c},{s})"
        if is_reg==False and is_multi==False: 
            assert out_sz >= 2, "For classification with out_sz=1, use is_multi=True"
        
        # Embedding layers
        self.embs = nn.ModuleList([nn.Embedding(c, s) for c,s in emb_szs])
        for emb in self.embs: 
            emb_init(emb)
        n_emb = sum(e.embedding_dim for e in self.embs)
        self.n_emb, self.n_cont=n_emb, n_cont
        
        # Linear Layers
        szs = [n_emb+n_cont] + szs
        self.lins = nn.ModuleList([
            nn.Linear(szs[i], szs[i+1]) for i in range(len(szs)-1)])
        for o in self.lins: 
            nn.init.kaiming_normal_(o.weight.data)
        
        # Output Layer
        self.outp = nn.Linear(szs[-1], out_sz)
        nn.init.kaiming_normal_(self.outp.weight.data)
        
        # Batch Norm Layers
        self.bns = nn.ModuleList([
            nn.BatchNorm1d(sz) for sz in szs[1:]])
        self.bn = nn.BatchNorm1d(n_cont)

        # Dropout Layers
        self.emb_drop = nn.Dropout(emb_drop)
        self.drops = nn.ModuleList([nn.Dropout(drop) for drop in drops])
        
        # Meta Information
        self.use_bn,self.y_range = use_bn,y_range
        self.is_reg = is_reg
        self.is_multi = is_multi

    def forward(self, x_cat, x_cont):
        
        # Apply embedding to categorical variables of input
        if self.n_emb != 0:
            x = [e(x_cat[:,i]) for i,e in enumerate(self.embs)]
            x = torch.cat(x, 1)
            x = self.emb_drop(x)
        
        # Apply batchnormal to continuous variables of input
        if self.n_cont != 0:
            x2 = self.bn(x_cont)
            x = torch.cat([x, x2], 1) if self.n_emb != 0 else x2
        
        # Apply linear_layer l, dropout_layer d, batchnormal_layer b
        for l,d,b in zip(self.lins, self.drops, self.bns):
            x = F.relu(l(x))
            if self.use_bn: x = b(x)
            x = d(x)
        
        # Apply output layer
        x = self.outp(x)
        
        # Generalise the model for classification
        if not self.is_reg:
            if self.is_multi: # For classification with out_sz=1, use is_multi=True
                x = torch.sigmoid(x)
            else:
                x = torch.log_softmax(x)
        elif self.y_range: # Regularize the output of regression. This part is tricky
            x = torch.sigmoid(x)
            x = x*(self.y_range[1] - self.y_range[0])
            x = x+self.y_range[0]
        return x

In [44]:
model = TabularDataNN(emb_dims, len(df.columns)-len(cat_vars)-1, 
                   0.04, 1, [1000,500], [0.001,0.01], y_range=y_range).to(device)

In [45]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [46]:
criterion = nn.MSELoss()

In [47]:
no_of_epochs = 3

In [51]:
model.train(); # Setting the model to training mood for clarity (by default the model is in training mood)

In [None]:
for epoch in range(no_of_epochs):
    print('-----------epoc: {}------------'.format(epoch))
    count = 0
    for y, cont_x, cat_x in dataloader:
        cat_x = cat_x.to(device)
        cont_x = cont_x.to(device)
        y  = y.to(device)
         # Forward Pass
        preds = model(cat_x, cont_x)
        loss = criterion(preds, y)
        
        # Backward Pass and Optimization
        optimizer.zero_grad()
        loss.backward()
        if count%500==0: print(loss)
        count += 1
        optimizer.step()

-----------epoc: 0------------
tensor(83643760., grad_fn=<MseLossBackward>)
tensor(78304200., grad_fn=<MseLossBackward>)
tensor(25114444., grad_fn=<MseLossBackward>)
tensor(61097768., grad_fn=<MseLossBackward>)
tensor(37817960., grad_fn=<MseLossBackward>)
tensor(35273332., grad_fn=<MseLossBackward>)
tensor(42384228., grad_fn=<MseLossBackward>)
tensor(91786528., grad_fn=<MseLossBackward>)
tensor(46281804., grad_fn=<MseLossBackward>)
tensor(66449484., grad_fn=<MseLossBackward>)
tensor(2004818.8750, grad_fn=<MseLossBackward>)
tensor(44300804., grad_fn=<MseLossBackward>)
tensor(424814.2812, grad_fn=<MseLossBackward>)
tensor(30533738., grad_fn=<MseLossBackward>)
tensor(38724452., grad_fn=<MseLossBackward>)
tensor(15180518., grad_fn=<MseLossBackward>)
tensor(182301.2969, grad_fn=<MseLossBackward>)
tensor(49817580., grad_fn=<MseLossBackward>)
tensor(70771600., grad_fn=<MseLossBackward>)
tensor(43198296., grad_fn=<MseLossBackward>)
tensor(1.1096e+08, grad_fn=<MseLossBackward>)
tensor(61536036.

## Evaluate the model on train dataset

In [None]:
model.eval();

In [None]:
loss = 0
for y, cont_x, cat_x in dataloader:
    cat_x = cat_x.to(device)
    cont_x = cont_x.to(device)
    y  = y.to(device)
     # Forward Pass
    preds = model(cat_x, cont_x)
    loss += criterion(preds, y)
#if epoch%500==0:
print(loss/len(df))

# Evaluate the model on val dataset

In [None]:
#df_test is already prepared according to the training format
df_eva = df.iloc[val_idx]

In [None]:
val_dataset = TabularDataset(data=df_eva, cat_cols=cat_vars, output_col='output')

batchsize = len(df_eva)
val_dataloader = DataLoader(val_dataset , batchsize, shuffle=False, drop_last=True, num_workers=0)

model.eval();

In [None]:
loss = 0
for y, cont_x, cat_x in val_dataloader:
    cat_x = cat_x.to(device)
    cont_x = cont_x.to(device)
    y  = y.to(device)
     # Forward Pass
    preds = model(cat_x, cont_x)
    loss += criterion(preds, y)
print(loss/len(df_eva))