<a href="https://colab.research.google.com/github/ds-personalization/final-project-qrdecomposition_final/blob/main/notebook/wide_n_deep_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
data_path = os.path.join('../','downloads')

In [2]:
import os
###utilities
from tqdm import tqdm
import time
import warnings
warnings.filterwarnings("ignore")

###numpy,scipy,pandas,sklearn stacks
from scipy import sparse
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline

###torch stacks
import torch
from torch import nn
from pytorch_widedeep.preprocessing import DensePreprocessor
from pytorch_widedeep.callbacks import (
    LRHistory,
    EarlyStopping,
    ModelCheckpoint,
)
from pytorch_widedeep.optim import RAdam
from pytorch_widedeep.initializers import XavierNormal, KaimingNormal
from pytorch_widedeep.models import Wide, DeepDense, WideDeep

In [45]:
class wide_deep():
    def __init__(self,wide_cols='genres',
                    deep_cols=['userId', 'movieId'],
                    target_col = 'rating',
                    deep_embs=[64, 64],
                    deep_hidden=[64,32,16],
                    deep_dropout=[0.1, 0.1, .1],
                    deep_bachnorm=True):
        self.wide = None
        self.deep = None
        self.deep_hidden = deep_hidden
        self.deep_dropout = deep_dropout
        self.deep_bachnorm = deep_bachnorm
        self.model = None

        self.embs = [(col, dim) for col, dim in zip(deep_cols, deep_embs)]
        self.wide_preprocessor = self._genre_preprocessor(wide_cols)
        self.deep_preprocessor = DensePreprocessor(embed_cols=self.embs)
        self.target_col = target_col


    def fit(self, train, n_epochs=10, batch_size=128, val_split=.1, verbose = True):
        X, y = train.drop(self.target_col, axis = 1), train[self.target_col].values
        wide_feature = self.wide_preprocessor.fit_transform(X)
        deep_feature = self.deep_preprocessor.fit_transform(X)
        self.wide = Wide(wide_dim=np.unique(wide_feature).shape[0], pred_dim=1)
        self.deep = DeepDense(hidden_layers=self.deep_hidden, dropout=self.deep_dropout,
                      batchnorm=self.deep_bachnorm,
                      deep_column_idx=self.deep_preprocessor.deep_column_idx,
                      embed_input=self.deep_preprocessor.embeddings_input)
        self.model =  WideDeep(wide=self.wide, deepdense=self.deep)
        wide_opt = torch.optim.Adam(self.model.wide.parameters(), lr=0.01)
        deep_opt = RAdam(self.model.deepdense.parameters())
        wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3)
        deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5)
        callbacks = [
                        LRHistory(n_epochs=n_epochs),
                        EarlyStopping(patience=5),
                        ModelCheckpoint(filepath="model_weights/wd_out"),
                    ]
        optimizers = {"wide": wide_opt, "deepdense": deep_opt}
        schedulers = {"wide": wide_sch, "deepdense": deep_sch}
        initializers = {"wide": KaimingNormal, "deepdense": XavierNormal}
        self.model.compile(method='regression',
                            optimizers=optimizers,
                        lr_schedulers=schedulers,
                        initializers=initializers,
                        callbacks=callbacks,
                        verbose=verbose)
        self.model.fit(X_wide=wide_feature, 
                  X_deep=deep_feature, 
                  target=y, 
                  n_epochs=n_epochs, 
                  batch_size=batch_size, 
                  val_split=val_split,)
    def load_pretrained(self, train, fp, device):
        X = train.copy()
        wide_feature = self.wide_preprocessor.fit_transform(X)
        deep_feature = self.deep_preprocessor.fit_transform(X)
        self.wide = Wide(wide_dim=np.unique(wide_feature).shape[0], pred_dim=1)
        self.deep = DeepDense(hidden_layers=self.deep_hidden, dropout=self.deep_dropout,
                      batchnorm=self.deep_bachnorm,
                      deep_column_idx=self.deep_preprocessor.deep_column_idx,
                      embed_input=self.deep_preprocessor.embeddings_input)
        self.model =  torch.load(fp,  map_location=torch.device(device))
        
    def predict(self, test):
        X = test.copy()
        wide_feature = self.wide_preprocessor.transform(X)
        deep_feature = self.deep_preprocessor.transform(X)
        return self.model.predict(X_wide=wide_feature, X_deep=deep_feature)

    def _genre_preprocessor(self, genre_feat):
        dense_layer = lambda X: X.toarray()
        genre_transformer = Pipeline(steps=[
                ('tokenizer', CountVectorizer()),
                ('dense', FunctionTransformer(dense_layer, validate=False))   
        ])
        preproc = ColumnTransformer(transformers=[('genre', genre_transformer, genre_feat),])
        return preproc


    def _deep_preprocessor(self,embs):
        return DensePreprocessor(embed_cols=embs)

In [4]:
x

In [5]:
wd = wide_deep()

In [6]:
wd.fit(sample_train_df)

  0%|          | 0/22894 [00:00<?, ?it/s]

Training


epoch 1: 100%|██████████| 22894/22894 [04:00<00:00, 95.21it/s, loss=41.4]   
valid: 100%|██████████| 2544/2544 [00:16<00:00, 157.81it/s, loss=0.88] 
epoch 2: 100%|██████████| 22894/22894 [04:00<00:00, 95.18it/s, loss=1.2]  
valid: 100%|██████████| 2544/2544 [00:16<00:00, 156.65it/s, loss=0.694]
epoch 3: 100%|██████████| 22894/22894 [04:19<00:00, 88.38it/s, loss=0.676] 
valid: 100%|██████████| 2544/2544 [00:15<00:00, 161.71it/s, loss=0.669]
epoch 4: 100%|██████████| 22894/22894 [04:32<00:00, 83.90it/s, loss=0.63]  
valid: 100%|██████████| 2544/2544 [00:17<00:00, 145.83it/s, loss=0.671]
epoch 5: 100%|██████████| 22894/22894 [04:06<00:00, 92.92it/s, loss=0.604] 
valid: 100%|██████████| 2544/2544 [00:15<00:00, 159.64it/s, loss=0.668]
epoch 6: 100%|██████████| 22894/22894 [04:26<00:00, 85.84it/s, loss=0.604] 
valid: 100%|██████████| 2544/2544 [00:15<00:00, 161.17it/s, loss=0.646]
epoch 7: 100%|██████████| 22894/22894 [04:16<00:00, 89.35it/s, loss=0.592] 
valid: 100%|██████████| 2544/2544 [0

In [7]:
train_pred = wd.predict(sample_train_df)

predict: 100%|██████████| 25437/25437 [01:38<00:00, 257.35it/s]


In [8]:
test_pred = wd.predict(sample_test_df)

predict: 100%|██████████| 8473/8473 [00:39<00:00, 216.10it/s]


In [9]:
torch.save(wd.model, os.path.join('../trained_model', "wide_deep_sample.t"))

In [44]:
with open(os.path.join('../model_results', "wide_deep_sample_train_pred.npy"), 'wb') as f:
    np.save(f, train_pred)
with open(os.path.join('../model_results', "wide_deep_sample_test_pred.npy"), 'wb') as f:
    np.save(f, test_pred)
with open(os.path.join('../model_results', "wide_deep_sample_train_true.npy"), 'wb') as f:
    np.save(f, sample_train_df.rating.values)
with open(os.path.join('../model_results', "wide_deep_sample_test_true.npy"), 'wb') as f:
    np.save(f, sample_test_df.rating.values)