<a href="https://colab.research.google.com/github/ds-personalization/final-project-qrdecomposition_final/blob/main/notebook/wide_n_deep_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
from google.colab import drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
import os
data_path = os.path.join('/content/drive/', 'MyDrive','final-project-qrdecomposition_final','data')

In [9]:
!pip install pytorch_widedeep --quiet

In [10]:
import os
###utilities
from tqdm import tqdm
import time
import warnings
warnings.filterwarnings("ignore")

###numpy,scipy,pandas,sklearn stacks
from scipy import sparse
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline

###torch stacks
import torch
from torch import nn
from pytorch_widedeep.preprocessing import DensePreprocessor
from pytorch_widedeep.callbacks import (
    LRHistory,
    EarlyStopping,
    ModelCheckpoint,
)
from pytorch_widedeep.optim import RAdam
from pytorch_widedeep.initializers import XavierNormal, KaimingNormal
from pytorch_widedeep.models import Wide, DeepDense, WideDeep

In [14]:
class wide_deep():
    def __init__(self,wide_cols='genres',
                    deep_cols=['userId', 'movieId'],
                    target_col = 'rating',
                    deep_embs=[64, 64],
                    deep_hidden=[64,32,16],
                    deep_dropout=[0.1, 0.1, .1],
                    deep_bachnorm=True):
        self.wide = None
        self.deep = None
        self.deep_hidden = deep_hidden
        self.deep_dropout = deep_dropout
        self.deep_bachnorm = deep_bachnorm
        self.model = None

        self.embs = [(col, dim) for col, dim in zip(deep_cols, deep_embs)]
        self.wide_preprocessor = self._genre_preprocessor(wide_cols)
        self.deep_preprocessor = DensePreprocessor(embed_cols=self.embs)
        self.target_col = target_col


    def fit(self, train, n_epochs=10, batch_size=128, val_split=.1, verbose = True):
        X, y = train.drop(self.target_col, axis = 1), train[self.target_col].values
        wide_feature = self.wide_preprocessor.fit_transform(X)
        deep_feature = self.deep_preprocessor.fit_transform(X)
        self.wide = Wide(wide_dim=np.unique(wide_feature).shape[0], pred_dim=1)
        self.deep = DeepDense(hidden_layers=self.deep_hidden, dropout=self.deep_dropout,
                      batchnorm=self.deep_bachnorm,
                      deep_column_idx=self.deep_preprocessor.deep_column_idx,
                      embed_input=self.deep_preprocessor.embeddings_input)
        self.model =  WideDeep(wide=self.wide, deepdense=self.deep)
        wide_opt = torch.optim.Adam(self.model.wide.parameters(), lr=0.01)
        deep_opt = RAdam(self.model.deepdense.parameters())
        wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3)
        deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5)
        callbacks = [
                        LRHistory(n_epochs=n_epochs),
                        EarlyStopping(patience=5),
                        ModelCheckpoint(filepath="model_weights/wd_out"),
                    ]
        optimizers = {"wide": wide_opt, "deepdense": deep_opt}
        schedulers = {"wide": wide_sch, "deepdense": deep_sch}
        initializers = {"wide": KaimingNormal, "deepdense": XavierNormal}
        self.model.compile(method='regression',
                            optimizers=optimizers,
                        lr_schedulers=schedulers,
                        initializers=initializers,
                        callbacks=callbacks,
                        verbose=verbose)
        self.model.fit(X_wide=wide_feature, 
                  X_deep=deep_feature, 
                  target=y, 
                  n_epochs=n_epochs, 
                  batch_size=batch_size, 
                  val_split=val_split,)
        
    def predict(self, test):
        X, y = test.drop(self.target_col, axis = 1), test[self.target_col].values
        wide_feature = self.wide_preprocessor.transform(X)
        deep_feature = self.deep_preprocessor.transform(X)
        return self.model.predict(X_wide=wide_feature, X_deep=deep_feature)

    def _genre_preprocessor(self, genre_feat):
        dense_layer = lambda X: X.toarray()
        genre_transformer = Pipeline(steps=[
                ('tokenizer', CountVectorizer()),
                ('dense', FunctionTransformer(dense_layer, validate=False))   
        ])
        preproc = ColumnTransformer(transformers=[('genre', genre_transformer, genre_feat),])
        return preproc


    def _deep_preprocessor(self,embs):
        return DensePreprocessor(embed_cols=embs)

In [20]:
sample_train_df, sample_test_df = pd.read_csv(os.path.join(data_path, 'sample_train.csv')), \
                                    pd.read_csv(os.path.join(data_path, 'sample_test.csv'))
movies = pd.read_csv(os.path.join(data_path, 'movies.csv'))
sample_train_df, sample_test_df = sample_train_df.merge(movies), sample_test_df.merge(movies)

In [21]:
wd = wide_deep()

In [22]:
wd.fit(sample_train_df)

  0%|          | 0/23001 [00:00<?, ?it/s]

Training


epoch 1: 100%|██████████| 23001/23001 [07:17<00:00, 52.55it/s, loss=1.21]
valid: 100%|██████████| 2556/2556 [00:27<00:00, 92.99it/s, loss=0.68]
epoch 2: 100%|██████████| 23001/23001 [07:11<00:00, 53.27it/s, loss=0.675]
valid: 100%|██████████| 2556/2556 [00:26<00:00, 94.86it/s, loss=0.662]
epoch 3: 100%|██████████| 23001/23001 [07:06<00:00, 53.87it/s, loss=0.644]
valid: 100%|██████████| 2556/2556 [00:26<00:00, 95.94it/s, loss=0.66]
epoch 4: 100%|██████████| 23001/23001 [07:04<00:00, 54.23it/s, loss=0.616]
valid: 100%|██████████| 2556/2556 [00:26<00:00, 95.34it/s, loss=0.662]
epoch 5: 100%|██████████| 23001/23001 [07:04<00:00, 54.16it/s, loss=0.595]
valid: 100%|██████████| 2556/2556 [00:26<00:00, 95.22it/s, loss=0.661]
epoch 6: 100%|██████████| 23001/23001 [07:08<00:00, 53.72it/s, loss=0.593]
valid: 100%|██████████| 2556/2556 [00:26<00:00, 95.39it/s, loss=0.638]
epoch 7: 100%|██████████| 23001/23001 [07:07<00:00, 53.86it/s, loss=0.581]
valid: 100%|██████████| 2556/2556 [00:26<00:00, 95.1

In [35]:
train_pred = wd.predict(sample_train_df)

predict: 100%|██████████| 25557/25557 [03:02<00:00, 140.23it/s]


In [23]:
test_pred = wd.predict(sample_test_df)

predict: 100%|██████████| 8510/8510 [01:00<00:00, 139.84it/s]


In [36]:
torch.save(wd.model, os.path.join(data_path, "wide_deep_sample.t"))

In [37]:
with open(os.path.join(data_path, "wide_deep_sample_train_pred.npy"), 'wb') as f:
    np.save(f, train_pred)

In [38]:
with open(os.path.join(data_path, "wide_deep_sample_test_pred.npy"), 'wb') as f:
    np.save(f, test_pred)