In [4]:
%load_ext autoreload
%autoreload 2

from src.model.news_encoder import LNEConfig
from src.model.user_encoder import CPJAConfig, PEConfig,NSAConfig,PAUEConfig
from src.model.popularity_predictor import TANPPConfig, CRGConfig, CBPDConfig, RBPDConfig, REConfig
from src.model.pprec import PPRConfig, PAGConfig, PPRec

from src.data.split import EBNeRDSplit

import torch

In [2]:
import os

# Set an environment variable
os.environ['PPREC_DATA_FOLDER'] =  '/Users/sohamchatterjee/Documents/UvA/RecSYS/Project/ebnerd_data'
print(os.environ.get("PPREC_DATA_FOLDER"))


/Users/sohamchatterjee/Documents/UvA/RecSYS/Project/ebnerd_data


Some data for testing


In [6]:
split = EBNeRDSplit()

Configuration of the model

In [7]:
# This depeneds on how were going to make the dataloader
max_clicked_articles_in_history = 50

# This is the config we can do experiments with.
# Any configuration should work. For instance
# size_n in the user news encoder doesnt have to
# match the one in the popularity news encoder.
# I filled it in the way they implemented it.
# Apart from the news encoder of course, which
# is still using the embeddings from the artifacts.
pprec_config = PPRConfig(
    user_news_encoder_config=LNEConfig(size_n=400, model="bert"),
    popularity_news_encoder_config=LNEConfig(size_n=400, model="bert"),
    user_encoder_config=PAUEConfig(
        popularity_embedding_config=PEConfig(
            size_p=100,
            max_ctr=200,
        ),
        news_self_attention_config=NSAConfig(
            n_attention_heads=20,
            head_output_size=20,
        ),
        content_popularity_joint_attention_config=CPJAConfig(weight_size=100),
    ),
    popularity_predictor_config=TANPPConfig(
        recency_based_popularity_dense_config=RBPDConfig(),
        content_based_popularity_dense_config=CBPDConfig(),
        recency_embedding_config=REConfig(
            r_size=100,
            max_recency=1500,
        ),
        content_recency_gate_config=CRGConfig(),
    ),
    aggregator_gate_config=PAGConfig(),
)

In [8]:
model = PPRec(
    max_clicked=max_clicked_articles_in_history,
    device=torch.device("cpu"),
    config=pprec_config,
)

In [9]:
print(model)

PPRec(
  (user_news_encoder): LookupNewsEncoder(
    (fcout): Linear(in_features=768, out_features=400, bias=True)
  )
  (popularity_news_encoder): LookupNewsEncoder(
    (fcout): Linear(in_features=768, out_features=400, bias=True)
  )
  (popularity_predictor): TimeAwareNewsPopularityPredictor(
    (recency_embedding): RecencyEmbedding(
      (embedding): Embedding(1500, 100)
    )
    (recency_based_popularity_dense): RecencyBasedPopularityDense(
      (dense): Linear(in_features=100, out_features=1, bias=True)
    )
    (content_based_popularity_dense): ContentBasedPopularityDense(
      (dense): Linear(in_features=400, out_features=1, bias=True)
    )
    (content_recency_gate): ContentRecencyGate()
  )
  (user_encoder): PopularityAwareUserEncoder(
    (popularity_embedding): PopularityEmbedding(
      (embedding): Embedding(200, 100)
    )
    (news_self_attention): NewsSelfAttention()
    (content_popularity_joint_attention): ContentPopularityJointAttention()
  )
  (aggregator_ga