In [1]:
%load_ext autoreload
%autoreload 2

import os
os.chdir("../../")
print(os.getcwd())

C:\Users\Milosz\thesis-recsys


In [2]:
import pandas as pd
import numpy as np
import functools
import operator
import json
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import namedtuple
import pickle

import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from torch.nn.functional import pad
from torch.utils.tensorboard import SummaryWriter

from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler

from models import NCF, DeepFM
from features.store import FeatureStore
from dataset.deep import DeepFMDataset, collate_fn_eval
from scripts.eval import load_model

torch.set_printoptions(precision=2, sci_mode=False)
torch.manual_seed(0)

<torch._C.Generator at 0x1721e2777d0>

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [4]:
dir_art = "data/steam"

with open(os.path.join(dir_art, 'data.pkl'), "rb") as f:
    data = pd.read_pickle(f)

with open(os.path.join(dir_art, 'matrix.pkl'), "rb") as f:
    matrix = pd.read_pickle(f)

item_attr = data['items_datastore'].dataframe.df
user_attr = data['users_datastore'].dataframe.df
scheme_relations = data['relations_datastore'].scheme
scheme_items = data['items_datastore'].scheme
scheme_users = data['users_datastore'].scheme

train_csr = matrix['train_csr']
valid_csr = matrix['valid_csr']

In [5]:
feature_store = FeatureStore(scheme_relations, scheme_items, scheme_users, emb_dims={"sparse": 4, "varlen": 4})

In [7]:
model_path = "runs/DeepFM/2023-11-11_17-00-00/model.pth"
model = load_model(
    model_path=model_path,
    model_kwargs={
        "feature_store": feature_store,
        "hidden_dim": [128, 64]
    },
    device=device
)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.RMSprop(params=model.parameters(), lr=1e-4, momentum=0.9)

In [8]:
relevance_mask = np.asarray((valid_csr.sum(axis=1) != 0)).ravel()

In [9]:
relevance_mask.nonzero()[0].shape

(289163,)

In [10]:
user_idx = np.repeat(relevance_mask.nonzero()[0], 1231)
item_idx = torch.arange(1231).repeat(289163, 1).flatten().numpy()

In [13]:
eval_edge_index = np.stack([user_idx, item_idx], axis=1)

In [14]:
eval_dataset = DeepFMDataset(feature_store, eval_edge_index, user_attr, item_attr, neg_sampl=0)
eval_loader = DataLoader(eval_dataset, shuffle=False, batch_size=1231, collate_fn=collate_fn_eval, drop_last=False)

  return torch.tensor([x.iloc[self.pad_index[0]:self.pad_index[1]].values])


In [33]:
@torch.no_grad()
def recommend_k_deep(model, dataloader, k=10):
    model.eval()
    preds = []
    for batch in tqdm(dataloader):
        batch = batch.to(device)
        y_pred = model(batch)
        preds.append(y_pred)
    pred = torch.cat(preds, dim=0).sigmoid().cpu().numpy()

    return pred

In [34]:
prob = recommend_k_deep(model, eval_loader)

  0%|                                                                                                                                                                                                                             | 82/289163 [00:14<13:57:53,  5.75it/s]

KeyboardInterrupt



In [32]:
prob.reshape((3, -1))

array([[8.85459721e-01, 1.16887107e-01, 1.44646898e-01, ...,
        9.99862514e-03, 1.66352867e-04, 1.00000000e+00],
       [8.46526504e-01, 1.15282051e-01, 1.03380837e-01, ...,
        2.31476966e-02, 2.48803030e-04, 8.98127019e-01],
       [8.48828077e-01, 9.29745659e-02, 1.08394414e-01, ...,
        1.93886645e-02, 2.97176302e-04, 1.00000000e+00]], dtype=float32)

In [None]:
# for user_batch in tqdm(user_batches):
#     X = torch.cat([user_batch.repeat(1231, 1).t().reshape(-1, 1), item_tensor], dim=1)
#     prob = evaluate(model, X)
#     prob = prob.view(1000, -1)
#     prob = remove_past_interactions(prob, user_batch)
#     recommended_batches.append(prob.topk(10, 1)[1])
    
# recommendations = torch.cat(recommended_batches, 0)

In [None]:
from reco_env import RecoEnv
from utils import import_data_for_env
import gym

In [None]:
env = gym.make(RecoEnv.id, **import_data_for_env())

In [None]:
vc = rec.user_id.value_counts()

In [None]:
vc

In [None]:
vc[vc >= 3]

In [None]:
plt.plot(vc)