In [3]:
!wget 'https://drive.usercontent.google.com/download?id=1vh3GYfYIaxPokXppuQuzt4qUop4DtyqI&export=download&authuser=1&confirm=t' -O features.npy

--2024-05-28 19:23:22--  https://drive.usercontent.google.com/download?id=1vh3GYfYIaxPokXppuQuzt4qUop4DtyqI&export=download&authuser=1&confirm=t
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 64.233.189.132, 2404:6800:4008:c07::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|64.233.189.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 76333184 (73M) [application/octet-stream]
Saving to: ‘features.npy’


2024-05-28 19:23:24 (241 MB/s) - ‘features.npy’ saved [76333184/76333184]



In [4]:
!wget 'https://drive.usercontent.google.com/download?id=1-5e2bAdzfXmvpsiqL0X7_TyKr0b6KVza&export=download&authuser=1&confirm=t' -O interactions.csv

--2024-05-28 19:23:30--  https://drive.usercontent.google.com/download?id=1-5e2bAdzfXmvpsiqL0X7_TyKr0b6KVza&export=download&authuser=1&confirm=t
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 64.233.189.132, 2404:6800:4008:c07::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|64.233.189.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 251321 (245K) [application/octet-stream]
Saving to: ‘interactions.csv’


2024-05-28 19:23:30 (104 MB/s) - ‘interactions.csv’ saved [251321/251321]



In [74]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split

In [75]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [177]:
class BPRModel(nn.Module):
    def __init__(self, num_users, num_items, design_vectors, dim=50):
        super(BPRModel, self).__init__()
        self.design_embedding = nn.Embedding.from_pretrained(torch.tensor(design_vectors), freeze=True)
        self.linear = nn.Sequential(
            nn.Linear(design_vectors.shape[1], 500),
            nn.ReLU(),
            nn.Linear(500, dim)
        )
        self.item_embedding = nn.Embedding(num_items, dim)
        nn.init.normal_(self.item_embedding.weight, std=0.01)

    def forward(self, design_indices, item_indices):
        design_embedding = self.design_embedding(design_indices)
        design_embedding = self.linear(design_embedding)
        item_embedding = self.item_embedding(item_indices)
        return (design_embedding * item_embedding).sum(dim=1)

In [178]:
interaction = pd.read_csv('interactions.csv')
design_tensor = torch.tensor(interaction['design'].values).to(device)
item_tensor = torch.tensor(interaction['item'].values).to(device)

n_designs = design_tensor.max() + 1
n_items = item_tensor.max() + 1

features = np.load('features.npy')

In [179]:
item_counts = interaction["item"].groupby(interaction["item"]).count()
item_frequency = torch.tensor(item_counts).to(device)

In [180]:
def bpr_loss(positive_scores, negative_scores, item_frequency, item_indices, a=0.5):
    weights = 1.0 / torch.sqrt(item_frequency[item_indices].float())
    loss = (-torch.log(torch.sigmoid(positive_scores - negative_scores)) * weights).mean()
    reg_loss = a * weights.mean()
    return loss + reg_loss

In [181]:
model = BPRModel(n_designs, n_items, features, dim=50).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

In [182]:
for epoch in range(30):
    indices = torch.randperm(len(design_tensor))
    design_tensor_shuffled = design_tensor[indices]
    item_tensor_shuffled = item_tensor[indices]
    total_loss = 0

    for i in range(0, len(design_tensor), 64):
        design_batch = design_tensor_shuffled[i:i+64]
        item_batch = item_tensor_shuffled[i:i+64]

        positive_scores = model(design_batch, item_batch)

        # negative_indices = torch.randint(0, n_items, (len(design_batch),)).to(device)
        negative_indices = []
        for _ in range(len(design_batch)):
            while True:
                neg_item = torch.randint(0, n_items, (1,)).to(device)
                if neg_item not in item_batch:
                    negative_indices.append(neg_item.item())
                    break
        negative_indices = torch.tensor(negative_indices).to(device)
        negative_scores = model(design_batch, negative_indices)

        loss = bpr_loss(positive_scores, negative_scores, item_frequency, negative_indices)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 385.121522963047
Epoch 2, Loss: 344.015248298645
Epoch 3, Loss: 335.3722930550575
Epoch 4, Loss: 334.1620962023735
Epoch 5, Loss: 333.01745414733887
Epoch 6, Loss: 331.31636464595795
Epoch 7, Loss: 330.79290598630905
Epoch 8, Loss: 329.34319776296616
Epoch 9, Loss: 328.8799862265587
Epoch 10, Loss: 326.6251026391983
Epoch 11, Loss: 325.4956593513489
Epoch 12, Loss: 324.02797079086304
Epoch 13, Loss: 323.5810977220535
Epoch 14, Loss: 321.5159893631935
Epoch 15, Loss: 321.514345228672
Epoch 16, Loss: 320.0829292535782
Epoch 17, Loss: 319.64409440755844
Epoch 18, Loss: 319.1051405072212
Epoch 19, Loss: 318.92776131629944
Epoch 20, Loss: 317.99119931459427
Epoch 21, Loss: 318.0650141239166
Epoch 22, Loss: 317.34606117010117
Epoch 23, Loss: 316.42103737592697
Epoch 24, Loss: 315.88643115758896
Epoch 25, Loss: 316.14790320396423
Epoch 26, Loss: 315.4399518966675
Epoch 27, Loss: 315.1131461262703
Epoch 28, Loss: 314.65364187955856
Epoch 29, Loss: 314.1431814432144
Epoch 30, Los

In [192]:
def recommend_items(model, design_idx, num_items, top_n=30):
    design_tensor = torch.tensor([design_idx])
    item_indices = torch.arange(num_items)

    design_tensor = design_tensor.to(device)
    item_indices = item_indices.to(device)

    with torch.no_grad():
        scores = model(design_tensor, item_indices)

    _, top_indices = torch.topk(scores, top_n)
    top_indices = top_indices.squeeze().cpu().numpy().tolist()

    return top_indices

recommendations = recommend_items(model, design_idx=0, num_items=n_items)
print("Top recommended item indices:", recommendations)

Top recommended item indices: [161, 5, 4, 3, 563, 96, 261, 109, 351, 89, 405, 76, 572, 240, 124, 53, 281, 284, 884, 49, 25, 162, 97, 349, 271, 94, 51, 134, 280, 37]
