In [1]:
!wget 'https://drive.usercontent.google.com/download?id=1vh3GYfYIaxPokXppuQuzt4qUop4DtyqI&export=download&authuser=1&confirm=t' -O features.npy

--2024-05-29 08:51:06--  https://drive.usercontent.google.com/download?id=1vh3GYfYIaxPokXppuQuzt4qUop4DtyqI&export=download&authuser=1&confirm=t
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 142.250.145.132, 2a00:1450:4013:c14::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|142.250.145.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 76333184 (73M) [application/octet-stream]
Saving to: ‘features.npy’


2024-05-29 08:51:10 (122 MB/s) - ‘features.npy’ saved [76333184/76333184]



In [2]:
!wget 'https://drive.usercontent.google.com/download?id=1-5e2bAdzfXmvpsiqL0X7_TyKr0b6KVza&export=download&authuser=1&confirm=t' -O interactions.csv

--2024-05-29 08:51:10--  https://drive.usercontent.google.com/download?id=1-5e2bAdzfXmvpsiqL0X7_TyKr0b6KVza&export=download&authuser=1&confirm=t
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 142.250.145.132, 2a00:1450:4013:c14::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|142.250.145.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 251321 (245K) [application/octet-stream]
Saving to: ‘interactions.csv’


2024-05-29 08:51:10 (96.2 MB/s) - ‘interactions.csv’ saved [251321/251321]



In [3]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class BPRModel(nn.Module):
    def __init__(self, num_users, num_items, design_vectors, dim=50):
        super(BPRModel, self).__init__()
        self.design_embedding = nn.Embedding.from_pretrained(torch.tensor(design_vectors), freeze=True)
        self.linear = nn.Sequential(
            nn.Linear(design_vectors.shape[1], 500),
            nn.ReLU(),
            nn.Linear(500, dim)
        )
        self.item_embedding = nn.Embedding(num_items, dim)
        nn.init.normal_(self.item_embedding.weight, std=0.01)

    def forward(self, design_indices, item_indices):
        design_embedding = self.design_embedding(design_indices)
        design_embedding = self.linear(design_embedding)
        item_embedding = self.item_embedding(item_indices)
        return (design_embedding * item_embedding).sum(dim=1)

In [6]:
interaction = pd.read_csv('interactions.csv')
design_tensor = torch.tensor(interaction['design'].values).to(device)
item_tensor = torch.tensor(interaction['item'].values).to(device)

n_designs = design_tensor.max() + 1
n_items = item_tensor.max() + 1

features = np.load('features.npy')

In [7]:
item_counts = interaction["item"].groupby(interaction["item"]).count()
item_frequency = torch.tensor(item_counts).to(device)

In [8]:
def bpr_loss(positive_scores, negative_scores, item_frequency, item_indices, a=0.5):
    # weights = 1.0 / torch.sqrt(item_frequency[item_indices].float())
    weights = 1.0 / item_frequency[item_indices].float()
    loss = (-torch.log(torch.sigmoid(positive_scores - negative_scores)) * weights).mean()
    reg_loss = a * weights.mean()
    return loss + reg_loss

In [9]:
model = BPRModel(n_designs, n_items, features, dim=50).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)

In [10]:
for epoch in range(30):
    indices = torch.randperm(len(design_tensor))
    design_tensor_shuffled = design_tensor[indices]
    item_tensor_shuffled = item_tensor[indices]
    total_loss = 0

    for i in range(0, len(design_tensor), 64):
        design_batch = design_tensor_shuffled[i:i+64]
        item_batch = item_tensor_shuffled[i:i+64]

        positive_scores = model(design_batch, item_batch)

        # negative_indices = torch.randint(0, n_items, (len(design_batch),)).to(device)
        negative_indices = []
        for _ in range(len(design_batch)):
            while True:
                neg_item = torch.randint(0, n_items, (1,)).to(device)
                if neg_item not in item_batch:
                    negative_indices.append(neg_item.item())
                    break
        negative_indices = torch.tensor(negative_indices).to(device)
        negative_scores = model(design_batch, negative_indices)

        loss = bpr_loss(positive_scores, negative_scores, item_frequency, negative_indices)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 299.13902109861374
Epoch 2, Loss: 273.67852783203125
Epoch 3, Loss: 263.9423939585686
Epoch 4, Loss: 262.5723223686218
Epoch 5, Loss: 259.96655428409576
Epoch 6, Loss: 256.92868530750275
Epoch 7, Loss: 258.66577285528183
Epoch 8, Loss: 255.45046645402908
Epoch 9, Loss: 255.08473962545395
Epoch 10, Loss: 250.4937175810337
Epoch 11, Loss: 248.75825661420822
Epoch 12, Loss: 247.15264761447906
Epoch 13, Loss: 245.16352504491806
Epoch 14, Loss: 244.1954089999199
Epoch 15, Loss: 242.49109503626823
Epoch 16, Loss: 241.09044387936592
Epoch 17, Loss: 240.71335616707802
Epoch 18, Loss: 239.89184266328812
Epoch 19, Loss: 239.14803498983383
Epoch 20, Loss: 239.67493495345116
Epoch 21, Loss: 240.46439629793167
Epoch 22, Loss: 238.84125924110413
Epoch 23, Loss: 238.2826208770275
Epoch 24, Loss: 239.9125131368637
Epoch 25, Loss: 238.0294712483883
Epoch 26, Loss: 239.26156905293465
Epoch 27, Loss: 239.51940208673477
Epoch 28, Loss: 238.02664676308632
Epoch 29, Loss: 237.68572235107422
E

In [13]:
def recommend_items(model, design_idx, num_items, top_n=30):
    design_tensor = torch.tensor([design_idx])
    item_indices = torch.arange(num_items)

    design_tensor = design_tensor.to(device)
    item_indices = item_indices.to(device)

    with torch.no_grad():
        scores = model(design_tensor, item_indices)

    _, top_indices = torch.topk(scores, top_n)
    top_indices = top_indices.squeeze().cpu().numpy().tolist()

    return top_indices

recommendations = recommend_items(model, design_idx=20, num_items=n_items)
print("Top recommended item indices:", recommendations)

Top recommended item indices: [5, 76, 201, 2684, 109, 89, 533, 3, 293, 118, 135, 4, 1487, 0, 370, 1907, 96, 25, 253, 26, 94, 892, 779, 864, 3941, 3021, 281, 2109, 120, 3609]


In [15]:
torch.save(model.state_dict(), 'model.pth')