In [1]:
# Imports
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from time import time

# Assuming these are custom modules you have in your project
from interactions import Interactions
from evaluation import evaluate_ranking
from utils import set_seed, shuffle, str2bool
from caser import Caser

In [2]:
class Recommender(object):
    def __init__(self, n_iter, batch_size, learning_rate, l2, neg_samples, model_args, use_cuda, precomputed_embeddings):
        self._num_items = None
        self._num_users = None
        self._net = None
        self.model_args = model_args
        self._n_iter = n_iter
        self._batch_size = batch_size
        self._learning_rate = learning_rate
        self._l2 = l2
        self._neg_samples = neg_samples
        self._device = torch.device("cuda" if use_cuda and torch.cuda.is_available() else "cpu")
        self.precomputed_embeddings = torch.from_numpy(precomputed_embeddings).float().to(self._device)
        self._initialized = False

    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users
        self.test_sequence = interactions.test_sequences

        # Assume the model_args have the necessary parameters
        self._net = Caser(self._num_users, self._num_items, self.model_args).to(self._device)

        self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate)
        self._initialized = True

    def fit(self, train, test, verbose=False):
        if not self._initialized:
            self._initialize(train)

        # Convert to sequences, targets, and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        print(f'Total training instances: {n_train}')

        for epoch_num in range(self._n_iter):
            t1 = time()

            # Set model to training mode
            self._net.train()

            users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            # Convert numpy arrays to PyTorch tensors and move them to the corresponding devices
            users = torch.from_numpy(users_np).long().to(self._device)
            sequences = torch.from_numpy(sequences_np).long().to(self._device)
            targets = torch.from_numpy(targets_np).long().to(self._device)
            negatives = torch.from_numpy(negatives_np).long().to(self._device)

            items_to_predict = torch.cat((targets, negatives), 1)
            items_prediction = self._net(sequences, users, items_to_predict)

            targets_prediction = items_prediction[:, :targets.size(1)]
            negatives_prediction = items_prediction[:, targets.size(1):]

            # Compute the binary cross-entropy loss
            positive_loss = -torch.mean(torch.log(torch.sigmoid(targets_prediction) + 1e-8))
            negative_loss = -torch.mean(torch.log(1 - torch.sigmoid(negatives_prediction) + 1e-8))
            loss = positive_loss + negative_loss

            loss.backward()
            self._optimizer.step()
            self._optimizer.zero_grad()

            t2 = time()

            print(f"Epoch {epoch_num + 1}/{self._n_iter} [{t2 - t1:.2f}s]\tloss={loss.item():.4f}")

    def _generate_negative_samples(self, users, interactions, n):
        """
        Sample negative items for each user.
        """
        users_ = users.squeeze()
        negative_samples = np.zeros((users_.shape[0], n), np.int64)
        if not hasattr(self, '_candidate'):
            self._candidate = {}
            for u, row in enumerate(interactions.tocsr()):
                self._candidate[u] = list(set(np.arange(interactions.num_items)) - set(row.indices))

        for i, u in enumerate(users_):
            for j in range(n):
                x = self._candidate[u]
                negative_samples[i, j] = x[np.random.randint(len(x))]

        return negative_samples

    def predict(self, user_id, item_ids=None):
        """
        Make predictions for evaluation: given a user id, it will
        first retrieve the test sequence associated with that user
        and compute the recommendation scores for items.
        """
        if self.test_sequence is None:
            raise ValueError('Missing test sequences, cannot make predictions')

        self._net.eval()

        with torch.no_grad():
            sequences_np = self.test_sequence.sequences[user_id, :]
            sequences_np = np.atleast_2d(sequences_np)

            if item_ids is None:
                item_ids = np.arange(self._num_items).reshape(-1)

            sequences = torch.from_numpy(sequences_np).long().to(self._device)
            item_ids = torch.from_numpy(item_ids).long().to(self._device)
            user_id = torch.from_numpy(np.array([[user_id]])).long().to(self._device)

            # Reshape inputs to match the expected shapes
            sequences = sequences.unsqueeze(0)  # Add batch dimension
            user_id = user_id.squeeze(1)  # Remove unnecessary dimension

            out = self._net(sequences, user_id, item_ids, for_pred=True)

        return out.cpu().numpy().flatten()

    def load_pretrained_model(self, path):
        if self._net is None:
            raise ValueError("Model is not initialized. Please call the _initialize method first.")
        
        pretrained_dict = torch.load(path, map_location=self._device)
        model_dict = self._net.state_dict()

        # Filter out unnecessary keys
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and v.shape == model_dict[k].shape}

        # Overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)

        # Load the new state dict into the model
        self._net.load_state_dict(model_dict)

    def save_model(self, path):
        torch.save(self._net.state_dict(), path)
        print(f"Model saved to {path}")

In [3]:
def train_model(model, train_data, test_data, config, pretrained_model_path=None, is_pretrain=True):
    print(f"{'Pretraining' if is_pretrain else 'Fine-tuning'} the model...")
    print(f"Number of users: {train_data.num_users}")
    print(f"Number of items: {train_data.num_items}")
    print(f"Number of interactions: {len(train_data.sequences.sequences)}")
    
    # Initialize the model before loading
    if not model._initialized:
        model._initialize(train_data)

    if not is_pretrain and pretrained_model_path:
        model.load_pretrained_model(pretrained_model_path)
    
    model.fit(train_data, test_data, verbose=True)
    
    if is_pretrain:
        model.save_model('edx_pretrained_model.pth')
    else:
        model.save_model('coursera_finetuned_model.pth')

    # Perform final evaluation
    print("Performing final evaluation...")
    precision, recall, mean_aps, mrr, ndcg = evaluate_ranking(model, test_data, train_data, k=[1, 5, 10])
    print(f"Final results:")
    print(f"Precision: @1={precision[0].mean():.4f}, @5={precision[1].mean():.4f}, @10={precision[2].mean():.4f}")
    print(f"Recall: @1={recall[0].mean():.4f}, @5={recall[1].mean():.4f}, @10={recall[2].mean():.4f}")
    print(f"MAP={mean_aps:.4f}, MRR={mrr:.4f}, NDCG={ndcg:.4f}")

In [4]:
# Data arguments
edx_train_root = 'datasets/edx/train.txt'
edx_test_root = 'datasets/edx/test.txt'
coursera_train_root = 'datasets/coursera/train.txt'
coursera_test_root = 'datasets/coursera/test.txt'
L = 5
T = 3

# Train arguments
n_iter = 20
seed = 1234
batch_size = 512
learning_rate = 1e-3
l2 = 1e-6
neg_samples = 3
use_cuda = True  # ให้แน่ใจว่าคุณมี GPU พร้อมใช้งาน ถ้าไม่มีให้ตั้งค่าเป็น False

# Model arguments
d = 512
nv = 4
nh = 16
drop = 0.5
ac_conv = 'relu'
ac_fc = 'relu'

# สร้าง config object เพื่อให้สามารถใช้งานได้เหมือนเดิม
class Config:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

config = Config(
    edx_train_root=edx_train_root,
    edx_test_root=edx_test_root,
    coursera_train_root=coursera_train_root,
    coursera_test_root=coursera_test_root,
    L=L,
    T=T,
    n_iter=n_iter,
    seed=seed,
    batch_size=batch_size,
    learning_rate=learning_rate,
    l2=l2,
    neg_samples=neg_samples,
    use_cuda=use_cuda
)

model_config = Config(
    d=d,
    nv=nv,
    nh=nh,
    drop=drop,
    ac_conv=ac_conv,
    ac_fc=ac_fc,
    L=L
)

# แสดงค่า config เพื่อตรวจสอบ
print("Configuration:")
for key, value in config.__dict__.items():
    print(f"{key}: {value}")

print("\nModel configuration:")
for key, value in model_config.__dict__.items():
    print(f"{key}: {value}")

Configuration:
edx_train_root: datasets/edx/train.txt
edx_test_root: datasets/edx/test.txt
coursera_train_root: datasets/coursera/train.txt
coursera_test_root: datasets/coursera/test.txt
L: 5
T: 3
n_iter: 20
seed: 1234
batch_size: 512
learning_rate: 0.001
l2: 1e-06
neg_samples: 3
use_cuda: True

Model configuration:
d: 512
nv: 4
nh: 16
drop: 0.5
ac_conv: relu
ac_fc: relu
L: 5


In [5]:
# Set seed for reproducibility
set_seed(seed, use_cuda)

# Load edX dataset
edx_train = Interactions(edx_train_root)
edx_train.to_sequence(L, T)
edx_test = Interactions(edx_test_root, user_map=edx_train.user_map, item_map=edx_train.item_map)

# Load Coursera dataset
coursera_train = Interactions(coursera_train_root)
coursera_train.to_sequence(L, T)
coursera_test = Interactions(coursera_test_root, user_map=coursera_train.user_map, item_map=coursera_train.item_map)

# Load precomputed embeddings for edX
edx_precomputed_embeddings = np.load("datasets/edx/precomputed_embeddings.npy")

print("Configuration:")
for key, value in config.__dict__.items():
    print(f"{key}: {value}")
print("\nModel configuration:")
for key, value in model_config.__dict__.items():
    print(f"{key}: {value}")

# Create and pretrain the model on edX data
edx_model = Recommender(n_iter=n_iter,
                        batch_size=batch_size,
                        learning_rate=learning_rate,
                        l2=l2,
                        neg_samples=neg_samples,
                        model_args=model_config,
                        use_cuda=use_cuda,
                        precomputed_embeddings=edx_precomputed_embeddings)

train_model(edx_model, edx_train, edx_test, config, is_pretrain=True)

# Load precomputed embeddings for Coursera
coursera_precomputed_embeddings = np.load("datasets/coursera/precomputed_embeddings.npy")

# Create a new model for Coursera, initialize with pretrained weights
coursera_model = Recommender(n_iter=n_iter,
                                batch_size=batch_size,
                                learning_rate=learning_rate,
                                l2=l2,
                                neg_samples=neg_samples,
                                model_args=model_config,
                                use_cuda=use_cuda,
                                precomputed_embeddings=coursera_precomputed_embeddings)

# Fine-tune on Coursera data
train_model(coursera_model, coursera_train, coursera_test, config, pretrained_model_path='edx_pretrained_model.pth', is_pretrain=False)

print("Pretraining and fine-tuning completed successfully.")

Total sequences after filtering: 4469
item_map {'0': 0, '1': 1, '6': 2, '2': 3, '8': 4, '13': 5, '5': 6, '3': 7, '11': 8, '15': 9, '10': 10, '9': 11, '4': 12, '7': 13, '14': 14, '12': 15}
Total sequences after filtering: 168
item_map {'224': 0, '183': 1, '225': 2, '127': 3, '58': 4, '221': 5, '169': 6, '120': 7, '121': 8, '15': 9, '135': 10, '255': 11, '258': 12, '100': 13, '11': 14, '0': 15, '162': 16, '256': 17, '118': 18, '266': 19, '267': 20, '265': 21, '7': 22, '39': 23, '52': 24, '27': 25, '57': 26, '274': 27, '304': 28, '214': 29, '16': 30, '97': 31, '43': 32, '95': 33, '294': 34, '308': 35, '136': 36, '20': 37, '170': 38, '3': 39, '145': 40, '59': 41, '142': 42, '4': 43, '107': 44, '1': 45, '299': 46, '326': 47, '186': 48, '47': 49, '250': 50, '87': 51, '5': 52, '166': 53, '175': 54, '105': 55, '330': 56, '204': 57, '309': 58, '101': 59, '296': 60, '227': 61, '226': 62, '228': 63, '88': 64, '8': 65, '273': 66, '178': 67, '132': 68, '192': 69, '229': 70, '319': 71, '248': 72, '2

  pretrained_dict = torch.load(path, map_location=self._device)


Total training instances: 168
Epoch 1/20 [0.03s]	loss=1.3861
Epoch 2/20 [0.00s]	loss=1.3394
Epoch 3/20 [0.00s]	loss=1.2615
Epoch 4/20 [0.00s]	loss=1.1680
Epoch 5/20 [0.01s]	loss=1.0411
Epoch 6/20 [0.00s]	loss=0.9775
Epoch 7/20 [0.01s]	loss=0.9432
Epoch 8/20 [0.00s]	loss=0.9226
Epoch 9/20 [0.01s]	loss=0.7992
Epoch 10/20 [0.00s]	loss=0.7961
Epoch 11/20 [0.00s]	loss=0.8621
Epoch 12/20 [0.01s]	loss=0.7270
Epoch 13/20 [0.00s]	loss=0.7398
Epoch 14/20 [0.01s]	loss=0.6914
Epoch 15/20 [0.00s]	loss=0.7220
Epoch 16/20 [0.01s]	loss=0.6643
Epoch 17/20 [0.00s]	loss=0.6123
Epoch 18/20 [0.00s]	loss=0.6024
Epoch 19/20 [0.02s]	loss=0.5647
Epoch 20/20 [0.00s]	loss=0.5821
Model saved to coursera_finetuned_model.pth
Performing final evaluation...
Processed 465 valid users out of 465 total users
Final results:
Precision: @1=0.0108, @5=0.0159, @10=0.0146
Recall: @1=0.0065, @5=0.0699, @10=0.1319
MAP=0.0403, MRR=0.0381, NDCG=0.1355
Pretraining and fine-tuning completed successfully.
