# SASRec SUM Seqemb and userllmemb

In [58]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader  # Добавленные импорты
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Параметры
max_len = 50        # Определение max_len
batch_size = 128    # Определение batch_size

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

display(train_data.head())

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings, max_len=50):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        # Получение эмбеддинга пользователя
        embedding = self.user_embeddings.get(user_id, [0.0] * 1536)  # Обработка отсутствующих эмбеддингов
        user_emb = torch.tensor(embedding, dtype=torch.float)

        # Паддинг последовательности
        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long), user_emb

# Создание датасетов и DataLoader
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, max_len)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, user2embedding, max_len)
test_dataset = MovieLensDataset(test_sequences, test_user_ids, user2embedding, max_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

class LLM4SASRec(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50):
        super(LLM4SASRec, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)  # +1 для паддинга
        self.position_embedding = nn.Embedding(max_len, embedding_dim)

        # Многослойный перцептрон для сжатия эмбеддингов пользователей
        self.user_mlp = nn.Sequential(
            nn.Linear(user_embedding_dim, 512),  # Сжатие до промежуточной размерности
            nn.ReLU(),
            nn.Linear(512, embedding_dim)  # Сжатие до итоговой размерности embedding_dim
        )

        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(embedding_dim, num_items + 1)

    def forward(self, input_seq, seq_len, user_emb):
        # input_seq: (batch_size, max_len)
        position_ids = torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        item_emb = self.item_embedding(input_seq) + self.position_embedding(position_ids)

        # Сжатие эмбеддингов пользователей с помощью MLP
        user_emb_compressed = self.user_mlp(user_emb)  # (batch_size, embedding_dim)
        user_emb_compressed = user_emb_compressed.unsqueeze(1)  # (batch_size, 1, embedding_dim)

        # Добавление сжатых эмбеддингов пользователей к эмбеддингам элементов
        item_emb = item_emb + user_emb_compressed

        item_emb = self.layer_norm(item_emb)
        item_emb = self.dropout(item_emb)

        # Transformer expects input of shape (seq_len, batch_size, embedding_dim)
        item_emb = item_emb.transpose(0, 1)

        # Создание маски для паддинга
        mask = (input_seq == 0)  # (batch_size, max_len)

        # Передача через Transformer
        output = self.transformer(item_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len, embedding_dim)

        # Предсказание последнего элемента
        output = output[:, -1, :]  # (batch_size, embedding_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits

# Параметры модели
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

model = LLM4SASRec(num_items=num_items, embedding_dim=embedding_dim, user_embedding_dim=1536, num_heads=num_heads,
                  num_layers=num_layers, dropout=dropout, max_len=max_len)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функции метрик
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths, user_emb = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)
        user_emb = user_emb.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths, user_emb)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации (аналогично)
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            user_emb = user_emb.to(device)

            outputs = model(sequences, lengths, user_emb)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            user_emb = user_emb.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,3186,4,978300019
1,1,1721,4,978300055
2,1,1022,5,978300055
3,1,1270,5,978300055
4,1,2340,3,978300103


Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 7.9572, Valid Loss: 7.3264
Epoch 2/10, Train Loss: 6.8373, Valid Loss: 6.4886
Epoch 3/10, Train Loss: 5.9530, Valid Loss: 5.6703
Epoch 4/10, Train Loss: 5.1423, Valid Loss: 4.8559
Epoch 5/10, Train Loss: 4.3579, Valid Loss: 4.1741
Epoch 6/10, Train Loss: 3.7020, Valid Loss: 3.6275
Epoch 7/10, Train Loss: 3.1220, Valid Loss: 3.1844
Epoch 8/10, Train Loss: 2.6448, Valid Loss: 2.7998
Epoch 9/10, Train Loss: 2.2539, Valid Loss: 2.5705
Epoch 10/10, Train Loss: 1.9358, Valid Loss: 2.3089
Precision@10: 0.0771
Recall@10: 0.7705
NDCG@10: 0.7288


# SASRec

In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Параметры
max_len = 50        # Максимальная длина последовательности
batch_size = 128    # Размер батча

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

# Для отображения первых строк DataFrame (только если вы работаете в Jupyter Notebook)
# display(train_data.head())

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, max_len=50):
        self.sequences = sequences
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]

        # Паддинг последовательности
        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long)

# Создание датасетов и DataLoader
train_dataset = MovieLensDataset(train_sequences, max_len)
valid_dataset = MovieLensDataset(valid_sequences, max_len)
test_dataset = MovieLensDataset(test_sequences, max_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

class SASRec(nn.Module):
    def __init__(self, num_items, embedding_dim=50, num_heads=2, num_layers=2, dropout=0.2, max_len=50):
        super(SASRec, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)  # +1 для паддинга
        self.position_embedding = nn.Embedding(max_len, embedding_dim)

        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(embedding_dim, num_items + 1)

    def forward(self, input_seq, seq_len):
        # input_seq: (batch_size, max_len)
        position_ids = torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        item_emb = self.item_embedding(input_seq) + self.position_embedding(position_ids)

        item_emb = self.layer_norm(item_emb)
        item_emb = self.dropout(item_emb)

        # Transformer ожидает ввод формы (seq_len, batch_size, embedding_dim)
        item_emb = item_emb.transpose(0, 1)

        # Создание маски для паддинга
        mask = (input_seq == 0)  # (batch_size, max_len)

        # Передача через Transformer
        output = self.transformer(item_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len, embedding_dim)

        # Предсказание последнего элемента
        output = output[:, -1, :]  # (batch_size, embedding_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits

# Параметры модели
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

model = SASRec(num_items=num_items, embedding_dim=embedding_dim, num_heads=num_heads,
              num_layers=num_layers, dropout=dropout, max_len=max_len)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функции метрик
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)

            outputs = model(sequences, lengths)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Функция оценки
def evaluate_model(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate_model(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)
Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 7.9844, Valid Loss: 7.4009
Epoch 2/10, Train Loss: 6.9228, Valid Loss: 6.6205
Epoch 3/10, Train Loss: 6.0998, Valid Loss: 5.8244
Epoch 4/10, Train Loss: 5.3035, Valid Loss: 5.0009
Epoch 5/10, Train Loss: 4.5375, Valid Loss: 4.2878
Epoch 6/10, Train Loss: 3.8380, Valid Loss: 3.6935
Epoch 7/10, Train Loss: 3.2466, Valid Loss: 3.2040
Epoch 8/10, Train Loss: 2.7352, Valid Loss: 2.8522
Epoch 9/10, Train Loss: 2.3127, Valid Loss: 2.5517
Epoch 10/10, Train Loss: 1.9615, Valid Loss: 2.3115
Precision@10: 0.0771
Recall@10: 0.7709
NDCG@10: 0.7307


In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader  # Добавленные импорты
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Параметры
max_len = 50        # Определение max_len
batch_size = 128    # Определение batch_size

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

display(train_data.head())

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings, max_len=50):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        # Получение эмбеддинга пользователя
        embedding = self.user_embeddings.get(user_id, [0.0] * 1536)  # Обработка отсутствующих эмбеддингов
        user_emb = torch.tensor(embedding, dtype=torch.float)

        # Паддинг последовательности
        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long), user_emb

# Создание датасетов и DataLoader
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, max_len)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, user2embedding, max_len)
test_dataset = MovieLensDataset(test_sequences, test_user_ids, user2embedding, max_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

class LLM4SASRec(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50):
        super(LLM4SASRec, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)  # +1 for padding
        self.position_embedding = nn.Embedding(max_len, embedding_dim)
        
        # Compress user embedding
        self.user_mlp = nn.Sequential(
            nn.Linear(user_embedding_dim, embedding_dim),
            nn.ReLU()
        )
        
        # Adjust embedding dimension for concatenation
        self.combined_embedding_dim = embedding_dim * 2  # Example: Concatenating user and item embeddings
        
        # Transformer expects this new embedding size
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.combined_embedding_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.layer_norm = nn.LayerNorm(self.combined_embedding_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(self.combined_embedding_dim, num_items + 1)
    
    def forward(self, input_seq, seq_len, user_emb):
        # Embed items and positions
        item_emb = self.item_embedding(input_seq) + self.position_embedding(torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq))
        
        # Compress user embeddings
        user_emb_compressed = self.user_mlp(user_emb)  # (batch_size, embedding_dim)
        user_emb_expanded = user_emb_compressed.unsqueeze(1).repeat(1, input_seq.size(1), 1)  # (batch_size, max_len, embedding_dim)
        
        # Concatenate item and user embeddings
        combined_emb = torch.cat([item_emb, user_emb_expanded], dim=-1)  # (batch_size, max_len, combined_embedding_dim)
        
        combined_emb = self.layer_norm(combined_emb)
        combined_emb = self.dropout(combined_emb)
        
        # Transformer expects (seq_len, batch_size, embedding_dim)
        combined_emb = combined_emb.transpose(0, 1)
        
        # Create padding mask
        mask = (input_seq == 0)  # (batch_size, max_len)
        
        # Pass through Transformer
        output = self.transformer(combined_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len, combined_embedding_dim)
        
        # Use the last item in the sequence
        output = output[:, -1, :]  # (batch_size, combined_embedding_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits


# Параметры модели
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

model = LLM4SASRec(num_items=num_items, embedding_dim=embedding_dim, user_embedding_dim=1536, num_heads=num_heads,
                  num_layers=num_layers, dropout=dropout, max_len=max_len)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функции метрик
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths, user_emb = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)
        user_emb = user_emb.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths, user_emb)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации (аналогично)
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            user_emb = user_emb.to(device)

            outputs = model(sequences, lengths, user_emb)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            user_emb = user_emb.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,3186,4,978300019
1,1,1721,4,978300055
2,1,1022,5,978300055
3,1,1270,5,978300055
4,1,2340,3,978300103


Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 7.8055, Valid Loss: 6.9700
Epoch 2/10, Train Loss: 6.3107, Valid Loss: 5.7959
Epoch 3/10, Train Loss: 5.1090, Valid Loss: 4.6177
Epoch 4/10, Train Loss: 3.9598, Valid Loss: 3.6678
Epoch 5/10, Train Loss: 2.9946, Valid Loss: 2.9693
Epoch 6/10, Train Loss: 2.2658, Valid Loss: 2.4313
Epoch 7/10, Train Loss: 1.7370, Valid Loss: 2.0812
Epoch 8/10, Train Loss: 1.3129, Valid Loss: 1.8749
Epoch 9/10, Train Loss: 1.0104, Valid Loss: 1.7153
Epoch 10/10, Train Loss: 0.7802, Valid Loss: 1.5260
Precision@10: 0.0846
Recall@10: 0.8455
NDCG@10: 0.8368


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CrossLayer(nn.Module):
    def __init__(self, input_dim):
        super(CrossLayer, self).__init__()
        self.input_dim = input_dim
        self.weight = nn.Parameter(torch.randn(input_dim, 1))
        self.bias = nn.Parameter(torch.randn(input_dim))

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = torch.matmul(x, self.weight) + self.bias  # (batch_size, 1)
        x_l = x_0 * x_l  # Broadcasting to (batch_size, input_dim)
        return x_l

class DCNv2(nn.Module):
    def __init__(self, input_dim, num_layers=2):
        super(DCNv2, self).__init__()
        self.num_layers = num_layers
        self.cross_layers = nn.ModuleList([CrossLayer(input_dim) for _ in range(num_layers)])
        self.output_layer = nn.Linear(input_dim * (num_layers + 1), input_dim)

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = x
        outputs = [x_0]
        for cross_layer in self.cross_layers:
            x_l = cross_layer(x_l)
            outputs.append(x_l)
        concatenated = torch.cat(outputs, dim=1)  # (batch_size, input_dim * (num_layers +1))
        output = self.output_layer(concatenated)   # (batch_size, input_dim)
        return output


In [55]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Параметры
max_len = 50
batch_size = 128

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

display(train_data.head())

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings, max_len=50):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        # Получение эмбеддинга пользователя
        embedding = self.user_embeddings.get(user_id, [0.0] * 1536)  # Обработка отсутствующих эмбеддингов
        user_emb = torch.tensor(embedding, dtype=torch.float)

        # Паддинг последовательности
        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long), user_emb

# Создание датасетов и DataLoader
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, max_len)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, user2embedding, max_len)
test_dataset = MovieLensDataset(test_sequences, test_user_ids, user2embedding, max_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Реализация DCNv2
class CrossLayer(nn.Module):
    def __init__(self, input_dim):
        super(CrossLayer, self).__init__()
        self.input_dim = input_dim
        self.weight = nn.Parameter(torch.randn(input_dim, 1))
        self.bias = nn.Parameter(torch.randn(input_dim))

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = torch.matmul(x, self.weight) + self.bias  # (batch_size, 1)
        x_l = x_0 * x_l  # Broadcasting to (batch_size, input_dim)
        return x_l

class DCNv2(nn.Module):
    def __init__(self, input_dim, num_layers=2):
        super(DCNv2, self).__init__()
        self.num_layers = num_layers
        self.cross_layers = nn.ModuleList([CrossLayer(input_dim) for _ in range(num_layers)])
        self.output_layer = nn.Linear(input_dim * (num_layers + 1), input_dim)

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = x
        outputs = [x_0]
        for cross_layer in self.cross_layers:
            x_l = cross_layer(x_l)
            outputs.append(x_l)
        concatenated = torch.cat(outputs, dim=1)  # (batch_size, input_dim * (num_layers +1))
        output = self.output_layer(concatenated)   # (batch_size, input_dim)
        return output

# Модифицированный класс LLM4SASRec с DCNv2
class LLM4SASRecDCNv2(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50, cross_num_layers=2):
        super(LLM4SASRecDCNv2, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)
        self.position_embedding = nn.Embedding(max_len, embedding_dim)
        
        # Замена MLP на DCNv2 для обработки эмбеддингов пользователей
        self.user_dcnv2 = DCNv2(user_embedding_dim, num_layers=cross_num_layers)
        self.user_compress = nn.Linear(user_embedding_dim, embedding_dim)  # Преобразование размера после DCNv2
        self.relu = nn.ReLU()
        
        # Размер объединенного эмбеддинга
        combined_dim = embedding_dim * 2
        
        # Слои трансформера с обновленной размерностью
        encoder_layer = nn.TransformerEncoderLayer(d_model=combined_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.layer_norm = nn.LayerNorm(combined_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(combined_dim, num_items + 1)
    
    def forward(self, input_seq, seq_len, user_emb):
        # Встраивание элементов и позиций
        item_emb = self.item_embedding(input_seq) + self.position_embedding(
            torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        )
        
        # Обработка эмбеддингов пользователей через DCNv2
        user_emb_processed = self.user_dcnv2(user_emb)  # (batch_size, user_embedding_dim)
        user_emb_compressed = self.user_compress(user_emb_processed)  # (batch_size, embedding_dim)
        user_emb_compressed = self.relu(user_emb_compressed)  # Нелинейность
        
        user_emb_expanded = user_emb_compressed.unsqueeze(1).repeat(1, input_seq.size(1), 1)  # (batch_size, max_len, embedding_dim)
        
        # Конкатенация эмбеддингов элементов и пользователей
        combined_emb = torch.cat([item_emb, user_emb_expanded], dim=-1)  # (batch_size, max_len, combined_dim)
        
        combined_emb = self.layer_norm(combined_emb)
        combined_emb = self.dropout(combined_emb)
        
        # Трансформер ожидает вход размерности (seq_len, batch_size, embedding_dim)
        combined_emb = combined_emb.transpose(0, 1)
        
        # Создание маски для паддинга
        mask = (input_seq == 0)  # (batch_size, max_len)
        
        # Пропуск через трансформер
        output = self.transformer(combined_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len, combined_dim)
        
        # Использование последнего элемента последовательности
        output = output[:, -1, :]  # (batch_size, combined_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits

# Параметры модели
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2
cross_num_layers = 2  # Количество слоев в DCNv2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

model = LLM4SASRecDCNv2(
    num_items=num_items,
    embedding_dim=embedding_dim,
    user_embedding_dim=1536,
    num_heads=num_heads,
    num_layers=num_layers,
    dropout=dropout,
    max_len=max_len,
    cross_num_layers=cross_num_layers
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функции метрик (оставляем без изменений)
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths, user_emb = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)
        user_emb = user_emb.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths, user_emb)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации (аналогично)
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            user_emb = user_emb.to(device)

            outputs = model(sequences, lengths, user_emb)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            user_emb = user_emb.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,3186,4,978300019
1,1,1721,4,978300055
2,1,1022,5,978300055
3,1,1270,5,978300055
4,1,2340,3,978300103


Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 7.7511, Valid Loss: 6.9217
Epoch 2/10, Train Loss: 6.2159, Valid Loss: 5.7334
Epoch 3/10, Train Loss: 4.9703, Valid Loss: 4.4953
Epoch 4/10, Train Loss: 3.8098, Valid Loss: 3.5088
Epoch 5/10, Train Loss: 2.8761, Valid Loss: 2.8212
Epoch 6/10, Train Loss: 2.1693, Valid Loss: 2.3564
Epoch 7/10, Train Loss: 1.6431, Valid Loss: 2.0391
Epoch 8/10, Train Loss: 1.2383, Valid Loss: 1.8013
Epoch 9/10, Train Loss: 0.9685, Valid Loss: 1.6643
Epoch 10/10, Train Loss: 0.7351, Valid Loss: 1.5650
Precision@10: 0.0846
Recall@10: 0.8455
NDCG@10: 0.8404


In [57]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Параметры
max_len = 50
batch_size = 128

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

display(train_data.head())

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings, max_len=50):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        # Получение эмбеддинга пользователя
        embedding = self.user_embeddings.get(user_id, [0.0] * 1536)  # Обработка отсутствующих эмбеддингов
        user_emb = torch.tensor(embedding, dtype=torch.float)

        # Паддинг последовательности
        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long), user_emb

# Создание датасетов и DataLoader
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, max_len)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, user2embedding, max_len)
test_dataset = MovieLensDataset(test_sequences, test_user_ids, user2embedding, max_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Реализация класса LLM4SASRecWithAttention
class LLM4SASRecWithAttention(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50):
        super(LLM4SASRecWithAttention, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)
        self.position_embedding = nn.Embedding(max_len, embedding_dim)
        
        # Сокращение размерности эмбеддингов пользователей
        self.user_mlp = nn.Sequential(
            nn.Linear(user_embedding_dim, embedding_dim),
            nn.ReLU()
        )
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(embedding_dim, num_items + 1)
    
    def forward(self, input_seq, seq_len, user_emb):
        # Встраивание элементов и позиций
        item_emb = self.item_embedding(input_seq) + self.position_embedding(
            torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        )
        
        # Сокращение эмбеддингов пользователей
        user_emb_compressed = self.user_mlp(user_emb)  # (batch_size, embedding_dim)
        
        # Интеграция эмбеддингов пользователей через внимание
        # Для простоты добавим эмбеддинг пользователя в начало последовательности
        user_emb_expanded = user_emb_compressed.unsqueeze(1)  # (batch_size, 1, embedding_dim)
        combined_emb = torch.cat([user_emb_expanded, item_emb], dim=1)  # (batch_size, max_len + 1, embedding_dim)
        
        combined_emb = self.layer_norm(combined_emb)
        combined_emb = self.dropout(combined_emb)
        
        # Трансформер ожидает вход размерности (seq_len, batch_size, embedding_dim)
        combined_emb = combined_emb.transpose(0, 1)
        
        # Создание маски для паддинга (игнорируем пользовательский токен)
        mask = torch.cat([
            torch.zeros((input_seq.size(0), 1), dtype=torch.bool, device=input_seq.device),
            (input_seq == 0)
        ], dim=1)  # (batch_size, max_len + 1)
        
        # Пропуск через трансформер
        output = self.transformer(combined_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len + 1, embedding_dim)
        
        # Использование последнего элемента последовательности (исключая пользовательский токен)
        output = output[:, -1, :]  # (batch_size, embedding_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits

# Параметры модели
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

# Инициализация модели
model = LLM4SASRecWithAttention(
    num_items=num_items,
    embedding_dim=embedding_dim,
    user_embedding_dim=1536,
    num_heads=num_heads,
    num_layers=num_layers,
    dropout=dropout,
    max_len=max_len
)
model = model.to(device)

# Исправленная функция инициализации весов
def init_weights(m):
    if isinstance(m, (nn.Linear, nn.Embedding)):
        nn.init.xavier_uniform_(m.weight)
        if hasattr(m, 'bias') and m.bias is not None:
            nn.init.constant_(m.bias, 0)

# Применение функции инициализации весов
model.apply(init_weights)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)  # Добавлен weight_decay

# Функции метрик
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths, user_emb = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)
        user_emb = user_emb.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths, user_emb)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            user_emb = user_emb.to(device)

            outputs = model(sequences, lengths, user_emb)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            user_emb = user_emb.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,3186,4,978300019
1,1,1721,4,978300055
2,1,1022,5,978300055
3,1,1270,5,978300055
4,1,2340,3,978300103


Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 7.9422, Valid Loss: 7.6329
Epoch 2/10, Train Loss: 7.1730, Valid Loss: 7.1856
Epoch 3/10, Train Loss: 6.4390, Valid Loss: 6.1652
Epoch 4/10, Train Loss: 5.5145, Valid Loss: 5.3553
Epoch 5/10, Train Loss: 4.6197, Valid Loss: 4.5000
Epoch 6/10, Train Loss: 3.7164, Valid Loss: 3.7588
Epoch 7/10, Train Loss: 2.9334, Valid Loss: 3.1174
Epoch 8/10, Train Loss: 2.2881, Valid Loss: 2.6364
Epoch 9/10, Train Loss: 1.7908, Valid Loss: 2.2751
Epoch 10/10, Train Loss: 1.3949, Valid Loss: 1.9985
Precision@10: 0.0846
Recall@10: 0.8455
NDCG@10: 0.8368


In [59]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random

# Параметры
max_len = 50
batch_size = 128
num_negatives = 100  # Количество негативных примеров

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

# display(train_data.head())  # Uncomment if using Jupyter Notebook

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings, num_items, max_len=50, num_negatives=100):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len
        self.num_negatives = num_negatives
        self.num_items = num_items
        
        # Создание словаря для быстрого поиска взаимодействий пользователя
        self.user_interactions = {}
        for user_id, seq in zip(self.user_ids, self.sequences):
            self.user_interactions[user_id] = set(seq)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        # Получение эмбеддинга пользователя
        embedding = self.user_embeddings.get(user_id, [0.0] * 1536)  # Обработка отсутствующих эмбеддингов
        user_emb = torch.tensor(embedding, dtype=torch.float)

        # Паддинг последовательности
        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        # Положительный пример (последний элемент последовательности)
        pos_item = padded_seq[-1]

        # Отрицательные примеры
        neg_items = []
        user_seen = self.user_interactions[user_id]
        while len(neg_items) < self.num_negatives:
            neg_item = random.randint(1, self.num_items)
            if neg_item not in user_seen and neg_item not in neg_items:
                neg_items.append(neg_item)

        return {
            'seq': torch.tensor(padded_seq, dtype=torch.long),
            'seq_len': torch.tensor(seq_len, dtype=torch.long),
            'user_emb': user_emb,
            'pos_item': torch.tensor(pos_item, dtype=torch.long),
            'neg_items': torch.tensor(neg_items, dtype=torch.long)
        }

# Определение количества товаров
num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

# Создание датасетов и DataLoader
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, num_items, max_len, num_negatives)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, user2embedding, num_items, max_len, num_negatives)
test_dataset = MovieLensDataset(test_sequences, test_user_ids, user2embedding, num_items, max_len, num_negatives)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Реализация класса LLM4SASRecWithAttention
class LLM4SASRecWithAttention(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50):
        super(LLM4SASRecWithAttention, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)
        self.position_embedding = nn.Embedding(max_len + 1, embedding_dim)  # +1 для пользовательского токена

        # Сокращение размерности эмбеддингов пользователей
        self.user_mlp = nn.Sequential(
            nn.Linear(user_embedding_dim, embedding_dim),
            nn.ReLU()
        )

        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(embedding_dim, num_items + 1)

    def forward(self, input_seq, seq_len, user_emb):
        batch_size, seq_length = input_seq.size()

        # Встраивание элементов и позиций
        item_emb = self.item_embedding(input_seq) + self.position_embedding(
            torch.arange(0, seq_length, device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        )

        # Сокращение эмбеддингов пользователей
        user_emb_compressed = self.user_mlp(user_emb)  # (batch_size, embedding_dim)

        # Интеграция эмбеддингов пользователей через внимание
        # Добавляем эмбеддинг пользователя в начало последовательности
        user_emb_expanded = user_emb_compressed.unsqueeze(1)  # (batch_size, 1, embedding_dim)
        combined_emb = torch.cat([user_emb_expanded, item_emb], dim=1)  # (batch_size, max_len + 1, embedding_dim)

        combined_emb = self.layer_norm(combined_emb)
        combined_emb = self.dropout(combined_emb)

        # Трансформер ожидает вход размерности (seq_len, batch_size, embedding_dim)
        combined_emb = combined_emb.transpose(0, 1)

        # Создание маски для паддинга (игнорируем пользовательский токен)
        mask = torch.cat([
            torch.zeros((batch_size, 1), dtype=torch.bool, device=input_seq.device),
            (input_seq == 0)
        ], dim=1)  # (batch_size, max_len + 1)

        # Пропуск через трансформер
        output = self.transformer(combined_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len + 1, embedding_dim)

        # Использование последнего элемента последовательности (исключая пользовательский токен)
        output = output[:, -1, :]  # (batch_size, embedding_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits

# Инициализация модели
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2

model = LLM4SASRecWithAttention(
    num_items=num_items,
    embedding_dim=embedding_dim,
    user_embedding_dim=1536,
    num_heads=num_heads,
    num_layers=num_layers,
    dropout=dropout,
    max_len=max_len
)
model = model.to(device)

# Инициализация весов (оптимизированная версия)
def init_weights(m):
    if isinstance(m, (nn.Linear, nn.Embedding)):
        nn.init.xavier_uniform_(m.weight)
        if hasattr(m, 'bias') and m.bias is not None:
            nn.init.constant_(m.bias, 0)

model.apply(init_weights)

# Критерий и оптимизатор
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# Функции метрик
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences = batch['seq'].to(device)
        seq_lens = batch['seq_len'].to(device)
        user_emb = batch['user_emb'].to(device)
        pos_items = batch['pos_item'].to(device)
        neg_items = batch['neg_items'].to(device)  # (batch_size, num_negatives)

        optimizer.zero_grad()

        # Предсказание для всех товаров
        logits = model(sequences, seq_lens, user_emb)  # (batch_size, num_items + 1)

        # Собираем логиты для положительных и негативных товаров
        pos_logits = logits.gather(1, pos_items.view(-1, 1))  # (batch_size, 1)
        neg_logits = logits.gather(1, neg_items)  # (batch_size, num_negatives)

        # Создание меток
        pos_labels = torch.ones(pos_logits.size(0), 1).to(device)  # (batch_size, 1)
        neg_labels = torch.zeros(neg_logits.size(0), neg_logits.size(1)).to(device)  # (batch_size, num_negatives)

        # Объединение логитов и меток
        combined_logits = torch.cat([pos_logits, neg_logits], dim=1)  # (batch_size, 1 + num_negatives)
        combined_labels = torch.cat([pos_labels, neg_labels], dim=1)  # (batch_size, 1 + num_negatives)

        # Вычисление потерь
        loss = criterion(combined_logits, combined_labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences = batch['seq'].to(device)
            seq_lens = batch['seq_len'].to(device)
            user_emb = batch['user_emb'].to(device)
            pos_items = batch['pos_item'].to(device)
            neg_items = batch['neg_items'].to(device)

            # Предсказание для всех товаров
            logits = model(sequences, seq_lens, user_emb)

            # Собираем логиты для положительных и негативных товаров
            pos_logits = logits.gather(1, pos_items.view(-1, 1))  # (batch_size, 1)
            neg_logits = logits.gather(1, neg_items)  # (batch_size, num_negatives)

            # Создание меток
            pos_labels = torch.ones(pos_logits.size(0), 1).to(device)  # (batch_size, 1)
            neg_labels = torch.zeros(neg_logits.size(0), neg_logits.size(1)).to(device)  # (batch_size, num_negatives)

            # Объединение логитов и меток
            combined_logits = torch.cat([pos_logits, neg_logits], dim=1)  # (batch_size, 1 + num_negatives)
            combined_labels = torch.cat([pos_labels, neg_labels], dim=1)  # (batch_size, 1 + num_negatives)

            # Вычисление потерь
            loss = criterion(combined_logits, combined_labels)
            total_loss += loss.item()
    return total_loss / len(loader)

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences = batch['seq'].to(device)
            seq_lens = batch['seq_len'].to(device)
            user_emb = batch['user_emb'].to(device)
            pos_items = batch['pos_item'].to(device)
            # neg_items = batch['neg_items'].to(device)  # Негативные примеры не нужны для метрик

            # Получение предсказаний
            logits = model(sequences, seq_lens, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(logits, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [pos_items[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)
Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 0.3668, Valid Loss: 0.1515
Epoch 2/10, Train Loss: 0.1021, Valid Loss: 0.0736
Epoch 3/10, Train Loss: 0.0640, Valid Loss: 0.0586
Epoch 4/10, Train Loss: 0.0543, Valid Loss: 0.0536
Epoch 5/10, Train Loss: 0.0504, Valid Loss: 0.0515
Epoch 6/10, Train Loss: 0.0485, Valid Loss: 0.0503
Epoch 7/10, Train Loss: 0.0473, Valid Loss: 0.0497
Epoch 8/10, Train Loss: 0.0465, Valid Loss: 0.0489
Epoch 9/10, Train Loss: 0.0453, Valid Loss: 0.0472
Epoch 10/10, Train Loss: 0.0431, Valid Loss: 0.0448
Precision@10: 0.0157
Recall@10: 0.1573
NDCG@10: 0.0712


In [61]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Параметры
max_len = 50
batch_size = 128

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

display(train_data.head())

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings=None, max_len=50):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        if self.user_embeddings is not None:
            embedding = self.user_embeddings.get(user_id, [0.0] * 1536)
        else:
            embedding = [0.0] * 1536

        user_emb = torch.tensor(embedding, dtype=torch.float)

        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long), user_emb

# Создание датасетов
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, max_len)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, None, max_len)  # Эмбеддинги не используются
test_dataset = MovieLensDataset(test_sequences, test_user_ids, None, max_len)     # Эмбеддинги не используются

# Создание DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Реализация DCNv2
class CrossLayer(nn.Module):
    def __init__(self, input_dim):
        super(CrossLayer, self).__init__()
        self.input_dim = input_dim
        self.weight = nn.Parameter(torch.randn(input_dim, 1))
        self.bias = nn.Parameter(torch.randn(input_dim))

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = torch.matmul(x, self.weight) + self.bias  # (batch_size, 1)
        x_l = x_0 * x_l  # Broadcasting to (batch_size, input_dim)
        return x_l

class DCNv2(nn.Module):
    def __init__(self, input_dim, num_layers=2):
        super(DCNv2, self).__init__()
        self.num_layers = num_layers
        self.cross_layers = nn.ModuleList([CrossLayer(input_dim) for _ in range(num_layers)])
        self.output_layer = nn.Linear(input_dim * (num_layers + 1), input_dim)

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = x
        outputs = [x_0]
        for cross_layer in self.cross_layers:
            x_l = cross_layer(x_l)
            outputs.append(x_l)
        concatenated = torch.cat(outputs, dim=1)  # (batch_size, input_dim * (num_layers +1))
        output = self.output_layer(concatenated)   # (batch_size, input_dim)
        return output

# Модифицированный класс LLM4SASRec с DCNv2
class LLM4SASRecDCNv2(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50, cross_num_layers=2):
        super(LLM4SASRecDCNv2, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)
        self.position_embedding = nn.Embedding(max_len, embedding_dim)
        
        # Замена MLP на DCNv2 для обработки эмбеддингов пользователей
        self.user_dcnv2 = DCNv2(user_embedding_dim, num_layers=cross_num_layers)
        self.user_compress = nn.Linear(user_embedding_dim, embedding_dim)  # Преобразование размера после DCNv2
        self.relu = nn.ReLU()
        
        # Размер объединенного эмбеддинга
        combined_dim = embedding_dim * 2
        
        # Слои трансформера с обновленной размерностью
        encoder_layer = nn.TransformerEncoderLayer(d_model=combined_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.layer_norm = nn.LayerNorm(combined_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(combined_dim, num_items + 1)
    
    def forward(self, input_seq, seq_len, user_emb):
        # Встраивание элементов и позиций
        item_emb = self.item_embedding(input_seq) + self.position_embedding(
            torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        )
        
        # Обработка эмбеддингов пользователей через DCNv2
        user_emb_processed = self.user_dcnv2(user_emb)  # (batch_size, user_embedding_dim)
        user_emb_compressed = self.user_compress(user_emb_processed)  # (batch_size, embedding_dim)
        user_emb_compressed = self.relu(user_emb_compressed)  # Нелинейность
        
        user_emb_expanded = user_emb_compressed.unsqueeze(1).repeat(1, input_seq.size(1), 1)  # (batch_size, max_len, embedding_dim)
        
        # Конкатенация эмбеддингов элементов и пользователей
        combined_emb = torch.cat([item_emb, user_emb_expanded], dim=-1)  # (batch_size, max_len, combined_dim)
        
        combined_emb = self.layer_norm(combined_emb)
        combined_emb = self.dropout(combined_emb)
        
        # Трансформер ожидает вход размерности (seq_len, batch_size, embedding_dim)
        combined_emb = combined_emb.transpose(0, 1)
        
        # Создание маски для паддинга
        mask = (input_seq == 0)  # (batch_size, max_len)
        
        # Пропуск через трансформер
        output = self.transformer(combined_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len, combined_dim)
        
        # Использование последнего элемента последовательности
        output = output[:, -1, :]  # (batch_size, combined_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits

# Параметры модели
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2
cross_num_layers = 2  # Количество слоев в DCNv2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

model = LLM4SASRecDCNv2(
    num_items=num_items,
    embedding_dim=embedding_dim,
    user_embedding_dim=1536,
    num_heads=num_heads,
    num_layers=num_layers,
    dropout=dropout,
    max_len=max_len,
    cross_num_layers=cross_num_layers
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функции метрик (оставляем без изменений)
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths, user_emb = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)
        user_emb = user_emb.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths, user_emb)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации (аналогично)
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            user_emb = user_emb.to(device)

            outputs = model(sequences, lengths, user_emb)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            user_emb = user_emb.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,3186,4,978300019
1,1,1721,4,978300055
2,1,1022,5,978300055
3,1,1270,5,978300055
4,1,2340,3,978300103


Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 7.7982, Valid Loss: 6.9893
Epoch 2/10, Train Loss: 6.3304, Valid Loss: 5.7871
Epoch 3/10, Train Loss: 5.1220, Valid Loss: 4.6419
Epoch 4/10, Train Loss: 3.9420, Valid Loss: 3.6762
Epoch 5/10, Train Loss: 3.0102, Valid Loss: 2.9548
Epoch 6/10, Train Loss: 2.2916, Valid Loss: 2.4405
Epoch 7/10, Train Loss: 1.7354, Valid Loss: 2.0537
Epoch 8/10, Train Loss: 1.3116, Valid Loss: 1.8192
Epoch 9/10, Train Loss: 1.0076, Valid Loss: 1.6616
Epoch 10/10, Train Loss: 0.7750, Valid Loss: 1.5123
Precision@10: 0.0846
Recall@10: 0.8455
NDCG@10: 0.8395


In [62]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Параметры
max_len = 50
batch_size = 128

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

display(train_data.head())

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings=None, max_len=50):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        if self.user_embeddings is not None:
            embedding = self.user_embeddings.get(user_id, [0.0] * 1536)
        else:
            embedding = [0.0] * 1536

        user_emb = torch.tensor(embedding, dtype=torch.float)

        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long), user_emb

# Создание датасетов
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, max_len)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, None, max_len)  # Эмбеддинги не используются
test_dataset = MovieLensDataset(test_sequences, test_user_ids, None, max_len)     # Эмбеддинги не используются

# Создание DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Реализация DCNv2
class CrossLayer(nn.Module):
    def __init__(self, input_dim):
        super(CrossLayer, self).__init__()
        self.input_dim = input_dim
        self.weight = nn.Parameter(torch.randn(input_dim, 1))
        self.bias = nn.Parameter(torch.randn(input_dim))

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = torch.matmul(x, self.weight) + self.bias  # (batch_size, 1)
        x_l = x_0 * x_l  # Broadcasting to (batch_size, input_dim)
        return x_l

class DCNv2(nn.Module):
    def __init__(self, input_dim, num_layers=2):
        super(DCNv2, self).__init__()
        self.num_layers = num_layers
        self.cross_layers = nn.ModuleList([CrossLayer(input_dim) for _ in range(num_layers)])
        self.output_layer = nn.Linear(input_dim * (num_layers + 1), input_dim)

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = x
        outputs = [x_0]
        for cross_layer in self.cross_layers:
            x_l = cross_layer(x_l)
            outputs.append(x_l)
        concatenated = torch.cat(outputs, dim=1)  # (batch_size, input_dim * (num_layers +1))
        output = self.output_layer(concatenated)   # (batch_size, input_dim)
        return output

# Отдельный модуль внимания для пользовательских эмбеддингов
class UserAttention(nn.Module):
    def __init__(self, user_embedding_dim, attention_dim):
        super(UserAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=user_embedding_dim, num_heads=1, batch_first=True)
        self.linear = nn.Linear(user_embedding_dim, attention_dim)

    def forward(self, user_emb):
        # user_emb: (batch_size, user_embedding_dim)
        # Добавляем временную размерность
        user_emb = user_emb.unsqueeze(1)  # (batch_size, 1, user_embedding_dim)
        attn_output, _ = self.attention(user_emb, user_emb, user_emb)  # (batch_size, 1, user_embedding_dim)
        attn_output = attn_output.squeeze(1)  # (batch_size, user_embedding_dim)
        attn_output = self.linear(attn_output)  # (batch_size, attention_dim)
        return attn_output

# Модифицированный класс LLM4SASRec с DCNv2 и отдельным вниманием для пользователей
class LLM4SASRecDCNv2(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50, cross_num_layers=2, attention_dim=50):
        super(LLM4SASRecDCNv2, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)
        self.position_embedding = nn.Embedding(max_len, embedding_dim)
        
        # Модуль DCNv2 для обработки пользовательских эмбеддингов
        self.user_dcnv2 = DCNv2(user_embedding_dim, num_layers=cross_num_layers)
        self.user_compress = nn.Linear(user_embedding_dim, embedding_dim)  # Преобразование размера после DCNv2
        self.relu = nn.ReLU()
        
        # Отдельный модуль внимания для пользовательских эмбеддингов
        self.user_attention = UserAttention(user_embedding_dim=embedding_dim, attention_dim=attention_dim)
        
        # Размер объединенного эмбеддинга после интеграции внимания
        combined_dim = embedding_dim + attention_dim
        
        # Слои трансформера с обновленной размерностью
        encoder_layer = nn.TransformerEncoderLayer(d_model=combined_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout,
                                                   activation='relu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.layer_norm = nn.LayerNorm(combined_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(combined_dim, num_items + 1)
    
    def forward(self, input_seq, seq_len, user_emb):
        # Встраивание элементов и позиций
        item_emb = self.item_embedding(input_seq) + self.position_embedding(
            torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        )
        
        # Обработка эмбеддингов пользователей через DCNv2
        user_emb_processed = self.user_dcnv2(user_emb)  # (batch_size, user_embedding_dim)
        user_emb_compressed = self.user_compress(user_emb_processed)  # (batch_size, embedding_dim)
        user_emb_compressed = self.relu(user_emb_compressed)  # Нелинейность
        
        # Применение отдельного внимания к пользовательским эмбеддингам
        user_attn = self.user_attention(user_emb_compressed)  # (batch_size, attention_dim)
        
        # Конкатенация эмбеддингов элементов и пользовательского внимания
        combined_emb = torch.cat([item_emb, user_attn.unsqueeze(1).repeat(1, input_seq.size(1), 1)], dim=-1)  # (batch_size, max_len, combined_dim)
        
        combined_emb = self.layer_norm(combined_emb)
        combined_emb = self.dropout(combined_emb)
        
        # Трансформер ожидает вход размерности (seq_len, batch_size, embedding_dim)
        combined_emb = combined_emb.transpose(0, 1)
        
        # Создание маски для паддинга
        mask = (input_seq == 0)  # (batch_size, max_len)
        
        # Пропуск через трансформер
        output = self.transformer(combined_emb, src_key_padding_mask=mask)
        output = output.transpose(0, 1)  # (batch_size, max_len, combined_dim)
        
        # Использование последнего элемента последовательности
        output = output[:, -1, :]  # (batch_size, combined_dim)
        logits = self.fc(output)    # (batch_size, num_items + 1)
        return logits

# Параметры модели
embedding_dim = 50
attention_dim = 50  # Должно соответствовать embedding_dim для простоты
num_heads = 2
num_layers = 2
dropout = 0.2
cross_num_layers = 2  # Количество слоев в DCNv2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

model = LLM4SASRecDCNv2(
    num_items=num_items,
    embedding_dim=embedding_dim,
    user_embedding_dim=1536,
    num_heads=num_heads,
    num_layers=num_layers,
    dropout=dropout,
    max_len=max_len,
    cross_num_layers=cross_num_layers,
    attention_dim=attention_dim
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функции метрик (оставляем без изменений)
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths, user_emb = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)
        user_emb = user_emb.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths, user_emb)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации (аналогично)
def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            user_emb = user_emb.to(device)

            outputs = model(sequences, lengths, user_emb)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Цикл обучения с валидацией
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    valid_loss = validate(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            user_emb = user_emb.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Параметр K для top-K рекомендаций
k = 10

# Оценка модели
precision, recall, ndcg = evaluate(model, test_loader, device, k=k)
print(f'Precision@{k}: {precision:.4f}')
print(f'Recall@{k}: {recall:.4f}')
print(f'NDCG@{k}: {ndcg:.4f}')


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,3186,4,978300019
1,1,1721,4,978300055
2,1,1022,5,978300055
3,1,1270,5,978300055
4,1,2340,3,978300103


Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040




Epoch 1/10, Train Loss: 7.7958, Valid Loss: 6.9727
Epoch 2/10, Train Loss: 6.3182, Valid Loss: 5.8482
Epoch 3/10, Train Loss: 5.1111, Valid Loss: 4.6406
Epoch 4/10, Train Loss: 3.9817, Valid Loss: 3.6352
Epoch 5/10, Train Loss: 3.0220, Valid Loss: 2.8984
Epoch 6/10, Train Loss: 2.2765, Valid Loss: 2.4139
Epoch 7/10, Train Loss: 1.7272, Valid Loss: 2.0767
Epoch 8/10, Train Loss: 1.3205, Valid Loss: 1.8131
Epoch 9/10, Train Loss: 1.0260, Valid Loss: 1.6570
Epoch 10/10, Train Loss: 0.7747, Valid Loss: 1.5599
Precision@10: 0.0845
Recall@10: 0.8454
NDCG@10: 0.8370


In [66]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math

# Параметры
max_len = 50
batch_size = 128

# Файлы данных
train_file = '../data/source/1_ml-1m_original.part1.inter'
valid_file = '../data/source/1_ml-1m_original.part2.inter'
test_file = '../data/source/1_ml-1m_original.part3.inter'

# Загрузка данных
train_data = pd.read_csv(train_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
valid_data = pd.read_csv(valid_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)
test_data = pd.read_csv(test_file, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], skiprows=1)

print(f'Train size: {train_data.shape}')
print(f'Valid size: {valid_data.shape}')
print(f'Test size: {test_data.shape}')

# Загрузка эмбеддингов пользователей
with open('../data/emb/embeddings.json', 'r') as f:
    user_embeddings = json.load(f)

# Преобразование эмбеддингов пользователей в словарь для быстрого доступа
user2embedding = {int(user['id']): user['embedding'] for user in user_embeddings}

# Подготовка последовательностей для обучения, валидации и теста
def prepare_sequences(data):
    user_group = data.groupby('user_id')['item_id'].apply(list)
    sequences = []
    user_ids = []
    for user_id, seq in user_group.items():
        if len(seq) >= 2:  # Только пользователи с достаточной историей
            sequences.append(seq)
            user_ids.append(user_id)
    return sequences, user_ids

train_sequences, train_user_ids = prepare_sequences(train_data)
valid_sequences, valid_user_ids = prepare_sequences(valid_data)
test_sequences, test_user_ids = prepare_sequences(test_data)

print(f'Количество пользователей в обучающем наборе: {len(train_sequences)}')
print(f'Количество пользователей в валидационном наборе: {len(valid_sequences)}')
print(f'Количество пользователей в тестовом наборе: {len(test_sequences)}')

class MovieLensDataset(Dataset):
    def __init__(self, sequences, user_ids, user_embeddings=None, max_len=50):
        self.sequences = sequences
        self.user_ids = user_ids
        self.user_embeddings = user_embeddings
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        user_id = self.user_ids[idx]

        if self.user_embeddings is not None:
            embedding = self.user_embeddings.get(user_id, [0.0] * 1536)
        else:
            embedding = [0.0] * 1536

        user_emb = torch.tensor(embedding, dtype=torch.float)

        if len(seq) < self.max_len:
            padded_seq = [0] * (self.max_len - len(seq)) + seq
            seq_len = len(seq)
        else:
            padded_seq = seq[-self.max_len:]
            seq_len = self.max_len

        return torch.tensor(padded_seq, dtype=torch.long), torch.tensor(seq_len, dtype=torch.long), user_emb

# Создание датасетов
train_dataset = MovieLensDataset(train_sequences, train_user_ids, user2embedding, max_len)
valid_dataset = MovieLensDataset(valid_sequences, valid_user_ids, None, max_len)  # Эмбеддинги не используются
test_dataset = MovieLensDataset(test_sequences, test_user_ids, None, max_len)     # Эмбеддинги не используются

# Создание DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Реализация DCNv2
class CrossLayer(nn.Module):
    def __init__(self, input_dim):
        super(CrossLayer, self).__init__()
        self.input_dim = input_dim
        self.weight = nn.Parameter(torch.randn(input_dim, 1))
        self.bias = nn.Parameter(torch.randn(input_dim))

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = torch.matmul(x, self.weight) + self.bias  # (batch_size, 1)
        x_l = x_0 * x_l  # Broadcasting to (batch_size, input_dim)
        return x_l

class DCNv2(nn.Module):
    def __init__(self, input_dim, num_layers=2):
        super(DCNv2, self).__init__()
        self.num_layers = num_layers
        self.cross_layers = nn.ModuleList([CrossLayer(input_dim) for _ in range(num_layers)])
        self.output_layer = nn.Linear(input_dim * (num_layers + 1), input_dim)

    def forward(self, x):
        # x: (batch_size, input_dim)
        x_0 = x
        x_l = x
        outputs = [x_0]
        for cross_layer in self.cross_layers:
            x_l = cross_layer(x_l)
            outputs.append(x_l)
        concatenated = torch.cat(outputs, dim=1)  # (batch_size, input_dim * (num_layers +1))
        output = self.output_layer(concatenated)   # (batch_size, input_dim)
        return output

# Кастомный слой трансформера
class CustomTransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu'):
        super(CustomTransformerEncoderLayer, self).__init__()
        self.d_model = d_model
        self.nhead = nhead
        self.dim_feedforward = dim_feedforward
        self.dropout = dropout

        # Стандартное многоголовое внимание
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)

        # Отдельная голова внимания для пользовательских эмбеддингов
        self.user_attn_q = nn.Linear(d_model, d_model // nhead, bias=False)
        self.user_attn_k = nn.Linear(d_model, d_model // nhead, bias=False)
        self.user_attn_v = nn.Linear(d_model, d_model // nhead, bias=False)
        self.user_attn_out = nn.Linear(d_model // nhead, d_model, bias=False)
        self.user_dropout = nn.Dropout(dropout)

        # Feedforward сеть
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout_fc = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        # Нормализация и Dropout
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        # Активация
        self.activation = nn.ReLU() if activation == 'relu' else nn.GELU()

    def forward(self, src, user_emb, src_mask=None, src_key_padding_mask=None):
        """
        src: (seq_len, batch_size, d_model)
        user_emb: (batch_size, d_model)
        src_mask: (seq_len, seq_len)
        src_key_padding_mask: (batch_size, seq_len)
        """
        # Стандартное внимание
        src2, _ = self.self_attn(src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)
        src = src + self.dropout1(src2)
        src = self.norm1(src)

        # Отдельная голова внимания для пользователей
        Q_user = self.user_attn_q(user_emb)  # (batch_size, d_model//nhead)
        K_user = self.user_attn_k(src.transpose(0,1))  # (batch_size, seq_len, d_model//nhead)
        V_user = self.user_attn_v(src.transpose(0,1))  # (batch_size, seq_len, d_model//nhead)

        Q_user = Q_user.unsqueeze(1)  # (batch_size, 1, d_model//nhead)

        # Корректное вычисление весов внимания
        attn_weights_user = torch.bmm(Q_user, K_user.transpose(1, 2)) / math.sqrt(self.d_model // self.nhead)  # (batch_size,1,seq_len)
        attn_weights_user = F.softmax(attn_weights_user, dim=-1)  # (batch_size,1,seq_len)
        attn_weights_user = self.user_dropout(attn_weights_user)

        # Вычисляем выход внимания
        attn_output_user = torch.bmm(attn_weights_user, V_user)  # (batch_size,1,d_model//nhead)
        attn_output_user = attn_output_user.squeeze(1)  # (batch_size, d_model//nhead)

        # Проекция обратно к d_model
        attn_output_user = self.user_attn_out(attn_output_user)  # (batch_size, d_model)
        attn_output_user = attn_output_user.unsqueeze(0)        # (1, batch_size, d_model)

        # Добавление пользовательского внимания к src
        src = src + self.dropout2(attn_output_user)
        src = self.norm2(src)

        # Feedforward сеть
        src2 = self.linear2(self.dropout_fc(self.activation(self.linear1(src))))
        src = src + self.dropout2(src2)
        src = self.norm2(src)

        return src

class CustomTransformerEncoder(nn.Module):
    def __init__(self, encoder_layer, num_layers):
        super(CustomTransformerEncoder, self).__init__()
        self.layers = nn.ModuleList([encoder_layer for _ in range(num_layers)])
        self.num_layers = num_layers

    def forward(self, src, user_emb, mask=None, src_key_padding_mask=None):
        for layer in self.layers:
            src = layer(src, user_emb, mask, src_key_padding_mask)
        return src

# Модифицированный класс LLM4SASRec с DCNv2 и кастомным трансформером
class LLM4SASRecDCNv2(nn.Module):
    def __init__(self, num_items, embedding_dim=50, user_embedding_dim=1536, num_heads=2, num_layers=2, dropout=0.2, max_len=50, cross_num_layers=2):
        super(LLM4SASRecDCNv2, self).__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=0)
        self.position_embedding = nn.Embedding(max_len, embedding_dim)
        
        # Модуль DCNv2 для обработки пользовательских эмбеддингов
        self.user_dcnv2 = DCNv2(user_embedding_dim, num_layers=cross_num_layers)
        self.user_compress = nn.Linear(user_embedding_dim, embedding_dim)  # Преобразование размера после DCNv2
        self.relu = nn.ReLU()

        # Кастомный трансформер с отдельной головой внимания для пользователей
        encoder_layer = CustomTransformerEncoderLayer(
            d_model=embedding_dim,
            nhead=num_heads,
            dim_feedforward=2048,
            dropout=dropout,
            activation='relu'
        )
        self.transformer = CustomTransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(embedding_dim, num_items +1)
    
    def forward(self, input_seq, seq_len, user_emb):
        """
        input_seq: (batch_size, max_len)
        seq_len: (batch_size)
        user_emb: (batch_size, user_embedding_dim)
        """
        # Встраивание элементов и позиций
        item_emb = self.item_embedding(input_seq) + self.position_embedding(
            torch.arange(0, input_seq.size(1), device=input_seq.device).unsqueeze(0).expand_as(input_seq)
        )  # (batch_size, max_len, embedding_dim)

        # Обработка пользовательских эмбеддингов через DCNv2
        user_emb_processed = self.user_dcnv2(user_emb)  # (batch_size, user_embedding_dim)
        user_emb_compressed = self.user_compress(user_emb_processed)  # (batch_size, embedding_dim)
        user_emb_compressed = self.relu(user_emb_compressed)  # (batch_size, embedding_dim)

        # Подготовка входа для трансформера
        # Трансформер ожидает (seq_len, batch_size, d_model)
        transformer_input = item_emb.transpose(0,1)  # (max_len, batch_size, embedding_dim)

        # Маска для паддинга
        src_key_padding_mask = (input_seq == 0)  # (batch_size, max_len)

        # Пропуск через трансформер
        transformer_output = self.transformer(transformer_input, user_emb_compressed, 
                                             mask=None, 
                                             src_key_padding_mask=src_key_padding_mask)

        # Использование последнего токена
        output = transformer_output[-1, :, :]  # (batch_size, embedding_dim)

        # Финальный слой классификации
        logits = self.fc(output)  # (batch_size, num_items +1)

        return logits


Train size: (697378, 4)
Valid size: (99582, 4)
Test size: (203165, 4)
Количество пользователей в обучающем наборе: 6040
Количество пользователей в валидационном наборе: 5954
Количество пользователей в тестовом наборе: 6040


In [67]:
# Параметры модели
embedding_dim = 50
num_heads = 2
num_layers = 2
dropout = 0.2
cross_num_layers = 2  # Количество слоёв в DCNv2

num_items = max(train_data['item_id'].max(), valid_data['item_id'].max(), test_data['item_id'].max())

model = LLM4SASRecDCNv2(
    num_items=num_items,
    embedding_dim=embedding_dim,
    user_embedding_dim=1536,
    num_heads=num_heads,
    num_layers=num_layers,
    dropout=dropout,
    max_len=max_len,
    cross_num_layers=cross_num_layers
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Критерий и оптимизатор
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Игнорируем паддинг
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функции метрик
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    hits = len(set(recommended) & set(relevant))
    return hits / len(relevant) if relevant else 0

def ndcg_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    dcg = 0.0
    for i, item in enumerate(recommended):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)
    idcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0

# Функция обучения
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in loader:
        sequences, lengths, user_emb = batch
        sequences = sequences.to(device)
        lengths = lengths.to(device)
        user_emb = user_emb.to(device)

        optimizer.zero_grad()
        outputs = model(sequences, lengths, user_emb)
        targets = sequences[:, -1]  # Последний элемент последовательности
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

# Функция валидации
def validate_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            lengths = lengths.to(device)
            user_emb = user_emb.to(device)

            outputs = model(sequences, lengths, user_emb)
            targets = sequences[:, -1]
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(loader)

# Функция оценки
def evaluate(model, loader, device, k=10):
    model.eval()
    precision_scores = []
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for batch in loader:
            sequences, lengths, user_emb = batch
            sequences = sequences.to(device)
            user_emb = user_emb.to(device)
            targets = sequences[:, -1]  # Последний элемент, который нужно предсказать

            # Получение предсказаний
            outputs = model(sequences, lengths, user_emb)  # (batch_size, num_items + 1)
            _, top_k_items = torch.topk(outputs, k, dim=1)  # Получаем top-K предсказаний для каждого пользователя

            # Цикл по батчу для расчета метрик
            for i in range(sequences.size(0)):
                recommended_items = top_k_items[i].cpu().numpy()
                relevant_items = [targets[i].item()]

                precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
                recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
                ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))

    # Среднее значение метрик по всем пользователям
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    return mean_precision, mean_recall, mean_ndcg

# Цикл обучения с валидацией
num_epochs = 10
best_valid_loss = float('inf')

for epoch in range(1, num_epochs + 1):
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    valid_loss = validate_epoch(model, valid_loader, criterion, device)
    print(f'Epoch {epoch}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}')
    
    # Сохранение модели с наилучшей валидационной потерей
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'best_model.pth')
        print(f'Model saved at epoch {epoch}')


Epoch 1/10, Train Loss: 7.8687, Valid Loss: 7.2749
Model saved at epoch 1
Epoch 2/10, Train Loss: 6.6132, Valid Loss: 6.3840
Model saved at epoch 2
Epoch 3/10, Train Loss: 5.5554, Valid Loss: 5.4623
Model saved at epoch 3
Epoch 4/10, Train Loss: 4.5574, Valid Loss: 4.6141
Model saved at epoch 4
Epoch 5/10, Train Loss: 3.6621, Valid Loss: 3.8708
Model saved at epoch 5
Epoch 6/10, Train Loss: 2.9225, Valid Loss: 3.3164
Model saved at epoch 6
Epoch 7/10, Train Loss: 2.3426, Valid Loss: 2.8236
Model saved at epoch 7
Epoch 8/10, Train Loss: 1.8566, Valid Loss: 2.4935
Model saved at epoch 8
Epoch 9/10, Train Loss: 1.4814, Valid Loss: 2.2147
Model saved at epoch 9
Epoch 10/10, Train Loss: 1.1876, Valid Loss: 2.0135
Model saved at epoch 10


In [68]:
# Загрузка лучшей модели
model.load_state_dict(torch.load('best_model.pth'))

# Оценка модели на тестовом наборе
precision, recall, ndcg = evaluate(model, test_loader, device, k=10)
print(f'Precision@10: {precision:.4f}')
print(f'Recall@10: {recall:.4f}')
print(f'NDCG@10: {ndcg:.4f}')


  model.load_state_dict(torch.load('best_model.pth'))


Precision@10: 0.0834
Recall@10: 0.8339
NDCG@10: 0.7941
