In [1]:
CONFIG = {
    'data_folder' : 'C:/Users/Николай/PycharmProjects/VKRecSys/data/',
    'models_folder' : 'C:/Users/Николай/PycharmProjects/VKRecSys/B.Processing/Модели/',
    
    
    'train_path' : 'train_interactions.parquet',
    'items_meta_path' : 'items_meta.parquet',
    'users_meta_path' : 'users_meta.parquet',
    'model_path' : '6.4.pth',
    
    'user_emb_size' : 256, # 183404
    'item_emb_size' : 512, # 337727
    'source_emb_size' : 256, # 19613
    'age_emb_size' : 256, # 43
    'duration_emb_size' : 256, # ~175   
    'gender_emb_size' : 256, # 3
    'torch_precision' : 40, # number of decimal places for printing numbers
        
    'DEVICE' : 'cuda',
    'SEED' : 42,
    'BATCH_SIZE' : 16384,
    'LR' : 0.001,
    'EPOCHS' : 3,
    'output_dim' : 3
    
}

In [2]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
import pandas as pd
from tqdm import tqdm

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.manual_seed(42)  
torch.cuda.manual_seed_all(42)  
np.random.seed(42)  

In [4]:
# Loading data
train = pd.read_parquet(f"{CONFIG['data_folder']}{CONFIG['train_path']}", engine='pyarrow')
train['target'] = train['like'] + train['dislike'].replace({1: -1})
train.drop(columns=['like', 'dislike'], inplace=True)
train['target'] = train['target'].astype('int8')
train['target'] = train['target'].replace({-1:0, 0:1, 1:2})

items_meta = pd.read_parquet(f"{CONFIG['data_folder']}{CONFIG['items_meta_path']}", engine='pyarrow')
items_meta['duration'] = items_meta['duration'] - 5
items_meta['item_id'] = items_meta['item_id'].astype('category')
items_meta['source_id'] = items_meta['source_id'].astype('category')
items_meta['duration'] = items_meta['duration'].astype('category')
items_meta.set_index('item_id', inplace=True)

users_meta = pd.read_parquet(f"{CONFIG['data_folder']}{CONFIG['users_meta_path']}", engine='pyarrow')
users_meta['age'] = users_meta['age'] - 18
users_meta['gender'] = users_meta['gender'].replace({1:0, 2:1})
users_meta['user_id'] = users_meta['user_id'].astype('category')
users_meta['gender'] = users_meta['gender'].astype('category')
users_meta['age'] = users_meta['age'].astype('category')
users_meta.set_index('user_id', inplace=True)

In [5]:
folds = pd.read_csv(f"{CONFIG['data_folder']}fold.csv")

In [6]:
class AFMModel(nn.Module):
    def __init__(self, 
                 num_users=users_meta.index.nunique(), 
                 num_items=items_meta.index.nunique(), 
                 num_sources=items_meta['source_id'].nunique(),
                 num_ages=users_meta['age'].nunique(),
                 num_durations=items_meta['duration'].nunique(),
                 num_genders=users_meta['gender'].nunique(), 
                 output_dim=CONFIG['output_dim']): 
        super(AFMModel, self).__init__()
        
        # Embedding layers for each feature
        self.user_embedding = nn.Embedding(num_users, CONFIG['user_emb_size'])
        self.item_embedding = nn.Embedding(num_items, CONFIG['item_emb_size'])
        self.source_embedding = nn.Embedding(num_sources, CONFIG['source_emb_size'])
        self.age_embedding = nn.Embedding(num_ages, CONFIG['age_emb_size'])
        self.duration_embedding = nn.Embedding(num_durations, CONFIG['duration_emb_size'])
        self.gender_embedding = nn.Embedding(num_genders, CONFIG['gender_emb_size'])
        
        # Transformations for embeddings
        self.user_emb_transform = nn.Linear(CONFIG['user_emb_size'], 32)
        self.item_emb_transform = nn.Linear(CONFIG['item_emb_size'], 32)
        self.source_emb_transform = nn.Linear(CONFIG['source_emb_size'], 32)
        self.age_emb_transform = nn.Linear(CONFIG['age_emb_size'], 32)
        self.duration_emb_transform = nn.Linear(CONFIG['duration_emb_size'], 32)
        self.gender_emb_transform = nn.Linear(CONFIG['gender_emb_size'], 32)
        
        # Attention layer components
        self.attention_network = nn.Sequential(
            nn.Linear(64, 32),  # 64 comes from the concatenation of two embedding sizes
            nn.ReLU(),
            nn.Linear(32, 1)
        )

        self.output_layer = nn.Linear(32, output_dim)  # Output layer
    
    def pairwise_interaction(self, x1, x2):
        return x1 * x2  # Element-wise multiplication for feature interaction
    
    def compute_attention(self, emb1, emb2):
        # Concatenate embeddings of the pair of features and pass through attention network
        combined_emb = torch.cat([emb1, emb2], dim=-1)  # Concatenate embeddings (dim=-1 is feature dimension)
        attention_score = self.attention_network(combined_emb)
        attention_score = torch.sigmoid(attention_score)  # Use sigmoid for attention score
        return attention_score
    
    def forward(self, user_ids, item_ids, source_ids, age_ids, duration_ids, gender_ids, embeddings):
        # Obtain embeddings for all features
        user_emb = self.user_embedding(user_ids)
        item_emb = self.item_embedding(item_ids)
        source_emb = self.source_embedding(source_ids)
        age_emb = self.age_embedding(age_ids)
        duration_emb = self.duration_embedding(duration_ids)
        gender_emb = self.gender_embedding(gender_ids)
        
        # Apply linear transformations to embeddings
        user_emb = self.user_emb_transform(user_emb)
        item_emb = self.item_emb_transform(item_emb)
        source_emb = self.source_emb_transform(source_emb)
        age_emb = self.age_emb_transform(age_emb)
        duration_emb = self.duration_emb_transform(duration_emb)
        gender_emb = self.gender_emb_transform(gender_emb)
        
        # Create dictionary of all embeddings
        embeddings_dict = {
            'user': user_emb,
            'item': item_emb,
            'source': source_emb,
            'age': age_emb,
            'duration': duration_emb,
            'gender': gender_emb,
            'embeddings': embeddings
        }
        
        interactions = []
        attention_scores = []
        
        # Compute pairwise interactions and their attention scores
        for key1, emb1 in embeddings_dict.items():
            for key2, emb2 in embeddings_dict.items():
                if key1 < key2:  # Avoid double counting pairs (e.g., user-item and item-user)
                    interaction = self.pairwise_interaction(emb1, emb2)
                    attention_score = self.compute_attention(emb1, emb2)
                    interactions.append(interaction * attention_score)  # Weight the interaction by attention score
                    attention_scores.append(attention_score)  # Store the attention scores
        
        # Combine all interactions into a single tensor
        interactions = torch.stack(interactions, dim=1)  # Shape: [batch_size, num_interactions]
        attention_scores = torch.stack(attention_scores, dim=1)  # Shape: [batch_size, num_interactions]
        
        # Sum over all interactions with their respective attention scores
        weighted_interactions = torch.sum(interactions * attention_scores, dim=1)
        
        # Pass through output layer to get the final prediction
        output = self.output_layer(weighted_interactions)
        
        return output

In [7]:
import gc  # Для сборщика мусора

for fold in range(4):
    print(f"Обучение модели для fold {fold}...")
    
    # Разделение данных на train и validation
    train_data = train[folds['fold'] != fold]
    val_data = train[folds['fold'] == fold]
    
    # Model creation
    model = AFMModel().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=CONFIG['LR'])
    
    # Обучение модели
    num_samples = len(train_data)
    num_batches = (num_samples + CONFIG['BATCH_SIZE'] - 1) // CONFIG['BATCH_SIZE']
    
    for epoch in range(CONFIG['EPOCHS']):
        running_loss = 0.0
        with tqdm(range(num_batches), desc=f"Epoch {epoch+1}/{CONFIG['EPOCHS']}", unit="batch") as t:
            for batch_idx in t:
                start_idx = batch_idx * CONFIG['BATCH_SIZE']
                end_idx = min(start_idx + CONFIG['BATCH_SIZE'], num_samples)
                batch_main = train.iloc[start_idx:end_idx]
                batch_user_ids = batch_main['user_id'].values
                batch_item_ids = batch_main['item_id'].values
                
                batch_users_meta = users_meta.loc[batch_user_ids]
                batch_items_meta = items_meta.loc[batch_item_ids]
                
                batch_user_values = torch.tensor(batch_user_ids, dtype=torch.long, device=device)
                batch_item_values = torch.tensor(batch_item_ids, dtype=torch.long, device=device)
                
                batch_gender_values = torch.tensor(batch_users_meta['gender'].values, dtype=torch.long, device=device)
                batch_age_values = torch.tensor(batch_users_meta['age'].values, dtype=torch.long, device=device)
                batch_source_values = torch.tensor(batch_items_meta['source_id'].values, dtype=torch.long, device=device)
                batch_duration_values = torch.tensor(batch_items_meta['duration'].values, dtype=torch.long, device=device)
    
                embeddings = torch.tensor(np.stack(batch_items_meta['embeddings'].values), device=device, dtype=torch.float32)
    
                targets = torch.tensor(batch_main['target'].values, dtype=torch.long, device=device)
    
                optimizer.zero_grad()
                outputs = model(batch_user_values, batch_item_values, batch_source_values, batch_age_values, batch_duration_values, batch_gender_values, embeddings)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
                t.set_postfix(mean_loss=f"{running_loss / (batch_idx + 1):.6f}")
    
        print(f"Epoch [{epoch+1}/{CONFIG['EPOCHS']}], Loss: {running_loss / num_batches:.4f}")
    
    # Сохранение модели для текущего fold
    fold_model_path = f"{CONFIG['models_folder']}{CONFIG['model_path']}_fold_{fold}"
    torch.save({"model_state_dict": model.state_dict()}, fold_model_path)
    print(f"Модель для fold {fold} сохранена в {fold_model_path}")

    # Очистка VRAM
    del model, optimizer, criterion  # Удаляем объекты модели и оптимизатора
    torch.cuda.empty_cache()  # Очищаем видеопамять
    gc.collect()  # Сбор мусора в системе
    print(f"VRAM очищена после fold {fold}.")

Обучение модели для fold 0...


Epoch 1/3: 100%|██████████| 6664/6664 [25:24<00:00,  4.37batch/s, mean_loss=0.136424]


Epoch [1/3], Loss: 0.1364


Epoch 2/3: 100%|██████████| 6664/6664 [25:35<00:00,  4.34batch/s, mean_loss=0.124044]


Epoch [2/3], Loss: 0.1240


Epoch 3/3: 100%|██████████| 6664/6664 [25:48<00:00,  4.30batch/s, mean_loss=0.122094]


Epoch [3/3], Loss: 0.1221
Модель для fold 0 сохранена в C:/Users/Николай/PycharmProjects/VKRecSys/B.Processing/Модели/6.4.pth_fold_0
VRAM очищена после fold 0.
Обучение модели для fold 1...


Epoch 1/3: 100%|██████████| 6667/6667 [25:19<00:00,  4.39batch/s, mean_loss=0.136680]


Epoch [1/3], Loss: 0.1367


Epoch 2/3: 100%|██████████| 6667/6667 [25:25<00:00,  4.37batch/s, mean_loss=0.124328]


Epoch [2/3], Loss: 0.1243


Epoch 3/3: 100%|██████████| 6667/6667 [25:23<00:00,  4.38batch/s, mean_loss=0.122358]


Epoch [3/3], Loss: 0.1224
Модель для fold 1 сохранена в C:/Users/Николай/PycharmProjects/VKRecSys/B.Processing/Модели/6.4.pth_fold_1
VRAM очищена после fold 1.
Обучение модели для fold 2...


Epoch 1/3: 100%|██████████| 6670/6670 [25:24<00:00,  4.37batch/s, mean_loss=0.135906]


Epoch [1/3], Loss: 0.1359


Epoch 2/3: 100%|██████████| 6670/6670 [25:24<00:00,  4.38batch/s, mean_loss=0.123825]


Epoch [2/3], Loss: 0.1238


Epoch 3/3: 100%|██████████| 6670/6670 [25:23<00:00,  4.38batch/s, mean_loss=0.121946]


Epoch [3/3], Loss: 0.1219
Модель для fold 2 сохранена в C:/Users/Николай/PycharmProjects/VKRecSys/B.Processing/Модели/6.4.pth_fold_2
VRAM очищена после fold 2.
Обучение модели для fold 3...


Epoch 1/3: 100%|██████████| 6673/6673 [25:26<00:00,  4.37batch/s, mean_loss=0.136283]


Epoch [1/3], Loss: 0.1363


Epoch 2/3: 100%|██████████| 6673/6673 [25:26<00:00,  4.37batch/s, mean_loss=0.123982]


Epoch [2/3], Loss: 0.1240


Epoch 3/3: 100%|██████████| 6673/6673 [25:29<00:00,  4.36batch/s, mean_loss=0.122183]


Epoch [3/3], Loss: 0.1222
Модель для fold 3 сохранена в C:/Users/Николай/PycharmProjects/VKRecSys/B.Processing/Модели/6.4.pth_fold_3
VRAM очищена после fold 3.
