In [1]:
CONFIG = {
    'data_folder' : 'C:/Users/Николай/PycharmProjects/VKRecSys/data/',
    'models_folder' : 'C:/Users/Николай/PycharmProjects/VKRecSys/B.Processing/Модели/',
    
    'train_path' : 'train_interactions.parquet',
    'items_meta_path' : 'items_meta.parquet',
    'users_meta_path' : 'users_meta.parquet',
    'model_path' : '6.3.pth',
    
    'user_emb_size' : 256, # 183404
    'item_emb_size' : 256, # 337727
    'source_emb_size' : 256, # 19613
    'age_emb_size' : 256, # 43
    'duration_emb_size' : 256, # ~175   
    'gender_emb_size' : 256, # 3
    'torch_precision' : 40, # number of decimal places for printing numbers
    
    'DEVICE' : 'cuda',
    'SEED' : 42,
    'BATCH_SIZE' : 16384,
    'LR' : 0.001,
    'EPOCHS' : 3,
    'output_dim' : 3
    
}

In [2]:
# Import libs
import numpy as np
from tqdm import tqdm
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Adam

In [3]:
# Device and seed
device = torch.device(CONFIG['DEVICE'] if torch.cuda.is_available() else "cpu")
torch.set_printoptions(precision=CONFIG['torch_precision']) 

torch.manual_seed(CONFIG['SEED'])  
torch.cuda.manual_seed_all(CONFIG['SEED'])  
np.random.seed(CONFIG['SEED'])  

In [4]:
# Loading data
train = pd.read_parquet(f"{CONFIG['data_folder']}{CONFIG['train_path']}", engine='pyarrow')
train['target'] = train['like'] + train['dislike'].replace({1: -1})
train.drop(columns=['like', 'dislike'], inplace=True)
train['target'] = train['target'].astype('int8')
train['target'] = train['target'].replace({-1:0, 0:1, 1:2})

items_meta = pd.read_parquet(f"{CONFIG['data_folder']}{CONFIG['items_meta_path']}", engine='pyarrow')
items_meta['duration'] = items_meta['duration'] - 5
items_meta['item_id'] = items_meta['item_id'].astype('category')
items_meta['source_id'] = items_meta['source_id'].astype('category')
items_meta['duration'] = items_meta['duration'].astype('category')
items_meta.set_index('item_id', inplace=True)

users_meta = pd.read_parquet(f"{CONFIG['data_folder']}{CONFIG['users_meta_path']}", engine='pyarrow')
users_meta['age'] = users_meta['age'] - 18
users_meta['gender'] = users_meta['gender'].replace({1:0, 2:1})
users_meta['user_id'] = users_meta['user_id'].astype('category')
users_meta['gender'] = users_meta['gender'].astype('category')
users_meta['age'] = users_meta['age'].astype('category')
users_meta.set_index('user_id', inplace=True)

In [5]:
# Input dimension
input_dim = (CONFIG['user_emb_size'] + 
             CONFIG['item_emb_size'] + 
             CONFIG['source_emb_size'] + 
             CONFIG['age_emb_size'] +
             CONFIG['duration_emb_size'] + 
             CONFIG['gender_emb_size'] + 
             32)

In [6]:
class AFMModel(nn.Module):
    def __init__(self, 
                 input_dim, 
                 num_users=users_meta.index.nunique(), 
                 num_items=items_meta.index.nunique(), 
                 num_sources=items_meta['source_id'].nunique(),
                 num_ages=users_meta['age'].nunique(),
                 num_durations=items_meta['duration'].nunique(),
                 num_genders=users_meta['gender'].nunique(), 
                 output_dim=CONFIG['output_dim']): 
        super(AFMModel, self).__init__()
        
        # Embedding layers for each feature
        self.user_embedding = nn.Embedding(num_users, CONFIG['user_emb_size'])
        self.item_embedding = nn.Embedding(num_items, CONFIG['item_emb_size'])
        self.source_embedding = nn.Embedding(num_sources, CONFIG['source_emb_size'])
        self.age_embedding = nn.Embedding(num_ages, CONFIG['age_emb_size'])
        self.duration_embedding = nn.Embedding(num_durations, CONFIG['duration_emb_size'])
        self.gender_embedding = nn.Embedding(num_genders, CONFIG['gender_emb_size'])
        
        # Transformations for embeddings
        self.user_emb_transform = nn.Linear(CONFIG['user_emb_size'], 32)
        self.item_emb_transform = nn.Linear(CONFIG['item_emb_size'], 32)
        self.source_emb_transform = nn.Linear(CONFIG['source_emb_size'], 32)
        self.age_emb_transform = nn.Linear(CONFIG['age_emb_size'], 32)
        self.duration_emb_transform = nn.Linear(CONFIG['duration_emb_size'], 32)
        self.gender_emb_transform = nn.Linear(CONFIG['gender_emb_size'], 32)
        
        # Attention layer components (updated for triple interaction)
        self.attention_network_pair = nn.Sequential(
            nn.Linear(64, 32),  # 64 comes from the concatenation of two embedding sizes
            nn.GELU(),
            nn.Linear(32, 1)
        )

        self.attention_network_triple = nn.Sequential(
            nn.Linear(96, 32),  # 96 comes from the concatenation of three embedding sizes
            nn.GELU(),
            nn.Linear(32, 1)
        )

        self.output_layer = nn.Linear(32, output_dim)  # Output layer
    
    def pairwise_interaction(self, x1, x2):
        return x1 * x2  # Element-wise multiplication for feature interaction
    
    def triplewise_interaction(self, x1, x2, x3):
        return x1 * x2 * x3  # Element-wise multiplication for triple interaction
    
    def compute_attention(self, emb1, emb2):
        # Concatenate embeddings of the pair of features and pass through attention network
        combined_emb = torch.cat([emb1, emb2], dim=-1)  # Concatenate embeddings (dim=-1 is feature dimension)
        attention_score = self.attention_network_pair(combined_emb)
        attention_score = torch.sigmoid(attention_score)  # Use sigmoid for attention score
        return attention_score
    
    def compute_triple_attention(self, emb1, emb2, emb3):
        # Concatenate embeddings of the triple and pass through attention network
        combined_emb = torch.cat([emb1, emb2, emb3], dim=-1)  # Concatenate embeddings (dim=-1 is feature dimension)
        attention_score = self.attention_network_triple(combined_emb)
        attention_score = torch.sigmoid(attention_score)  # Use sigmoid for attention score
        return attention_score
    
    def forward(self, user_ids, item_ids, source_ids, age_ids, duration_ids, gender_ids, embeddings):
        # Obtain embeddings for all features
        user_emb = self.user_embedding(user_ids)
        item_emb = self.item_embedding(item_ids)
        source_emb = self.source_embedding(source_ids)
        age_emb = self.age_embedding(age_ids)
        duration_emb = self.duration_embedding(duration_ids)
        gender_emb = self.gender_embedding(gender_ids)
        
        # Apply linear transformations to embeddings
        user_emb = self.user_emb_transform(user_emb)
        item_emb = self.item_emb_transform(item_emb)
        source_emb = self.source_emb_transform(source_emb)
        age_emb = self.age_emb_transform(age_emb)
        duration_emb = self.duration_emb_transform(duration_emb)
        gender_emb = self.gender_emb_transform(gender_emb)
        
        # Create dictionary of all embeddings
        embeddings_dict = {
            'user': user_emb,
            'item': item_emb,
            'source': source_emb,
            'age': age_emb,
            'duration': duration_emb,
            'gender': gender_emb,
            'embeddings': embeddings
        }
        
        interactions = []
        attention_scores = []
        
        # Compute pairwise interactions and their attention scores
        for key1, emb1 in embeddings_dict.items():
            for key2, emb2 in embeddings_dict.items():
                if key1 < key2:  # Avoid double counting pairs (e.g., user-item and item-user)
                    interaction = self.pairwise_interaction(emb1, emb2)
                    attention_score = self.compute_attention(emb1, emb2)
                    interactions.append(interaction * attention_score)  # Weight the interaction by attention score
                    attention_scores.append(attention_score)  # Store the attention scores
        
        # Compute triplewise interactions and their attention scores
        for key1, emb1 in embeddings_dict.items():
            for key2, emb2 in embeddings_dict.items():
                for key3, emb3 in embeddings_dict.items():
                    if key1 < key2 < key3:  # Avoid double counting triples (e.g., user-item-source and source-item-user)
                        interaction = self.triplewise_interaction(emb1, emb2, emb3)
                        attention_score = self.compute_triple_attention(emb1, emb2, emb3)
                        interactions.append(interaction * attention_score)  # Weight the interaction by attention score
                        attention_scores.append(attention_score)  # Store the attention scores
        
        # Combine all interactions into a single tensor
        interactions = torch.stack(interactions, dim=1)  # Shape: [batch_size, num_interactions]
        attention_scores = torch.stack(attention_scores, dim=1)  # Shape: [batch_size, num_interactions]
        
        # Sum over all interactions with their respective attention scores
        weighted_interactions = torch.sum(interactions * attention_scores, dim=1)
        
        # Pass through output layer to get the final prediction
        output = self.output_layer(weighted_interactions)
        
        return output


In [7]:
# Model creation
model = AFMModel(input_dim).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=CONFIG['LR'])

In [8]:
# Train
num_samples = len(train)
num_batches = (num_samples + CONFIG['BATCH_SIZE'] - 1) // CONFIG['BATCH_SIZE']

for epoch in range(CONFIG['EPOCHS']):
    running_loss = 0.0
    with tqdm(range(num_batches), desc=f"Epoch {epoch+1}/{CONFIG['EPOCHS']}", unit="batch") as t:
        for batch_idx in t:
            start_idx = batch_idx * CONFIG['BATCH_SIZE']
            end_idx = min(start_idx + CONFIG['BATCH_SIZE'], num_samples)
            batch_main = train.iloc[start_idx:end_idx]
            batch_user_ids = batch_main['user_id'].values
            batch_item_ids = batch_main['item_id'].values
            
            batch_users_meta = users_meta.loc[batch_user_ids]
            batch_items_meta = items_meta.loc[batch_item_ids]
            
            batch_user_values = torch.tensor(batch_user_ids, dtype=torch.long, device=device)
            batch_item_values = torch.tensor(batch_item_ids, dtype=torch.long, device=device)
            
            batch_gender_values = torch.tensor(batch_users_meta['gender'].values, dtype=torch.long, device=device)
            batch_age_values = torch.tensor(batch_users_meta['age'].values, dtype=torch.long, device=device)
            batch_source_values = torch.tensor(batch_items_meta['source_id'].values, dtype=torch.long, device=device)
            batch_duration_values = torch.tensor(batch_items_meta['duration'].values, dtype=torch.long, device=device)

            embeddings = torch.tensor(np.stack(batch_items_meta['embeddings'].values), device=device, dtype=torch.float32)

            targets = torch.tensor(batch_main['target'].values, dtype=torch.long, device=device)

            optimizer.zero_grad()
            outputs = model(batch_user_values, batch_item_values, batch_source_values, batch_age_values, batch_duration_values, batch_gender_values, embeddings)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            t.set_postfix(mean_loss=f"{running_loss / (batch_idx + 1):.6f}")

    print(f"Epoch [{epoch+1}/{CONFIG['EPOCHS']}], Loss: {running_loss / num_batches:.4f}")

Epoch 1/3: 100%|██████████| 8891/8891 [31:43<00:00,  4.67batch/s, mean_loss=0.134236]


Epoch [1/3], Loss: 0.1342


Epoch 2/3: 100%|██████████| 8891/8891 [33:19<00:00,  4.45batch/s, mean_loss=0.124609]


Epoch [2/3], Loss: 0.1246


Epoch 3/3: 100%|██████████| 8891/8891 [32:12<00:00,  4.60batch/s, mean_loss=0.123053]

Epoch [3/3], Loss: 0.1231





In [9]:
# Save model_state
torch.save({"model_state_dict": model.state_dict()}, f"{CONFIG['models_folder']}{CONFIG['model_path']}")
print(f"Модель сохранена в f'{CONFIG['models_folder']}{CONFIG['model_path']}'")

Модель сохранена в f'C:/Users/Николай/PycharmProjects/VKRecSys/B.Processing/Модели/6.3.pth'
