In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class GMF(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim=8):
        super(GMF, self).__init__()
        
        self.user_embedding = nn.Embedding(num_embeddings=n_users, embedding_dim=embedding_dim)
        self.item_embedding = nn.Embedding(num_embeddings=n_items, embedding_dim=embedding_dim)
        
        self.output_layer = nn.Linear(in_features=embedding_dim, out_features=1)
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, user_input, item_input):
        user_embedded = self.user_embedding(user_input)
        item_embedded = self.item_embedding(item_input)
        
        element_product = torch.mul(user_embedded, item_embedded)
        
        output = self.output_layer(element_product)
        
        prediction = self.sigmoid(output)
        
        return prediction.squeeze()

def train_gmf(model, train_loader, epochs=10, learning_rate=0.001):
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        for batch_idx, (user_ids, item_ids, labels) in enumerate(train_loader):
            predictions = model(user_ids, item_ids)
            loss = criterion(predictions, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch+1}/{epochs} - Average Loss: {avg_loss:.4f}')

In [2]:
import json
import pandas as pd
from collections import defaultdict

class SessionDataProcessor:
    def __init__(self):
        self.user_mapping = {}
        self.track_mapping = {}
        self.next_user_id = 0
        self.next_track_id = 0
        
    def _map_user_id(self, original_user_id):
       if original_user_id not in self.user_mapping:
           self.user_mapping[original_user_id] = self.next_user_id
           self.next_user_id += 1
       return self.user_mapping[original_user_id]
    
    def _map_track_id(self, original_track_id):
       if original_track_id not in self.track_mapping:
           self.track_mapping[original_track_id] = self.next_track_id 
           self.next_track_id += 1
       return self.track_mapping[original_track_id]
    
    def process_sessions(self, sessions_file):
        interactions = defaultdict(list)
        
        for line in sessions_file:
            session = json.loads(line)
            user_id = self._map_user_id(session['user_id'])
            track_id = self._map_track_id(session['track_id'])
            
            interactions[(user_id, track_id)].append(session['event_type'])
            
        training_data = []
        for (user_id, track_id), events in interactions.items():
            score = self._calculate_interaction_score(events)
            training_data.append((user_id, track_id, score))
            
        return pd.DataFrame(training_data, columns=['user_id', 'track_id', 'score'])
    
    def _calculate_interaction_score(self, events):
        score = sum(-0.5 if e == 'skip' else 0.5 if e == 'play' else 1.0 if e == 'like' else 0 for e in events)
        return 1 / (1 + np.exp(-score))
    
    @property
    def n_users(self):
        return len(self.user_mapping)
        
    @property
    def n_tracks(self):
        return len(self.track_mapping)

processor = SessionDataProcessor()
with open('data/sessions.jsonl', 'r') as f:
    training_df = processor.process_sessions(f)

In [3]:
dataset = torch.utils.data.TensorDataset(
    torch.LongTensor(training_df['user_id'].values).to(device),
    torch.LongTensor(training_df['track_id'].values).to(device),
    torch.FloatTensor(training_df['score'].values).to(device)
)

In [4]:
model = GMF(
    n_users=processor.n_users,
    n_items=processor.n_tracks
)

In [5]:
train_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=64,
    shuffle=True
)

In [None]:
train_gmf(model, train_loader, 50, 0.005)

Epoch 1/50 - Average Loss: 0.0170
Epoch 2/50 - Average Loss: 0.0170
Epoch 3/50 - Average Loss: 0.0170
Epoch 4/50 - Average Loss: 0.0170
Epoch 5/50 - Average Loss: 0.0170
Epoch 6/50 - Average Loss: 0.0170
Epoch 7/50 - Average Loss: 0.0170
Epoch 8/50 - Average Loss: 0.0170


In [7]:
print(torch.cuda.is_available())

True
