In [1]:
from typing import Tuple, Dict, Any
from preprocessing_utils import *
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
from tqdm import tqdm
from torch.optim import Adam
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import random
import wandb
import torch.nn as nn
import torch.nn.functional as F



device = torch.device("cuda:0" if torch.cuda.is_available() else "mps")

#user_item_path = '/Volumes/DeepLearner/Search & Recommendation System/Data/australian_users_items_clean.json'
device


device(type='cuda', index=0)

In [2]:
user_item_path = '/notebooks/australian_users_items_clean.json'

In [3]:
def create_mappings(df, column):
    unique_ids = sorted(df[column].unique())  # Sorting added here
    print(f"Number of unique {column}: {len(unique_ids)}")
    id_to_idx = {id_: idx for idx, id_ in enumerate(unique_ids)}
    return id_to_idx

class SimplifiedSteamDataset(Dataset):
    def __init__(self, user_item_df, user_mapping, item_mapping):
        self.df = user_item_df.copy()
        
        self.df['user_idx'] = self.df['user_id'].map(user_mapping)
        self.df['item_idx'] = self.df['item_id'].map(item_mapping)
        
        print(f"Missing user_idx: {self.df['user_idx'].isna().sum()}")
        print(f"Missing item_idx: {self.df['item_idx'].isna().sum()}")
        
        # Scaling playtime
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.y = self.scaler.fit_transform(self.df['playtime_forever'].values.reshape(-1, 1)).flatten()
        
        self.user_idxs = self.df['user_idx'].values
        self.item_idxs = self.df['item_idx'].values
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        return self.y[idx], self.user_idxs[idx], self.item_idxs[idx]

In [4]:
def prepare_simple_data_loaders(user_item_path, batch_size=32):
    # Load data
    user_item_df = load_json_to_df(user_item_path)
    user_item_df.sort_values(by=['user_id', 'playtime_forever'], ascending=[True, False], inplace=True)
    
    # Drop duplicates based on 'user_id' and 'item_id'
    #user_item_df.drop_duplicates(subset=['user_id', 'item_id'], inplace=True)
    def test_mapping(mapping, unique_ids):
        # Test 1: Ensure every unique ID has a mapping
        for unique_id in unique_ids:
            assert unique_id in mapping, f"{unique_id} not found in mapping"

        # Test 2: Ensure reverse mapping is consistent
        reverse_mapping = {v: k for k, v in mapping.items()}
        for unique_id in unique_ids:
            idx = mapping[unique_id]
            assert reverse_mapping[idx] == unique_id, f"Inconsistent mapping for {unique_id}"
    
    # Create mappings
    user_mapping = create_mappings(user_item_df, 'user_id')
    item_mapping = create_mappings(user_item_df, 'item_id')

    test_mapping(user_mapping, user_item_df['user_id'].unique())
    test_mapping(item_mapping, user_item_df['item_id'].unique())

    # Create the final dataset using the new mappings
    full_dataset = SimplifiedSteamDataset(user_item_df, user_mapping, item_mapping)

    # Split into train, validation, and test sets
    train_size = int(0.7 * len(full_dataset))
    val_size = int(0.15 * len(full_dataset))
    test_size = len(full_dataset) - train_size - val_size

    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, val_size, test_size], 
    generator=torch.Generator().manual_seed(42)  # set the seed for reproducibility
    )

# Create a DataFrame from val_dataset
    val_data = [val_dataset[i] for i in range(len(val_dataset))]
    val_df = pd.DataFrame(val_data, columns=['playtime_scaled', 'user_idx', 'item_idx'])
    # Create DataLoader for each set
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Extract the indices from val_dataset
    val_indices = val_dataset.indices

    # Create a validation DataFrame using these indices
    val_df = full_dataset.df.iloc[val_indices].reset_index(drop=True)

    return train_loader, val_loader, test_loader, user_mapping, item_mapping, full_dataset.df, val_df


In [5]:
train_loader, val_loader, test_loader, user_mapping, item_mapping, user_item_df, val_df = prepare_simple_data_loaders(user_item_path)

Number of unique user_id: 70834
Number of unique item_id: 10978
Missing user_idx: 0
Missing item_idx: 0


In [6]:
def get_actual_top_k_games(user_id, user_item_df, user_mapping, k=5):
    
    user_data = user_item_df[user_item_df['user_idx'] == user_id]
    top_k_actual = user_data.sort_values(by='playtime_forever', ascending=False).head(k)
    return top_k_actual['item_name'].tolist()

def get_predicted_top_k_games(model, user_id, user_mapping, item_mapping, user_item_df, k=5):
    model.eval()
    
    user_idx = torch.LongTensor([user_mapping[user_id]] * len(item_mapping)).to(device)    
    # Convert item_mapping values to a tensor and move to the same device as the model
    all_item_idxs = torch.LongTensor(list(item_mapping.values())).to(device)

    # Make predictions
    with torch.no_grad():
        predictions = model(user_idx, all_item_idxs).cpu().numpy().flatten()

    # Extract top-k items
    top_k_indices = predictions.argsort()[-k:][::-1]
    top_k_item_idxs = [list(item_mapping.values())[i] for i in top_k_indices]

    top_k_item_names = [user_item_df.loc[user_item_df['item_idx'] == idx, 'item_name'].iloc[0] for idx in top_k_item_idxs]

    return top_k_item_names

def precision_at_k(y_true, y_pred, k, threshold=0.5):
    # Sort by predicted score and take top k
    top_k_indices = np.argsort(y_pred)[::-1][:k]
    top_k_true = y_true[top_k_indices]
    
    # Count number of true positives in top k (playtime above threshold)
    true_positives = np.sum(top_k_true > threshold)
    
    return true_positives / k

def predict_for_random_user(model, user_mapping, item_mapping, user_item_df, k=5):
    # Randomly select a user ID
    random_user_id = random.choice(list(user_mapping.keys()))
    print(random_user_id)
    print(f"Making predictions for random user {random_user_id}")
    # Check if the random_user_id exists in the DataFrame
    print(random_user_id in user_item_df['user_id'].values)

    
    # Call the existing function to make predictions for this user
    top_k_games = get_predicted_top_k_games(model, random_user_id, user_mapping, item_mapping, user_item_df, k)
    
    return random_user_id, top_k_games

def recall_at_k(y_true, y_pred, k, threshold=0.5):
    # Sort by predicted score and take top k
    top_k_indices = np.argsort(y_pred)[::-1][:k]
    top_k_true = y_true[top_k_indices]
    
    # Count number of true positives in top k (playtime above threshold)
    true_positives = np.sum(top_k_true > threshold)
    
    # Count the total number of actual positives (relevant items)
    total_actual_positives = np.sum(y_true > threshold)
    
    if total_actual_positives == 0:
        return 0
    
    return true_positives / total_actual_positives


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NCF(nn.Module):
    def __init__(self, num_users, num_items, embed_dim, layers=[16, 8], dropout=False):
        super(NCF, self).__init__()
        
        self.num_users = num_users
        self.num_items = num_items
        self.embed_dim = embed_dim
        
        # GMF embeddings
        self.user_gmf_embedding = nn.Embedding(num_users, embed_dim)
        self.item_gmf_embedding = nn.Embedding(num_items, embed_dim)
        
        # MLP embeddings
        self.user_mlp_embedding = nn.Embedding(num_users, layers[0] // 2)
        self.item_mlp_embedding = nn.Embedding(num_items, layers[0] // 2)
        
        # MLP layers
        mlp_modules = []
        for i, (in_size, out_size) in enumerate(zip(layers[:-1], layers[1:])):
            mlp_modules.append(nn.Linear(in_size, out_size))
            mlp_modules.append(nn.ReLU())
            if dropout:
                mlp_modules.append(nn.Dropout(p=0.2))
        self.mlp_layers = nn.Sequential(*mlp_modules)
        
        # Final prediction layer
        self.prediction_layer = nn.Linear(layers[-1] + embed_dim, 1)
        
    def forward(self, user_indices, item_indices):
        user_gmf_embed = self.user_gmf_embedding(user_indices)
        item_gmf_embed = self.item_gmf_embedding(item_indices)
        
        user_mlp_embed = self.user_mlp_embedding(user_indices)
        item_mlp_embed = self.item_mlp_embedding(item_indices)
        
        # GMF part
        gmf_product = torch.mul(user_gmf_embed, item_gmf_embed)
        
        # MLP part
        mlp_input = torch.cat([user_mlp_embed, item_mlp_embed], dim=1)
        mlp_output = self.mlp_layers(mlp_input)
        
        # Final layer
        final_input = torch.cat([gmf_product, mlp_output], dim=1)
        prediction = self.prediction_layer(final_input)
        
        return prediction.squeeze(-1)


In [8]:
# Initialize wandb
wandb.init(project="Steam Rec Engine_vNew_200dim", config={"learning_rate": 0.001})

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [9]:
import wandb
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from math import sqrt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


wandb.config.update({
    "learning_rate": 0.001,
    "batch_size": 32,
    "embed_dim": 200,
    "epochs": 20,
})

#wandb.config.update({"Model Architecture": str(model)})

# Initialize NCF model, optimizer, and loss function
# Assuming you have defined NCF class as discussed
model = NCF(num_users=len(user_mapping), num_items=len(item_mapping), embed_dim=200)

wandb.config.update({"Model Architecture": str(model)})

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5, verbose=True)


#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)


# Move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Number of epochs and interval to save model
n_epochs = 20
save_interval = 5  # Save every 2 epochs

for epoch in range(1, n_epochs + 1):
    # Training loop
    model.train()
    train_loss = 0.0
    for batch_idx, batch_data in enumerate(tqdm(train_loader, desc=f"Training epoch {epoch}")):
        y_batch, user_ids, item_ids = batch_data
        y_batch = y_batch.float()
        user_ids, item_ids = user_ids.long(), item_ids.long()
        y_batch = y_batch.to(device)
        user_ids, item_ids = user_ids.to(device), item_ids.to(device)
        optimizer.zero_grad()
        outputs = model(user_ids, item_ids)
        loss = criterion(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
        
        wandb.log({"Batch Train Loss": loss.item()}, step=batch_idx)
        #scheduler.step()

        #wandb.log({"Batch Train Loss": loss.item()})
        train_loss += loss.item()
    
    for name, param in model.named_parameters():
        if param.requires_grad:
            wandb.log({f"Gradients/{name}": wandb.Histogram(param.grad.cpu().numpy())})
    #scheduler.step()
    # Inside your training loop
    wandb.log({"Learning Rate": scheduler.optimizer.param_groups[0]['lr']})
    train_loss /= len(train_loader)
    wandb.log({"Train Loss": train_loss})
    print(f"Epoch {epoch} - Training loss: {train_loss}")

    # Validation loop
    model.eval()
    y_true_val = []
    y_pred_val = []
    val_loss = 0.0
    with torch.no_grad():
        for batch_data in tqdm(val_loader, desc=f"Validating epoch {epoch}"):
            y_batch, user_ids, item_ids = batch_data
            
            y_batch = y_batch.float()

            user_ids, item_ids = user_ids.long(), item_ids.long()

            y_batch = y_batch.to(device)

            user_ids, item_ids = user_ids.to(device), item_ids.to(device)

            outputs = model(user_ids, item_ids)

            loss = criterion(outputs.squeeze(), y_batch)

            batch_rmse = sqrt(mean_squared_error(y_batch.cpu().numpy(), outputs.cpu().numpy()))
            wandb.log({"Batch Validation RMSE": batch_rmse})


            val_loss += loss.item()
            y_true_val.extend(y_batch.cpu().numpy())
            y_pred_val.extend(outputs.cpu().numpy())

    val_loss /= len(val_loader)
    scheduler.step(val_loss)
    val_rmse = sqrt(mean_squared_error(y_true_val, y_pred_val))
    val_precision_at_k = precision_at_k(np.array(y_true_val), np.array(y_pred_val), k=5)
    wandb.log({"Validation Loss": val_loss, "Validation RMSE": val_rmse, "Validation Precision@5": val_precision_at_k})
    print(f"Epoch {epoch} - Validation loss: {val_loss}, RMSE: {val_rmse}, Precision@5: {val_precision_at_k}")

    # Log example users
    table_rows = []
    for _ in range(5):
        random_user_id, random_top_k_games = predict_for_random_user(model, user_mapping, item_mapping, user_item_df, k=10)
        actual_games = get_actual_top_k_games(user_mapping[random_user_id], user_item_df, user_mapping, k=10)
        print(f"Actual games: {actual_games}")
        print(f"Predicted games: {random_top_k_games}")
        # Create a row for this user
        row = [random_user_id, ", ".join(actual_games), ", ".join(random_top_k_games)]
        
        # Add this row to the table
        table_rows.append(row)
        
    table = wandb.Table(data=table_rows, columns=["User ID", "Actual Top-5 Games", "Predicted Top-5 Games"])

    wandb.log({f"Example Users Epoch {epoch}": table})
    
    # Save the model at intervals
    if epoch % save_interval == 0:
        torch.save(model.state_dict(), f"model_epoch_{epoch}.pt")
        wandb.save(f"model_epoch_{epoch}.pt")

Training epoch 1: 100%|██████████| 112511/112511 [16:00<00:00, 117.16it/s]


Epoch 1 - Training loss: 0.0004021331169123971


Validating epoch 1: 100%|██████████| 24110/24110 [00:31<00:00, 754.28it/s]


Epoch 1 - Validation loss: 0.00013815338514148288, RMSE: 0.011753929748983908, Precision@5: 0.0
babypanay
Making predictions for random user babypanay
True
Actual games: ['Age of Mythology: Extended Edition', "Garry's Mod", 'The Elder Scrolls V: Skyrim', 'Age of Empires II: HD Edition', 'STAR WARS™ Empire at War: Gold Pack', 'Far Cry® 3', "Sid Meier's Civilization V", 'Grand Theft Auto IV', 'Need for Speed: Hot Pursuit', 'Terraria']
Predicted games: ['Samantha Swift and the Golden Touch', 'Mimic Arena', 'Metro Last Light JP', '4 Elements', 'Nurse Love Addiction', 'Prominence', 'Run Rabbit Run', 'Unnamed Fiasco', 'Planetbase', 'MotoGP™13']
76561198050010300
Making predictions for random user 76561198050010300
True
Actual games: ['Total War: SHOGUN 2', 'The Sims(TM) 3', 'Farming Simulator 2011', 'Empire: Total War', 'Napoleon: Total War', 'Microsoft Flight', 'Theatre of War 2: Africa 1943', 'Demolition Company Gold', 'Medieval II: Total War', "Garry's Mod"]
Predicted games: ['Valiant', "

Training epoch 2: 100%|██████████| 112511/112511 [15:44<00:00, 119.07it/s]


Epoch 2 - Training loss: 0.0002011486049411301


Validating epoch 2: 100%|██████████| 24110/24110 [00:31<00:00, 760.28it/s]


Epoch 2 - Validation loss: 0.00034609305084727204, RMSE: 0.018603737678354024, Precision@5: 0.0
76561198060385675
Making predictions for random user 76561198060385675
True
Actual games: ['FINAL FANTASY XIV: A Realm Reborn', 'Fallout 4', 'Borderlands 2', 'The Elder Scrolls V: Skyrim', 'Fallout: New Vegas', 'DARK SOULS™ II', 'Metro: Last Light', 'Spiral Knights', 'Saints Row: The Third', 'Starbound']
Predicted games: ['Witch & Hero（魔女と勇者）', 'Shift Happens', 'Chronicon', 'Time Mysteries 2: The Ancient Spectres', 'Octamari Rescue', 'Shoot 1UP', 'Zone 22', 'Agarest Zero', 'Gemsweeper', 'Crystal Picnic']
jjosh678
Making predictions for random user jjosh678
True
Actual games: ["Garry's Mod", 'Counter-Strike: Global Offensive', 'Arma 3', 'The Crew', 'Arma 2: Operation Arrowhead', 'Euro Truck Simulator 2', 'Mount & Blade: Warband', 'Wargame: AirLand Battle', 'Block N Load', 'ARK: Survival Evolved']
Predicted games: ['Reverse x Reverse', 'TRON: Evolution', 'World Of Leaders', 'SEEP Universe', 'N

Training epoch 3: 100%|██████████| 112511/112511 [15:45<00:00, 118.98it/s]


Epoch 3 - Training loss: 0.0001850745937898124


Validating epoch 3: 100%|██████████| 24110/24110 [00:31<00:00, 757.44it/s]


Epoch 3 - Validation loss: 0.0001554075166475002, RMSE: 0.012466277164742195, Precision@5: 0.0
76561198053373904
Making predictions for random user 76561198053373904
True
Actual games: ["Sid Meier's Civilization V", 'Left 4 Dead 2', "Garry's Mod", 'Borderlands 2', 'The Lord of the Rings Online™', 'Counter-Strike: Global Offensive', 'Surgeon Simulator', 'Chivalry: Medieval Warfare', 'Unturned', 'Empire: Total War']
Predicted games: ['Black Rainbow', 'Bin Weevils Arty Arcade', 'Hush', 'Selknam Defense', 'Sunken', 'RHEM IV: The Golden Fragments Special Edition', 'Shake Your Money Simulator 2016', 'OCCHIO', 'An Assassin in Orlandes', 'Football Manager 2017']
76561198077032945
Making predictions for random user 76561198077032945
True
Actual games: ["Garry's Mod", 'Chivalry: Medieval Warfare', 'Borderlands 2', 'Terraria', 'Left 4 Dead 2', 'Counter-Strike: Global Offensive', 'Spiral Knights', 'Kerbal Space Program', 'Unturned', 'Blacklight: Retribution']
Predicted games: ['Nameless ~The one t

Training epoch 4: 100%|██████████| 112511/112511 [15:45<00:00, 119.06it/s]


Epoch 4 - Training loss: 0.0001745561883366965


Validating epoch 4: 100%|██████████| 24110/24110 [00:31<00:00, 770.01it/s]


Epoch 00004: reducing learning rate of group 0 to 5.0000e-04.
Epoch 4 - Validation loss: 0.0001971923449972859, RMSE: 0.014042561791940338, Precision@5: 0.0
76561198088983081
Making predictions for random user 76561198088983081
True
Actual games: ['Counter-Strike: Global Offensive', 'Unturned', 'Killing Floor', 'Killing Floor 2', "Tom Clancy's Ghost Recon Phantoms - NA", "Garry's Mod", 'Magicite', 'Terraria', 'Left 4 Dead 2', 'Robocraft']
Predicted games: ['Tom vs. The Armies of Hell', 'EeOneGuy Adventure', 'Awakening of Solutio', 'Hazen: The Dark Whispers', 'Legena: Union Tides', 'Treasure Planet Battle at Procyon', 'Voidspire Tactics', 'Way to Go!', 'MAGIX Photostory 2014 Deluxe', 'Unwell Mel']
76561198072285282
Making predictions for random user 76561198072285282
True
Actual games: ["Garry's Mod", 'South Park™: The Stick of Truth™', 'SMITE', 'Dead Island: Epidemic', 'Loadout', 'Bloons TD Battles']
Predicted games: ['Galactic Inheritors', 'Luxor 2 HD', 'Troubles Land', 'Lost in Parad

Training epoch 5: 100%|██████████| 112511/112511 [15:46<00:00, 118.87it/s]


Epoch 5 - Training loss: 9.073115359778397e-05


Validating epoch 5: 100%|██████████| 24110/24110 [00:32<00:00, 747.90it/s]


Epoch 5 - Validation loss: 0.0001379189896516968, RMSE: 0.011743985883899343, Precision@5: 0.0
Big_Kill
Making predictions for random user Big_Kill
True
Actual games: ['Left 4 Dead 2', 'Counter-Strike: Source', 'Super Monday Night Combat', "Garry's Mod", 'Serious Sam 2', 'Castle Crashers', 'NARUTO SHIPPUDEN: Ultimate Ninja STORM Revolution', 'Poker Night 2', 'Counter-Strike: Global Offensive', 'BattleBlock Theater']
Predicted games: ['Blue Bird', 'Enemy Empire', 'The Settlers: Heritage of Kings', 'Complete Figure Drawing Course HD: 017 - The Journey to 3/4 view of Human Skull  - Part 5', 'Gunspell: Steam Edition', 'Westward 2', 'The BoX', 'Blender: 4.08 Assets - Removing NGons', 'Abduction Action! Plus', 'The Empty Inn']
76561198050100696
Making predictions for random user 76561198050100696
True
Actual games: ['Football Manager 2012', "Sid Meier's Civilization V", "Garry's Mod", 'The Elder Scrolls V: Skyrim', 'Dota 2 Test']
Predicted games: ['Tank Battle: North Africa', "Waste Walkers 

Training epoch 6: 100%|██████████| 112511/112511 [15:47<00:00, 118.80it/s]


Epoch 6 - Training loss: 9.076121009933835e-05


Validating epoch 6: 100%|██████████| 24110/24110 [00:31<00:00, 754.39it/s]


Epoch 6 - Validation loss: 9.764494479933525e-05, RMSE: 0.009881657947132728, Precision@5: 0.0
76561198040495105
Making predictions for random user 76561198040495105
True
Actual games: ['Age of Empires II: HD Edition', 'Age of Empires II: HD Edition', 'DARK SOULS III', 'DARK SOULS III', 'Left 4 Dead 2', 'Left 4 Dead 2', 'DARK SOULS™ II', 'DARK SOULS™ II', "Sid Meier's Civilization V", "Sid Meier's Civilization V"]
Predicted games: ['Shiny Gauntlet', 'Treasure Planet Battle at Procyon', 'The Dwarves', 'Botology', 'Luxor Amun Rising HD', 'Nancy Drew: Secrets can Kill', 'Azada', 'Tommy Tronic', 'Aircraft War X', 'MotoGP™13']
76561198044743278
Making predictions for random user 76561198044743278
True
Actual games: ["Sid Meier's Civilization V", "Garry's Mod", 'Kerbal Space Program', 'Unturned', 'Prison Architect', 'Total War: ATTILA', 'FTL: Faster Than Light', 'Robocraft', 'Hitman: Absolution', 'Terraria']
Predicted games: ['Blinding Dark', 'Aerofly FS 1 Flight Simulator', 'Lethis - Path o

Training epoch 7: 100%|██████████| 112511/112511 [15:46<00:00, 118.85it/s]


Epoch 7 - Training loss: 8.994897128381247e-05


Validating epoch 7: 100%|██████████| 24110/24110 [00:31<00:00, 757.28it/s]


Epoch 7 - Validation loss: 8.259544864806575e-05, RMSE: 0.009088313089444033, Precision@5: 0.0
EpicDemoManHasKitteh
Making predictions for random user EpicDemoManHasKitteh
True
Actual games: ['Counter-Strike: Global Offensive', "Garry's Mod", "Sid Meier's Civilization V", 'Awesomenauts', 'Counter-Strike: Source', 'The Elder Scrolls V: Skyrim', 'Terraria', 'Prison Architect', 'Saints Row: The Third', 'Loadout']
Predicted games: ["Nobunaga's Ambition: Souzou", 'The Visitor', 'Damage Inc', 'Sound Forge Audio Studio 10 - Steam Powered', 'CHARIOT WARS', 'Krita Gemini', 'Fleet Defender: The F-14 Tomcat Simulation', 'Chicku', 'Arkshot', 'The Path of Greatest Resistance']
76561198065940908
Making predictions for random user 76561198065940908
True
Actual games: ['Borderlands 2', "Garry's Mod", 'Half-Life 2', 'Half-Life 2: Episode Two', 'Counter-Strike: Source', 'Half-Life 2: Episode One', 'Portal', 'Arma 2: Operation Arrowhead', 'Half-Life 2: Lost Coast', 'Arma 2']
Predicted games: ['Force of E

Training epoch 8: 100%|██████████| 112511/112511 [15:45<00:00, 119.01it/s]


Epoch 8 - Training loss: 8.920104861510302e-05


Validating epoch 8: 100%|██████████| 24110/24110 [00:31<00:00, 762.58it/s]


Epoch 8 - Validation loss: 0.00011966744197619641, RMSE: 0.010939393853452831, Precision@5: 0.0
popureye
Making predictions for random user popureye
True
Actual games: ["Tom Clancy's Ghost Recon Phantoms - NA", 'Marvel Heroes 2016', 'Loadout', 'World of Guns: Gun Disassembly', 'Dota 2 Test']
Predicted games: ['Mystery Castle', 'Pulse', 'Complete Figure Drawing Course HD: 009 - The Formulas for the Side View of the Human Head - Phase 1', 'VR Gallery', 'Garage Drummer VR', 'Niffelheim', 'Fall of Civilization', 'The Tribe', 'Heroes Rise: HeroFall', 'And So It Was']
76561198084189953
Making predictions for random user 76561198084189953
True
Actual games: ["Garry's Mod", 'Counter-Strike: Global Offensive', 'Call of Duty: World at War', 'Unturned', 'Mortal Kombat Komplete Edition', 'Goat Simulator', 'Call of Duty: Advanced Warfare - Multiplayer', 'Left 4 Dead 2', 'Yet Another Zombie Defense', 'Call of Duty: Advanced Warfare']
Predicted games: ['Alienware Steam Machine', '101 Ways to Die', 'S

Training epoch 9: 100%|██████████| 112511/112511 [15:46<00:00, 118.84it/s]


Epoch 9 - Training loss: 8.853971514948447e-05


Validating epoch 9: 100%|██████████| 24110/24110 [00:31<00:00, 754.41it/s]


Epoch 9 - Validation loss: 7.562789996798269e-05, RMSE: 0.008696533031163231, Precision@5: 0.0
76561198085261848
Making predictions for random user 76561198085261848
True
Actual games: ['Dream Of Mirror Online', 'Trove', 'Cry of Fear', 'Spiral Knights', 'Robocraft', 'Super Monday Night Combat', 'War Inc. Battlezone', 'Sins of a Dark Age', 'Warface', 'The Way of Life Free Edition']
Predicted games: ['The Treasures of Montezuma 5', 'Rebirth of Island', 'Gnomes Garden 2', 'Yooka-Laylee - Toybox', 'Dinocide', 'Ship Simulator: Maritime Search and Rescue', 'Impossible Geometry', 'Chicken Invaders 2', 'The Bloc', 'Gravity Cat']
thanksforeadingthis
Making predictions for random user thanksforeadingthis
True
Actual games: ['Counter-Strike: Global Offensive', 'Dirty Bomb', 'Warframe', 'Trove']
Predicted games: ['The Prism', 'The Secret Order 3: Ancient Times', 'Highborn', 'Ruckus Ridge VR Party', 'Deep Black : Reloaded', 'Agatha Christie: Evil under the Sun', 'Raise Your Own Clone', "Drakensang 

Training epoch 10: 100%|██████████| 112511/112511 [15:47<00:00, 118.77it/s]


Epoch 10 - Training loss: 8.787315968884582e-05


Validating epoch 10: 100%|██████████| 24110/24110 [00:32<00:00, 746.05it/s]


Epoch 10 - Validation loss: 9.804258985954702e-05, RMSE: 0.00990176193690777, Precision@5: 0.0
76561198090441464
Making predictions for random user 76561198090441464
True
Actual games: ['Total War: SHOGUN 2', 'Total War: ROME II - Emperor Edition', 'Automation - The Car Company Tycoon Game', 'Magic Duels']
Predicted games: ['A-Train 9 V4.0 : Japan Rail Simulator', 'Dual Core', 'A Mass of Dead', 'Wave of Darkness', 'Blender: 7.3 3D Paint - Multi Layered', 'Nancy Drew: The Creature of Kapu Cave', 'Concrete Jungle', 'The Last Hope', 'MAGIX Movie Edit Pro 2014 Plus', "Little King's Story"]
lachlanm
Making predictions for random user lachlanm
True
Actual games: ['Left 4 Dead', "Garry's Mod", 'Left 4 Dead 2', 'Grand Theft Auto IV', 'Left 4 Dead 2 Beta']
Predicted games: ['Reset 1-1', 'The Battle for Sector 219', 'PRINCIPIA: Master of Science', 'Footbrawl Playground', '1943 Megami Strike', 'Monsters and Monocles', 'Star Raiders ', 'The Great Whale Road', 'Wild Frontera', 'Sun Dogs']
Infected_

Training epoch 11: 100%|██████████| 112511/112511 [15:46<00:00, 118.91it/s]


Epoch 11 - Training loss: 8.737996662497778e-05


Validating epoch 11: 100%|██████████| 24110/24110 [00:31<00:00, 753.47it/s]


Epoch 11 - Validation loss: 8.769636696455389e-05, RMSE: 0.009364746926841338, Precision@5: 0.0
doyouwantorp
Making predictions for random user doyouwantorp
True
Actual games: ['Counter-Strike: Global Offensive', "Garry's Mod", 'DayZ', 'Call of Duty: Black Ops II - Multiplayer', "Assassin's Creed® III", 'Saints Row: The Third', 'Dead by Daylight', 'Left 4 Dead 2', 'Robocraft', 'Grand Theft Auto V']
Predicted games: ['Cursed West', 'Zoo Rampage', 'Steel Rain', 'Scooby-Doo! & Looney Tunes Cartoon Universe: Adventure', 'Evil Hazard', 'A Stroke of Fate: Operation Bunker', 'Blender: 1.3 UI Basics - Layout Customization', 'Wing IDE 5', 'Astro Emporia', 'Doodle WHAT?!']
MtTheuz
Making predictions for random user MtTheuz
True
Actual games: ['Call of Duty: Modern Warfare 3 - Multiplayer', 'Unturned', 'Insurgency', 'PlanetSide 2', 'Portal', "Garry's Mod", 'Call of Duty: Modern Warfare 3', 'Firefall', 'HAWKEN', 'Robocraft']
Predicted games: ['G8 Dynamic Gate (VST/AU)', 'Strangers', 'Deputy Dangle

Training epoch 12: 100%|██████████| 112511/112511 [15:47<00:00, 118.79it/s]


Epoch 12 - Training loss: 8.649681486932487e-05


Validating epoch 12: 100%|██████████| 24110/24110 [00:31<00:00, 754.26it/s]


Epoch 00012: reducing learning rate of group 0 to 2.5000e-04.
Epoch 12 - Validation loss: 7.773861209485061e-05, RMSE: 0.008817056935150813, Precision@5: 0.0
pedro2579
Making predictions for random user pedro2579
True
Actual games: ['Terraria', 'Neverwinter', 'The Binding of Isaac', 'Warframe', 'Dungeon Defenders', 'Starbound', 'Trove', "Don't Starve", 'Left 4 Dead 2', "Garry's Mod"]
Predicted games: ['Gnumz: Masters of Defense', 'Cloudlands : VR Minigolf', 'Amnesia: Final Revelations', 'Hexus', 'SiN Episodes: Emergence', 'RESCUE 2', 'Super Ubie Island REMIX', 'Mad Bullets', 'Perraw - FPS Clone War', "Candice DeBébé's Incredibly Trick Lifestyle"]
NayeonIsFab
Making predictions for random user NayeonIsFab
True
Actual games: ['Counter-Strike: Global Offensive', 'MapleStory', "Don't Starve Together", 'Left 4 Dead 2', 'Clicker Heroes', 'TERA', 'Unturned', 'Blacklight: Retribution', 'Aura Kingdom', 'Brawlhalla']
Predicted games: ['Protonwar', 'Fruit Golf', "YOU DON'T KNOW JACK", 'Mensa Acad

Training epoch 13: 100%|██████████| 112511/112511 [15:46<00:00, 118.92it/s]


Epoch 13 - Training loss: 6.79463077232395e-05


Validating epoch 13: 100%|██████████| 24110/24110 [00:32<00:00, 745.11it/s]


Epoch 13 - Validation loss: 7.052780385626584e-05, RMSE: 0.008398185770564824, Precision@5: 0.0
JarryBush
Making predictions for random user JarryBush
True
Actual games: ["Garry's Mod", 'Golf With Your Friends', 'Ultimate Chicken Horse', 'Counter-Strike: Global Offensive', 'Call of Duty: Black Ops - Multiplayer', 'Gotham City Impostors: Free To Play', 'Middle-earth: Shadow of Mordor', 'Left 4 Dead 2', 'Warface', 'PlanetSide 2']
Predicted games: ['DreamDesk VR Beta', 'Kinacoustic', 'Cheaters Blackjack 21', 'Tavernier', 'Bleeding Blocks', 'Pixel Dungeon', 'Sign Motion', 'Viking Armor / Weapons: 12 - Sword UVWs', 'Rocket Shooter', 'Winter Novel']
The_Eternal_Bane
Making predictions for random user The_Eternal_Bane
True
Actual games: ['Europa Universalis IV', "Sid Meier's Civilization V", 'Counter-Strike: Global Offensive', 'Fallout 4', 'Wasteland 2', 'Galactic Civilizations III', 'Stellaris', 'Mass Effect', 'Victoria II', 'PAYDAY 2']
Predicted games: ['MAGIX Photostory 2014 Deluxe', 'Disc

Training epoch 14: 100%|██████████| 112511/112511 [15:47<00:00, 118.77it/s]


Epoch 14 - Training loss: 6.779759312034184e-05


Validating epoch 14: 100%|██████████| 24110/24110 [00:32<00:00, 748.82it/s]


Epoch 14 - Validation loss: 7.01600688332512e-05, RMSE: 0.008376263490551086, Precision@5: 0.0
magoo38
Making predictions for random user magoo38
True
Actual games: ['Counter-Strike: Global Offensive', 'Call of Duty: Black Ops III', 'Grand Theft Auto V', 'Dying Light', 'Arma 3', 'Far Cry® 3', 'Call of Duty: Black Ops II - Multiplayer', 'Dirty Bomb', 'South Park™: The Stick of Truth™', 'Far Cry® 3 Blood Dragon']
Predicted games: ['DreamDesk VR Beta', 'Throbax TD', 'Mystery Of Rivenhallows', 'Planet Busters', "Little King's Story", 'Web Designer 10', 'Civil War: 1863', 'Complete Figure Drawing Course HD: 019 - Conclusion of the invention of the Human Skull', 'Heckabomb', 'Forsaken Fortress Strategy']
76561198068116178
Making predictions for random user 76561198068116178
True
Actual games: ['Dota 2 Test']
Predicted games: ['Chronicle Keepers: The Dreaming Garden', 'Battle Girls', 'Black Box', 'UnderWater Adventure', 'The Asskickers', 'Ride the Bullet', 'DEMIAN', 'DreamDesk VR Beta', 'Obsc

Training epoch 15: 100%|██████████| 112511/112511 [15:46<00:00, 118.89it/s]


Epoch 15 - Training loss: 6.748712878261933e-05


Validating epoch 15: 100%|██████████| 24110/24110 [00:31<00:00, 754.97it/s]


Epoch 15 - Validation loss: 7.406794823079943e-05, RMSE: 0.008606377635309028, Precision@5: 0.0
76561198015204133
Making predictions for random user 76561198015204133
True
Actual games: ["Tom Clancy's Ghost Recon Phantoms - NA", 'Counter-Strike: Global Offensive', 'Left 4 Dead 2', 'Heroes & Generals', 'Unturned', 'Dota 2 Test', 'Star Conflict', 'War Thunder', 'Soldier Front 2', 'Left 4 Dead 2 Beta']
Predicted games: ['CF-7-04 Color Maps - Veins and Vessels', 'Existentia', 'DreamDesk VR Beta', 'Color Guardians', 'Viking Armor / Weapons: 07 - Modeling the Helmet', 'Logic Missile', 'Enigmatis 3: The Shadow of Karkhala', 'MegaSphere', 'Mafia II - JAPAN', 'Horrid Henry']
Hunter_King
Making predictions for random user Hunter_King
True
Actual games: ['Counter-Strike: Global Offensive', 'Arma 2: Operation Arrowhead', "Garry's Mod", 'Arma 2', 'Arma 2: Operation Arrowhead Beta (Obsolete)', 'DETOUR', 'One Way Heroics', 'Lovely Planet', 'Polarity', 'Dota 2 Test']
Predicted games: ['Plush', 'Emerge

Training epoch 16: 100%|██████████| 112511/112511 [15:45<00:00, 118.97it/s]


Epoch 16 - Training loss: 6.697981898334734e-05


Validating epoch 16: 100%|██████████| 24110/24110 [00:32<00:00, 744.07it/s]


Epoch 16 - Validation loss: 6.968202279669933e-05, RMSE: 0.008347678152879203, Precision@5: 0.0
76561198059098764
Making predictions for random user 76561198059098764
True
Actual games: ['Robocraft', 'Prison Architect', "Garry's Mod", 'Age of Empires Online', 'Unturned', 'PAYDAY 2', 'Left 4 Dead 2', 'Total War: ROME II - Emperor Edition', 'Warhammer 40,000: Dawn of War – Soulstorm', 'PlanetSide 2']
Predicted games: ['Molly', 'Cubicolor', 'Rugby Challenge 2', 'Bravada', 'DreamDesk VR Beta', 'Occurrence at JCR Outpost', 'Miko Gakkou: Second Year', "Dark Parables: The Little Mermaid and the Purple Tide Collector's Edition", 'Champions Of Chaos 2', 'TransOcean 2: Rivals']
76561198089058865
Making predictions for random user 76561198089058865
True
Actual games: ['Dota 2 Test', 'Afterfall InSanity - Dirty Arena Edition']
Predicted games: ['DreamDesk VR Beta', 'Web Designer 10', 'Mystik Belle', 'The Ables: Freepoint High', 'Zen Fish SIM', 'Supreme: Pizza Empire', 'Isbarah', 'City Quest', 'Chr

Training epoch 17:  72%|███████▏  | 81029/112511 [11:22<04:24, 119.12it/s]

In [None]:
user_mapping

   val_loss /= len(val_loader)
    val_rmse = sqrt(mean_squared_error(y_true_val, y_pred_val))
    val_precision_at_k = precision_at_k(np.array(y_true_val), np.array(y_pred_val), k=5)
    
    print(f"Epoch {epoch} - Validation loss: {val_loss}, RMSE: {val_rmse}, Precision@5: {val_precision_at_k}")

    # Display top-k games for a sample user (let's say the first user in our dataset)
    #sample_user_id = list(user_mapping.keys())[0]

    random_user_id, random_top_k_games = predict_for_random_user(model, user_mapping, item_mapping, user_item_df, k=5)
    print(f"Actual top 5 games for user {random_user_id}: {get_actual_top_k_games(user_mapping[random_user_id], user_item_df, user_mapping, k=5)}")
    print(f"Predicted top 5 games for user {random_user_id}: {random_top_k_games}")