# 0. Username mapping

Requirements

In [50]:
%pip install -r ../requirements.txt -q

Note: you may need to restart the kernel to use updated packages.


Imports

In [51]:
import os
import time

import pandas as pd
import numpy as np

import json
from tqdm import tqdm
import logging

import random

Logging configuration

In [52]:
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger()

Constants

In [53]:
# Repositories
DATA_DIR = os.path.join("..", "data")
TEMP_DIR = os.path.join(DATA_DIR, "temp")
RAW_DIR = os.path.join(DATA_DIR, "raw")
MONGO_DIR = os.path.join(DATA_DIR, "clean", "mongo")
NEO4J_DIR = os.path.join(DATA_DIR, "clean", "neo4j")

In [54]:
BACKUP_PERIOD = 3000 # Frequency of data backup

In [55]:
# Files
USERNAMES_FILE = os.path.join(TEMP_DIR, "usernames.json")

USERS_FILE = os.path.join(RAW_DIR, "users.json")
BOARD_GAME_FILE = os.path.join(RAW_DIR, "boardgames&reviews.json")

MAP_FILE = os.path.join(TEMP_DIR, "user_map.json")

Utility functions

In [56]:
def save_to_json(data, filename):
    """
    Save data to a JSON file.

    Parameters:
    data (any): The data to be saved to the JSON file. This can be any data type that is serializable to JSON.
    filename (str): The name of the file where the data will be saved.

    Returns:
    None
    """
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

## Functions

In [57]:
def create_username_mapping(downloaded_usernames, nondownloaded_usernames):
    """
    Crea una mappatura univoca da usernames non scaricati a usernames scaricati.
    """
    # Assegna casualmente username scaricati agli username non scaricati
    mapping = {}
    downloaded_usernames_list = list(downloaded_usernames)
    for username in nondownloaded_usernames:
        mapping[username] = random.choice(downloaded_usernames_list)
    for username in downloaded_usernames:
        mapping[username] = username
    return mapping

In [58]:
def replace_username(username, mapping):
    """
    Sostituisce un singolo username con quello corrispondente nella mappatura.
    Se l'username non è presente nella mappatura, ritorna l'originale.
    """
    return mapping.get(username, username)

## Execution

In [59]:
with open(USERNAMES_FILE, "r", encoding="utf-8") as f:
    all_users = set(json.load(f))
logger.info(f"Utenti in lista: {len(all_users)}")

with open(USERS_FILE, "r", encoding="utf-8") as f:
    downloaded_users = set([user["name"] for user in json.load(f)])
logger.info(f"Utenti scaricati: {len(downloaded_users)}")

with open(BOARD_GAME_FILE, "r", encoding="utf-8") as f:
    boardgames = json.load(f)
username_reviews = set([review["user"] for game in boardgames for review in game["reviews"]])
logger.info(f"Utenti con recensioni: {len(username_reviews)}")

all_users = all_users.union(username_reviews)
logger.info(f"Utenti totali: {len(all_users)}")

2025-01-20 01:11:30,130 - INFO - Utenti in lista: 33394
2025-01-20 01:11:36,400 - INFO - Utenti scaricati: 5000
2025-01-20 01:11:42,474 - INFO - Utenti con recensioni: 102985
2025-01-20 01:11:42,499 - INFO - Utenti totali: 121273


In [60]:
username_mapping = create_username_mapping(downloaded_users, all_users - downloaded_users)
logger.info(f"Mappatura creata: {list(username_mapping.items())[:5]}")

2025-01-20 01:12:05,184 - INFO - Mappatura creata: [('Betbet', 'Xangxa'), ('derpderps', 'Elkar'), ('Gulgothica', 'Kortnek'), ('nibbons', 'superman829'), ('january31', 'thecrisper')]


In [61]:
save_to_json(username_mapping, MAP_FILE)
logger.info(f"Mappatura salvata in {MAP_FILE}")

2025-01-20 01:12:08,057 - INFO - Mappatura salvata in ..\data\temp\user_map.json
