In [1]:
import torch
import torch.nn as nn
import zstandard
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import umap
from pathlib import Path
from train_vector import SimpleMF

# Config
MODEL_PATH = Path("./osu_mf_model.pth")
MAP_MAPPINGS_PATH = Path("./training_data/mappings_maps.json.zst")
EMBEDDING_DIM = 32
DEVICE = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"


# Load Mappings
print("Loading Mappings...")
with open(MAP_MAPPINGS_PATH, "rb") as f:
    dctx = zstandard.ZstdDecompressor()
    map_data = json.loads(dctx.decompress(f.read()).decode("utf-8"))

# Create lookup: Index -> Name
idx_to_map = {
    v['idx']: f"{v['artist']} - {v['title']} [{v['version']}]" 
    for k, v in map_data.items()
}
num_items = len(idx_to_map)
# Dummy num_users since we only care about map weights here
num_users = 9999

# Load Model
print(f"Loading Model from {MODEL_PATH}...")
model = SimpleMF(num_users, num_items, EMBEDDING_DIM).to(DEVICE)
state_dict = torch.load(MODEL_PATH, map_location=DEVICE)

# Handle size mismatch if training data grew/shrank (optional safety)
current_items = model.item_embedding.weight.shape[0]
loaded_items = state_dict['item_embedding.weight'].shape[0]
if current_items != loaded_items:
    model = SimpleMF(num_users, loaded_items, EMBEDDING_DIM).to(DEVICE)

model.load_state_dict(state_dict)
model.eval()
print("Model Loaded.")

  from .autonotebook import tqdm as notebook_tqdm


Device: mps
Loading Mappings...
Loading Model from osu_mf_model.pth...
Model Loaded.


In [2]:
from torchdr import UMAP

# 1. Get Weights (Keep on Device)
# Normalize first because TorchDR defaults to Euclidean, 
# and Euclidean on normalized vectors is equivalent to Cosine distance.
item_weights = torch.nn.functional.normalize(model.item_embedding.weight, p=2, dim=1)

# 2. Fit UMAP on GPU/MPS
print(f"Running TorchDR UMAP on {DEVICE}...")
reducer = UMAP(n_components=2, n_neighbors=15, min_dist=0.1)
embedding_2d_tensor = reducer.fit_transform(item_weights)

# 3. Move to CPU only for plotting
embedding_2d = embedding_2d_tensor.detach().cpu().numpy()

# 4. Plot
plt.figure(figsize=(12, 8), dpi=100)
plt.scatter(embedding_2d[:, 0], embedding_2d[:, 1], s=1, alpha=0.3, c='cyan')
plt.title(f"Map Style Clusters (TorchDR on {DEVICE})")
plt.axis('off')
plt.show()

  \text{artanh}(x) = 0.5 \cdot (\log(1 + x) - \log(1 - x))


Running TorchDR UMAP on mps...


NotImplementedError: The operator 'aten::unique_dim' is not currently implemented for the MPS device. If you want this op to be considered for addition please comment on https://github.com/pytorch/pytorch/issues/141287 and mention use-case, that resulted in missing op as well as commit hash e2d141dbde55c2a4370fac5165b0561b6af4798b. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.

In [None]:
# Extract Biases
# High Bias = Easier than average (Farm)
# Low Bias = Harder than average (Underrated/Tech)
biases = model.item_bias.weight.detach().cpu().flatten()
values, indices = torch.topk(biases, k=len(biases))

print("--- Top 10 'Farm' Maps (Highest Bias) ---")
for i in range(10):
    idx = indices[i].item()
    print(f"{values[i]:.4f} | {idx_to_map.get(idx, 'Unknown')}")

print("\n--- Top 10 'Hard/Underrated' Maps (Lowest Bias) ---")
for i in range(1, 11):
    idx = indices[-i].item()
    val = values[-i].item()
    print(f"{val:.4f} | {idx_to_map.get(idx, 'Unknown')}")

In [None]:
def find_similar_maps(query_map_idx, top_k=5):
    # 1. Get Query Vector
    query_vec = model.item_embedding.weight[query_map_idx].unsqueeze(0)
    
    # 2. Cosine Similarity against ALL maps
    # Sim = (A . B) / (|A| * |B|)
    all_vecs = model.item_embedding.weight
    
    # Normalize vectors to use simple dot product
    query_norm = torch.nn.functional.normalize(query_vec, p=2, dim=1)
    all_norm = torch.nn.functional.normalize(all_vecs, p=2, dim=1)
    
    cosine_sim = torch.mm(query_norm, all_norm.T).squeeze()
    
    # 3. Sort
    scores, sorted_indices = torch.topk(cosine_sim, k=top_k+1)
    
    print(f"Maps similar to: {idx_to_map.get(query_map_idx, query_map_idx)}")
    for i in range(1, top_k+1): # Skip index 0 (itself)
        idx = sorted_indices[i].item()
        print(f"{scores[i]:.4f} | {idx_to_map.get(idx, 'Unknown')}")

# Example Usage: Replace 100 with a valid map index from your data
find_similar_maps(100)