In [9]:
import pandas as pd
import joblib
from pathlib import Path
from manga_recs.data_engineering.load.s3 import s3_load

# Paths
MODEL_PATH = s3_load("cosine_sim.pkl", bucket="manga-recs", status="models")
METADATA_PATH = s3_load("cleaned_manga_metadata.parquet", bucket="manga-recs", status="cleaned")

# Load similarity matrix + metadata
SIM_MATRIX = joblib.load(MODEL_PATH)
METADATA = pd.read_parquet(METADATA_PATH)

def get_top_n_recommendations(manga_id, top_n=5):

    # Get row index for the given manga_id
    similarities = SIM_MATRIX.loc[manga_id]  # Ensure manga_id is valid

    # Get top N most similar
    top_similarities = similarities.sort_values(ascending=False).head(top_n)

    # Get the corresponding metadata for these manga IDs
    recs = METADATA[METADATA['id'].isin(top_similarities.index)][['id', 'title', 'description', 'tags']]

    # Merge similarity scores into metadata
    recs = recs.set_index('id')  # set 'id' as index to match top_similarities
    recs = recs.join(top_similarities.rename("similarity"))  # add similarity column
    recs = recs.reset_index()  # optional: reset index for clean DataFrame
    recs['similarity'] = recs['similarity'].round(2)  # round similarity for better readability

    return recs

# test
if __name__ == "__main__":
    recommendations = get_top_n_recommendations(manga_id=30002, top_n=5)
    print(recommendations)


Downloaded cosine_sim.pkl from s3://manga-recs/models/2026-02-18/cosine_sim.pkl
Downloaded cleaned_manga_metadata.parquet from s3://manga-recs/cleaned/2026-02-15/cleaned_manga_metadata.parquet
      id            title                                        description  \
0  30013        one piece  As a child, Monkey D. Luffy was inspired to be...   
1  30642     vinland saga  As a child, Thorfinn sat at the feet of the gr...   
2  30656         vagabond  At seventeen years of age, Miyamoto Musashi--s...   
3  46765          kingdom  Xin is a war orphan in a poor village in the k...   
4  53390  attack on titan  In this post-apocalyptic sci-fi story, humanit...   

                                                tags  similarity  
0  [pirates, travel, found family, shounen, ensem...        0.55  
1  [rehabilitation, historical, coming of age, vi...        0.62  
2  [samurai, swordplay, seinen, historical, philo...        0.59  
3  [military, historical, war, ancient china, pol...      