## Imports & Paths

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
FEAT_CSV = Path('../models/anime_features.csv')

In [3]:
assert FEAT_CSV.exists(), f"{FEAT_CSV} not found — run feature‑builder first!"
print("Feature file found:", FEAT_CSV)

Feature file found: ..\models\anime_features.csv


## Load Feature Table

In [4]:
feat_df = pd.read_csv(FEAT_CSV)

In [5]:
feature_cols = feat_df.columns.difference(['anime_id','title'])
feat_matrix = feat_df[feature_cols].values

In [6]:
print("Loaded feature matrix:", feat_matrix.shape)
feat_df.head(3)

Loaded feature matrix: (13231, 93)


Unnamed: 0,anime_id,title,genre_Action,genre_Adult Cast,genre_Adventure,genre_Anthropomorphic,genre_Avant Garde,genre_Award Winning,genre_Boys Love,genre_Cgdct,...,hist_score-10,hist_score-9,hist_score-8,hist_score-7,hist_score-6,hist_score-5,hist_score-4,hist_score-3,hist_score-2,hist_score-1
0,1,Frieren: Beyond Journey's End,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,2,Fullmetal Alchemist: Brotherhood,1,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,3,Steins;Gate,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [7]:
title_to_idx = {
    t.lower(): i
    for i, t in enumerate(feat_df["title"])
    if isinstance(t, str) and t.strip()          # skip blanks / NaNs
}


In [8]:
def titles_to_indices(titles):
    idxs = [title_to_idx.get(t.strip().lower()) for t in titles]
    return [i for i in idxs if i is not None]

## Core recommender

In [9]:
def recommend_from_titles(
    liked_titles: list[str],
    k: int = 10,
    genre_filter: list[str] | None = None
):
    idxs = titles_to_indices(liked_titles)
    if not idxs:
        raise ValueError("None of the provided titles found in the catalogue.")
    
    # Build user profile vector (sum because rows are L2‑normalised)
    user_vec = feat_matrix[idxs].sum(axis=0, keepdims=True)

    # Cosine similarity = dot product
    scores = (user_vec @ feat_matrix.T).ravel()
    scores[idxs] = -1.0    # exclude watched
    
    # Optional genre gating
    if genre_filter:
        gcols = [f"genre_{g.strip().title()}" for g in genre_filter]
        gcols = [c for c in gcols if c in feature_cols]
        if gcols:
            mask = feat_df[gcols].sum(axis=1) > 0
            scores = np.where(mask, scores, -1.0)
    
    best = scores.argsort()[-k:][::-1]
    return (
        feat_df.loc[best, ["title", "anime_id"]]
        .assign(similarity=scores[best].round(4))
        .reset_index(drop=True)
    )

In [10]:
liked = ["One Piece"]

print("Top 5 overall recommendations:")
display(recommend_from_titles(liked, k=5))

Top 5 overall recommendations:


Unnamed: 0,title,anime_id,similarity
0,Hunter x Hunter,8,5
1,Fullmetal Alchemist: Brotherhood,2,5
2,One Piece Fan Letter,5,5
3,Dragon Ball Z: Fusion Reborn,1525,4
4,Naruto Shippuden the Movie 5: Blood Prison,2093,4


In [11]:
print("Top 5 *Action* recommendations:")
display(recommend_from_titles(liked, k=5, genre_filter=["Action"]))

Top 5 *Action* recommendations:


Unnamed: 0,title,anime_id,similarity
0,One Piece Fan Letter,5,5.0
1,Hunter x Hunter,8,5.0
2,Fullmetal Alchemist: Brotherhood,2,5.0
3,One Piece: Open Upon the Great Sea! A Father's...,3385,4.0
4,Naruto Shippuden the Movie 4: The Lost Tower,2223,4.0
