In [1]:
import pandas as pd
import faiss
import pickle
from sentence_transformers import SentenceTransformer
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
BASE_DIR = os.path.abspath('..')  # Adjust if running from different location
CSV_PATH = os.path.join(BASE_DIR, 'data', 'processed_data', 'anime', 'anime.csv')
ARTIFACT_DIR = os.path.join(BASE_DIR, 'artifacts', 'anime')
os.makedirs(ARTIFACT_DIR, exist_ok=True)

In [3]:
df = pd.read_csv(CSV_PATH)

In [8]:
df['tags'] = df['title'].fillna('') + ' ' + df['genres'].fillna('') + ' ' + df['synopsis'].fillna('')

In [9]:
# Generate embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['tags'].tolist(), show_progress_bar=True, convert_to_numpy=True)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Batches: 100%|███████████████████████████████████████████████████████████████████████| 794/794 [10:05<00:00,  1.31it/s]


In [10]:
pip install hf_xet

Collecting hf_xet
  Downloading hf_xet-1.1.3-cp37-abi3-win_amd64.whl.metadata (883 bytes)
Downloading hf_xet-1.1.3-cp37-abi3-win_amd64.whl (2.3 MB)
   ---------------------------------------- 0.0/2.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.3 MB ? eta -:--:--
   --------- ------------------------------ 0.5/2.3 MB 4.2 MB/s eta 0:00:01
   --------- ------------------------------ 0.5/2.3 MB 4.2 MB/s eta 0:00:01
   ------------------ --------------------- 1.0/2.3 MB 2.1 MB/s eta 0:00:01
   --------------------------- ------------ 1.6/2.3 MB 2.0 MB/s eta 0:00:01
   ---------------------------------------- 2.3/2.3 MB 2.2 MB/s eta 0:00:00
Installing collected packages: hf_xet
Successfully installed hf_xet-1.1.3
Note: you may need to restart the kernel to use updated packages.


The value specified in an AutoRun registry key could not be parsed.

[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
# Build FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

In [12]:
# Save files
pickle.dump(df, open(os.path.join(ARTIFACT_DIR, 'data.pkl'), 'wb'))
pickle.dump(model, open(os.path.join(ARTIFACT_DIR, 'sbert_model.pkl'), 'wb'))
faiss.write_index(index, os.path.join(ARTIFACT_DIR, 'faiss_index.index'))

In [4]:
from rapidfuzz import process
import numpy as np
import faiss
import pickle
import os

def recommend(title_query, top_k=10):
    ARTIFACT_DIR = os.path.join(BASE_DIR, 'artifacts', 'anime')

    df = pickle.load(open(os.path.join(ARTIFACT_DIR, 'data.pkl'), 'rb'))
    model = pickle.load(open(os.path.join(ARTIFACT_DIR, 'sbert_model.pkl'), 'rb'))
    index = faiss.read_index(os.path.join(ARTIFACT_DIR, 'faiss_index.index'))

    # Step 1: Exact match
    matches = df[df['title'].str.lower() == title_query.lower()]
    if not matches.empty:
        idx = matches.index[0]
        query = df.loc[idx, 'tags']
        query_vec = model.encode([query], convert_to_numpy=True)
        _, I = index.search(query_vec, top_k + 1)
        return df.iloc[I[0][1:]][['title', 'genres', 'synopsis']]

    # Step 2: Fallback to closest title using RapidFuzz
    all_titles = df['title'].fillna('').tolist()
    best_match, score, idx = process.extractOne(title_query, all_titles)
    print(f"⚠️ Title not found. Using closest match: '{best_match}' (Score: {score:.2f})")

    query = df.loc[idx, 'tags']
    query_vec = model.encode([query], convert_to_numpy=True)
    _, I = index.search(query_vec, top_k + 1)
    return df.iloc[I[0][1:]][['title', 'genres', 'synopsis']]

In [12]:
recommend("to love ru")

⚠️ Title not found. Using closest match: 'to love-ru' (Score: 90.00)


Unnamed: 0,title,genres,synopsis
5647,motto to love-ru,"Comedy, Sci-Fi, Ecchi",Rito Yuuki never gets a break—he's always find...
9881,to love-ru darkness 2nd,"Comedy, Romance, Sci-Fi, Ecchi","The dispassionate, transforming assassin Golde..."
7000,to love-ru darkness,"Comedy, Romance, Sci-Fi, Ecchi",As close encounters of the twisted kind betwee...
4256,to love-ru ova,"Comedy, Sci-Fi, Ecchi",Episode 01: Rito becomes a Woman\nLala invents...
4494,sasameki koto,"Comedy, Girls Love, Romance",Murasame Sumika is popular in the high school ...
2692,utsukushiki sei no dendoushi reirei,"Girls Love, Supernatural, Hentai",Sweat-soaked bodies writhe in passion as a nur...
3917,maria†holic,"Comedy, Girls Love","In search of true love, Kanako Miyamae transfe..."
1756,urusei yatsura movie 6: itsudatte my darling,"Action, Adventure, Comedy, Drama, Romance, Sci-Fi","Lupica, another one of the legion of space pri..."
11344,love live! sunshine!!,Slice of Life,"Chika Takami, a self-proclaimed normal girl, h..."
599,iketeru futari,"Comedy, Romance, Ecchi",16-year old Keisuke Saji loves to fantasize ab...
