In [1]:
import pandas as pd
import numpy as np

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
df = pd.read_csv("../data/sample_titles.csv")
df.head()


Unnamed: 0,title,genres,popularity,overview
0,The Matrix,"['Action','Sci-Fi']",9.8,A computer hacker learns about the true nature...
1,Inception,"['Action','Sci-Fi','Thriller']",9.7,A skilled thief leads a team into people's dre...
2,Blade Runner,"['Sci-Fi','Drama']",8.9,A blade runner must pursue and terminate repli...
3,Arrival,"['Sci-Fi','Drama']",8.6,A linguist works with the military to communic...
4,Her,"['Romance','Sci-Fi','Drama']",8.4,A lonely writer develops an unlikely relations...


In [5]:
model = SentenceTransformer("all-MiniLM-L6-v2")


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [6]:
embeddings = model.encode(
    df["overview"].fillna("").tolist(),
    show_progress_bar=True
)


Batches: 100%|██████████| 1/1 [00:00<00:00, 15.72it/s]


In [7]:
df["embedding"] = list(embeddings)


In [10]:
def recommend_similar_titles(df, title, top_n=5):
    idx = df.index[df["title"] == title][0]

    query_embedding = df.loc[idx, "embedding"].reshape(1, -1)
    all_embeddings = np.vstack(df["embedding"].values)

    similarities = cosine_similarity(query_embedding, all_embeddings)[0]

    df["similarity"] = similarities

    return (
        df.sort_values("similarity", ascending=False)
          .iloc[1 : top_n + 1][["title", "similarity", "genres"]]
    )


In [11]:
recommend_similar_titles(df, "The Matrix")


Unnamed: 0,title,similarity,genres
4,Her,0.375224,"['Romance','Sci-Fi','Drama']"
1,Inception,0.355356,"['Action','Sci-Fi','Thriller']"
2,Blade Runner,0.315094,"['Sci-Fi','Drama']"
3,Arrival,0.206739,"['Sci-Fi','Drama']"
5,The Godfather,0.181039,"['Crime','Drama']"
