In [1]:
import chromadb
from chromadb.utils import embedding_functions

In [None]:
# Sample data
movies = [
  {"id": "m1",  "title": "Inception",            "genre": "sci-fi",   "description": "A thief enters dreams to plant ideas in a corporate target's mind."},
  {"id": "m2",  "title": "Interstellar",          "genre": "sci-fi",   "description": "Astronauts travel through a wormhole near Saturn to find a new home for humanity."},
  {"id": "m3",  "title": "The Matrix",            "genre": "sci-fi",   "description": "A hacker discovers reality is a simulation and joins a rebellion against machines."},
  {"id": "m4",  "title": "The Dark Knight",       "genre": "action",   "description": "Batman faces the Joker, a criminal mastermind who wants to plunge Gotham into anarchy."},
  {"id": "m5",  "title": "John Wick",             "genre": "action",   "description": "A retired hitman seeks vengeance after criminals kill his dog and steal his car."},
  {"id": "m6",  "title": "Mad Max: Fury Road",    "genre": "action",   "description": "In a post-apocalyptic wasteland, a woman rebels against a tyrannical ruler."},
  {"id": "m7",  "title": "The Notebook",          "genre": "romance",  "description": "A poor young man and a rich young woman fall in love during the 1940s."},
  {"id": "m8",  "title": "Pride and Prejudice",   "genre": "romance",  "description": "Elizabeth Bennet navigates issues of manners, marriage, and love in Georgian England."},
  {"id": "m9",  "title": "Eternal Sunshine",      "genre": "romance",  "description": "A couple undergoes a procedure to erase memories of each other after a painful breakup."},
  {"id": "m10", "title": "Parasite",              "genre": "thriller", "description": "A poor family schemes their way into working for a wealthy household with dark consequences."},
  {"id": "m11", "title": "Gone Girl",             "genre": "thriller", "description": "A man becomes the prime suspect when his wife mysteriously disappears on their anniversary."},
  {"id": "m12", "title": "Get Out",               "genre": "thriller", "description": "A Black man uncovers disturbing secrets when visiting his white girlfriend's family estate."},
]

In [None]:
# Setup client
client = chromadb.PersistentClient(path="./movies_db")

ef = embedding_functions.SentenceTransformerEmbeddingFunction(
  model_name="all-MiniLM-L6-v2"
)

collection = client.get_or_create_collection(
  name="movies",
  embedding_functions=ef,
  metadata={"hnsw:space": "cosine"}
)

# Ingesting data
if collection.count() == 0:
  collection.add(
    ids=[m['id'] for m in movies],
    documents=[m['description'] for m in movies],
    metadata=[{"title": m["title"], "genre": m["genre"]} for m in movies]
  )
  print(f"Integrated {len(movies)} movies.")

In [None]:
# Recommendation Function
def recommend_by_title(liked_title: str, n: int=3, genre_filter: str=None) -> list[dict]:
  """Find movies that are similar to the ones that user liked."""
  # Find the corresponding movie
  source = next((m for m in movies if m['title'] == liked_title), None)
  if not source:
    raise ValueError(f"Movie '{liked_title}' not found.")
  
  # Additional filter on genre
  where = {"genre": genre_filter} if genre_filter else None

  # Query the database
  result = collection.query(
    query_text=[source['description']],
    n_result=n+1,
    where=where
  )

  # Clean results
  recommendations = []
  for title, genre, doc, distance in zip(
    [m['title'] for m in results['metadata'][0]],
    [m['genre'] for m in results['metadata'][0]],
    results['documents'][0],
    results['distances'][0]
  ):
    if title == liked_title:
      continue
    
    recommendations.append({
      "title": title,
      "genre": genre,
      "similarity": round(1 - distance, 3),
      "description": doc
    })
  
  return recommendations[:n]

In [None]:
def recommend_by_query(query: str, n: int=3) -> list[dict]:
  """Recomends movies from user preference description"""
  results = collection.query(
    query_text=[query],
    n_result=n
  )

  return [
    {
      "title": meta['title'],
      "genre": meta['genre'],
      "similarity": round(1 - dist, 3),
      "description": doc
    } for meta, doc, dist in zip(
      results['metadatas'][0],
      results['documents'][0],
      results['distances'][0]
    )
  ]