In [9]:
# Installation

!pip install --quiet datasets pandas nomic sentence-transformers einops pymongo

In [10]:
# Imports

import os
from dotenv import load_dotenv

from sentence_transformers import SentenceTransformer
import pandas as pd
import pymongo

In [11]:
# Load secrets

load_dotenv()

mongo_connection_string = os.getenv('MONGO_CONNECTION_STRING')
openai_api_key = os.getenv('OPENAI_API_KEY')

In [12]:
# Setup Embedder

# Load the embedding model (https://huggingface.co/nomic-ai/nomic-embed-text-v1")
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)

def get_embedding(text):
   """Generates vector embeddings for the given text."""
   embedding = model.encode(text)
   
   return embedding.tolist()



<All keys matched successfully>


# Setup for performing vector search

In [25]:
# Connect to Atlas cluster
mongo_client = pymongo.MongoClient(mongo_connection_string)

# Ingest data into Atlas
db = mongo_client["netflix_titles"]
collection = db["embedded_titles"]


def vector_search(query, limit=5):
    """Performs a vector search on a given query and returns the top 5 results"""
    query_embedding = get_embedding(query)

    pipeline = [
        {
            "$vectorSearch": {
                "index": "netflix_titles_description_vector_index",
                "queryVector": query_embedding,
                "path": "description_embedding",
                "numCandidates": 100,
                "limit": limit
            }
        },
        {
            "$project": {
                "_id": 0,
                "title": 1,
                "description": 1,
                "score": {
                    "$meta": "vectorSearchScore"
                }
            }
        }
    ]

    # Execute the search
    return collection.aggregate(pipeline)

# Performing a sample query

In [28]:
results = vector_search("dark comedy protagonist with heart of gold")

for result in results:
    print(result)


{'title': 'BoJack Horseman', 'description': "Meet the most beloved sitcom horse of the '90s, 20 years later. He’s a curmudgeon with a heart of...not quite gold...but something like gold. Copper?", 'score': 0.7800670266151428}
{'title': 'Gbomo Gbomo Express', 'description': 'The night spins sinisterly out of control when incompetent kidnappers hold a club-hopping record exec and a socialite for ransom in this dark comedy.', 'score': 0.772736132144928}
{'title': 'For Love or Money', 'description': 'When a goofy but likable millionaire discovers his fiancée’s plan to steal his wealth, he devises an unromantic scheme to make her life a living hell.', 'score': 0.770065188407898}
{'title': 'Man in Love', 'description': "When he meets a debt-ridden woman who's caring for her ailing father, a debt collector with a heart of gold sets out to win her love.", 'score': 0.7673096656799316}
{'title': 'The House of Flowers', 'description': "In this dark comedy, a wealthy matriarch tries to maintain he