In [30]:
import warnings
warnings.filterwarnings('ignore')

import os
from dotenv import load_dotenv, find_dotenv
from datasets import load_dataset
import pandas as pd
from typing import List, Optional
from pydantic import BaseModel
from datetime import datetime
from pymongo.mongo_client import MongoClient
import openai
import time
from IPython.display import display, HTML

In [31]:
# Set up the environment
_ = load_dotenv(find_dotenv())
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
MONGO_URI = os.environ.get("MONGO_URI")
openai.api_key = OPENAI_API_KEY

In [32]:
# Dataset Loading
dataset = load_dataset("Pablinho/movies-dataset", streaming=True, split="train")
dataset = dataset.take(200)  
dataset_df = pd.DataFrame(dataset)
dataset_df

Unnamed: 0,Release_Date,Title,Overview,Popularity,Vote_Count,Vote_Average,Original_Language,Genre,Poster_Url
0,2021-12-15,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,5083.954,8940,8.3,en,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...
1,2022-03-01,The Batman,"In his second year of fighting crime, Batman u...",3827.658,1151,8.1,en,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...
2,2022-02-25,No Exit,Stranded at a rest stop in the mountains durin...,2618.087,122,6.3,en,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...
3,2021-11-24,Encanto,"The tale of an extraordinary family, the Madri...",2402.201,5076,7.7,en,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...
4,2021-12-22,The King's Man,As a collection of history's worst tyrants and...,1895.511,1793,7.0,en,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...
...,...,...,...,...,...,...,...,...,...
195,2016-02-09,Deadpool,Deadpool tells the origin story of former Spec...,189.206,26390,7.6,en,"Action, Adventure, Comedy",https://image.tmdb.org/t/p/original/3E53WEZJqP...
196,2021-10-22,The Harder They Fall,"Gunning for revenge, outlaw Nat Love saddles u...",188.865,611,6.7,en,Western,https://image.tmdb.org/t/p/original/su9WzL7lwU...
197,2001-11-01,"Monsters, Inc.","James Sullivan and Mike Wazowski are monsters,...",188.759,15255,7.8,en,"Animation, Comedy, Family",https://image.tmdb.org/t/p/original/sgheSKxZkt...
198,2021-02-09,Ainbo: Spirit of the Amazon,An epic journey of a young hero and her Spirit...,188.484,285,7.1,en,"Adventure, Animation, Family, Fantasy",https://image.tmdb.org/t/p/original/l8HyObVj8f...


In [33]:
# Document Modelling
class Movie(BaseModel):
    Release_Date: Optional[datetime]
    Title: str
    Overview: str
    Popularity: float
    Vote_Count: int
    Vote_Average: float
    Original_Language: str
    Genre: List[str]
    Poster_Url: str
    text_embeddings: Optional[List[float]] = None

In [34]:
# Embedding Generation
def get_embedding(text):
    if not text or not isinstance(text, str):
        return None
    try:
        embedding = openai.embeddings.create(
            input=text,
            model="text-embedding-3-small", dimensions=1536).data[0].embedding
        return embedding
    except Exception as e:
        print(f"Error in get_embedding: {e}")
        return None

def process_and_embed_record(record):
    for key, value in record.items():
        if pd.isnull(value):
            record[key] = None

    if record['Genre']:
        record['Genre'] = record['Genre'].split(', ')
    else:
        record['Genre'] = []

    text_to_embed = f"{record['Title']} {record['Overview']}"
    embedding = get_embedding(text_to_embed)
    record['text_embeddings'] = embedding
    return record

print("Processing records and generating embeddings...")
records = [process_and_embed_record(record) for record in dataset_df.to_dict(orient='records')]
print("Embedding generation completed")

Processing records and generating embeddings...
Embedding generation completed


In [35]:
# Database Creation and Connection
def get_mongo_client(mongo_uri):
    client = MongoClient(mongo_uri, appname="pmr.movies")
    print("Connection to MongoDB successful")
    return client

mongo_client = get_mongo_client(MONGO_URI)
database_name = "movies_dataset"
collection_name = "movies"
db = mongo_client.get_database(database_name)
collection = db.get_collection(collection_name)

collection.delete_many({})

Connection to MongoDB successful


DeleteResult({'n': 100, 'electionId': ObjectId('7fffffff0000000000000347'), 'opTime': {'ts': Timestamp(1722530566, 135), 't': 839}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1722530566, 144), 'signature': {'hash': b'@:\x04*\xfd\xc9/\xf9\x98\xa9 \x9f\xa5\xff\x895\x0c]\x1aY', 'keyId': 7358892219303985247}}, 'operationTime': Timestamp(1722530566, 135)}, acknowledged=True)

In [36]:
# Data Ingestion
movies = [Movie(**record).model_dump() for record in records]
collection.insert_many(movies)
print("Data ingestion completed")

Data ingestion completed


In [37]:
# Vector Search Function
def vector_search(user_query, db, collection, vector_index="vector_index_text", max_retries=3):
    query_embedding = get_embedding(user_query)
    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    vector_search_stage = {
        "$vectorSearch": {
            "index": vector_index,
            "queryVector": query_embedding,
            "path": "text_embeddings",
            "numCandidates": 150,
            "limit": 20
        }
    }

    pipeline = [vector_search_stage]

    for attempt in range(max_retries):
        try:
            results = list(collection.aggregate(pipeline))
            if results:
                explain_query_execution = db.command(
                    'explain', {
                        'aggregate': collection.name,
                        'pipeline': pipeline,
                        'cursor': {}
                    },
                    verbosity='executionStats')
                vector_search_explain = explain_query_execution['stages'][0]['$vectorSearch']
                millis_elapsed = vector_search_explain['explain']['collectStats']['millisElapsed']
                print(f"Total time for the execution to complete on the database server: {millis_elapsed} milliseconds")
                return results
            else:
                print(f"No results found on attempt {attempt + 1}. Retrying...")
                time.sleep(2)
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {str(e)}")
            time.sleep(2)
    
    return "Failed to retrieve results after multiple attempts."

In [38]:
# Handling User Query
class SearchResultItem(BaseModel):
    Title: str
    Overview: str
    Genre: List[str]
    Vote_Average: float
    Popularity: float

def handle_user_query(query, db, collection):
    
    time.sleep(2)
    get_knowledge = vector_search(query, db, collection)

    if isinstance(get_knowledge, str):  # Error message
        return get_knowledge, "No source information available."
        
    search_results_models = [SearchResultItem(**result) for result in get_knowledge]
    search_results_df = pd.DataFrame([item.dict() for item in search_results_models])

    completion = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a movie recommendation system."},
            {"role": "user", "content": f"Answer this user query: {query} with the following context:\n{search_results_df}"}
        ]
    )

    system_response = completion.choices[0].message.content

    print(f"- User Question:\n{query}\n")
    print(f"- System Response:\n{system_response}\n")

    display(HTML(search_results_df.to_html()))

    return system_response

In [39]:
# Query Example
query = """
What is a good animated film that adults would enjoy too? What makes your suggestion a good fit?
"""
response = handle_user_query(query, db, collection)

Total time for the execution to complete on the database server: 0.073451 milliseconds
- User Question:

What is a good animated film that adults would enjoy too? What makes your suggestion a good fit?


- System Response:
I recommend the movie "Raya and the Last Dragon" for adults who enjoy animated films. This film combines elements of fantasy, action, and adventure, making it entertaining for a wider audience. With a high vote average of 8.0 and a solid popularity score, "Raya and the Last Dragon" has been well-received by viewers. The movie's captivating storytelling, rich animation, and themes of unity and courage make it a good fit for adults looking for an engaging and visually stunning animated film.



Unnamed: 0,Title,Overview,Genre,Vote_Average,Popularity
0,The Croods: A New Age,"Searching for a safer habitat, the prehistoric Crood family discovers an idyllic, walled-in paradise that meets all of its needs. Unfortunately, they must also learn to live with the Bettermans -- a family that's a couple of steps above the Croods on the evolutionary ladder. As tensions between the new neighbors start to rise, a new threat soon propels both clans on an epic adventure that forces them to embrace their differences, draw strength from one another, and survive together.","[Animation, Family, Adventure, Fantasy, Comedy]",7.6,284.798
1,"Monsters, Inc.","James Sullivan and Mike Wazowski are monsters, they earn their living scaring children and are the best in the business... even though they're more afraid of the children than they are of them. When a child accidentally enters their world, James and Mike suddenly find that kids are not to be afraid of and they uncover a conspiracy that could threaten all children across the world.","[Animation, Comedy, Family]",7.8,188.759
2,The House,"Across different eras, a poor family, an anxious developer and a fed-up landlady become tied to the same mysterious house in this animated dark comedy.","[Animation, Drama, Comedy, Horror]",7.1,551.65
3,Erax,Monstrous creatures leap from a magical storybook and unleash mayhem and mischief for Auntie Opal and her tween niece Nina in this spooky short film.,"[Mystery, Family]",5.7,317.102
4,Riverdance: The Animated Adventure,A young Irish boy named Keegan and Spanish girl named Moya journey into a magical world of the Megaloceros Giganteus who teach them to appreciate Riverdance as a celebration of life. Based on the stage show phenomenon of the same name and featuring Bill Whelan’s multi-platinum Grammy Award-winning music.,"[Animation, Fantasy, Music, Adventure, Comedy, Family]",5.2,250.831
5,Encanto,"The tale of an extraordinary family, the Madrigals, who live hidden in the mountains of Colombia, in a magical house, in a vibrant town, in a wondrous, charmed place called an Encanto. The magic of the Encanto has blessed every child in the family with a unique gift from super strength to the power to heal—every child except one, Mirabel. But when she discovers that the magic surrounding the Encanto is in danger, Mirabel decides that she, the only ordinary Madrigal, might just be her exceptional family's last hope.","[Animation, Comedy, Family, Fantasy]",7.7,2402.201
6,Ainbo: Spirit of the Amazon,"An epic journey of a young hero and her Spirit Guides, 'Dillo' a cute and humorous armadillo and ""Vaca"" a goofy oversized tapir, who embark on a quest to save their home in the spectacular Amazon Rainforest.","[Adventure, Animation, Family, Fantasy]",7.1,188.484
7,Space Jam: A New Legacy,"When LeBron and his young son Dom are trapped in a digital space by a rogue A.I., LeBron must get them home safe by leading Bugs, Lola Bunny and the whole gang of notoriously undisciplined Looney Tunes to victory over the A.I.'s digitized champions on the court. It's Tunes versus Goons in the highest-stakes challenge of his life.","[Family, Animation, Comedy, Science Fiction]",7.1,271.573
8,Black Water: Abyss,"An adventure-loving couple convince their friends to explore a remote, uncharted cave system in the forests of Northern Australia. With a tropical storm approaching, they abseil into the mouth of the cave, but when the caves start to flood, tensions rise as oxygen levels fall and the friends find themselves trapped. Unknown to them, the storm has also brought in a pack of dangerous and hungry crocodiles.","[Horror, Thriller, Adventure, Mystery]",5.1,191.722
9,The Addams Family 2,The Addams get tangled up in more wacky adventures and find themselves involved in hilarious run-ins with all sorts of unsuspecting characters.,"[Animation, Adventure, Comedy, Family]",7.2,244.18
