In [64]:
!pip install -U -q openai tenacity

In [65]:
import os
import json
import ast
import pandas as pd
import warnings
import openai  # Import the OpenAI module
from tenacity import retry, wait_random_exponential, stop_after_attempt
warnings.filterwarnings('ignore')

In [66]:
# Safely read the API key from a file
try:
    with open("/content/OPENAI_API_Key.txt", "r") as file:
        os.environ["OPENAI_API_KEY"] = file.read().strip()
    print("API key loaded successfully.")
except FileNotFoundError:
    print("Error: The API key file was not found.")
    raise  # Re-raise the exception if the file is critical
except Exception as e:
    print(f"An error occurred: {e}")
    raise  # Re-raise any other unexpected exceptions

API key loaded successfully.


In [67]:
df = pd.read_csv('/content/IMDB-Movie-Data.csv')
df

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,Secret in Their Eyes,"Crime,Drama,Mystery","A tight-knit team of rising investigators, alo...",Billy Ray,"Chiwetel Ejiofor, Nicole Kidman, Julia Roberts...",2015,111,6.2,27585,,45.0
996,997,Hostel: Part II,Horror,Three American college students studying abroa...,Eli Roth,"Lauren German, Heather Matarazzo, Bijou Philli...",2007,94,5.5,73152,17.54,46.0
997,998,Step Up 2: The Streets,"Drama,Music,Romance",Romantic sparks occur between two dance studen...,Jon M. Chu,"Robert Hoffman, Briana Evigan, Cassie Ventura,...",2008,98,6.2,70699,58.01,50.0
998,999,Search Party,"Adventure,Comedy",A pair of friends embark on a mission to reuni...,Scot Armstrong,"Adam Pally, T.J. Miller, Thomas Middleditch,Sh...",2014,93,5.6,4881,,22.0


In [68]:
def initialize_movie_conversation():
    '''
    Returns a list [{"role": "system", "content": system_message}]
    '''

    delimiter = "####"
    example_movie_info = {
        'Rank': 1,
        'Title': 'Guardians of the Galaxy',
        'Genre': 'Action, Adventure, Sci-Fi',
        'Description': 'A group of intergalactic criminals are forced to work together to stop a fanatical warrior from taking control of the universe.',
        'Director': 'James Gunn',
        'Actors': 'Chris Pratt, Vin Diesel, Bradley Cooper, Zoe Saldana',
        'Year': 2014,
        'Runtime (Minutes)': 121,
        'Rating': 8.1,
        'Votes': 757074,
        'Revenue (Millions)': 333.13,
        'Metascore': 76.0
    }

    system_message = f"""
    You are a smart movie recommendation assistant. Your goal is to help the user find the perfect movie based on their preferences.
    You will ask relevant questions about the following attributes and use their responses to recommend the most suitable movies:

    {delimiter}Movie Attributes to consider:
    - 'Genre': Example genres include Action, Adventure, Sci-Fi, Drama, etc.
    - 'Director': Ask the user if they prefer movies from specific directors.
    - 'Actors': Ask about favorite actors they want to see in the movie.
    - 'Year': Ask for a range of years they are interested in.
    - 'Runtime': Ask if they have a preference for movie length (in minutes).
    - 'Rating': Ask if they are looking for high-rated movies (above 7.0 or 8.0).
    - 'Revenue' and 'Metascore' can be additional optional factors for recommendation.

    {delimiter}Sample Conversation:
    User: "I'm in the mood for a Sci-Fi movie."
    Assistant: "Great! Do you have a preference for the year of release?"
    User: "I'd prefer something from the last decade."
    Assistant: "Got it. Do you have any favorite actors you'd like to see?"
    User: "I love Chris Pratt and Zoe Saldana!"
    Assistant: "Perfect! Based on your preferences, I recommend 'Guardians of the Galaxy' directed by James Gunn. It's an exciting Sci-Fi adventure with Chris Pratt and Zoe Saldana, released in 2014."

    Ask questions in a conversational manner to ensure you understand the user's preferences and recommend the most suitable movies based on their responses.
    """
    conversation = [{"role": "system", "content": system_message}]
    return conversation


In [69]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_chat_model_completions(messages):
    MODEL = 'gpt-3.5-turbo'
    response  = openai.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0,  # Set temperature for deterministic responses
        max_tokens=2500
    )
    output = response.choices[0].message.content
    return output


In [70]:
def debug_movie_recommendation():
    # Step 1: Initialize the movie recommendation conversation
    conversation = initialize_movie_conversation()

    # Step 2: Start interaction with the user and assistant
    assistant_reply = get_chat_model_completions(conversation)
    print("Assistant:", assistant_reply)

    # Simulate user input (step-by-step)
    user_input_1 = {"role": "user", "content": "I'm in the mood for a Sci-Fi movie."}
    conversation.append(user_input_1)
    assistant_reply = get_chat_model_completions(conversation)
    print("Assistant:", assistant_reply)

    user_input_2 = {"role": "user", "content": "I'd prefer something from the last decade."}
    conversation.append(user_input_2)
    assistant_reply = get_chat_model_completions(conversation)
    print("Assistant:", assistant_reply)

    user_input_3 = {"role": "user", "content": "I love Chris Pratt and Zoe Saldana!"}
    conversation.append(user_input_3)
    assistant_reply = get_chat_model_completions(conversation)
    print("Assistant:", assistant_reply)

    return assistant_reply


In [71]:
debug_movie_recommendation()

Assistant: Hello! I'm here to help you find the perfect movie. 
What genre are you in the mood for today?
Assistant: Great choice! Do you have a preference for the year of release?
Assistant: Great choice! Do you have any favorite actors you'd like to see in the movie?
Assistant: Great choices! Based on your preferences for a Sci-Fi movie from the last decade starring Chris Pratt and Zoe Saldana, I recommend "Guardians of the Galaxy" directed by James Gunn. It's an exciting Sci-Fi adventure released in 2014 that features both Chris Pratt and Zoe Saldana in leading roles. I think you'll enjoy this action-packed space adventure!


'Great choices! Based on your preferences for a Sci-Fi movie from the last decade starring Chris Pratt and Zoe Saldana, I recommend "Guardians of the Galaxy" directed by James Gunn. It\'s an exciting Sci-Fi adventure released in 2014 that features both Chris Pratt and Zoe Saldana in leading roles. I think you\'ll enjoy this action-packed space adventure!'

In [72]:
# Define a function called moderation_check that takes user_input as a parameter.

def moderation_check(user_input):
    # Call the OpenAI API to perform moderation on the user's input.
    response = openai.moderations.create(input=user_input)

    # Extract the moderation result from the API response.
    moderation_output = response.results[0].flagged
    # Check if the input was flagged by the moderation system.
    if response.results[0].flagged == True:
        # If flagged, return "Flagged"
        return "Flagged"
    else:
        # If not flagged, return "Not Flagged"
        return "Not Flagged"

In [73]:
def dictionary_present(response):
    delimiter = "####"
    movie_info = {
        'Rank': 1,
        'Title': 'Guardians of the Galaxy',
        'Genre': 'Action, Adventure, Sci-Fi',
        'Director': 'James Gunn',
        'Actors': 'Chris Pratt, Vin Diesel, Bradley Cooper, Zoe Saldana',
        'Year': 2014,
        'Runtime (Minutes)': 121,
        'Rating': 8.1,
        'Votes': 757074,
        'Revenue (Millions)': 333.13,
        'Metascore': 76.0
    }

    prompt = f"""
    You are a Python expert. Your task is to identify and extract a Python dictionary from the input.
    The dictionary will contain the following keys: 'Title', 'Genre', 'Director', 'Actors', 'Year', 'Runtime', 'Rating', 'Votes', 'Revenue', and 'Metascore'.
    Ensure the values extracted are formatted correctly, especially for 'Revenue', 'Runtime', and 'Rating' where appropriate.
    If a key or value is missing, return the available ones. If none are present, return an empty dictionary.

    The dictionary format should strictly match this template: {movie_info}.

    The input may vary in formatting or contain additional text.
    Your job is to:
    1. Find the Python dictionary.
    2. Ensure proper formatting of the values.
    3. Return the dictionary with available keys and values.

    If the input doesn't have a valid dictionary, return an empty dictionary.

    Here are some example input-output pairs for better understanding:
    {delimiter}
    Input: - Title: Inception - Genre: Sci-Fi, Thriller - Director: Christopher Nolan - Year: 2010 - Runtime: 148 minutes - Rating: 8.8 - Revenue: 825.5 Million USD
    Output: {{'Title': 'Inception', 'Genre': 'Sci-Fi, Thriller', 'Director': 'Christopher Nolan', 'Actors': '', 'Year': '2010', 'Runtime (Minutes)': '148', 'Rating': '8.8', 'Votes': '', 'Revenue (Millions)': '825.5', 'Metascore': ''}}

    Input: {{'Title': 'The Dark Knight', 'Genre': 'Action, Crime, Drama', 'Director': 'Christopher Nolan', 'Actors': 'Christian Bale, Heath Ledger', 'Year': '2008', 'Runtime (Minutes)': '152', 'Rating': '9.0', 'Revenue (Millions)': '1000.0', 'Metascore': '84'}}
    Output: {{'Title': 'The Dark Knight', 'Genre': 'Action, Crime, Drama', 'Director': 'Christopher Nolan', 'Actors': 'Christian Bale, Heath Ledger', 'Year': '2008', 'Runtime (Minutes)': '152', 'Rating': '9.0', 'Votes': '', 'Revenue (Millions)': '1000.0', 'Metascore': '84'}}

    Input: Here is your movie profile: 'Title': 'Interstellar', 'Genre': 'Sci-Fi, Adventure', 'Director': 'Christopher Nolan', 'Actors': 'Matthew McConaughey', 'Year': '2014', 'Runtime': '169', 'Rating': '8.6'
    Output: {{'Title': 'Interstellar', 'Genre': 'Sci-Fi, Adventure', 'Director': 'Christopher Nolan', 'Actors': 'Matthew McConaughey', 'Year': '2014', 'Runtime (Minutes)': '169', 'Rating': '8.6', 'Votes': '', 'Revenue (Millions)': '', 'Metascore': ''}}

    Input: I haven't decided which movie yet, but I want something action-packed.
    Output: {{}}
    {delimiter}

    Now, based on the above examples, here is the input: {response}
    """
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        seed=1234
    )

    confirmation = response.choices[0].message.content.strip()
    return confirmation


In [74]:
def product_map_layer(Title, Genre, Director, Actors, Year, Runtime, Rating, Revenue):

    delimiter = "#####"
    movie_spec = {
        "Title": "(Movie title)",
        "Genre": "(Genres of the movie)",
        "Director": "(Director of the movie)",
        "Actors": "(Main actors in the movie)",
        "Year": "(Release year of the movie)",
        "Runtime": "(Total runtime in minutes)",
        "Rating": "(IMDb rating of the movie)",
        "Revenue": "(Revenue in millions)"
    }

    prompt = f"""
    You are a movie expert whose job is to extract key features of a movie from the provided information.
    Extract all the features according to the following rules:
    {delimiter}
    - **Title**: Extract the exact value from {Title}.
    - **Genre**: Extract the genres from {Genre}. If multiple genres are listed, separate them with commas.
    - **Director**: Extract the exact value from {Director}.
    - **Actors**: Extract the main actors from {Actors}, separating them with commas.
    - **Year**: Extract the exact value from {Year}.
    - **Runtime**: Extract the runtime in minutes from {Runtime}. Only the numeric part is required.
    - **Rating**: Extract the IMDb rating from {Rating}.
    - **Revenue**: Extract the revenue in millions from {Revenue}. If the revenue is given in any other format (e.g., thousands or billions), convert it to millions.
    {delimiter}

    Examples for reference:
    {delimiter}
    Input1: "Guardians of the Galaxy, Action|Adventure|Sci-Fi, James Gunn, Chris Pratt|Vin Diesel|Bradley Cooper, 2014, 121 minutes, 8.1, 333.13 Million USD"
    Output1: {{'Title': 'Guardians of the Galaxy', 'Genre': 'Action, Adventure, Sci-Fi', 'Director': 'James Gunn', 'Actors': 'Chris Pratt, Vin Diesel, Bradley Cooper', 'Year': '2014', 'Runtime': '121', 'Rating': '8.1', 'Revenue': '333.13'}}

    Input2: "Inception, Sci-Fi|Thriller, Christopher Nolan, Leonardo DiCaprio|Joseph Gordon-Levitt, 2010, 148 minutes, 8.8, 825.5 Million USD"
    Output2: {{'Title': 'Inception', 'Genre': 'Sci-Fi, Thriller', 'Director': 'Christopher Nolan', 'Actors': 'Leonardo DiCaprio, Joseph Gordon-Levitt', 'Year': '2010', 'Runtime': '148', 'Rating': '8.8', 'Revenue': '825.5'}}

    Input3: "The Dark Knight, Action|Crime|Drama, Christopher Nolan, Christian Bale|Heath Ledger, 2008, 152 minutes, 9.0, 1000.0 Million USD"
    Output3: {{'Title': 'The Dark Knight', 'Genre': 'Action, Crime, Drama', 'Director': 'Christopher Nolan', 'Actors': 'Christian Bale, Heath Ledger', 'Year': '2008', 'Runtime': '152', 'Rating': '9.0', 'Revenue': '1000.0'}}
    {delimiter}

    Now, based on these instructions and examples, output the dictionary {movie_spec} without any additional text for the following movie:
    {Title}, {Genre}, {Director}, {Actors}, {Year}, {Runtime}, {Rating}, {Revenue}.
    """
    response = openai.chat.completions.create(
                                        model="gpt-3.5-turbo",
                                        messages=[{"role": "user", "content": prompt}],
                                        seed=1234
    )

    confirmation = response.choices[0].message.content.strip()
    return confirmation


In [75]:
import ast
import re

def extract_dictionary_from_string(string):
    regex_pattern = r"\{[^{}]+\}"

    # Find all matches that resemble a dictionary structure
    dictionary_matches = re.findall(regex_pattern, string)

    # Check if any dictionary-like structures were found
    if dictionary_matches:
        dictionary_string = dictionary_matches[0]
        dictionary_string = dictionary_string.lower()

        try:
            # Convert the dictionary string to an actual dictionary using ast.literal_eval()
            dictionary = ast.literal_eval(dictionary_string)
            return dictionary
        except (ValueError, SyntaxError) as e:
            print(f"Error converting string to dictionary: {e}")
            return None
    else:
        print("No dictionary-like structure found in the string.")
        return None


In [76]:
def compare_movie_with_user(user_req_string):
    # Load the movie dataset (adapt the file path as needed)
    movie_df = pd.read_csv('/content/IMDB-Movie-Data.csv', on_bad_lines='skip')

    # Create a new column "movie_features" that contains the dictionary of the movie attributes
    movie_df['movie_features'] = movie_df.apply(lambda x: product_map_layer(
        x['Title'], x['Genre'], x['Director'], x['Actors'], x['Year'],
        x['Runtime (Minutes)'], x['Rating'], x['Revenue (Millions)']), axis=1)

    # Extract user requirements into a dictionary
    user_requirements = extract_dictionary_from_string(user_req_string)

    # This line retrieves the value associated with the key 'budget' (revenue) from the user_requirements dictionary.
    # If the key is not found, the default value '0' is used.
    # The value is then processed to remove commas and split it into a list of strings.
    # Finally, the resulting value is converted to a float (for revenue comparison).
    budget = float(user_requirements.get('Revenue', '0').replace(',', '').split()[0])

    # Filter the movies to include only rows where the 'Revenue (Millions)' is less than or equal to the budget
    filtered_movies = movie_df.copy()
    filtered_movies = filtered_movies[filtered_movies['Revenue (Millions)'] <= budget].copy()

    # Remove the 'Revenue' key from user requirements before comparing with movie attributes
    del user_requirements['Revenue']

    # Extract movie attributes into a dictionary so they can be compared with user requirements
    filtered_movies['movie_features'] = filtered_movies['movie_features'].apply(lambda x: extract_dictionary_from_string(x))

    # Comparison of user requirements with movie attributes and returning the matching rows from df
    mask = filtered_movies['movie_features'].apply(lambda x: all(x.get(key, '') == user_requirements[key] for key in user_requirements))
    result_df = filtered_movies[mask]

    # Return the filtered results in JSON format
    return result_df.to_json(orient='records')


In [77]:
def intent_confirmation_layer(response_assistant):
    prompt = f"""
    You are a senior evaluator who has an eye for detail.
    You are provided with an input. You need to evaluate if the input has the following keys: 'Genre', 'Rating', 'Director', 'Budget', 'Duration'.
    Next, you need to evaluate if the keys have the values filled correctly.
    - The value for the key 'Budget' needs to contain a number with currency (e.g., '₹1000', '$200').
    - The value of key 'Genre' should be one of the following: 'Action', 'Adventure', 'Comedy', 'Drama', 'Fantasy', 'Horror', 'Romance', 'Sci-Fi', 'Thriller'.
    - The value of key 'Rating' should be a number between 0 and 10, inclusive.
    - The value of key 'Director' should be a non-empty string.
    - The value of key 'Duration' should be a positive integer representing minutes.

    Output a string 'Yes' if the input contains the dictionary with the values correctly filled for all keys.
    Otherwise, output the string 'No'.

    Here is the input: {response_assistant}
    Only output a one-word string - Yes/No.
    """
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        seed=1234
    )

    confirmation = response.choices[0].message.content.strip()
    return confirmation


In [78]:
def initialize_conv_reco(products):
    # Create a clear system message with the movie catalog details
    system_message = f"""
    You are an intelligent movie expert whose goal is to help users find the best movies based on their preferences.
    You will answer user queries based on the available movies in the catalogue: {products}.

    Keep the user’s preferences in mind when responding to their questions.

    Start the conversation by presenting a brief summary of each movie in the catalogue, sorted by decreasing order of revenue.
    Format the summary as follows:

    1. <Movie Title>: <Genre>, <Director>, <Rating>, <Revenue in millions>
    2. <Movie Title>: <Genre>, <Director>, <Rating>, <Revenue in millions>

    Make sure to list the movies in decreasing order of their revenue.
    """

    # Initialize conversation with the system message
    conversation = [{"role": "system", "content": system_message}]

    return conversation


In [79]:
def dialogue_mgmt_system():
    # Initialize the conversation with system instructions for movie recommendations
    conversation = initialize_conv_reco([])  # Start with an empty product list
    introduction = get_chat_model_completions(conversation)
    print(introduction + '\n')

    recommended_movies = None
    user_input = ''

    while user_input.lower() != "exit":
        user_input = input("Your input: ")

        # Moderation check for user input
        if moderation_check(user_input) == 'Flagged':
            # No output for flagged input; exit the conversation silently
            break

        # If no movie recommendation exists, collect user preferences
        if recommended_movies is None:
            conversation.append({"role": "user", "content": user_input})

            response_assistant = get_chat_model_completions(conversation)

            # Moderation check for assistant response
            if moderation_check(response_assistant) == 'Flagged':
                # No output for flagged assistant response; exit the conversation silently
                break

            # Confirm if the assistant understood the intent correctly
            confirmation = intent_confirmation_layer(response_assistant)

            # Moderation check for confirmation response
            if moderation_check(confirmation) == 'Flagged':
                # No output for flagged confirmation response; exit the conversation silently
                break

            # If the confirmation says "No," continue dialogue but ensure no flagged content is printed
            if "No" in confirmation:
                # Add the assistant's response to the conversation
                conversation.append({"role": "assistant", "content": response_assistant})
                print("\n" + response_assistant + "\n")
                print('\n' + confirmation + '\n')
            else:
                # Handle the case where confirmation is positive, extract preferences, and move on
                print("\n" + response_assistant + "\n")
                print('\n' + confirmation + '\n')

                # Extract user preferences as a dictionary
                user_preferences = dictionary_present(response_assistant)

                # Moderation check for the extracted dictionary
                if moderation_check(user_preferences) == 'Flagged':
                    # No output for flagged user preferences; exit the conversation silently
                    break

                print('\n' + str(user_preferences) + '\n')
                print("Thank you for providing all the information. Please wait while I fetch movie recommendations.\n")

                # Compare user preferences with available movie options
                recommended_movies = compare_movies_with_user(user_preferences)

                # Initialize conversation for recommendation phase
                conversation_reco = initialize_conv_reco(recommended_movies)
                recommendation = get_chat_model_completions(conversation_reco)

                # Moderation check for recommendation
                if moderation_check(recommendation) == 'Flagged':
                    # No output for flagged recommendation; exit the conversation silently
                    break

                print('\n' + recommendation + '\n')

        else:
            # Continue conversation with the recommendation phase
            conversation_reco.append({"role": "user", "content": user_input})

            response_asst_reco = get_chat_model_completions(conversation_reco)

            # Moderation check for the recommendation conversation
            if moderation_check(response_asst_reco) == 'Flagged':
                # No output for flagged recommendation response; exit the conversation silently
                break

            print('\n' + response_asst_reco + '\n')
            conversation.append({"role": "assistant", "content": response_asst_reco})


In [80]:
dialogue_mgmt_system()

Here are the movies available in the catalogue:

1. Avatar: Science Fiction, James Cameron, 7.8, $2784.3 million
2. Avengers: Endgame: Superhero, Anthony Russo, Joe Russo, 8.4, $2796.2 million
3. Titanic: Romance, James Cameron, 7.8, $2187.5 million
4. Star Wars: The Force Awakens: Science Fiction, J.J. Abrams, 7.8, $2068.2 million
5. Avengers: Infinity War: Superhero, Anthony Russo, Joe Russo, 8.4, $2048.4 million

Your input: James Mangold

Here are the movies in the catalogue directed by James Mangold:

1. Logan: Action, James Mangold, 8.1, $619 million
2. Ford v Ferrari: Biography, James Mangold, 8.1, $225 million


No

Your input: Joe Russo

I'm sorry, but there are no movies in the catalogue directed by Joe Russo. Would you like to know about any other directors or movies?


No

Your input: X-Men

Here are the X-Men movies in the catalogue:

1. Logan: Action, James Mangold, 8.1, $619 million
2. X-Men: Days of Future Past: Action, Bryan Singer, 8.0, $747 million
3. X-Men: Apocalyp

KeyboardInterrupt: Interrupted by user

In [None]:
df = pd.read_csv('/content/IMDB-Movie-Data.csv', on_bad_lines='skip')
df.head()