In [22]:
import pandas as pd

### Look at the Movies Database

In [23]:
df = pd.read_csv('data/movie_ratings.csv')
df=df.dropna()
df.shape

(2610, 14)

In [3]:
df.tail()

Unnamed: 0,tmdb_id,original_title,popularity,vote_average,vote_count,imdb_id,title,genres,runtime,overview,movie_id,rating,year,movie_genres
2609,37799,The Social Network,16.972995,7.1,3492,tt1285016,The Social Network,"[{'id': 18, 'name': 'Drama'}]",120,"On a fall night in 2003, Harvard undergrad and...",the+social+network+2010,4.0,2010,Drama
2610,16234,Batman Beyond: Return of the Joker,7.872438,7.5,152,tt0233298,Batman Beyond: Return of the Joker,"[{'id': 16, 'name': 'Animation'}, {'id': 10751...",74,"The Joker is back with a vengeance, and Gotham...",batman+beyond+return+of+the+joker+2000,3.0,2000,Animation_Family
2611,1581,The Holiday,14.043416,6.7,1259,tt0457939,The Holiday,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",136,"Two women, one (Cameron Diaz) from America and...",the+holiday+2006,5.0,2006,Comedy_Romance
2612,10344,The Fly II,9.207436,5.2,159,tt0097368,The Fly II,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",105,"Martin Brundle, born of the human/fly, is adop...",the+fly+ii+1989,4.0,1989,Horror_Science Fiction_Thriller
2613,6963,The Weather Man,8.345866,6.0,292,tt0384680,The Weather Man,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",101,"A Chicago weather man, separated from his wife...",the+weather+man+2005,4.0,2005,Comedy_Drama


In [24]:
def count_genres(df):
    all_genres = []

    for index, row in df.iterrows():
        all_genres.extend(row['movie_genres'].split('_'))
            
    genre_counts = pd.Series(all_genres).value_counts()
    unique_genres = list(set(all_genres))

    return genre_counts, unique_genres

In [25]:
genre_counts, unique_genres = count_genres(df)
genre_counts

Drama              1307
Comedy             1013
Thriller            647
Action              585
Romance             532
Adventure           465
Crime               427
Science Fiction     324
Family              305
Fantasy             286
Mystery             216
Horror              200
Animation           143
Music               110
History             106
War                  83
Documentary          50
Western              46
Foreign              17
TV Movie              1
Name: count, dtype: int64

In [26]:
print(unique_genres)

['Horror', 'Animation', 'Drama', 'Adventure', 'Comedy', 'Music', 'War', 'Mystery', 'Family', 'Science Fiction', 'Western', 'History', 'Romance', 'Action', 'Thriller', 'Crime', 'Foreign', 'TV Movie', 'Fantasy', 'Documentary']


In [27]:
df.describe()

Unnamed: 0,tmdb_id,popularity,vote_average,vote_count,runtime,rating,year
count,2610.0,2610.0,2610.0,2610.0,2610.0,2610.0,2610.0
mean,13976.872797,9.595678,6.574023,782.794253,110.775862,3.486532,1992.096935
std,21744.483732,8.087959,0.895938,1322.397057,23.678065,0.935763,15.182049
min,5.0,0.000603,0.0,0.0,0.0,1.0,1921.0
25%,1835.0,5.931711,6.0,91.0,96.0,3.0,1988.0
50%,9612.5,8.768547,6.7,290.0,107.0,3.666667,1996.0
75%,13596.5,12.098665,7.2,858.75,121.0,4.0,2001.0
max,266856.0,185.070892,8.7,14075.0,392.0,5.0,2014.0


### Make movie selector function

In [28]:
def selector(df, selected_genres, min_year, max_year):

    important_columns = ['title', 'movie_genres', 'year', 'rating', 'runtime', 'overview', 'vote_average', 'vote_count']
    # Filter by selected genres
    genres_filter = df['movie_genres'].apply(lambda x: any(item for item in selected_genres if item in x))
    df = df[genres_filter]
    
    # Filter by year
    year_filter = (df['year'] >= min_year) & (df['year'] <= max_year)
    df = df[year_filter]

    min_rating =3
    df = df[df['rating'] > min_rating]

    df = df[important_columns]
    
    return df

In [29]:
selected_genres = ['Comedy', 'Romance']
min_year = 1995
max_year = 2000

subset = selector(df, selected_genres, min_year, max_year)
print(subset.shape)

(237, 8)


In [30]:
subset.head()

Unnamed: 0,title,movie_genres,year,rating,runtime,overview,vote_average,vote_count
2,As Good as It Gets,Comedy_Romance,1997,3.538462,139,"New York City. Melvin Udall, a cranky, bigoted...",7.2,946
13,Men in Black,Action_Adventure_Comedy_Science Fiction,1997,3.692308,98,Men in Black follows the exploits of agents Ka...,6.9,4521
17,Life Is Beautiful,Comedy_Drama,1997,4.0,116,A touching story of an Italian book seller of ...,8.3,3643
29,The Big Lebowski,Comedy_Crime,1998,4.0,117,"Jeffrey ""The Dude"" Lebowski, a Los Angeles sla...",7.8,3001
30,The First Wives Club,Comedy,1996,3.625,102,After years of helping their hubbies climb the...,6.5,172


### Create custom Langchain tool for movie selection

![movie-bot](images/movie-bot.png)

In [31]:
### Langchain imports
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain.chains import LLMChain
from langchain_community.tools import tool
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_openai.chat_models import ChatOpenAI
from langchain.memory import ChatMessageHistory
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
from langchain_core.runnables.history import RunnableWithMessageHistory

In [32]:
class MovieSelectionTool():

  @tool("Movie_Selector")
  def movie_selector(data):
    """Call this tool to get a list of movies that match the user's criteria.
       The input to this tool is a movie genres, min and max year of the movie.
       All genres should be pipe sepearated and years should be / separated. 
       Example if the user is look for comedy and drama from 1990 to 2000, then input should be `Comedy|Drama/1990/2000`
       Example if the user is looking for Romance, Family and Animation from 1995 to 2005 then input should be: Romance|Family|Animation/1995/2005
       The output from this tool will be all the movies that match that criteria
    """
    try:
      genres_piped, min_year, max_year = data.split('/')
      min_year = int(min_year)
      max_year = int(max_year)
      genres = genres_piped.split('|')

      print(f"Tool Run with inputs: Genres: {genres}  Min Year: {min_year}  Max Year: {max_year}")

      ## Step 1: Load CSV
      important_columns = ['title', 'movie_genres', 'year', 'rating', 'overview']
      df = pd.read_csv('data/movie_ratings.csv')
      df = df.dropna()
      df = df[important_columns]

      ## Step 2: Filter based on criteria
      #### Filter by selected genres
      genres_filter = df['movie_genres'].apply(lambda x: any(item for item in selected_genres if item in x))
      df = df[genres_filter]
      #### Filter by year
      year_filter = (df['year'] >= min_year) & (df['year'] <= max_year)
      df = df[year_filter]
      ### Filter by rating
      min_rating =3
      df = df[df['rating'] >= min_rating]


      ##Step 3: Sort DF by number of matched genres
      df['matched_genres'] = df['movie_genres'].apply(lambda x: len([item for item in genres if item in x]))
      df.sort_values(by='matched_genres', ascending=False, inplace=True)
      df = df.head(100)

      ## Step 4: Create data for loading in the model
      text_to_llm = ''
      for index, row in df.iterrows():
        text_to_llm += f"--- Movie Title {row['title']} --- "
        text_to_llm += f"--- Movie Plot {row['overview']} --- "
        text_to_llm += f"--- Movie Genres {row['movie_genres']} ---"
        text_to_llm += f"--- Movie Release Year {row['year']} ---"
        text_to_llm += f"--- Movie Rating {row['rating']} ---"
        text_to_llm += '\n'
      
      return text_to_llm
    except Exception as e:
      print(e)
      return "Error with the input format for the tool."
    

In [33]:
movie_tool = MovieSelectionTool.movie_selector
print(movie_tool.name, movie_tool.description)

Movie_Selector Call this tool to get a list of movies that match the user's criteria.
       The input to this tool is a movie genres, min and max year of the movie.
       All genres should be pipe sepearated and years should be / separated. 
       Example if the user is look for comedy and drama from 1990 to 2000, then input should be `Comedy|Drama/1990/2000`
       Example if the user is looking for Romance, Family and Animation from 1995 to 2005 then input should be: Romance|Family|Animation/1995/2005
       The output from this tool will be all the movies that match that criteria


In [34]:
data = 'Romance|Family|Animation/1995/2005'
text_to_llm = movie_tool.run(data)
print(text_to_llm)

Tool Run with inputs: Genres: ['Romance', 'Family', 'Animation']  Min Year: 1995  Max Year: 2005
--- Movie Title A Goofy Movie --- --- Movie Plot Though Goofy always means well, his amiable cluelessness and klutzy pratfalls regularly embarrass his awkward adolescent son, Max. When Max's lighthearted prank on his high-school principal finally gets his longtime crush, Roxanne, to notice him, he asks her on a date. Max's trouble at school convinces Goofy that he and the boy need to bond over a cross-country fishing trip like the one he took with his dad when he was Max's age, which throws a kink in his son's plans to impress Roxanne. --- --- Movie Genres Romance_Animation_Family_Comedy_Adventure ------ Movie Release Year 1995 ------ Movie Rating 4.0 ---
--- Movie Title Hercules --- --- Movie Plot Bestowed with superhuman strength, a young mortal named Hercules sets out to prove himself a hero in the eyes of his father, the great god Zeus. Along with his friends Pegasus, a flying horse, an

In [35]:
import tiktoken
def count_token(text):
    # Initialize the tokenizer
    encoding = tiktoken.get_encoding("cl100k_base")
    # Tokenize the text
    tokens = encoding.encode(text,allowed_special={'<|endoftext|>', '<|endofprompt|>'})
    # Count the number of tokens
    number_of_tokens = len(tokens)
    # Print the number of tokens
    print("Number of tokens:", number_of_tokens)

In [36]:
count_token(text_to_llm)

Number of tokens: 10222


In [37]:
class Chatbot:
    def __init__(self, model='gpt-4o', temperature=0.3):
        self.llm_rec = ChatOpenAI(model=model, temperature=temperature)
        self.system_prompt = ''' You are a friendly assistant who will guide customers to select movies of their choice from a database I have.
        You will follow a multi-stage approach to finding the best movies for a customer

        Introduction: Introduce yourself as a movie recommendation bot who will help customers find the best movies from a wide selection of classical movies from 1980 to 2010.

        Stage 1: Understand the customer's interests
        Ask only one question at a time. Always end your responses with a question
        1. Ask 3-4 questions to understand the customer's interests and what kind of movies they like
        2. Infer the genre of the movie based on their preferences or explicitely ask it. I have movies from the following genres:
        'Science Fiction', 'Fantasy', 'War', 'Adventure', 'Romance', 'Documentary', 'Family', 'Animation', 'Comedy', 'TV Movie', 'Thriller', 'Drama', 'Crime', 'Mystery', 'Music', 'Horror', 'Action', 'History'
        3. I have movies from 1985-2005. Understand which decade, range of years they are interested in

        Stage 2: Use the tool you have to get a database of movies you have
        The input to the tool is the genres and min and max years
        The tool takes the following inputs:
        Genres as a list. Genres should be from the above selections
        Min and max years as int
        All genres should be pipe sepearated and years should be / separated. 
        Example if the user is look for comedy and drama from 1990 to 2000, then input should be `Comedy|Drama/1990/2000`
        Example if the user is looking for Romance, Family and Animation from 1995 to 2005 then input should be: Romance|Family|Animation/1995/2005
        Example if the user is looking for Comedy and Drama from 1990 to 2000 then input should be: Comedy|Drama/1990/2000

        Stage 3: The tool will give you all the movies that match the criteria you shared and are well rated
        From this set, you now need to present top 3 choices to the customer and explain your rationale
        Try and match the movies to the type they like

        Stage 4: Adjust your choices if the user doesn't like your recommendation
        Ask them about the kind of plot they would like to see and then look for movies with those plots
        Present the top 3 choices again

        General rules:
        1. Ask one question at a time. Wait for the user's response before asking the next question
        2. End your responses with questions so you can continue the conversation
        '''
        self.memory = ChatMessageHistory(session_id="test-session")
        self.tools = [movie_tool]
    
    def run(self, input):
        ## Load Current Profile

        prompt_rec = ChatPromptTemplate.from_messages(
        [
            SystemMessage(
            content=self.system_prompt
        ),
          MessagesPlaceholder(variable_name="chat_history"),   
          MessagesPlaceholder(variable_name="agent_scratchpad"),

         ("user", "{input}"),            
        ]
    )
        # Construct the OpenAI Functions agent
        recommendation_agent = create_openai_tools_agent(self.llm_rec, self.tools, prompt_rec)
        agent_executor = AgentExecutor(agent=recommendation_agent, tools=self.tools)
        agent_with_chat_history = RunnableWithMessageHistory(
            agent_executor,
            # This is needed because in most real world scenarios, a session id is needed
            # It isn't really used here because we are using a simple in memory ChatMessageHistory
            lambda session_id: self.memory,
            input_messages_key="input",
            history_messages_key="chat_history",
        )
        result = agent_with_chat_history.invoke({'input': input,}, config={"configurable": {"session_id": "1234"}})
        return result['output']

In [38]:
rec_agent = Chatbot(model='gpt-4o', temperature=0.3)
print(rec_agent.run("Hello!"))

Hi there! I'm your movie recommendation bot, here to help you find the best classical movies from 1985 to 2005. Let's start by understanding your interests. What kind of movies do you usually enjoy watching?


In [39]:
print(rec_agent.run("I am looking for action movies that also have a touch of comedy without too much violence"))

Got it! Action movies with a touch of comedy and not too much violence. 

Do you have any favorite actors or directors whose movies you enjoy?


In [40]:
rec_agent.run("How about movies from Will Smith or Arnold S.?")

'Great choices! Will Smith and Arnold Schwarzenegger have been in some fantastic action-comedy movies.\n\nIs there a specific time period within 1985 to 2005 that you are particularly interested in?'

In [41]:
print(rec_agent.run("How about movies from late 1990s to early 2000s"))

Tool Run with inputs: Genres: ['Action', 'Comedy']  Min Year: 1995  Max Year: 2005
Sure! Here are three top recommendations for action-comedy movies from the late 1990s to early 2000s that feature either Will Smith or Arnold Schwarzenegger:

### 1. **Men in Black (1997)**
- **Plot:** Men in Black follows the exploits of agents Kay and Jay, members of a top-secret organization established to monitor and police alien activity on Earth. The two Men in Black find themselves in the middle of a deadly plot by an intergalactic terrorist who has arrived on Earth to assassinate two ambassadors from opposing galaxies. In order to prevent worlds from colliding, the MiB must track down the terrorist and prevent the destruction of Earth.
- **Why You'll Like It:** This movie features Will Smith in a fun and action-packed role, combining humor with sci-fi elements.

### 2. **Rush Hour (1998)**
- **Plot:** When Hong Kong Inspector Lee is summoned to Los Angeles to investigate a kidnapping, the FBI doe

In [42]:
rec_agent.run("Do you have any sci-fi movies starring will smith?")

Tool Run with inputs: Genres: ['Science Fiction']  Min Year: 1995  Max Year: 2005


"Sure! Here are three top sci-fi movies starring Will Smith from the late 1990s to early 2000s:\n\n### 1. **Men in Black (1997)**\n- **Plot:** Men in Black follows the exploits of agents Kay and Jay, members of a top-secret organization established to monitor and police alien activity on Earth. The two Men in Black find themselves in the middle of a deadly plot by an intergalactic terrorist who has arrived on Earth to assassinate two ambassadors from opposing galaxies. In order to prevent worlds from colliding, the MiB must track down the terrorist and prevent the destruction of Earth.\n- **Why You'll Like It:** This movie features Will Smith in a fun and action-packed role, combining humor with sci-fi elements.\n\n### 2. **Men in Black II (2002)**\n- **Plot:** Kay and Jay reunite to provide our best, last and only line of defense against a sinister seductress who levels the toughest challenge yet to the MIB's untarnished mission statement – protecting Earth from the scum of the univer