In [1]:
# Add prereq code to set open API key
import getpass 

# Define OpenAI API key 
api_key = getpass.getpass("Enter your API Key: ").strip()

import warnings

warnings.filterwarnings("ignore")

import os
os.environ["OPENAI_API_KEY"] = api_key
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [2]:
# from langchain.llms import OpenAI # this code has been deprecated since recording.
from langchain.chat_models import ChatOpenAI # this is the replacement 
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, NonNegativeInt
from typing import List
from random import sample 
from langchain.document_loaders import CSVLoader

First, let's create a loader and load reviews from tv-reviews.csv into memory

In [3]:
# TODO: load reviews from tv-reviews.csv
loader = CSVLoader(file_path="data/tv-reviews.csv", source_column="Review Text")
reviews = loader.load()

Then, let's initialize our LLM

In [None]:
# TODO: initialize OpenAI object with your API key
llm = ChatOpenAI(
    openai_api_key=api_key,
    openai_api_base=os.environ["OPENAI_API_BASE"],
    model="gpt-3.5-turbo",
    temperature=0, 
    max_tokens=1000
)

Now, let's setup our parser and a template  - 

**Note**  that since recording, the code to initialize the model has been updated to 

`llm = ChatOpenAI()`

In [5]:
class ReviewSentiment(BaseModel):
    positives: List[NonNegativeInt] = Field(description="index of a positive TV review, starting from 0")
    negatives: List[NonNegativeInt] = Field(description="index of a negative TV review, starting from 0")
        
parser = PydanticOutputParser(pydantic_object=ReviewSentiment)
# TODO: setup a template with partial and input variables
prompt = PromptTemplate(
    template="""
    Context:
    {context}
    ---
    Question:
    {question}
    Output format: {format_instructions}
    """,
    input_variables=["context", "question"],
    partial_variables={"format_instructions": parser.get_format_instructions},
)

question = """
    Classify TV reviews provided in the context into positive and negative. 
    Only use the reviews provided in this context, do not make up new reviews or use any existing information you know about these TVs. 
    If there are no positive or negative reviews, output an empty JSON array. 
"""

Pick 3 sample reviews to classify - LLMs have a limited context window they can work with. In later exercises, we'll see how to deal with that differently

In [6]:
# TODO: pick 3 random reviews and save them into reviews_to_classify variable
reviews_to_classify = sample(reviews, 4)
context = '\n'.join(review.page_content for review in reviews_to_classify)

query = prompt.format(context = context, question = question)
print(query)


    Context:
    TV Name: Imagix Pro
Review Title: Exceptional Customer Service
Review Rating: 10
Review Text: I had a minor issue with my Imagix Pro, but the customer service team was exceptional. They were prompt in addressing my concern and guided me through the troubleshooting process. They even offered a quick replacement when the issue persisted. Kudos to their excellent support!
TV Name: VisionMax Ultra
Review Title: Unresponsive Remote Control
Review Rating: 4
Review Text: One major drawback of the VisionMax Ultra is the unresponsive remote control. It often takes multiple presses for the TV to register the command. It's frustrating, especially when navigating through menus or adjusting settings. The remote control definitely needs improvement.
TV Name: Imagix Pro
Review Title: Unmatched Clarity
Review Rating: 10
Review Text: I cannot express enough how impressed I am with the clarity of the Imagix Pro. Every detail is so sharp and lifelike, it's like I can reach out and touch

# generate textual prompt from the prompt template
question = """
    Review TVs provided in the context. 
    Only use the reviews provided in this context, do not make up new reviews or use any existing information you know about these TVs. 
    If there are no positive or negative reviews, output an empty JSON array. 
"""
query = prompt.format(context = context, question = question)

Finally, let's send our query to LLM and use the parser we setup to parse an output into a Python object

**NOTE**: Since recording the code to feed the query to the llm has been updated to

`llm.predict(query)`

In [7]:
# TODO: query LLM, then parse output into the result variable
output = llm.predict(query)
print(output)
result = parser.parse(output)
print(result)

print("Positives:\n" + "\n".join([reviews_to_classify[i].page_content for i in result.positives]))
print("Negatives:\n" + "\n".join([reviews_to_classify[i].page_content for i in result.negatives]))

{
    "positives": [0, 2],
    "negatives": [1, 3]
}
positives=[0, 2] negatives=[1, 3]
Positives:
TV Name: Imagix Pro
Review Title: Exceptional Customer Service
Review Rating: 10
Review Text: I had a minor issue with my Imagix Pro, but the customer service team was exceptional. They were prompt in addressing my concern and guided me through the troubleshooting process. They even offered a quick replacement when the issue persisted. Kudos to their excellent support!
TV Name: Imagix Pro
Review Title: Unmatched Clarity
Review Rating: 10
Review Text: I cannot express enough how impressed I am with the clarity of the Imagix Pro. Every detail is so sharp and lifelike, it's like I can reach out and touch the images on the screen. The colors are vibrant and realistic, making everything look stunning. It truly enhances my movie-watching experience!
Negatives:
TV Name: VisionMax Ultra
Review Title: Unresponsive Remote Control
Review Rating: 4
Review Text: One major drawback of the VisionMax Ultr