In [None]:
%pip install langchain
%pip install openai
%pip install tiktoken

In [3]:
%pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_KEY')
os.chdir('/Users/tcoan/git_repos/ncrm-spring-school/')

In [5]:
from langchain.llms import OpenAI
from langchain import PromptTemplate

In [6]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [12]:
llm = OpenAI(temperature=0)

In [12]:
text = """
Here is an example of a movie review:

The big Lebowski is the best movie ever! I've just stopped laughing.

Is this a positive, negative, or neutral review?" If you don't know, say 'unclear'.
"""

res = llm(text)
print(res)


Positive


In [19]:
sentiment_template = """
Here is an example of a movie review:

{review}

Is this a positive, negative, or neutral review?" If you don't know, say 'unclear'.
"""

prompt = PromptTemplate(
    input_variables=["review"],
    template=sentiment_template,
)

In [2]:
movie_review = 'I know that most people love the movie Titanic. I thought it was pretty stupid. Sappy!'
print(prompt.format(review=movie_review))

NameError: name 'prompt' is not defined

In [22]:
print(llm(prompt.format(review=movie_review)))


Negative


In [23]:
sentiment_template = """
Here is an example of a movie review:

{review}

Is this a positive, negative, or neutral review?" If you don't know, say 'unclear'. \
Return the result as a Python dictionary with the key 'sentiment'.
"""

prompt = PromptTemplate(
    input_variables=["review"],
    template=sentiment_template,
)

In [24]:
print(llm(prompt.format(review=movie_review)))


{'sentiment': 'negative'}


In [7]:
sentiment_template = """
Here is an example of a movie review:

{review}

Is this a positive, negative, or neutral review?" If you don't know, say 'unclear'. \
Return the result as a Python dictionary with the key 'sentiment' and return a short \
description of why you gave this answer to the same dictionary using the key "reason". 
"""

prompt = PromptTemplate(
    input_variables=["review"],
    template=sentiment_template,
)

In [10]:
llm = OpenAI(temperature=0)

In [11]:
print(llm(prompt.format(review=movie_review)))


Answer: {'sentiment': 'negative', 'reason': 'The reviewer expresses dislike for the movie.'}


In [35]:
reviews = pd.read_csv('data/movie_reviews.csv').to_dict('records')

In [36]:
reviews_unlabeled = reviews[1500:]
print(len(reviews_unlabeled))

500


In [20]:
results = []
for row in reviews_unlabeled:
    results.append(llm(prompt.format(review=row['text'])))

In [108]:
positive = []
for result in results:
    if 'positive' in result[0:25]:
        positive.append(1)
    else:
        positive.append(0)


In [111]:
y = np.array([row['positive'] for row in reviews_unlabeled])
y_pred = np.array(positive)

In [112]:
accuracy_score(y, y_pred)

0.868

In [7]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [8]:
# chat mode instance
chat = ChatOpenAI(temperature=0)

In [23]:
movie_review = 'I know that most people love the movie Titanic. I thought it was pretty stupid. Sappy!'
messages = [
    SystemMessage(content="You are a helpful assistant that can classify the sentiment of movie review texts. The labels you can use are positive, negative and neutral."),
    HumanMessage(content=f"Provide a one word label for the following review: {movie_review}\n\nAlso provide a short justification for your label. Seperate the label and justification using a | character."),
]

response = chat(messages)

In [24]:
response

AIMessage(content='Negative | The reviewer expresses a negative opinion about the movie Titanic, calling it "stupid" and "sappy".', additional_kwargs={})

In [26]:
response.content

'Negative | The reviewer expresses a negative opinion about the movie Titanic, calling it "stupid" and "sappy".'

In [33]:
def send_chat_request(movie_review):
    messages = [
        SystemMessage(content="You are a helpful assistant that can classify the sentiment of movie review texts. The labels you can use are positive, negative and neutral."),
        HumanMessage(content=f"Provide a one word label with no punctuation for the following review: {movie_review}"),
    ]
    return chat(messages)

In [37]:
results_chat = []
for i,row in enumerate(reviews_unlabeled):
    response = send_chat_request(row['text'])
    results_chat.append(response.content)
    print(f'Finished iteration {i}')

Finished iteration 0
Finished iteration 1
Finished iteration 2
Finished iteration 3
Finished iteration 4
Finished iteration 5
Finished iteration 6
Finished iteration 7
Finished iteration 8
Finished iteration 9
Finished iteration 10
Finished iteration 11
Finished iteration 12
Finished iteration 13
Finished iteration 14
Finished iteration 15
Finished iteration 16
Finished iteration 17
Finished iteration 18
Finished iteration 19
Finished iteration 20
Finished iteration 21
Finished iteration 22
Finished iteration 23
Finished iteration 24
Finished iteration 25
Finished iteration 26
Finished iteration 27
Finished iteration 28
Finished iteration 29
Finished iteration 30
Finished iteration 31
Finished iteration 32
Finished iteration 33
Finished iteration 34
Finished iteration 35
Finished iteration 36
Finished iteration 37
Finished iteration 38
Finished iteration 39
Finished iteration 40
Finished iteration 41
Finished iteration 42
Finished iteration 43
Finished iteration 44
Finished iteration 4

In [41]:
positive = []
for row in results_chat:
    if 'Positive' in row:
        positive.append(1)
    else:
        positive.append(0)

In [39]:
results_chat[0]

'Positive'

In [40]:
pd.DataFrame(results_chat).value_counts()

Negative.    193
Positive     114
Negative      73
Positive.     73
Neutral.      24
Neutral       23
dtype: int64

In [42]:
y = np.array([row['positive'] for row in reviews_unlabeled])
y_pred = np.array(positive)

In [43]:
accuracy_score(y, y_pred)

0.862

In [54]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate

chat = ChatOpenAI(temperature=0)
from langchain.chains.summarize import load_summarize_chain
text_splitter = CharacterTextSplitter()
from langchain.docstore.document import Document

In [52]:
texts = text_splitter.split_text(reviews[0]['text'])

In [55]:
docs = [Document(page_content=t) for t in texts]

In [58]:
docs

[Document(page_content='plot : two teen couples go to a church party , drink and then drive . \nthey get into an accident . \none of the guys dies , but his girlfriend continues to see him in her life , and has nightmares . \nwhat\'s the deal ? \nwatch the movie and " sorta " find out . . . \ncritique : a mind-fuck movie for the teen generation that touches on a very cool idea , but presents it in a very bad package . \nwhich is what makes this review an even harder one to write , since i generally applaud films which attempt to break the mold , mess with your head and such ( lost highway & memento ) , but there are good and bad ways of making all types of films , and these folks just didn\'t snag this one correctly . \nthey seem to have taken this pretty neat concept , but executed it terribly . \nso what are the problems with the movie ? \nwell , its main problem is that it\'s simply too jumbled . \nit starts off " normal " but then downshifts into this " fantasy " world in which you

In [59]:
chain = load_summarize_chain(chat, chain_type="map_reduce")
chain.run(docs)

'The movie is about two teen couples who drink and drive, resulting in an accident where one of the guys dies. The film fails to execute its cool idea properly, resulting in a confusing and jumbled plot. The actors are good, but the film lacks entertainment value and feels redundant. The reviewer suggests skipping it.'

In [57]:
docs

Collecting tiktoken
  Downloading tiktoken-0.3.3-cp39-cp39-macosx_10_9_x86_64.whl (736 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m736.3/736.3 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.3.3
