In [35]:
import os
import pandas as pd
import numpy as np
import openai
import tiktoken

API_KEY_ENV = "OPENAI_API_KEY"
MODEL = 'gpt-3.5-turbo'

openai.api_key = os.getenv(API_KEY_ENV)
encoding = tiktoken.encoding_for_model(MODEL)

file = os.path.join("data", "wiki_movie_plots_deduped.csv")
types = {
    'Release Year': 'int',
    'Title': 'string',
    'Origin/Ethnicity': 'string',
    'Director': 'string',
    'Cast': 'string',
    'Genre': 'string',
    'Plot': 'string'
}
df = pd.read_csv(file, dtype=types) \
    .drop('Wiki Page', axis='columns', inplace=False) 
df.shape

(34886, 7)

In [36]:
# Count characters in Plot column
df['plot_characters'] = df['Plot'].apply(len)
# Count tokens in Plot Column
df['plot_token_count'] = df['Plot'].apply(lambda plot: len(encoding.encode(plot)))
df.head()

Unnamed: 0,Release Year,Title,Origin/Ethnicity,Director,Cast,Genre,Plot,plot_characters,plot_token_count
0,1901,Kansas Saloon Smashers,American,Unknown,,unknown,"A bartender is working at a saloon, serving dr...",500,105
1,1901,Love by the Light of the Moon,American,Unknown,,unknown,"The moon, painted with a smiling face hangs ov...",437,101
2,1901,The Martyred Presidents,American,Unknown,,unknown,"The film, just over a minute long, is composed...",436,100
3,1901,"Terrible Teddy, the Grizzly King",American,Unknown,,unknown,Lasting just 61 seconds and consisting of two ...,890,193
4,1902,Jack and the Beanstalk,American,"George S. Fleming, Edwin S. Porter",,unknown,The earliest known adaptation of the classic f...,732,165


In [46]:
df = df.query('plot_token_count > 100 and plot_token_count <= 4000')
df.head()

Unnamed: 0,Release Year,Title,Origin/Ethnicity,Director,Cast,Genre,Plot,plot_characters,plot_token_count
23223,1987,Sworn Brothers,Hong Kong,David Lai,"Andy Lau, Cheung Kwok Keung",crime,"When Lam Ting-yat was little, his father died ...",16636,3675
1897,1935,Grand Exit,American,Erle C. Kenton,"Edmund Lowe, Ann Sothern",mystery,The front page of a brochure depicts a globe w...,15709,3639
3009,1941,Broadway Limited,American,Gordon Douglas,"Victor McLaglen, Patsy Kelly, ZaSu Pitts",comedy,"Following the screening of her latest film ""Th...",16517,3595
3592,1943,Isle of Forgotten Sins,American,Edgar G. Ulmer,"Gale Sondergaard, John Carradine",adventure,Somewhere on one of the English-speaking South...,15046,3546
33726,2013,Detective Conan: Private Eye in the Distant Sea,Japanese,Kobun Shizuno,Minami Takayama,unknown,"At Maizuru Bay, on the dawn of April 20, a coa...",15763,3534


In [51]:
sample = df.sample(n=1).iloc[0, :]

plot1 = sample.Plot
plot1

'The Sick-Little-Well-Girl (played by Mildred Davis) has been wrapped in cotton wool all her life. At the sign of the slightest sniffle or cough, she is packed off to bed and each time, the stuffy (and expensive) Dr Ludwig von Saulsbourg (Eric Mayne) is called to attend to her.\r\nIn another town lives Doctor Jackson (Harold Lloyd), a friendly and altruistic doctor who is liked by everyone in town. He utilises common sense when curing the citizens of any ills.\r\nSoon, Doctor Jack discovers that von Saulsbourg has been playing on The Sick-Little-Well-Girl\'s non-illness, charging the girl\'s father exorbitant amounts of money to "treat" her. With Jack\'s intervention, von Saulsbourg is sent packing.'

In [52]:
result = openai.ChatCompletion.create(model = MODEL, messages = [{'role':'user', 'content': "summarize the following movie plot in four sentences or less: {}".format(plot1)}])
print("LLM Summary\n============\n{}\n\nWikipedia plot\n============\n{}".format(result.choices[0].message.content, plot1))

LLM Summary
A sheltered girl with a supposed illness is constantly attended to by an expensive doctor who takes advantage of her condition. A kind and sensible doctor from another town exposes the fraud and helps the girl's father get rid of the dishonest doctor.

Wikipedia plot
The Sick-Little-Well-Girl (played by Mildred Davis) has been wrapped in cotton wool all her life. At the sign of the slightest sniffle or cough, she is packed off to bed and each time, the stuffy (and expensive) Dr Ludwig von Saulsbourg (Eric Mayne) is called to attend to her.
In another town lives Doctor Jackson (Harold Lloyd), a friendly and altruistic doctor who is liked by everyone in town. He utilises common sense when curing the citizens of any ills.
Soon, Doctor Jack discovers that von Saulsbourg has been playing on The Sick-Little-Well-Girl's non-illness, charging the girl's father exorbitant amounts of money to "treat" her. With Jack's intervention, von Saulsbourg is sent packing.
