In [6]:
%pip install --quiet --upgrade pip
%pip install --quiet -U -r requirements.txt

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


--- 

# Prepare Collection

In [10]:
import ipywidgets as widgets
from IPython.display import display, clear_output
from movies import MovieCollection
from features import FeatureExtractor
from helpers import display_vocabulary, display_md
from tqdm.notebook import tqdm

# Create widgets for form inputs
stemming_checkbox = widgets.Checkbox(
    value=True,
    description='Use Stemming',
)

stopwords_checkbox = widgets.Checkbox(
    value=False, 
    description='Remove Stopwords',
)

number_of_records = widgets.IntSlider(
    value=3000,
    min=1000,
    max=50000,
    step=1000,
    description='Number of Records:'
)

def create_collection():
    global collection, pipeline, features_set, vocabulary_df, vocabulary_idf, features_bag, features_tfidf
    
    clear_output(wait=True)
    display_md("### Creating Movies Collection")
    collection = MovieCollection(number_of_records.value)
    pipeline = FeatureExtractor(stemming=stemming_checkbox.value, stopwords=stopwords_checkbox.value)
    display_md("- extracting 'bag of words' features")
    features_bag = {movie.imdb_id: pipeline.bag_of_words(movie.to_text()) for movie in tqdm(collection.movies)}
    display_md("- extracting 'set of words' features")
    features_set = {id: set(bag_of_words.keys()) for (id, bag_of_words) in features_bag.items()}
    display_md("- building the vocabulary")
    vocabulary_df = pipeline.df(features_set.values())
    vocabulary_idf = pipeline.idf(features_set.values())
    display_md("- extracting 'tfidf' features")
    features_tfidf = {id: pipeline.tfidf(bag_of_words, vocabulary_idf) for (id, bag_of_words) in features_bag.items()}
    display_vocabulary(vocabulary_df, vocabulary_idf, n_samples=20)

index_button = widgets.Button(description="Create Collection", button_style='primary')
index_button.on_click(lambda _: create_collection())

# Create form container
form = widgets.VBox([widgets.VBox([number_of_records, stemming_checkbox, stopwords_checkbox]), index_button])

# Display the form
display(form)

### Creating Movies Collection

- reading dataset from local cache (./data/movie_dataset.jsonl)

- extracting 'bag of words' features

  0%|          | 0/3000 [00:00<?, ?it/s]

- extracting 'set of words' features

- building the vocabulary

- extracting 'tfidf' features

|token|df|idf|
|-|-|-|
|the|2778|0.07652113790774304|
|alic|32|4.509860006183766|
|reverend|16|5.173154223594031|
|mcgovern|10|5.608472294851876|
|acquaint|7|5.926926025970411|
|legal|5|6.214608098422191|
|lifelong|3|6.620073206530356|
|jen|3|6.620073206530356|
|bakeri|2|6.907755278982137|
|delaney|2|6.907755278982137|
|poncelet|1|7.313220387090301|
|turtl|1|7.313220387090301|
|jianxin|1|7.313220387090301|
|velociraptor|1|7.313220387090301|
|kurtzmann|1|7.313220387090301|
|brea|1|7.313220387090301|
|courtney|1|7.313220387090301|
|nockmaar|1|7.313220387090301|
|scalper|1|7.313220387090301|
|mima|1|7.313220387090301|
|footman|1|7.313220387090301|


---

# Building the index

In [11]:
from boolean import BooleanRetriever

index = BooleanRetriever(features_set)
index.n_docs

2999

**Test Query**

In [12]:
query = "star wars"

# use the following for more precision
result = index.query_and(pipeline.set_of_words(query))

collection.display_result(result)

|id|title|overview|cast
|-|-|-|-|
|tt0113107|**Frankie Starlight** (1995)|**Sometimes the brightest star is the one that shines within.**<br/>[Drama, Romance, War]<br/><br/>The quirky story of a young boy's adventures growing up with his stunningly beautiful mother and the two very different men who love her.|Matt Dillon as Terry Klout, Gabriel Byrne as Jack Kelly
|tt0114663|**Three Wishes** (1995)|**If you really believe, magic will find you**<br/>[Drama, Fantasy]<br/><br/>While Jane Holman (Mastrantonio) is driving with her two sons, she accidentally runs into a drifter, Jack McCloud (Swayze), who breaks his leg. Being responsible,  Jane invites Jack, and his dog, to stay at her home until his leg has healed.  Jack struggles to adapt their lifestyle, and finds himself loved by the family. He starts teaching baseball to Tom, who misses his father, who was lost in the Korean war. Jack and Tom develop a strong bond of friendship. Meanwhile, Gunny believes that there is more to Jack and Betty Jane than meets the eye...  We learn that Jack, is Jack McCloud, a Star White Socks baseball player in 1941, who dropped out of the league, after his first season, and; "was never heard from again" ...  A wonderful story.  We witness magic between a boy's imagination, and Jack's dog, and are never sure if we are witnessing imagination or magic by the dog.  A story of friendship, family, and learning that life isn't always as cut and dried as we often believe.|Patrick Swayze as Jack McCloud, Jay O. Sanders as Coach Schramka, John Diehl as Leland's Dad, Diane Venora as Joyce, Colleen Camp as Neighbor's Wife
|tt0076759|**Star Wars** (1977)|**A long time ago in a galaxy far, far away...**<br/>[Adventure, Action, Science Fiction]<br/><br/>Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.|Harrison Ford as Han Solo, Carrie Fisher as Princess Leia Organa, Kenny Baker as Artoo-Detoo (R2-D2), James Earl Jones as Voice of Darth Vader (voice), William Hootkins as Red Six (Porkins)
|tt0082971|**Raiders of the Lost Ark** (1981)|**Indiana Jones - the new hero from the creators of JAWS and STAR WARS.**<br/>[Adventure, Action]<br/><br/>When Dr. Indiana Jones – the tweed-suited professor who just happens to be a celebrated archaeologist – is hired by the government to locate the legendary Ark of the Covenant, he finds himself up against the entire Nazi regime.|Harrison Ford as Indy, William Hootkins as Major Eaton, Alfred Molina as Satipo, Frank Welker as Special Vocal Effects (voice) (uncredited)
|tt0119859|**Paradise Road** (1997)|[War, Drama, History]<br/><br/>Paradise Road is a 1997 film which tells the story of a group of English, American, Dutch and Australian women who are imprisoned in Sumatra during World War II. It was directed by Bruce Beresford and stars Glenn Close as beatific Adrienne Pargiter, Frances McDormand as the brash Dr. Verstak, Pauline Collins as missionary Margaret Drummond (based on missionary Margaret Dryburgh), Julianna Margulies as American socialite Topsy Merritt, Jennifer Ehle as British doyenne and model Rosemary Leighton Jones, Cate Blanchett as Australian nurse Susan McCarthy and Elizabeth Spriggs as dowager Imogene Roberts. Basing his picture on real events, Bruce Beresford tells the story of a vocal orchestra created by the women in a Japanese P.O.W. camp, a classic survivors' tale extolling women's ability to survive hardship and atrocity through perseverance, solidarity and creativity.|Glenn Close as Adrienne Pargiter, Frances McDormand as Dr. Verstak
|tt0120915|**Star Wars: Episode I - The Phantom Menace** (1999)|**Every generation has a legend. Every journey has a first step. Every saga has a beginning.**<br/>[Adventure, Action, Science Fiction]<br/><br/>Anakin Skywalker, a young slave strong with the Force, is discovered on Tatooine. Meanwhile, the evil Sith have returned, enacting their plot for revenge against the Jedi.|Liam Neeson as Qui-Gon Jinn, Ewan McGregor as Obi Wan Kenobi, Kenny Baker as R2-D2, Frank Oz as Yoda (voice), Terence Stamp as Chacellor Valorum, Samuel L. Jackson as Mace Windu
|tt0143924|**Promise Her Anything** (1999)|[Comedy, Drama, Fantasy]<br/><br/>A quaint small town that hasn't paid taxes since World War II draws the attention of an ambitious tax inspector in this comedy starring Billy Zane and Patrick Bergin.|Billy Zane as George Putter
|tt0054331|**Spartacus** (1960)|**More titanic than any story ever told!**<br/>[Action, Drama, History]<br/><br/>Spartacus is a 1960 American historical drama film directed by Stanley Kubrick and based on the novel of the same name by Howard Fast about the historical life of Spartacus and the Third Servile War. The film stars Kirk Douglas as the rebellious slave Spartacus who leads a violent revolt against the decadent Roman empire. The film was awarded four Oscars and stands today as one of the greatest classics of the Sword and Sandal genre.|George Kennedy as Rebel Soldier (uncredited)
|tt0048380|**Mister Roberts** (1955)|**All The Uproarious Fun Of the Smash Broadway Play!**<br/>[Comedy, Drama, Family, War]<br/><br/>A hilarious and heartfelt military comedy-drama co-directed by John Ford and Mervyn LeRoy, Mister Roberts stars Henry Fonda as an officer who's yearning for battle but is stuck in the backwaters of World War II on a noncommissioned Navy ship run by the bullying Capt. Morton (James Cagney). Jack Lemmon enjoys a star-making turn as the freewheeling Ensign Pulver, and William Powell stars as the ship's doctor in his last screen role.  Based on the 1946 novel with the same name, by Thomas Heggen, and the 1948 Broadway play, written by Thomas Heggen and Joshua Logan. Henry Fonda also starred in the original Broadway production. Warner Bros. didn't want Fonda to star in the film, as they thought he was too old, and had been a stage player for so long (8 years), that he no longer was box office material. However, John Ford insisted on Fonda and the company eventually agreed.|Henry Fonda as Mister Roberts, Jack Lemmon as Ens. Frank Thurlowe Pulver, Ward Bond as Chief Petty Officer Dowdy


In [13]:
prompt=f"""
You are assessing movies. Given the query below and the results, assess which movies are relevant for the query

Query: {query}

Results:
{collection.prompt_context(result)}
"""

print(prompt)


You are assessing movies. Given the query below and the results, assess which movies are relevant for the query

Query: star wars

Results:
ID: tt0113107
Title: Frankie Starlight (1995)
Summary: Sometimes the brightest star is the one that shines within.. The quirky story of a young boy's adventures growing up with his stunningly beautiful mother and the two very different men who love her.
Genres: [Drama, Romance, War]
Cast: Matt Dillon as Terry Klout, Gabriel Byrne as Jack Kelly

ID: tt0114663
Title: Three Wishes (1995)
Summary: If you really believe, magic will find you. While Jane Holman (Mastrantonio) is driving with her two sons, she accidentally runs into a drifter, Jack McCloud (Swayze), who breaks his leg. Being responsible,  Jane invites Jack, and his dog, to stay at her home until his leg has healed.  Jack struggles to adapt their lifestyle, and finds himself loved by the family. He starts teaching baseball to Tom, who misses his father, who was lost in the Korean war. Jack

In [18]:
print(collection.prompt_context(["tt0076759", "tt0120915"]))

ID: tt0076759
Title: Star Wars (1977)
Summary: A long time ago in a galaxy far, far away.... Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.
Genres: [Adventure, Action, Science Fiction]
Cast: Harrison Ford as Han Solo, Carrie Fisher as Princess Leia Organa, Kenny Baker as Artoo-Detoo (R2-D2), James Earl Jones as Voice of Darth Vader (voice), William Hootkins as Red Six (Porkins)

ID: tt0120915
Title: Star Wars: Episode I - The Phantom Menace (1999)
Summary: Every generation has a legend. Every journey has a first step. Every saga has a beginning.. Anakin Skywalker, a young slave strong with the Force, is discovered on Tatooine. Meanwhile, the evil Sith have returned, enacting their plot for revenge against the Jedi.
Genres: [