In [None]:
%pip install -r requirements.txt

First run plain question through LLM.
 - Some movie plots may not be available or well represented in the general model.

In [None]:
import os
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import GPT4All
import gpt4all as g4a

# load local model
# model = g4a.GPT4All("llama-2-7b-chat.ggmlv3.q4_0.bin") # use this to download and store the model locally.
llm = GPT4All(model=os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_0.bin"))

template = """Question: {question}

Answer including relevant infos for movie, e.g. genre, director, etc.:"""

prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [2]:
question = """Tell me about the movie \"A Touch of Zen\"."""
llm_chain.run(question)

'\n\n"A Touch of Zen" is a 2001 Taiwanese film directed by Hou Hsiao-hsien and starring Shu Qi and Honglei Sun. It is a drama/thriller that explores themes of identity, memory, and the blurring of reality and fantasy. The movie follows a young woman named Aiyuan as she navigates her way through a complex web of relationships and events, all while trying to uncover the truth about her past.\n\nThe film is notable for its slow pacing and long takes, which create a sense of hypnotic immersion in the story. The cinematography is also striking, with a focus on muted colors and natural lighting that adds to the dreamlike atmosphere.\n\n"A Touch of Zen" was well-received by critics and audiences alike, and it has since become a cult classic. It won several awards at film festivals around the world, including the Golden Lion award at the Venice Film Festival in 2001.'

LLM response might contain "hallucinations". 
E.g. the movie was released in 1971 and not 2001.

With Retrieval Augmented Generation the LLM answer can be improved.

<img src="images/RAG.png" alt="Retrieval Augmented Generation" width="30%" height="auto" class="blog-image">

Add domain specific context from [data](https://www.kaggle.com/datasets/jrobischon/wikipedia-movie-plots/) containing ca. 35k movie plots

<img src="images/data-card.png" alt="Retrieval Augmented Generation" width="40%" height="auto" class="blog-image">

- save each movie plot as a separate document

In [3]:
import pandas as pd
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import DirectoryLoader

def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))

Create Vectorstore once
 - movie data is stored as embedding vectors for **semantic** search
 - Vectorstore index is persisted in folder faiss_index

In [None]:
plots = pd.read_csv('wiki_movie_plots_deduped.csv')
for p in plots['Plot'][:3]:
    print(p)
    print()

In [None]:
# Store each movie plot as separate text file into folder
os.makedirs('documents', exist_ok=True)

for i, row in plots.iterrows():
    with open(f'documents/plot_{i}.txt', 'w') as f:
        txt = f"""
Title: {str(row['Title'])}
Release Year: {str(row['Release Year'])}
Genre: {str(row['Genre'])}
Ethnicity: {str(row['Origin/Ethnicity'])}
Director: {str(row['Director'])}
Cast: {str(row['Cast'])}
Plot: {str(row['Plot'])}
        """
        f.write(txt)

In [None]:
def create_index(directory):
    loader = DirectoryLoader(directory)
    docs = loader.load()

    print("creating index")

    db = FAISS.from_documents(docs, embeddings)
    db.save_local("faiss_index")

In [4]:
embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")  # downloads to ~/.cache 
create_index('documents')  # initial vectore store creation. Comment out this line after index is created.
db = FAISS.load_local("faiss_index", embeddings)

Extract movie infos
 - that match the question
 - use as context info (automated Prompt Engineering)

In [11]:
# compress if model has small token window size (2k)
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.vectorstores import Chroma

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=db.as_retriever())

compressed_docs = compression_retriever.get_relevant_documents(question + " Summarize the plot. In addition, state Title, Release Year, and Director.")
pretty_print_docs(compressed_docs)
compressed_context = Chroma.from_documents(compressed_docs, embeddings)

LLaMA ERROR: The prompt is 2269 tokens and the context window is 2048!


Document 1:

* Title: A Touch of Zen
* Release Year: 1971
* Director: King Hu
* Cast: nan (no actors are mentioned)
* Plot summary: The story is largely seen through the eyes of Gu, a well-meaning but unambitious scholar and painter who becomes involved in a plot to bring down corrupt officials.
----------------------------------------------------------------------------------------------------
Document 2:

Title: Zenobia Release Year: 1939 Genre: comedy Ethnicity: American Director: Gordon Douglas Cast: Oliver Hardy, Harry Langdon, Billie Burke Plot: Dr. Henry Tibbett is called on by a travelling circus trainer to cure his sick elephant.
NO OUTPUT
----------------------------------------------------------------------------------------------------
Document 3:

ERROR: The prompt size exceeds the context window size and cannot be processed.
----------------------------------------------------------------------------------------------------
Document 4:

Title: Zegen Release Year: 1987 Dir

In [12]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm, retriever=compressed_context.as_retriever())
qa_chain.run({"query": question})

' The movie "A Touch of Zen" was released in 1971 and directed by King Hu. It stars no actors, but follows the story of Gu, a well-meaning but unambitious scholar and painter who becomes involved in a plot to bring down corrupt officials.'

<!-- import gpt4all
model = gpt4all.GPT4All(model_name="ggml-mpt-7b-chat.bin") -->