#### RAG for screenplays

##### Loading Metropolis screenplay

In [31]:
from langchain_community.document_loaders import PyPDFLoader

pdf_loader = PyPDFLoader('./metropolis.pdf')

In [32]:
docs = pdf_loader.load()

In [40]:
print(docs[1].page_content)

INT.  HIGH RISE APARTMENT BUILDING - LOBBY - CONTINUOUS
The doorman at his post flipping through a magazine, rap
music pounding from his radio.
Glass suddenly shatters as a gloved hand punches through
the window, reaching in, unlocking the front door.
Two men in dark suits blur inside.  The doorman rushes to
stop them, only --
DEX, the taller of the two men, slams the barrel of his
gun into the doorman's throat.
                         DEX
          Life can be an illusion.
                  (low and deadly)
          Sometimes we see what isn't there,
          and sometimes we don't see what is
          there.
The doorman, wide eyed with terror, nods his consent.
Dex removes the gun.
Doorman plants himself in the chair, eyes glued to his
magazine, making it clear he doesn't 'see' the men as they
stroll into --
INT.  ELEVATOR - CONTINUOUS
Old fashioned steel box struggling its way up the shaft.
Fluorescent yellow light stabbing through the grating.
INT.  HALLWAY - MOMENT LATER
Dex s

In [44]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pprint import pprint

splitter = RecursiveCharacterTextSplitter()
first_page = docs[1].page_content
splitted_first_page = splitter.split_text(first_page)
pprint(splitted_first_page)

['INT.  HIGH RISE APARTMENT BUILDING - LOBBY - CONTINUOUS\n'
 'The doorman at his post flipping through a magazine, rap\n'
 'music pounding from his radio.\n'
 'Glass suddenly shatters as a gloved hand punches through\n'
 'the window, reaching in, unlocking the front door.\n'
 'Two men in dark suits blur inside.  The doorman rushes to\n'
 'stop them, only --\n'
 'DEX, the taller of the two men, slams the barrel of his\n'
 "gun into the doorman's throat.\n"
 '                         DEX\n'
 '          Life can be an illusion.\n'
 '                  (low and deadly)\n'
 "          Sometimes we see what isn't there,\n"
 "          and sometimes we don't see what is\n"
 '          there.\n'
 'The doorman, wide eyed with terror, nods his consent.\n'
 'Dex removes the gun.\n'
 'Doorman plants himself in the chair, eyes glued to his\n'
 "magazine, making it clear he doesn't 'see' the men as they\n"
 'stroll into --\n'
 'INT.  ELEVATOR - CONTINUOUS\n'
 'Old fashioned steel box struggling its 

splitting can be improved, especially because the format of a screenplay is quite unanimous

In [52]:
from langchain_ollama.embeddings import OllamaEmbeddings

embedding_model = OllamaEmbeddings(model='nomic-embed-text')

In [53]:
embedding_response = embedding_model.embed_documents(splitted_first_page)
embedding_response

[[-0.026607439,
  -0.0016676615,
  -0.17468132,
  0.012560263,
  0.088503316,
  -0.0041034296,
  0.017680721,
  0.03064929,
  -0.0155091295,
  -0.046692323,
  -0.058123566,
  0.06658112,
  -0.028055374,
  0.024826553,
  0.0020858992,
  -0.07399596,
  0.0734647,
  -0.023941264,
  -0.0317327,
  -0.0061989664,
  -0.09028967,
  -0.009096658,
  -0.031767085,
  -0.11263814,
  0.07341217,
  0.01906179,
  0.0035234706,
  0.015339673,
  -0.02328498,
  0.014196362,
  0.050307702,
  0.021107687,
  -0.08826706,
  -0.03406944,
  -0.077429265,
  -0.09994991,
  0.03052968,
  0.049416564,
  0.03139317,
  -0.026082752,
  0.037973445,
  0.023722235,
  -0.012827503,
  -0.006686677,
  0.030962748,
  0.056398977,
  0.024701115,
  -0.01018388,
  0.023874007,
  -0.024413906,
  0.0138399275,
  -0.05574178,
  0.05540913,
  -0.044682216,
  0.07809865,
  -0.028111156,
  0.008878675,
  -0.036548458,
  0.04474266,
  -0.01214725,
  0.09984432,
  0.025691452,
  -0.0006058816,
  0.04159894,
  0.040987525,
  -0.044191

In [54]:
embedded_query = embedding_model.embed_query("what is the scene about?")

In [55]:
from langchain_chroma import Chroma

store = Chroma(embedding_function=embedding_model)

In [None]:
returned_ids = store.add_documents(docs)
returned_ids

In [65]:
search_result = store.search('what is the scene about', 'similarity')
for res in search_result:
    pprint(res.page_content)
    print('-- doc end --')

('He sees Christoph heading toward him.\n'
 'Tanner trying to scramble out of the way.\n'
 "Christoph's eyes are scanning the faces around him, and it\n"
 "appears he doesn't see Tanner down on the floor.\n"
 'The song ends, people stop dancing.\n'
 'Tanner finds a table and holes up under it --\n'
 'INT.  UNDER THE TABLE\n'
 'Tanner allows himself a deep breath.\n'
 'A sudden EXPLOSION of sound.\n'
 'Tanner spins, laser pointed.\n'
 "But it's just the next song starting.  Tanner exhales.\n"
 'The table is no longer there.\n'
 'Christoph has ripped it off the ground and flung it out\n'
 'onto the dance floor.\n'
 'Tanner swings the laser around.\n'
 '                         CHRISTOPH\n'
 '                  (shakes his head)\n'
 "          Halden's little killer, who --\n"
 'Tanner FIRES!\n'
 "The shot slams through Christoph's face but does\n"
 'absolutely no damage.\n'
 '                         CHRISTOPH\n'
 "          Can't kill a God.\n"
 'Tanner fires again.  And again.\n'
 "The 

In [66]:
search_results_2 = store.search('what is the movie about', 'similarity')
for res in search_results_2:
    pprint(res.page_content)
    print('-- doc end --')

('remain linked to their flesh and\n'
 '          blood body.  Such an encumbrance.\n'
 '                  (beat)\n'
 '          Even if their bodies are kept on\n'
 "          machines, nourished by IV's, the\n"
 '          laws of nature eventually will be\n'
 '          respected.  The body grows old and\n'
 "          dies.  Then they're gone from both\n"
 '          worlds.\n'
 'Somewhere deep inside, she is beginning to understand.\n'
 'But it brings her no comfort.\n'
 'Christoph heads for the elevator, motioning for her to\n'
 "follow.  But she doesn't.\n"
 '                         DIGITAL GENA\n'
 '          She said I have a son...\n'
 '                         CHRISTOPH\n'
 '          No, she said she had a son.\n'
 '                  (quiet)\n'
 '          In a dream she once called life.\n'
 'She stares at him.\n'
 '                         DIGITAL GENA\n'
 "          How come I can't remember?\n"
 '                         CHRISTOPH\n'
 '          Because I took it out o