In [1]:
import requests

import csv


import kagglehub

# Download latest version
path = kagglehub.dataset_download("cryptexcode/mpst-movie-plot-synopses-with-tags")

print("Path to dataset files:", path)

Path to dataset files: /Users/ramundson/.cache/kagglehub/datasets/cryptexcode/mpst-movie-plot-synopses-with-tags/versions/1


In [4]:
import pandas as pd
df = pd.read_csv(f'{path}/mpst_full_data.csv')
df = df[df['plot_synopsis'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(1000).to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

1000

In [16]:
data[0]

{'imdb_id': 'tt0831399',
 'title': 'The Three Musketeers',
 'plot_synopsis': "In Venice, the musketeers Athos, Porthos, and Aramis, with the help of Milady de Winter, steal airship blueprints made by Leonardo da Vinci. However, they are betrayed by Milady, who incapacitates them and sells the blueprints to the Duke of Buckingham.\nA year later, d'Artagnan leaves his village in Gascony for Paris in hopes of becoming a musketeer as his father was, only to learn that they were disbanded. At a rural bar, he challenges Captain Rochefort, leader of Cardinal Richelieu's guard, to a duel after being offended by him, but Rochefort merely shoots him while he's distracted. Once in Paris, d'Artagnan separately encounters Athos, Porthos and Aramis and, accidentally offending all three, schedules duels with each. Athos brings Porthos and Aramis to the duel as his seconds and d'Artagnan realizes who they are. Richelieu's guards arrive to arrest them, but, inspired by d'Artagnan, the musketeers fight 

In [5]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [6]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

In [17]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [18]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="movie_plots",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [19]:
# vectorize!
qdrant.upload_points(
    collection_name="movie_plots",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["plot_synopsis"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [20]:
user_prompt = "Suggest me a movie about kids finding pirate treasure"

In [21]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="movie_plots",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'imdb_id': 'tt0248568', 'title': 'Treasure Island', 'plot_synopsis': 'Young Jim Hawkins Jackie Cooper) and his mother (Dorothy Peterson) run the Admiral Benbow, a tavern near Bristol, England. One dark and stormy night, during a birthday celebration, the mysterious Billy Bones (Lionel Barrymore) arrives and drunkenly talks about treasure. Soon after, Bones is visited by Black Dog (Charles McNaughton) then Pew (William V. Mong), and drops dead, leaving a chest, which he bragged contained gold and jewels. Instead of money, Jim finds a map that his friend Dr. Livesey (Otto Kruger) realizes will lead them to the famous Flint treasure. Squire Trelawney (Nigel Bruce) raises money for a voyage to the treasure island and they set sail on Captain Alexander Smollett\'s (Lewis Stone) ship Hispaniola. Also on board is the one-legged Long John Silver (Wallace Beery) and his cronies. Even though Bones had warned Jim about a sailor with one leg, they become friends.\nDuring the voyage, several fatal

  hits = qdrant.search(


In [22]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]
search_results

[{'imdb_id': 'tt0133240',
  'title': 'Treasure Planet',
  'tags': 'psychedelic, sci-fi, murder, flashback',
  'split': 'train',
  'synopsis_source': 'imdb'},
 {'imdb_id': 'tt0248568',
  'title': 'Treasure Island',
  'plot_synopsis': 'Young Jim Hawkins Jackie Cooper) and his mother (Dorothy Peterson) run the Admiral Benbow, a tavern near Bristol, England. One dark and stormy night, during a birthday celebration, the mysterious Billy Bones (Lionel Barrymore) arrives and drunkenly talks about treasure. Soon after, Bones is visited by Black Dog (Charles McNaughton) then Pew (William V. Mong), and drops dead, leaving a chest, which he bragged contained gold and jewels. Instead of money, Jim finds a map that his friend Dr. Livesey (Otto Kruger) realizes will lead them to the famous Flint treasure. Squire Trelawney (Nigel Bruce) raises money for a voyage to the treasure island and they set sail on Captain Alexander Smollett\'s (Lewis Stone) ship Hispaniola. Also on board is the one-legged Lon

In [26]:
# Now time to connect to the local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://localhost:1234/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
)
completion = client.chat.completions.create(
    model="LM_STUDIO_DEEPSEEK",
    messages=[
        {"role": "system", "content": "You are chatbot, a movie specialist. Your top priority is to help find movies, summarize and suggest movies."},
        {"role": "user", "content": "Suggest me a movie about kids finding pirate treasure"},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content='}\n\nThe user is interested in movies that involve adventure, have high stakes, and have a group of young characters on a quest. The user mentioned specific movies like "Pirates of the Caribbean" and "Treasure Planet," which are both action-packed and have treasure-hunting elements.\n\nSo, I need to find more movies with similar themes: high-stakes adventures, treasure hunting, and groups of young characters. Maybe something with a shipwreck or searching for a hidden treasure. Perhaps including some conflict with pirates or other antagonists.\n\nThinking about this, "The Goonies" comes to mind as well, which is another classic movie involving kids searching for treasure and dealing with criminals.\n\nI should look for movies that feature young characters facing challenges while on an adventure, possibly involving treasure or high-stakes situations. Maybe something with a shipwreck or a mysterious map leading them into danger.\n\nAlso, considering the user