In [1]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np
import faiss

embeddings = np.load("artifacts/movie_embeddings.npy")
index = faiss.read_index("artifacts/movie_faiss.index")


In [3]:
import pandas as pd
df_processed = pd.read_csv("df1_cleaned.csv")

In [4]:
def semantic_search(query, top_k=5):
    query_vec = model.encode(
        [query],
        normalize_embeddings=True
    )
    
    scores, indices = index.search(query_vec, top_k)
    
    results = []
    for idx, score in zip(indices[0], scores[0]):
        movie = df_processed.iloc[idx]
        results.append({
            "title": movie["Title"],
            "year": movie["Release Year"],
            "score": float(score)
        })
    
    return results


In [5]:
semantic_search("movies about war and friendship", top_k=5)


[{'title': 'A Soldier Speaks after Death',
  'year': np.int64(1966),
  'score': 0.5737587213516235},
 {'title': 'The Good Wife',
  'year': np.int64(1987),
  'score': 0.5450994968414307},
 {'title': 'Bless Me, Ultima',
  'year': np.int64(2013),
  'score': 0.5444862842559814},
 {'title': 'Once Upon a Dream',
  'year': np.int64(1949),
  'score': 0.5386888384819031},
 {'title': 'Brothers', 'year': np.int64(2016), 'score': 0.5273308753967285}]

In [6]:
semantic_search("movies related to slum", top_k=5)

[{'title': 'Thakarachenda',
  'year': np.int64(2007),
  'score': 0.540657639503479},
 {'title': 'Walk Like a Dragon',
  'year': np.int64(1960),
  'score': 0.5364879369735718},
 {'title': 'Maa Kasam', 'year': np.int64(1999), 'score': 0.5238276720046997},
 {'title': 'The Bang Bang Club',
  'year': np.int64(2010),
  'score': 0.5139367580413818},
 {'title': 'Krantikaal', 'year': np.int64(2005), 'score': 0.5084155797958374}]

In [7]:

semantic_search("A movie about space exploration and black holes", top_k=5)

[{'title': "Doraemon the Movie: Nobita's New Great Adventure into the Underworld - The Seven Magic Users",
  'year': np.int64(2007),
  'score': 0.4734650254249573},
 {'title': 'Sweety Nanna Jodi',
  'year': np.int64(2013),
  'score': 0.4657539129257202},
 {'title': 'The 11th Hour',
  'year': np.int64(2007),
  'score': 0.4602738618850708},
 {'title': 'Ultraman Cosmos vs. Ultraman Justice: The Final Battle',
  'year': np.int64(2003),
  'score': 0.4587746262550354},
 {'title': 'The Thousand Faces of Dunjia',
  'year': np.int64(2017),
  'score': 0.4583735764026642}]