In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("../data/cleaned_movies_30k.csv")
embeddings = np.load("../data/movie_embeddings.npy")

In [3]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")

In [4]:
mood_map = {
    "happy": "feel good comedy light hearted joyful fun",
    "sad": "emotional sad heartbreaking drama tragedy",
    "romantic": "romantic love relationship emotional",
    "thriller": "suspense thriller intense mystery crime",
    "motivational": "inspiring motivational uplifting",
    "dark": "dark psychological serious disturbing",
    "action": "action adventure fast paced excitement"
}

In [5]:
df["imdb_norm"] = df["imdb_rating"] / 10

In [6]:
def get_movie_similarity(movie):
    movie = movie.lower()
    matches = df[df["title"].str.lower().str.contains(movie)]

    if matches.empty:
        return None, None

    idx = matches.index[0]
    movie_vec = embeddings[idx].reshape(1, -1)
    sim = cosine_similarity(movie_vec, embeddings)[0]

    return sim, idx

In [7]:
def get_mood_similarity(mood_text, model):
    mood_embedding = model.encode([mood_text])
    return cosine_similarity(mood_embedding, embeddings)[0]

In [8]:
def recommend(movie=None, mood=None, top_n=10,
              w_movie=0.4, w_mood=0.4, w_imdb=0.2):

    score = np.zeros(len(df))
    drop_idx = None
    
    if movie:
        movie_sim, drop_idx = get_movie_similarity(movie)
        if movie_sim is not None:
            score += w_movie * movie_sim

    if mood:
        mood_sim = get_mood_similarity(mood, model)
        score += w_mood * mood_sim

    score += w_imdb * df["imdb_norm"].values

    df["final_score"] = score

    temp_df = df.copy()
    if drop_idx is not None:
        temp_df = temp_df.drop(index=drop_idx)

    results = (
        temp_df.sort_values("final_score", ascending=False)
               .head(top_n)
               .reset_index(drop=True)
    )

    results.index += 1

    return results[["title", "genres", "imdb_rating",]]

In [10]:
recommend(movie="inception", mood="angry")

Unnamed: 0,title,genres,imdb_rating
1,Jawan,"Action, Crime, Thriller",6.9
2,Caper,"Comedy, Crime, Drama",9.3
3,American Psycho,"Thriller, Drama, Crime",7.6
4,Sneakers,"Comedy, Crime, Drama",7.1
5,Limitless,"Thriller, Mystery, Science Fiction",7.4
6,Victim,"Drama, Crime, Thriller",7.7
7,The Conversation,"Crime, Drama, Mystery",7.7
8,Memories of Overdevelopment,Drama,7.0
9,Baby Driver,"Action, Crime",7.5
10,Wall Street,"Crime, Drama",7.3
