In [1]:
# 03 Final Score
# Combine Story similarity + Creator similarity + Numeric score
# Produces final ranked recommendations

In [2]:
import os
import joblib
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.preprocessing import MinMaxScaler

In [3]:
# ---------- CONFIG ----------
META_CSV = "movies_with_content_meta.csv"   # from Content Score notebook
NUMERIC_COL = "numeric_score"               # from Numeric Score notebook

# Story artifacts (TF-IDF fallback)
STORY_TFIDF_VEC = "story_tfidf_vectorizer.joblib"
STORY_TFIDF_NPZ = "story_tfidf.npz"

# Creator artifacts
CREATORS_VECTORIZER = "creators_tfidf.joblib"
CREATORS_MATRIX = "creators_tfidf.npz"

# Weights (sum to 1)
ALPHA_STORY = 0.55
BETA_CREATORS = 0.25
GAMMA_NUMERIC = 0.20


In [4]:
# ---------- Load resources ----------
if not os.path.exists(META_CSV):
    raise FileNotFoundError(f"Missing {META_CSV}. Run Content Score notebook first.")

df = pd.read_csv(META_CSV)
print(f"Loaded metadata: {len(df)} rows")

# Load TF-IDF artifacts
story_tfidf = joblib.load(STORY_TFIDF_VEC)
story_matrix = sparse.load_npz(STORY_TFIDF_NPZ)

creators_tfidf = joblib.load(CREATORS_VECTORIZER)
creators_matrix = sparse.load_npz(CREATORS_MATRIX)

Loaded metadata: 29937 rows


In [5]:
def _find_indices_from_titles(df, titles):
    titles = [t.strip().lower() for t in titles]
    mask = df['title'].astype(str).str.lower().isin(titles)
    return df[mask].index.tolist()


In [6]:
def _normalize_01(arr):
    arr = np.asarray(arr, dtype=float)
    mn, mx = arr.min(), arr.max()
    if mx - mn > 0:
        return (arr - mn) / (mx - mn)
    return np.zeros_like(arr)

In [7]:
def compute_story_similarity(selected_indices):
    sel = story_matrix[selected_indices]
    user_vec = sel.mean(axis=0)
    if not sparse.issparse(user_vec):
        user_vec = sparse.csr_matrix(user_vec)
    sims = story_matrix.dot(user_vec.T).A.flatten()
    return _normalize_01(sims)


In [8]:
def compute_creator_similarity(selected_indices):
    sel = creators_matrix[selected_indices]
    user_vec = sel.mean(axis=0)
    if not sparse.issparse(user_vec):
        user_vec = sparse.csr_matrix(user_vec)
    sims = creators_matrix.dot(user_vec.T).A.flatten()
    return _normalize_01(sims)

In [9]:
def recommend_final(user_movies, top_n=10, exclude_user_movies=True):
    """
    user_movies: list of exact movie titles present in df['title']
    """
    if not user_movies:
        raise ValueError("Provide at least one movie title")

    # find indices
    idx = _find_indices_from_titles(df, user_movies)
    if len(idx) == 0:
        raise ValueError("None of the provided titles found in dataset")

    # compute similarities
    story_sim = compute_story_similarity(idx)
    creator_sim = compute_creator_similarity(idx)

    # numeric score
    if NUMERIC_COL in df.columns:
        numeric = df[NUMERIC_COL].fillna(0).values
        numeric = _normalize_01(numeric)
    else:
        numeric = np.zeros(len(df))

    # final score (NO global renormalization afterward)
    final_score = (
        ALPHA_STORY * story_sim +
        BETA_CREATORS * creator_sim +
        GAMMA_NUMERIC * numeric
    )

    res = df[['id', 'title']].copy()
    res['final_score'] = final_score
    res['story_sim'] = story_sim
    res['creator_sim'] = creator_sim
    res['numeric_score'] = numeric

    if exclude_user_movies:
        res = res[~res['title'].str.lower().isin([t.lower() for t in user_movies])]

    res = res.sort_values('final_score', ascending=False).head(top_n)
    return res


In [10]:
if __name__ == '__main__':
    print("\nExample recommendation:")
    try:
        out = recommend_final(
            user_movies=['The Dark Knight','Batman Returns','Batman Begins'],
            top_n=10
        )
        print(out.to_string(index=False))
    except Exception as e:
        print("Example failed:", e)

    print("\nFinal Score notebook ready.")



Example recommendation:
    id                 title  final_score  story_sim  creator_sim  numeric_score
 49026 The Dark Knight Rises     0.709001   0.566860     0.837077       0.939793
   268                Batman     0.651706   0.630955     0.554968       0.829693
   414        Batman Forever     0.482834   0.499387     0.267167       0.706900
414906            The Batman     0.474807   0.444443     0.208169       0.891604
 13851 Batman: Gotham Knight     0.441513   0.520782     0.160986       0.574181
 27205             Inception     0.404884   0.013121     0.801760       0.986137
   415        Batman & Robin     0.391793   0.367278     0.254838       0.630406
  1124          The Prestige     0.388271   0.005609     0.807631       0.916391
374720               Dunkirk     0.373896   0.027738     0.736035       0.873158
872585           Oppenheimer     0.366709   0.023553     0.681237       0.917227

Final Score notebook ready.
