# Requirement #2
## DATA IMPORT

In [2]:
import pandas as pd

# Load the data
df_ratings = pd.read_csv('movies_ratings.csv')
df_movies = pd.read_csv('movies_titles.csv')
df_users = pd.read_csv('movies_users.csv')  # Optional for later



## DATA UNDERSTANDING

In [3]:
# Quick preview
print(df_ratings.shape)
print(df_movies.shape)
df_ratings.head()


(2147, 3)
(8508, 42)


Unnamed: 0,user_id,show_id,rating
0,1,s8381,4
1,1,s3466,5
2,1,s3181,4
3,1,s294,3
4,1,s7739,5


## DATA PREPARATION

In [6]:
# Count ratings per show
counts = df_ratings['show_id'].value_counts()
keep_shows = counts[counts >= 1].index

# Filter ratings to keep only those shows
df_ratings = df_ratings[df_ratings['show_id'].isin(keep_shows)].copy()

# Drop duplicates (if any)
df_ratings.drop_duplicates(subset=['user_id', 'show_id'], keep='first', inplace=True)


## MODELING


In [7]:
import numpy as np
from scipy.sparse import csr_matrix

def create_matrix(df, user, item, rating):
    U = df[user].nunique()
    I = df[item].nunique()

    user_mapper = dict(zip(np.unique(df[user]), list(range(U))))
    item_mapper = dict(zip(np.unique(df[item]), list(range(I))))

    user_inv_mapper = dict(zip(list(range(U)), np.unique(df[user])))
    item_inv_mapper = dict(zip(list(range(I)), np.unique(df[item])))

    user_index = [user_mapper[i] for i in df[user]]
    item_index = [item_mapper[i] for i in df[item]]

    X = csr_matrix((df[rating], (item_index, user_index)), shape=(I, U))

    return X, user_mapper, item_mapper, user_inv_mapper, item_inv_mapper


In [8]:
X, user_mapper, item_mapper, user_inv_mapper, item_inv_mapper = create_matrix(
    df_ratings, user='user_id', item='show_id', rating='rating'
)


In [9]:
from sklearn.neighbors import NearestNeighbors

def recommend(itemId, X, item_mapper, item_inv_mapper, k, metric='cosine', messages=True):
    rec_ids = []
    item = item_mapper[itemId]
    item_vector = X[item]

    knn = NearestNeighbors(n_neighbors=k+1, algorithm="brute", metric=metric).fit(X)
    rec = knn.kneighbors(item_vector.reshape(1, -1), return_distance=True)

    rec_indices = rec[1][0]
    rec_distances = rec[0][0]
    rec_distances = np.delete(rec_distances, 0)  # remove the item itself

    for i in range(1, knn.n_neighbors):
        rec_ids.append(item_inv_mapper[rec_indices[i]])

    if messages:
        print(f"Recommended item indices:\n{rec_indices}")
        print(f"Recommended show_ids:\n{rec_ids}")
        print(f"Distances:\n{rec_distances}")

    return rec_ids, rec_distances


In [14]:
import random

def genre_home_screen(user_id, df_ratings, df_movies, X, item_mapper, item_inv_mapper, 
                      genres=['Action', 'Comedies', 'Documentaries'], top_n=50):
    # 1. Get the user's ratings
    df_user = df_ratings[df_ratings['user_id'] == user_id]
    if df_user.empty:
        print("⚠️ This user has no ratings!")
        return None, {}

    # 2. Find top-rated show(s)
    max_rating = df_user['rating'].max()
    top_rated_shows = df_user[df_user['rating'] == max_rating]['show_id'].tolist()

    # 3. Pick a seed show randomly
    seed_show_id = random.choice(top_rated_shows)

    # 4. Get collaborative recommendations
    rec_ids, _ = recommend(seed_show_id, X, item_mapper, item_inv_mapper, k=top_n)

    # 5. Filter out shows the user has already rated
    rec_ids = [sid for sid in rec_ids if sid not in df_user['show_id'].values]

    # 6. Get DataFrame of recommended shows
    rec_df = df_movies[df_movies['show_id'].isin(rec_ids)]

    # 7. Group recommendations by genre
    genre_sections = {}
    for genre in genres:
        if genre in rec_df.columns:
            genre_recs = rec_df[rec_df[genre] == 1][['title', 'description']].head(5)
            if not genre_recs.empty:
                genre_sections[genre] = genre_recs.reset_index(drop=True)

    # 8. Get the name of the seed show for labeling
    seed_title = df_movies[df_movies['show_id'] == seed_show_id]['title'].values[0]

    return seed_title, genre_sections


In [15]:
user_id = df_ratings['user_id'].sample(1).iloc[0]  # pick one randomly

seed_title, genre_lists = genre_home_screen(
    user_id, df_ratings, df_movies, X, item_mapper, item_inv_mapper,
    genres=['Action', 'Comedies', 'Documentaries', 'Children', 'Dramas']
)

# Display it!
print(f"\n🎬 Because you liked **{seed_title}**, here are some picks:")

for genre, recs in genre_lists.items():
    print(f"\n🎞️ {genre} Recommendations:")
    display(recs)


ValueError: Expected n_neighbors <= n_samples_fit, but n_neighbors = 51, n_samples_fit = 6, n_samples = 1