# Requirement #2
## DATA IMPORT

In [19]:
import pandas as pd

# Load the data
df_ratings = pd.read_csv('movies_ratings.csv')
df_movies = pd.read_csv('movies_titles.csv')
df_users = pd.read_csv('movies_users.csv')  # Optional for later



## DATA UNDERSTANDING

In [20]:
# Quick preview
print(df_ratings.shape)
print(df_movies.shape)
df_ratings.head()


(2147, 3)
(8508, 42)


Unnamed: 0,user_id,show_id,rating
0,1,s8381,4
1,1,s3466,5
2,1,s3181,4
3,1,s294,3
4,1,s7739,5


## DATA PREPARATION

In [21]:
# Count ratings per show
counts = df_ratings['show_id'].value_counts()
# keep_shows = counts[counts >= 1].index

# Filter ratings to keep only those shows
df_ratings = df_ratings[df_ratings['show_id'].isin(keep_shows)].copy()

# Drop duplicates (if any)
df_ratings.drop_duplicates(subset=['user_id', 'show_id'], keep='first', inplace=True)


## MODELING


In [22]:
import numpy as np
from scipy.sparse import csr_matrix

def create_matrix(df, user, item, rating):
    U = df[user].nunique()
    I = df[item].nunique()

    user_mapper = dict(zip(np.unique(df[user]), list(range(U))))
    item_mapper = dict(zip(np.unique(df[item]), list(range(I))))

    user_inv_mapper = dict(zip(list(range(U)), np.unique(df[user])))
    item_inv_mapper = dict(zip(list(range(I)), np.unique(df[item])))

    user_index = [user_mapper[i] for i in df[user]]
    item_index = [item_mapper[i] for i in df[item]]

    X = csr_matrix((df[rating], (item_index, user_index)), shape=(I, U))

    return X, user_mapper, item_mapper, user_inv_mapper, item_inv_mapper


In [23]:
X, user_mapper, item_mapper, user_inv_mapper, item_inv_mapper = create_matrix(
    df_ratings, user='user_id', item='show_id', rating='rating'
)


In [24]:
from sklearn.neighbors import NearestNeighbors

def recommend(itemId, X, item_mapper, item_inv_mapper, k, metric='cosine', messages=True):
    rec_ids = []
    item = item_mapper[itemId]
    item_vector = X[item]

    knn = NearestNeighbors(n_neighbors=k+1, algorithm="brute", metric=metric).fit(X)
    rec = knn.kneighbors(item_vector.reshape(1, -1), return_distance=True)

    rec_indices = rec[1][0]
    rec_distances = rec[0][0]
    rec_distances = np.delete(rec_distances, 0)  # remove the item itself

    for i in range(1, knn.n_neighbors):
        rec_ids.append(item_inv_mapper[rec_indices[i]])

    if messages:
        print(f"Recommended item indices:\n{rec_indices}")
        print(f"Recommended show_ids:\n{rec_ids}")
        print(f"Distances:\n{rec_distances}")

    return rec_ids, rec_distances


In [35]:
import random

def genre_home_screen_smart(user_id, df_ratings, df_movies, X, item_mapper, item_inv_mapper, 
                            fallback_n=5, genre_cols=None):
    import random

    # Step 1: Get user's ratings
    df_user = df_ratings[df_ratings['user_id'] == user_id]
    if df_user.empty:
        print("⚠️ This user has no ratings!")
        return None, {}

    # Step 2: Get one top-rated show
    max_rating = df_user['rating'].max()
    top_rated_shows = df_user[df_user['rating'] == max_rating]['show_id'].tolist()
    seed_show_id = random.choice(top_rated_shows)
    seed_title = df_movies[df_movies['show_id'] == seed_show_id]['title'].values[0]

    # Step 3: Get genre columns if not passed
    if genre_cols is None:
        genre_cols = [col for col in df_movies.columns if df_movies[col].isin([0, 1]).all()]

    # Step 4: Get genres for the seed show
    seed_row = df_movies[df_movies['show_id'] == seed_show_id]
    seed_genres = [g for g in genre_cols if seed_row[g].values[0] == 1]

    # Step 5: Get collaborative recommendations
    max_k = min(30, X.shape[0] - 1)
    rec_ids, _ = recommend(seed_show_id, X, item_mapper, item_inv_mapper, k=max_k)
    rec_ids = [sid for sid in rec_ids if sid not in df_user['show_id'].values]
    rec_df = df_movies[df_movies['show_id'].isin(rec_ids)]

    # Step 6: Build genre sections using only seed show genres
    genre_sections = {}

    for genre in seed_genres:
        if genre in rec_df.columns:
            collab_recs = rec_df[rec_df[genre] == 1][['title', 'description', 'show_id']]
            needed = fallback_n - len(collab_recs)

            if needed > 0:
                content_fallback = get_recommendations(seed_title, top_n=30)
                fallback_genre = content_fallback[
                    content_fallback['title'].isin(
                        df_movies[df_movies[genre] == 1]['title']
                    )
                ][['title', 'description']].head(needed)

                merged = pd.concat([collab_recs[['title', 'description']], fallback_genre]).drop_duplicates().head(fallback_n)
            else:
                merged = collab_recs[['title', 'description']].head(fallback_n)

            if not merged.empty:
                genre_sections[genre] = merged.reset_index(drop=True)

    return seed_title, genre_sections




In [40]:
# ADD CONTENT FILTER BACK 

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# 1. Build TF-IDF matrix (once)
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_movies['description'])

# 2. Build cosine similarity matrix (once)
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# 3. Create function to get content-based recommendations
def get_recommendations(title, top_n=10):
    idx = df_movies[df_movies['title'].str.lower() == title.lower()].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    show_indices = [i[0] for i in sim_scores]
    return df_movies.iloc[show_indices][['title', 'description']]


In [38]:
genre_cols = [col for col in df_movies.columns if df_movies[col].isin([0,1]).all()]
print("Available genre columns:\n", genre_cols)


Available genre columns:
 ['Action', 'Adventure', 'Anime Series International TV Shows', 'British TV Shows Docuseries International TV Shows', 'Children', 'Comedies', 'Comedies Dramas International Movies', 'Comedies International Movies', 'Comedies Romantic Movies', 'Crime TV Shows Docuseries', 'Documentaries', 'Documentaries International Movies', 'Docuseries', 'Dramas', 'Dramas International Movies', 'Dramas Romantic Movies', 'Family Movies', 'Fantasy', 'Horror Movies', 'International Movies Thrillers', 'International TV Shows Romantic TV Shows TV Dramas', "Kids' TV", 'Language TV Shows', 'Musicals', 'Nature TV', 'Reality TV', 'Spirituality', 'TV Action', 'TV Comedies', 'TV Dramas', 'Talk Shows TV Comedies', 'Thrillers']


In [41]:
user_id = df_ratings['user_id'].sample(1).iloc[0]

seed_title, genre_lists = genre_home_screen_smart(
    user_id, df_ratings, df_movies, X, item_mapper, item_inv_mapper
)

print(f"\n🎬 Because you liked **{seed_title}**, you might also enjoy:\n")

for genre, recs in genre_lists.items():
    print(f"\n🎞️ {genre} Picks:")
    display(recs)


Recommended item indices:
[1 0 2 3 4 5]
Recommended show_ids:
['s2179', 's540', 's6508', 's7748', 's8804']
Distances:
[1. 1. 1. 1. 1.]

🎬 Because you liked **The Stranded**, you might also enjoy:


🎞️ Action Picks:


Unnamed: 0,title,description
0,The New Legends of Monkey,A valiant girl liberates the Monkey King – a g...
1,The Walking Dead,In the wake of a zombie apocalypse survivors h...
2,Naruto Shippûden the Movie: The Will of Fire,When four out of five ninja villages are destr...
3,Ava,An elite assassin wrestling with doubts about ...
4,Kingdom,While strange rumors about their ill king grip...



🎞️ Adventure Picks:


Unnamed: 0,title,description
0,The New Legends of Monkey,A valiant girl liberates the Monkey King – a g...
1,The Walking Dead,In the wake of a zombie apocalypse survivors h...
2,Naruto Shippûden the Movie: The Will of Fire,When four out of five ninja villages are destr...
3,Ava,An elite assassin wrestling with doubts about ...
4,Kingdom,While strange rumors about their ill king grip...



🎞️ Dramas Picks:


Unnamed: 0,title,description
0,The Next Step,This drama series follows young dancers at the...
1,Club Friday To Be Continued - Friend & Enemy,A love triangle spirals out of control wreakin...
2,The Walking Dead,In the wake of a zombie apocalypse survivors h...
3,Moms at War,Two fierce mothers become rivals when a school...
4,Prague,The chaotic ramblings of a young architect's m...



🎞️ TV Action Picks:


Unnamed: 0,title,description
0,The New Legends of Monkey,A valiant girl liberates the Monkey King – a g...
1,The Walking Dead,In the wake of a zombie apocalypse survivors h...
2,Kingdom,While strange rumors about their ill king grip...



🎞️ TV Dramas Picks:


Unnamed: 0,title,description
0,The Next Step,This drama series follows young dancers at the...
1,Club Friday To Be Continued - Friend & Enemy,A love triangle spirals out of control wreakin...
2,The Walking Dead,In the wake of a zombie apocalypse survivors h...
3,Invisible City,After a family tragedy a man discovers mythica...
4,3%,In a future where the elite inhabit an island ...
