In [57]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Data Loading
### Load the MovieLens 100k dataset from multiple raw files (ratings, movies, genres, and users) and merge them into a single unified dataframe (movie_data) containing user interactions enriched with movie metadata and user information.

In [58]:
# 1. Defining path
u_data_path = '/Users/muskan.m/project_helix/data/ml-100k/u.data'
u_item_path = '/Users/muskan.m/project_helix/data/ml-100k/u.item'
u_user_path = '/Users/muskan.m/project_helix/data/ml-100k/u.user'
u_genre_path = '/Users/muskan.m/project_helix/data/ml-100k/u.genre'

# 2. Load ratings
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(u_data_path, sep='\t', names=r_cols)

# 3. Load genre names
genre_cols = ['genre', 'genre_id']
genres = pd.read_csv(u_genre_path, sep='|', names=genre_cols)
genre_names = genres['genre'].tolist()

# 4. Load movies + genre flags
m_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url'] + genre_names

movies = pd.read_csv(
    u_item_path,
    sep='|',
    names=m_cols,
    encoding='latin-1'
)

# 5. Merge ratings + movies
df1 = pd.merge(ratings, movies, on='movie_id')

# 6. Load users
u_cols = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
users = pd.read_csv(u_user_path, sep='|', names=u_cols)

# 7. Merge df1 + users → final dataset
movie_data = pd.merge(df1, users, on='user_id')
movie_data.head(5)


Unnamed: 0,user_id,movie_id,rating,timestamp,title,release_date,video_release_date,imdb_url,unknown,Action,...,Mystery,Romance,Sci-Fi,Thriller,War,Western,age,gender,occupation,zip_code
0,196,242,3,881250949,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,49,M,writer,55105
1,186,302,3,891717742,L.A. Confidential (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?L%2EA%2E+Conf...,0,0,...,1,0,0,1,0,0,39,F,executive,0
2,22,377,1,878887116,Heavyweights (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Heavyweights%...,0,0,...,0,0,0,0,0,0,25,M,writer,40206
3,244,51,2,880606923,Legends of the Fall (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Legends%20of%...,0,0,...,0,1,0,0,1,1,28,M,technician,80525
4,166,346,1,886397596,Jackie Brown (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?imdb-title-11...,0,0,...,0,0,0,0,0,0,47,M,educator,55113


In [59]:
# Convert Unix Timestamp to  Readable Date
movie_data['date'] = pd.to_datetime(movie_data['timestamp'], unit='s')
movie_data.head(2)


Unnamed: 0,user_id,movie_id,rating,timestamp,title,release_date,video_release_date,imdb_url,unknown,Action,...,Romance,Sci-Fi,Thriller,War,Western,age,gender,occupation,zip_code,date
0,196,242,3,881250949,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,49,M,writer,55105,1997-12-04 15:55:49
1,186,302,3,891717742,L.A. Confidential (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?L%2EA%2E+Conf...,0,0,...,0,0,1,0,0,39,F,executive,0,1998-04-04 19:22:22


In [60]:
#droping unnecessary columns
movie_data.drop(columns=['video_release_date', 'imdb_url', 'timestamp', 'unknown'], inplace=True)
movie_data.head(2)

Unnamed: 0,user_id,movie_id,rating,title,release_date,Action,Adventure,Animation,Children's,Comedy,...,Romance,Sci-Fi,Thriller,War,Western,age,gender,occupation,zip_code,date
0,196,242,3,Kolya (1996),24-Jan-1997,0,0,0,0,1,...,0,0,0,0,0,49,M,writer,55105,1997-12-04 15:55:49
1,186,302,3,L.A. Confidential (1997),01-Jan-1997,0,0,0,0,0,...,0,0,1,0,0,39,F,executive,0,1998-04-04 19:22:22


In [61]:
#changing column name 
movie_data = movie_data.rename(columns={
    "Children's": "Children",
    "Film-Noir": "Film_Noir",
    "Sci-Fi": "Sci_Fi"
})


# gouping genre columns
genre_cols = [
    'Action','Adventure','Animation',"Children",'Comedy','Crime',
    'Documentary','Drama','Fantasy','Film_Noir','Horror','Musical',
    'Mystery','Romance','Sci_Fi','Thriller','War','Western'
]


In [62]:
#reordering columns for better understanding 
final_cols = [
    'date','user_id', 'age', 'gender', 'occupation',
    'movie_id', 'title', 'release_date',
    'rating'
] + genre_cols
movie_data = movie_data[final_cols]
movie_data.head()


Unnamed: 0,date,user_id,age,gender,occupation,movie_id,title,release_date,rating,Action,...,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,1997-12-04 15:55:49,196,49,M,writer,242,Kolya (1996),24-Jan-1997,3,0,...,0,0,0,0,0,0,0,0,0,0
1,1998-04-04 19:22:22,186,39,F,executive,302,L.A. Confidential (1997),01-Jan-1997,3,0,...,0,1,0,0,1,0,0,1,0,0
2,1997-11-07 07:18:36,22,25,M,writer,377,Heavyweights (1994),01-Jan-1994,1,0,...,0,0,0,0,0,0,0,0,0,0
3,1997-11-27 05:02:03,244,28,M,technician,51,Legends of the Fall (1994),01-Jan-1994,2,0,...,0,0,0,0,0,1,0,0,1,1
4,1998-02-02 05:33:16,166,47,M,educator,346,Jackie Brown (1997),01-Jan-1997,1,0,...,0,0,0,0,0,0,0,0,0,0


In [63]:
movie_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 27 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   date          100000 non-null  datetime64[ns]
 1   user_id       100000 non-null  int64         
 2   age           100000 non-null  int64         
 3   gender        100000 non-null  object        
 4   occupation    100000 non-null  object        
 5   movie_id      100000 non-null  int64         
 6   title         100000 non-null  object        
 7   release_date  99991 non-null   object        
 8   rating        100000 non-null  int64         
 9   Action        100000 non-null  int64         
 10  Adventure     100000 non-null  int64         
 11  Animation     100000 non-null  int64         
 12  Children      100000 non-null  int64         
 13  Comedy        100000 non-null  int64         
 14  Crime         100000 non-null  int64         
 15  Documentary   1000

In [64]:
movie_data.shape

(100000, 27)

### Removing null and duplicates

In [65]:
#check null values
movie_data.isnull().sum()

date            0
user_id         0
age             0
gender          0
occupation      0
movie_id        0
title           0
release_date    9
rating          0
Action          0
Adventure       0
Animation       0
Children        0
Comedy          0
Crime           0
Documentary     0
Drama           0
Fantasy         0
Film_Noir       0
Horror          0
Musical         0
Mystery         0
Romance         0
Sci_Fi          0
Thriller        0
War             0
Western         0
dtype: int64

In [66]:
# droping null values
movie_data.dropna()

Unnamed: 0,date,user_id,age,gender,occupation,movie_id,title,release_date,rating,Action,...,Fantasy,Film_Noir,Horror,Musical,Mystery,Romance,Sci_Fi,Thriller,War,Western
0,1997-12-04 15:55:49,196,49,M,writer,242,Kolya (1996),24-Jan-1997,3,0,...,0,0,0,0,0,0,0,0,0,0
1,1998-04-04 19:22:22,186,39,F,executive,302,L.A. Confidential (1997),01-Jan-1997,3,0,...,0,1,0,0,1,0,0,1,0,0
2,1997-11-07 07:18:36,22,25,M,writer,377,Heavyweights (1994),01-Jan-1994,1,0,...,0,0,0,0,0,0,0,0,0,0
3,1997-11-27 05:02:03,244,28,M,technician,51,Legends of the Fall (1994),01-Jan-1994,2,0,...,0,0,0,0,0,1,0,0,1,1
4,1998-02-02 05:33:16,166,47,M,educator,346,Jackie Brown (1997),01-Jan-1997,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1997-11-22 05:10:44,880,13,M,student,476,"First Wives Club, The (1996)",14-Sep-1996,3,0,...,0,0,0,0,0,0,0,0,0,0
99996,1997-11-17 19:39:03,716,36,F,administrator,204,Back to the Future (1985),01-Jan-1985,5,0,...,0,0,0,0,0,0,1,0,0,0
99997,1997-09-20 22:49:55,276,21,M,student,1090,Sliver (1993),01-Jan-1993,1,0,...,0,0,0,0,0,0,0,1,0,0
99998,1997-12-17 22:52:36,13,47,M,educator,225,101 Dalmatians (1996),27-Nov-1996,2,0,...,0,0,0,0,0,0,0,0,0,0


In [67]:
#check duplicates
movie_data.duplicated(subset=['user_id','movie_id']).sum()

np.int64(0)

In [68]:
#check Rating Validity
movie_data['rating'].unique()

array([3, 1, 2, 4, 5])

### User & Movie Activity Filtering
- Some users rated only 1 movie- removing those

In [69]:
 
# check
user_counts = movie_data['user_id'].value_counts()
movie_counts = movie_data['movie_id'].value_counts()

In [70]:
# filter
movie_data = movie_data[
    movie_data['user_id'].isin(user_counts[user_counts >= 5].index) &
    movie_data['movie_id'].isin(movie_counts[movie_counts >= 5].index)
]
movie_data.shape

(99287, 27)

# Baseline Recommender #1 - Popularity-Based 
### Idea
The popularity-based recommender suggests movies that are most popular across all users, based on the number of ratings and average rating.
This model does not personalize recommendations and serves as a global benchmark.

### Method
- Group interactions by movie_id
- Compute: Total number of ratings (rating_count)
           & Average rating (avg_rating)
- Filter out movies with very low number of ratings
- Rank movies by popularity and quality

### Output
- The model recommends the same top-N movies to every user.

### Limitation
- No personalization
- Ignores individual user preferences


In [71]:
#creating interations table to ignore the columns which are not required at this point
interactions = movie_data[['user_id', 'movie_id', 'rating', 'date']]


In [72]:
# taking only those interations which have a rating more than equal to three
interactions = interactions[interactions['rating'] >= 3]
interactions.shape


(82128, 4)

In [73]:
# create ta dataframe to calculate the popularity of a movie by using rating count and avg rating
popular = (
    interactions
    .groupby('movie_id')
    .agg(
        rating_count=('rating', 'count'),
        avg_rating=('rating', 'mean')
    )
    .reset_index()
)


In [74]:
# taking only those movies which have a rating count more than equal to fifty

popular = popular[popular['rating_count'] >= 50]


In [75]:
# sorting the popula movies based or rating_count first and then on avg_rating
popular = popular.sort_values(
    by=['rating_count', 'avg_rating'],
    ascending=False
)


In [76]:
# merging the movie title in the popular dataframe
popular = popular.merge(
    movies[['movie_id', 'title']],
    on='movie_id',
    how='left'
)


In [77]:
# creating a function to provide n numbers of top popular movies
def recommend_popular(n=10):
    return popular[['title', 'avg_rating', 'rating_count']].head(n)


In [78]:
recommend_popular(11)


Unnamed: 0,title,avg_rating,rating_count
0,Star Wars (1977),4.480287,558
1,Fargo (1996),4.329832,476
2,Return of the Jedi (1983),4.155462,476
3,Contact (1997),4.048458,454
4,Toy Story (1995),4.055156,417
5,Raiders of the Lost Ark (1981),4.364764,403
6,"English Patient, The (1996)",4.077889,398
7,Scream (1996),3.848718,390
8,"Godfather, The (1972)",4.471354,384
9,Air Force One (1997),3.886842,380


# Baseline Recommender #2 - Item-Based Collaborative Filtering
### Idea
Item-based Collaborative Filtering recommends movies based on similarity between items.
If two movies are rated similarly by many users, they are considered similar.

### Method
-Create a user–movie rating matrix
-Compute movie–movie similarity using Pearson correlation
-Apply a minimum co-rating threshold to reduce noisy similarity
-Recommend movies most similar to a given movie

### Output
-Recommendations depend on the movie a user has already rated, making this approach personalized.

### Limitation
-Sensitive to data sparsity
-Correlation can be unreliable for low-overlap items
-Does not consider the order or timing of user interactions

In [79]:
# creating table of user interation with movies
user_movie_matrix = interactions.pivot_table(
    index='user_id',
    columns='movie_id',
    values='rating'
)
user_movie_matrix.head()


movie_id,1,2,3,4,5,6,7,8,9,10,...,1534,1540,1555,1558,1591,1592,1597,1598,1615,1620
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,


In [80]:
min_common_ratings = 10 # minimum co-rating threshold to ensure similarity is based on sufficient data

# convert ratings into binary form: 1 if user rated the movie, 0 if not
user_movie_binary = user_movie_matrix.notnull().astype(int) 

user_movie_binary.head()


movie_id,1,2,3,4,5,6,7,8,9,10,...,1534,1540,1555,1558,1591,1592,1597,1598,1615,1620
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,1,1,1,1,1,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [81]:
#count how many users have rated each pair of movies
common_ratings = user_movie_binary.T.dot(user_movie_binary)
common_ratings.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1534,1540,1555,1558,1591,1592,1597,1598,1615,1620
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,417,76,43,115,47,9,230,133,159,47,...,0,4,2,2,2,2,1,1,5,5
2,76,106,19,62,25,4,67,48,40,13,...,0,4,3,0,0,0,1,0,3,1
3,43,19,59,30,13,2,47,17,30,11,...,0,0,0,0,0,0,1,0,2,0
4,115,62,30,179,32,4,112,87,88,31,...,0,2,2,0,1,1,3,1,3,1
5,47,25,13,32,71,1,47,31,35,5,...,0,1,0,0,0,0,1,0,0,1


In [82]:
# calculate similarity between movies based on how users rated them
movie_similarity = user_movie_matrix.corr(method='pearson')

movie_similarity.head()


movie_id,1,2,3,4,5,6,7,8,9,10,...,1534,1540,1555,1558,1591,1592,1597,1598,1615,1620
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.217091,0.030035,0.161584,0.423449,0.724882,0.151916,0.215601,0.185538,-0.004843,...,,0.707107,-1.0,,,-1.0,,,0.645497,0.642857
2,0.217091,1.0,-0.020851,0.289994,0.572848,,0.101259,0.283339,-0.111881,0.158114,...,,0.0,,,,,,,,
3,0.030035,-0.020851,1.0,-0.182121,0.28768,,0.063644,0.403275,-0.093175,0.075955,...,,,,,,,,,,
4,0.161584,0.289994,-0.182121,1.0,0.291274,0.333333,0.16958,0.350387,-0.022763,0.062626,...,,-1.0,,,,,-1.0,,1.0,
5,0.423449,0.572848,0.28768,0.291274,1.0,,0.12434,0.308939,-0.001612,,...,,,,,,,,,,


In [83]:
# create funtion that recommends movies that are most similar to a given movie 
# based on user rating patterns and sufficient common user overlap.
def recommend_similar_movies(movie_id, n=10):
    sims = movie_similarity[movie_id].dropna()
    common = common_ratings[movie_id]

    valid = common[common >= min_common_ratings].index
    sims = sims.loc[valid]

    return sims.sort_values(ascending=False).iloc[1:n+1]




In [84]:
# create function that converts movie IDs into readable movie titles 
# for the final recommendation output.
def recommend_similar_with_titles(movie_id, n=10):
    recs = recommend_similar_movies(movie_id, n).reset_index()
    recs.columns = ['movie_id', 'similarity']
    return recs.merge(
        movies[['movie_id', 'title']],
        on='movie_id',
        how='left'
    )


In [85]:
recommend_similar_with_titles(1013, 10)


Unnamed: 0,movie_id,similarity,title
0,678,0.686406,Volcano (1997)
1,1,0.666667,Toy Story (1995)
2,333,0.662122,"Game, The (1997)"
3,597,0.62361,Eraser (1996)
4,298,0.542326,Face/Off (1997)
5,252,0.502331,"Lost World: Jurassic Park, The (1997)"
6,405,0.490653,Mission: Impossible (1996)
7,294,0.460566,Liar Liar (1997)
8,300,0.440086,Air Force One (1997)
9,50,0.360381,Star Wars (1977)


# Baseline Recommender #3 – User-Based Collaborative Filtering

### Idea
User-based Collaborative Filtering recommends movies by finding users with similar rating patterns and suggesting movies that similar users have liked.

### Method
- Create a user–movie rating matrix  
- Mean-center user ratings to reduce individual rating bias  
- Compute user–user similarity using Pearson correlation  
- Select a neighborhood of similar users with sufficient co-rated movies  
- Predict ratings for unseen movies using weighted average of neighbors’ ratings  

### Output
- Personalized list of recommended movies for a given user  
- Each recommendation is accompanied by a predicted preference score  

### Limitation
- Performance degrades with sparse data  
- Similarity computation is expensive for large user sets  
- Does not model sequence, recency, or temporal behavior  


In [86]:
# Calculate the average rating given by each user
user_means = user_movie_matrix.mean(axis=1) 
user_means.head()

user_id
1    4.110092
2    3.929825
3    3.700000
4    4.434783
5    3.756757
dtype: float64

In [87]:
# Subtract each user's average rating from their ratings (mean-centering)
user_movie_centered = user_movie_matrix.sub(user_means, axis=0) 
user_movie_centered.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1534,1540,1555,1558,1591,1592,1597,1598,1615,1620
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.889908,-1.110092,-0.110092,-1.110092,-1.110092,0.889908,-0.110092,,0.889908,-1.110092,...,,,,,,,,,,
2,0.070175,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,0.243243,-0.756757,,,,,,,,,...,,,,,,,,,,


In [88]:
# Compute similarity between users based on their rating patterns
user_similarity = user_movie_centered.T.corr()
user_similarity.shape

(943, 943)

In [89]:
# Convert ratings matrix to binary (1 if rating exists, else 0)
user_binary = user_movie_matrix.notnull().astype(int)

# Count how many movies each pair of users has commonly rated
common_movies = user_binary.dot(user_binary.T)


In [90]:
def recommend_for_user(user_id, n=10, min_common=5):

    sims = user_similarity[user_id].dropna() # Get similarity scores of the target user with other users
    commons = common_movies[user_id]     # Get count of commonly rated movies with other users

    valid_users = commons[commons >= min_common].index # Keep only users who have enough movies in common
    sims = sims.loc[sims.index.intersection(valid_users)]

    sims = sims[sims > 0]  # Keep only positively similar users

    if len(sims) == 0: # If no similar users remain, return empty result
        return pd.Series(dtype=float)

    centered_filled = user_movie_centered.fillna(0) # Replace NaN values with 0 for safe computation

    weighted_ratings = centered_filled.loc[sims.index].T.dot(sims) # Compute weighted sum of ratings from similar users
    score = weighted_ratings / sims.sum()  # Normalize by total similarity

    watched = user_movie_matrix.loc[user_id] # Remove movies already watched by the user
    score = score[watched.isna()]

    return score.sort_values(ascending=False).head(n)


In [91]:
def recommend_user_with_titles(user_id, n=10):
    # Get recommended movie IDs and scores
    recs = recommend_for_user(user_id, n).reset_index() 
    recs.columns = ["movie_id", "score"]
    
    return recs.merge(
        movies[["movie_id", "title"]],
        on="movie_id",
        how="left"
    )


In [92]:
recommend_user_with_titles(7, 10)


Unnamed: 0,movie_id,score,title
0,313,0.223695,Titanic (1997)
1,272,0.114118,Good Will Hunting (1997)
2,302,0.108911,L.A. Confidential (1997)
3,475,0.087404,Trainspotting (1996)
4,1,0.071984,Toy Story (1995)
5,169,0.067782,"Wrong Trousers, The (1993)"
6,408,0.063751,"Close Shave, A (1995)"
7,276,0.059454,Leaving Las Vegas (1995)
8,316,0.056953,As Good As It Gets (1997)
9,124,0.056731,Lone Star (1996)


## Conclusion

### In this notebook, we built and analyzed multiple baseline recommender systems using the MovieLens 100k dataset.  
### We first cleaned and validated the data, created an interaction dataset, and then implemented three baseline models:

- Popularity-Based Recommender  
- Item-Based Collaborative Filtering  
- User-Based Collaborative Filtering  

### These baselines demonstrate the strengths and limitations of traditional recommendation approaches, especially with respect to personalization and data sparsity.

