In [None]:
#Step 1: Setup and Dataset
import pandas as pd

# Create a simple dataset
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 4],
    'movie': ['Batman', 'Superman', 'Spiderman', 'Batman', 'Spiderman', 'Superman', 'Spiderman', 'Batman'],
    'rating': [5, 4, 3, 5, 4, 5, 4, 3]
}

df = pd.DataFrame(data)
print(df)

   user_id      movie  rating
0        1     Batman       5
1        1   Superman       4
2        1  Spiderman       3
3        2     Batman       5
4        2  Spiderman       4
5        3   Superman       5
6        3  Spiderman       4
7        4     Batman       3


In [None]:
#Step 2: Understand the Data
#Each row shows a user, a movie, and their rating.
#This is the starting point for most recommendation systems.

#We’ll now use Collaborative Filtering.

In [None]:
user_movie_matrix= df.pivot_table(index='user_id',columns='movie',values='rating')
print(user_movie_matrix)

movie    Batman  Spiderman  Superman
user_id                             
1           5.0        3.0       4.0
2           5.0        4.0       NaN
3           NaN        4.0       5.0
4           3.0        NaN       NaN


In [None]:
user_movie_matrix=user_movie_matrix.fillna(0)
user_movie_matrix

movie,Batman,Spiderman,Superman
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,5.0,3.0,4.0
2,5.0,4.0,0.0
3,0.0,4.0,5.0
4,3.0,0.0,0.0


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
similarity=cosine_similarity(user_movie_matrix)
similarity
#calculate cosine similarity

array([[1.        , 0.81719329, 0.70676177, 0.70710678],
       [0.81719329, 1.        , 0.3902439 , 0.78086881],
       [0.70676177, 0.3902439 , 1.        , 0.        ],
       [0.70710678, 0.78086881, 0.        , 1.        ]])

In [None]:
similar_users = pd.DataFrame(similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

In [None]:
print(similar_users)

user_id         1         2         3         4
user_id                                        
1        1.000000  0.817193  0.706762  0.707107
2        0.817193  1.000000  0.390244  0.780869
3        0.706762  0.390244  1.000000  0.000000
4        0.707107  0.780869  0.000000  1.000000


In [None]:
def recommend_movies(user_id ,user_movie_matrix,similarity_matrix):
  similar_users =similarity_matrix[user_id].sort_values(ascending= False)
  similar_users=similar_users.drop(user_id)
  print("similar user",similar_users)
  recommendation=pd.Series(dtype=float)
  for other_user,sim_score in similar_users.items():
    rated_movies=user_movie_matrix.loc[other_user]
    weighted_rating=rated_movies*sim_score
    recommendation=recommendation.add(weighted_rating,fill_value=0)
  rated_by_user=user_movie_matrix.loc[user_id]
  print("rateed_by_user",rated_by_user)
  recommendation=recommendation[rated_by_user==0]
  return recommendation.sort_values(ascending=False)

In [None]:
recommend= recommend_movies(4,user_movie_matrix,similar_users)
print(recommend)

similar user user_id
2    0.780869
1    0.707107
3    0.000000
Name: 4, dtype: float64
rateed_by_user movie
Batman       3.0
Spiderman    0.0
Superman     0.0
Name: 4, dtype: float64
movie
Spiderman    5.244796
Superman     2.828427
dtype: float64
