In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [14]:
ratings = pd.read_csv('/content/drive/MyDrive/Data Mining and Warehouse Lab/ratings.dat',
                      sep='::', engine='python',
                      names=['user_id', 'movie_id', 'rating', 'timestamp'])

movies = pd.read_csv('/content/drive/MyDrive/Data Mining and Warehouse Lab/movies.dat',
                     sep='::', engine='python',
                     names=['movie_id', 'title', 'genres'],
                     encoding='latin-1') # or 'iso-8859-1'

# Step 4: Create a user-movie rating matrix
user_movie_matrix = ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
movie_features = user_movie_matrix.T  # Transpose for movie-based similarity

# Step 5: Compute cosine similarity between movies
similarity_matrix = cosine_similarity(movie_features)
similarity_matrix = pd.DataFrame(similarity_matrix, index=movie_features.index, columns=movie_features.index)

#Function to Recommend Similar Movies
def recommend_movies(movie_id, top_n=5):
    if movie_id not in similarity_matrix.index:
        print(f"Movie ID {movie_id} not found in similarity matrix.")
        return pd.DataFrame()
    similar_scores = similarity_matrix.loc[movie_id].drop(labels=[movie_id])
    top_similar = similar_scores.sort_values(ascending=False).head(top_n)
    return movies[movies['movie_id'].isin(top_similar.index)]

#  Function for Personalized Recommendations
def personalized_recommendations(user_id, top_n=10):
    user_ratings = ratings[ratings['user_id'] == user_id].sort_values(by='rating', ascending=False)
    top_movies = user_ratings['movie_id'].head(3)
    seen_movies = set(user_ratings['movie_id'])
    unseen_movies = set(movies['movie_id']) - seen_movies

    recommendations = {}

    for movie in top_movies:
        if movie not in similarity_matrix.index:
            continue
        similar_movies = similarity_matrix.loc[movie].drop(labels=[movie])
        for sim_movie, score in similar_movies.items():
            if sim_movie in unseen_movies:
                recommendations[sim_movie] = recommendations.get(sim_movie, 0) + score

    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    top_recs = [movie_id for movie_id, _ in sorted_recommendations[:top_n]]

    avg_ratings = ratings.groupby('movie_id')['rating'].mean()
    top_recs_sorted = sorted(top_recs, key=lambda x: avg_ratings.get(x, 0), reverse=True)
    return movies[movies['movie_id'].isin(top_recs_sorted)]


print("Similar movies to movie ID 1:")
display(recommend_movies(1))

print("Personalized recommendations for user ID 10:")
display(personalized_recommendations(10))


Similar movies to movie ID 1:


Unnamed: 0,movie_id,title,genres
584,588,Aladdin (1992),Animation|Children's|Comedy|Musical
1245,1265,Groundhog Day (1993),Comedy|Romance
1250,1270,Back to the Future (1985),Comedy|Sci-Fi
2286,2355,"Bug's Life, A (1998)",Animation|Children's|Comedy
3045,3114,Toy Story 2 (1999),Animation|Children's|Comedy


Personalized recommendations for user ID 10:


Unnamed: 0,movie_id,title,genres
1226,1246,Dead Poets Society (1989),Drama
1617,1663,Stripes (1981),Comedy
2019,2088,Popeye (1980),Adventure|Comedy|Musical
2081,2150,"Gods Must Be Crazy, The (1980)",Comedy
2337,2406,Romancing the Stone (1984),Action|Adventure|Comedy|Romance
2351,2420,"Karate Kid, The (1984)",Drama
2402,2471,Crocodile Dundee II (1988),Adventure|Comedy
2409,2478,Three Amigos! (1986),Comedy|Western
3379,3448,"Good Morning, Vietnam (1987)",Comedy|Drama|War
3457,3526,Parenthood (1989),Comedy|Drama
