<a href="https://colab.research.google.com/github/sajib-222/Data-Mining-and-warehouse/blob/main/Assignment-1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Assignment 01: Movie Similarity-Based Recommendation System

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
ratings = pd.read_csv("/content/drive/MyDrive/Data Mining/week 2/ratings.csv")
movies = pd.read_csv("/content/drive/MyDrive/Data Mining/week 2/movies.csv")

In [None]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [None]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [None]:
# Create a user-movie ratings matrix
user_movie_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [None]:
# Compute movie similarity using Euclidean distance
def movie_similarity(movie1, movie2):
    common_users = (user_movie_matrix[movie1] != 0) & (user_movie_matrix[movie2] != 0)
    if np.sum(common_users) == 0:
        return 0
    distance = euclidean(user_movie_matrix[movie1][common_users], user_movie_matrix[movie2][common_users])
    return 1 / (1 + distance)

In [None]:
# Compute movie similarity matrix using cosine similarity for better performance
from sklearn.metrics.pairwise import cosine_similarity

movie_features = user_movie_matrix.T  # Transpose to get movies as rows
similarity_matrix = cosine_similarity(movie_features)
similarity_matrix = pd.DataFrame(similarity_matrix, index=movie_features.index, columns=movie_features.index)


In [None]:
# Function to recommend movies based on similarity
def recommend_movies(movie_id, top_n=5):
    similar_movies = similarity_matrix.loc[movie_id].dropna().sort_values(ascending=False)
    return movies[movies['movieId'].isin(similar_movies.index[:top_n])]

In [None]:
# Function to generate personalized recommendations
def personalized_recommendations(user_id, top_n=11):
    user_ratings = ratings[ratings['userId'] == user_id].sort_values(by='rating', ascending=False)
    top_movies = user_ratings['movieId'].head(3)

    unseen_movies = set(movies['movieId']) - set(user_ratings['movieId'])

    recommendations = []
    for movie in top_movies:
        similar_movies = similarity_matrix.loc[movie].dropna().sort_values(ascending=False)
        for sim_movie in similar_movies.index:
            if sim_movie in unseen_movies:
                recommendations.append((sim_movie, similar_movies[sim_movie]))

    recommendations.sort(key=lambda x: x[1], reverse=True)
    recommended_movie_ids = [r[0] for r in recommendations[:top_n]]
    return movies[movies['movieId'].isin(recommended_movie_ids)]

In [None]:
# Example usage
recommend_movies(1)  # Find movies similar to movieId 1


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
224,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
418,480,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller
615,780,Independence Day (a.k.a. ID4) (1996),Action|Adventure|Sci-Fi|Thriller
2355,3114,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy


In [None]:
personalized_recommendations(10)  # Get recommendations for userId 10

Unnamed: 0,movieId,title,genres
6743,59315,Iron Man (2008),Action|Adventure|Sci-Fi
7154,71535,Zombieland (2009),Action|Comedy|Horror
7258,74458,Shutter Island (2010),Drama|Mystery|Thriller
7372,79132,Inception (2010),Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX
7620,87232,X-Men: First Class (2011),Action|Adventure|Sci-Fi|Thriller|War
7693,89745,"Avengers, The (2012)",Action|Adventure|Sci-Fi|IMAX
7888,94864,Prometheus (2012),Action|Horror|Sci-Fi|IMAX
8063,99114,Django Unchained (2012),Action|Drama|Western
8159,102445,Star Trek Into Darkness (2013),Action|Adventure|Sci-Fi|IMAX
8475,112852,Guardians of the Galaxy (2014),Action|Adventure|Sci-Fi
