In [35]:
#a
import pandas as pd

# Load the datasets
links = pd.read_csv('links.csv', sep=',')
movies = pd.read_csv('movies.csv', sep=',')
ratings = pd.read_csv('ratings.csv', sep=',')
tags = pd.read_csv('tags.csv', sep=',')

# Display the first few rows of each dataset
print("Links Dataset:")
print(links.head())

print("\nMovies Dataset:")
print(movies.head())

print("\nRatings Dataset:")
print(ratings.head())

print("\nTags Dataset:")
print(tags.head())

# Get the count of ratings
num_ratings = len(ratings)
print(f'\nTotal number of ratings: {num_ratings}')


Links Dataset:
   movieId  imdbId   tmdbId
0        1  114709    862.0
1        2  113497   8844.0
2        3  113228  15602.0
3        4  114885  31357.0
4        5  113041  11862.0

Movies Dataset:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  

Ratings Dataset:
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  9649838

In [9]:
print(ratings.columns)


Index(['userId,movieId,rating,timestamp'], dtype='object')


In [36]:
from sklearn.metrics.pairwise import cosine_similarity
ratings['rating'] = ratings['rating'].replace(-1, pd.NA)
ratings = ratings.drop_duplicates(['userId', 'movieId'])
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
user_similarity = cosine_similarity(user_item_matrix.fillna(0))

def user_based_recommendations(userId, user_item_matrix, user_similarity, n=5):
    # Get the similarity scores for the target user
    user_scores = user_similarity[userId - 1]
    # Sort users by similarity in descending order
    similar_users = sorted(enumerate(user_scores), key=lambda x: x[1], reverse=True)
    
    # Initialize a list to store recommended anime
    recommended_movie = []
    
    for user, score in similar_users[1:]:  # Exclude the target user
        # Get the anime the similar user has rated positively
        rated_movie = user_item_matrix.loc[user_item_matrix.index[user - 1]]
        positively_rated_movie = rated_movie[rated_movie >= 6].index
        
        # Exclude anime the target user has already rated
        target_user_rated_movie = user_item_matrix.loc[userId].dropna().index
        recommended_movie.extend(set(positively_rated_movie) - set(target_user_rated_movie))
        
        # Limit the number of recommendations
        if len(recommended_movie) >= n:
            break

    return recommended_movie[:n]


In [25]:
# Choose a user ID for testing
userId = 5  # Replace with the user ID you want to test

# Test user-based recommendations
user_recommendations = user_based_recommendations(userId, user_item_matrix, user_similarity, n=5)
print(user_recommendations)


[]


In [19]:
# Display the recommendations
print(f"User-Based Recommendations for User {userId}:")
for movieId in user_recommendations:
    movie_name = movies.loc[movies['movieId'] == movieId, 'title'].values[0]
    print(f"- {movie_name}")

User-Based Recommendations for User 55:


In [None]:
from scipy.stats import pearsonr

# Step 1: Calculate User Similarities using Pearson correlation
def pearson_similarity(ratings):
    user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    similarity_matrix = user_item_matrix.corr(method='pearson', min_periods=1)
    np.fill_diagonal(similarity_matrix.values, 0)  # Set self-correlations to 0
    return similarity_matrix

user_similarity = pearson_similarity(ratings)

# Step 2: Predict Movie Scores
def predict_movie_score(user_id, movie_id):
    rated_movie = user_item_matrix.loc[user_id]
    similar_users = user_similarity[user_id].dropna().index
    similar_users = similar_users[rated_movie[similar_users] != 0]

    if len(similar_users) == 0:
        return np.nan

    weighted_sum = 0
    similarity_sum = 0

    for user in similar_users:
        similarity = user_similarity.loc[user_id, user]
        rating = user_item_matrix.loc[user, movie_id]
        weighted_sum += similarity * rating
        similarity_sum += abs(similarity)

    if similarity_sum == 0:
        return np.nan

    predicted_score = weighted_sum / similarity_sum
    return predicted_score

# Example: Predict the score for user 1 and movie 1
# Example: Predict the score for user 1 and movie 1
user_id = 1
movie_id = 1
predicted_score = predict_movie_score(user_id, movie_id)

if not np.isnan(predicted_score):
    print(f"Predicted score for user {user_id} and movie {movie_id}: {predicted_score}")
else:
    print(f"No prediction available for user {user_id} and movie {movie_id}.")


In [3]:
import pandas as pd
from scipy.stats import pearsonr

# Load the ratings dataset
ratings = pd.read_csv('ratings.csv')

# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Define a function to calculate Pearson correlation between two users
def pearson_correlation(user1, user2):
    common_movies = user_item_matrix.loc[user1].index.intersection(user_item_matrix.loc[user2].index)
    if len(common_movies) < 2:
        return 0
    user1_ratings = user_item_matrix.loc[user1][common_movies]
    user2_ratings = user_item_matrix.loc[user2][common_movies]
    correlation, _ = pearsonr(user1_ratings, user2_ratings)
    return correlation

# Define a function to get similar users for a given user
def get_similar_users(target_user):
    similarities = {}
    for user in user_item_matrix.index:
        if user != target_user:
            similarity = pearson_correlation(target_user, user)
            similarities[user] = similarity
    return similarities

# Define a function to get top N similar users for a given user
def get_top_similar_users(target_user, n=5):
    similarities = get_similar_users(target_user)
    top_users = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:n]
    return top_users

# Define a function to make recommendations for a user
def user_based_recommendations(target_user, n=5):
    top_users = get_top_similar_users(target_user)
    target_user_movies = user_item_matrix.loc[target_user].index
    recommended_movies = set()
    for user, similarity in top_users:
        similar_user_movies = user_item_matrix.loc[user].index
        new_movies = set(similar_user_movies) - set(target_user_movies)
        recommended_movies.update(new_movies)
        if len(recommended_movies) >= n:
            break
    return recommended_movies

# Example: Make recommendations for user 1
target_user = 5
recommended_movies = user_based_recommendations(target_user, n=5)
print(f"Recommended movies for user {target_user}:")
print(recommended_movies)


Recommended movies for user 5:
set()


In [8]:
##########
import pandas as pd
from scipy.stats import pearsonr

# Load the ratings dataset
ratings = pd.read_csv('ratings.csv')

# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [58]:
# Define a function to calculate Pearson correlation between two users
user_similarity = None
def pearson_correlation(user1, user2):
    common_movies = user_item_matrix.loc[user1].index.intersection(user_item_matrix.loc[user2].index)
    if len(common_movies) < 2:
        return 0
    user1_ratings = user_item_matrix.loc[user1][common_movies]
    user2_ratings = user_item_matrix.loc[user2][common_movies]
    correlation, _ = pearsonr(user1_ratings, user2_ratings)
    user_similarity = correlation
    return correlation
user_similarity = pearson_correlation(2,3)
print(user_similarity)

-0.0025940733283772387


In [62]:
# Define a function to get similar users for a given user
def get_similar_users(target_user):
    similarities = {}
    for user in user_item_matrix.index:
        if user != target_user:
            similarity = pearson_correlation(target_user, user)
            similarities[user] = similarity
    return similarities

In [63]:
# Define a function to get top N similar users for a given user
def get_top_similar_users(target_user, n=5):
    similarities = get_similar_users(target_user)
    top_users = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:n]
    return top_users

print(get_top_similar_users(3,5))

[(313, 0.0708816637198), (532, 0.06777586588571832), (377, 0.06673315321298243), (527, 0.06518229803961956), (312, 0.06154047447052821)]


In [60]:
# Define a function to make predictions using the provided formula on lecture
def predict_rating(target_user, item):
    target_user_movies = user_item_matrix.loc[target_user].index
    common_users = user_item_matrix.loc[:, item].dropna().index
    similarities = {user: pearson_correlation(target_user, user) for user in common_users}
    weighted_sum = 0
    sum_of_similarities = 0

    for user, similarity in similarities.items():
        rb = user_item_matrix.loc[user, item]
        rb_bar = user_item_matrix.loc[user].mean()
        weighted_sum += similarity * (rb - rb_bar)
        sum_of_similarities += abs(similarity)

    ra_bar = user_item_matrix.loc[target_user].mean()
    predicted_rating = ra_bar + (weighted_sum / sum_of_similarities) if sum_of_similarities != 0 else ra_bar

    return predicted_rating

# Predict certain movie rating of certain user
target_user = 88
target_movie = 88
predicted_rating = predict_rating(target_user, target_movie)
print(f"Predicted rating for user {target_user} on movie {target_movie}: {predicted_rating}")

Predicted rating for user 88 on movie 88: 0.03153666282883257


In [37]:
# Define a function to make recommendations for a user
def user_based_recommendations(target_user, n=5):
    top_users = get_top_similar_users(target_user)
    print(top_users)
    target_user_movies = user_item_matrix.loc[target_user].index
    print(target_user_movies)
    recommended_movies = set()
    for user, similarity in top_users:
        similar_user_movies = user_item_matrix.loc[user].index
        new_movies = set(similar_user_movies) - set(target_user_movies)
        print(new_movies)
        recommended_movies.update(new_movies)
        if len(recommended_movies) >= n:
            break
    return recommended_movies
print(user_based_recommendations(88,2))

NameError: name 'get_top_similar_users' is not defined

In [33]:
def user_based_recommendations(user_id, user_item_matrix, user_similarity, n=5):
    # Get the similarity scores for the target user
    user_scores = user_similarity[user_id - 1]
    
    
    # Initialize a list to store recommended anime
    recommended_anime = []
    
    for user, score in similar_users[1:]:  # Exclude the target user
        # Get the anime the similar user has rated positively
        rated_anime = user_item_matrix.loc[user_item_matrix.index[user - 1]]
        positively_rated_anime = rated_anime[rated_anime >= 6].index
        
        # Exclude anime the target user has already rated
        target_user_rated_anime = user_item_matrix.loc[user_id].dropna().index
        recommended_anime.extend(set(positively_rated_anime) - set(target_user_rated_anime))
        
        # Limit the number of recommendations
        if len(recommended_anime) >= n:
            break
    
    return recommended_anime[:n]



In [40]:
# Define a function to make recommendations for a user
def user_based_recommendations(target_user, n=5):
    top_users = get_top_similar_users(target_user)
    print(top_users)
    target_user_movies = user_item_matrix.loc[target_user].index
    print(target_user_movies)
    recommended_movies = set()
    for user, similarity in top_users:
        similar_user_movies = user_item_matrix.loc[user].index
        new_movies = set(similar_user_movies) - set(target_user_movies)
        print(new_movies)
        recommended_movies.update(new_movies)
        if len(recommended_movies) >= n:
            break
    return recommended_movies
print(user_based_recommendations(1,5))

[(266, 0.34498334625399185), (313, 0.33387548288146385), (368, 0.3240414067556353), (57, 0.32394753336932625), (39, 0.3201204083712928)]
Index([     1,      2,      3,      4,      5,      6,      7,      8,      9,
           10,
       ...
       193565, 193567, 193571, 193573, 193579, 193581, 193583, 193585, 193587,
       193609],
      dtype='int64', name='movieId', length=9724)
set()


AttributeError: 'list' object has no attribute 'update'

In [38]:
user_id = 9
n=5
user_recommendations = user_based_recommendations(user_id, user_item_matrix, user_similarity, n) 
print(f"User-Based Recommendations for User {user_id}:")
for movie_id in user_recommendations:
    anime_name = ratings.loc[anime_df['movie_id'] == movie_id, 'name'].values[0]
    print(f"- {anime_name}")

IndexError: invalid index to scalar variable.

In [6]:
import pandas as pd

# Load the ratings dataset
ratings = pd.read_csv('ratings.csv')

# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Define a function to calculate Pearson correlation between two users
def pearson_correlation(user1, user2):
    common_movies = user_item_matrix.loc[user1].index.intersection(user_item_matrix.loc[user2].index)
    if len(common_movies) < 2:
        return 0
    user1_ratings = user_item_matrix.loc[user1][common_movies]
    user2_ratings = user_item_matrix.loc[user2][common_movies]
    correlation, _ = pearsonr(user1_ratings, user2_ratings)
    return correlation

# Define a function to get similar users for a given user
def get_similar_users(target_user):
    similarities = {}
    for user in user_item_matrix.index:
        if user != target_user:
            similarity = pearson_correlation(target_user, user)
            similarities[user] = similarity
    return similarities

# Define a function to make predictions using the provided formula
def predict_rating(target_user, item):
    target_user_movies = user_item_matrix.loc[target_user].index
    common_users = user_item_matrix.loc[:, item].dropna().index
    similarities = {user: pearson_correlation(target_user, user) for user in common_users}
    weighted_sum = 0
    sum_of_similarities = 0

    for user, similarity in similarities.items():
        rb = user_item_matrix.loc[user, item]
        rb_bar = user_item_matrix.loc[user].mean()
        weighted_sum += similarity * (rb - rb_bar)
        sum_of_similarities += abs(similarity)

    ra_bar = user_item_matrix.loc[target_user].mean()
    predicted_rating = ra_bar + (weighted_sum / sum_of_similarities) if sum_of_similarities != 0 else ra_bar

    return predicted_rating

# Example: Predict rating for user 1 on movie 1
target_user = 4
target_movie = 88
predicted_rating = predict_rating(target_user, target_movie)
print(f"Predicted rating for user {target_user} on movie {target_movie}: {predicted_rating}")


Predicted rating for user 4 on movie 88: 0.09655699132940904


In [50]:
import pandas as pd
from scipy.stats import pearsonr

# Load the ratings dataset
ratings = pd.read_csv('ratings.csv')

# Create a user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Define a function to calculate Pearson correlation between two users
def pearson_correlation(user1, user2):
    common_movies = user_item_matrix.loc[user1].index.intersection(user_item_matrix.loc[user2].index)
    if len(common_movies) < 2:
        return 0
    user1_ratings = user_item_matrix.loc[user1][common_movies]
    user2_ratings = user_item_matrix.loc[user2][common_movies]
    correlation, _ = pearsonr(user1_ratings, user2_ratings)
    return correlation

# Define a function to get similar users for a given user
def get_similar_users(target_user):
    similarities = {}
    for user in user_item_matrix.index:
        if user != target_user:
            similarity = pearson_correlation(target_user, user)
            similarities[user] = similarity
    return similarities

# Define a function to get top N similar users for a given user
def get_top_similar_users(target_user, n=10):
    similarities = get_similar_users(target_user)
    top_users = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:n]
    return top_users

# Define a function to make recommendations for a user
def user_based_recommendations(target_user, n=10):
    top_users = get_top_similar_users(target_user)
    target_user_movies = user_item_matrix.loc[target_user].index
    recommended_movies = set()
    for user, similarity in top_users:
        similar_user_movies = user_item_matrix.loc[user].index
        new_movies = set(similar_user_movies) - set(target_user_movies)
        recommended_movies.update(new_movies)
        if len(recommended_movies) >= n:
            break
    return recommended_movies

# Select a user
#Test the top similar users and top recommendation
target_user = 1

# Get the 10 most similar users
similar_users = get_top_similar_users(target_user, n=10)
print(f"10 most similar users to user {target_user}:")
for user, similarity in similar_users:
    print(f"User {user} - Similarity: {similarity}")

# Get the 10 most relevant movies that the recommender suggests
recommended_movies = user_based_recommendations(target_user, n=10)
print(f"\n10 most relevant movies for user {target_user}:")
for movie_id in recommended_movies:
    movie_title = ratings[ratings['movieId'] == movie_id]['title'].values[0]
    print(f"Movie ID: {movie_id} - Title: {movie_title}")


10 most similar users to user 1:
User 266 - Similarity: 0.34498334625399185
User 313 - Similarity: 0.33387548288146385
User 368 - Similarity: 0.3240414067556353
User 57 - Similarity: 0.32394753336932625
User 39 - Similarity: 0.3201204083712928
User 452 - Similarity: 0.31315004538758084
User 91 - Similarity: 0.3113459316284405
User 469 - Similarity: 0.30908917794063595
User 45 - Similarity: 0.3075681925502507
User 135 - Similarity: 0.306842101856095

10 most relevant movies for user 1:


In [65]:
import numpy as np

def cosine_similarity(user1, user2):
    common_movies = user_item_matrix.loc[user1].index.intersection(user_item_matrix.loc[user2].index)
    if len(common_movies) == 0:
        return 0
    user1_ratings = user_item_matrix.loc[user1][common_movies]
    user2_ratings = user_item_matrix.loc[user2][common_movies]
    similarity = np.dot(user1_ratings, user2_ratings) / (np.linalg.norm(user1_ratings) * np.linalg.norm(user2_ratings))
    return similarity

print(cosine_similarity(3,313))


0.07818732282993371


In [64]:
def content_overlap_similarity(user1_ratings, user2_ratings):
    items_user1 = set(user1_ratings)
    items_user2 = set(user2_ratings)
    
    common_items = len(items_user1.intersection(items_user2))
    union_items = len(items_user1.union(items_user2))
    similarity = common_items / union_items
    return similarity

# Assuming user_ratings is a dictionary where keys are user IDs and values are sets of movie IDs they have rated

user1_ratings = user_ratings[3]
user2_ratings = user_ratings[313]

similarity = content_overlap_similarity(user1_ratings, user2_ratings)

print(f"The similarity between User 2 and User 3 is: {similarity}")


The similarity between User 2 and User 3 is: 0.0215633423180593


In [70]:
def predict_recommended_movies_ratings(self, target_user, user_item_matrix, similar_users, n_recommendations):
        target_ratings = user_item_matrix.loc[target_user]
        recommended_movies = np.empty((0, 2), dtype=float)
        similarity_sum_denominator = sum([s for user, s in similar_users])
        mean_target_ratings = np.mean(target_ratings)

        for movie in user_item_matrix.columns:
            if (user_item_matrix.loc[target_user][movie] == 0):
                numerator_sum = 0
                for user, similarity in similar_users:
                    user_ratings = user_item_matrix.loc[user]
                    if (user_ratings[movie] > 0):
                        #mean_user_ratings = np.mean(user_ratings)
                        numerator_sum += similarity * (user_ratings[movie] - np.mean(user_ratings))

                # Check whether numerator is greater than zero only then add the prediction value
                if numerator_sum > 0:
                    predicted_rating = mean_target_ratings + numerator_sum / similarity_sum_denominator
                    a = np.array([[movie, predicted_rating]])
                    recommended_movies = np.append(recommended_movies, a, axis=0)
        sorted_indices = np.argsort(recommended_movies[:, 1])[::-1]
        top_n_movies = recommended_movies[sorted_indices][:n_recommendations]
        # print(top_n_movies)
        return top_n_movies

In [73]:
# Define necessary variables
target_user = 6
user_item_matrix = ...  # Your user-item matrix
similar_users = [(1, 0.8), (2, 0.6), (3, 0.7)]
n_recommendations = 10

# Call the method and print the result
recommended_movies = predict_recommended_movies_ratings(target_user, user_item_matrix, similar_users, n_recommendations)

print("Top recommended movies:")
for movie_id, predicted_rating in recommended_movies:
    print(f"Movie ID: {movie_id} - Predicted Rating: {predicted_rating}")

TypeError: predict_recommended_movies_ratings() missing 1 required positional argument: 'n_recommendations'

In [82]:


# Define similar users (example data)
similar_users = [(2, 0.8), (3, 0.6), (4, 0.7)]

# Define other variables
target_user = 66
n_recommendations = 10
self = 3

# Call the method and print the result
recommended_movies = predict_recommended_movies_ratings(self, target_user, user_item_matrix, similar_users, n_recommendations)

print("Top recommended movies:")
for movie_id, predicted_rating in recommended_movies:
    print(f"Movie ID: {movie_id} - Predicted Rating: {predicted_rating}")


KeyError: 66