Author: Michele Russo

#Assignment 3 Recommender System

In [51]:
import pandas as pd
import numpy as np

##Helper Functions

In [52]:

def get_common_users_ratings(ratings_matrix, movie_id1, movie_id2):
    # Filter ratings for movieId 1
    ratings_movie_1 = ratings_matrix[ratings_matrix["movieId"] == movie_id1]

    # Filter ratings for movieId 2
    ratings_movie_2 = ratings_matrix[ratings_matrix["movieId"] == movie_id2]

    # Merge the two DataFrames on userId
    merged_ratings = pd.merge(ratings_movie_1, ratings_movie_2, on="userId", suffixes=("_movie1", "_movie2"))

    # Select only the userId and ratings columns for both movies
    result = merged_ratings[["userId", "rating_movie1", "rating_movie2"]]

    # Return DataFrame containing ratings from users who rated both movies
    return result


In [53]:

"""
    Generate a matrix of recommended movies with their corresponding titles and recommendation values.

    Args:
    - recommendations: List of tuples containing movie IDs and their recommendation values.
    - movies: DataFrame containing movie information including titles.

    Returns:
    - movie_matrix: A matrix containing movie IDs, titles, and recommendation values.
"""

def find_movies(recommendations, movies):

    movie_matrix = []  # Initialize an empty list to store movie information
    for item in recommendations:
        movie_id, value = item[0], item[1]  # Extract movie ID and recommendation value

        # Find the movie title corresponding to the movie ID
        movie_title = movies[movies["movieId"] == movie_id]["title"].values[0]

        # Append movie ID, title, and recommendation value to the movie matrix
        movie_matrix.append([movie_id, movie_title, value])

    return movie_matrix


In [54]:
 """
    Get the title of a movie based on its movie ID from a movies matrix.

    Args:
    - movies_matrix: DataFrame containing movie information including titles.
    - movie_id: ID of the movie to retrieve the title for.

    Returns:
    - movie_title: Title of the specified movie.
"""

def get_movie_title(movies_matrix, movie_id):
    # Find the row corresponding to the given movie ID
    movie_row = movies_matrix[movies_matrix["movieId"] == movie_id]

    # Extract the title from the movie row
    movie_title = movie_row["title"].values[0]

    return movie_title


In [55]:
"""
    Find positively similar items, rated by the user, to a given item.

    Args:
    - user_id: ID of the user whose ratings are considered.
    - item_id: ID of the item to find similar items for.
    - item_similarity: DataFrame containing item-item similarity values.
    - ratings: DataFrame containing user-item ratings.

    Returns:
    - num_similarities: Number of positively similar items found.
    - positive_similarities: List of tuples containing positively similar item IDs and their similarity values.
"""

def get_similar_items(user_id, item_id, item_similarity, ratings):
    # Filter ratings for the specified user
    user_ratings = ratings[ratings["userId"] == user_id]

    # Extract movie IDs rated by the user
    movies_rated = set(user_ratings["movieId"].tolist())

    positive_similarities = []  # Initialize list to store positive similar items

    # Iterate over movies rated by the user
    for movie_id in movies_rated:
        # Find similarity value between the current movie and the given item
        value = item_similarity.loc[movie_id, item_id]

        # If similarity value is positive, add it to the positive similarities list
        if value > 0:
            positive_similarities.append((movie_id, value))

    # Return the number of positive similar items and the list of positive similar items
    return len(positive_similarities), positive_similarities


In [56]:
"""
    Get the similarity value between two items from a similarity matrix.

    Args:
    - item1: ID of the first item.
    - item2: ID of the second item.
    - similarity_matrix: DataFrame containing item-item similarity values.

    Returns:
    - similarity: Similarity value between the two items.
"""

def get_similarity(item1, item2, similarity_matrix):
    # Retrieve the similarity value from the similarity matrix
    similarity = similarity_matrix.loc[item1, item2]

    return similarity


##Assignment

In [57]:
def read_csv(file_path):
    try:
        # Read CSV file into a DataFrame
        df = pd.read_csv(file_path)

        return df
    except Exception as e:
        print("Error:", e)
        return None


###User User CF

In [58]:

"""
    Calculate the Pearson correlation coefficient between two users based on their ratings.

    Args:
    - user1_id: ID of the first user.
    - user2_id: ID of the second user.
    - df: DataFrame containing user-item ratings.

    Returns:
    - num_common_movies: Number of common movies rated by both users.
    - pearson_corr: Pearson correlation coefficient between the two users.
"""
def person_corr(user1_id, user2_id, df):
    # Extract ratings for user1 and user2
    user1_ratings = df[df['userId'] == user1_id].set_index('movieId')['rating']
    user2_ratings = df[df['userId'] == user2_id].set_index('movieId')['rating']

    # Calculate the average ratings for each user
    r1 = user1_ratings.mean()
    r2 = user2_ratings.mean()

    # Find common movies rated by both users
    common_movies = user1_ratings.index.intersection(user2_ratings.index)

    if len(common_movies) <= 1:
        # If there are no common movies or only one common movie, return correlation of 0
        return 0, 0

    # Filter ratings for common movies
    user1_common_ratings = user1_ratings.loc[common_movies]
    user2_common_ratings = user2_ratings.loc[common_movies]

    # Calculate the numerator of the Pearson correlation coefficient
    numerator = ((user1_common_ratings - r1) * (user2_common_ratings - r2)).sum()

    # Calculate the denominator of the Pearson correlation coefficient
    denominator = (((user1_common_ratings - r1) ** 2).sum() * ((user2_common_ratings - r2) ** 2).sum()) ** 0.5

    if denominator == 0:
        # If denominator is 0, return correlation of 0
        return 0, 0

    # Calculate the Pearson correlation coefficient
    pearson_corr = numerator / denominator

    return len(common_movies), pearson_corr


In [59]:
"""
    Weight the Pearson correlation coefficient between two users.

    Args:
    - user1_id: ID of the first user.
    - user2_id: ID of the second user.
    - ratings: DataFrame containing user-item ratings.
    - y: Weight parameter (default is 10).

    Returns:
    - weighted_corr: Weighted Pearson correlation coefficient.
"""

def weight_per_corr(user1_id, user2_id, ratings, y=10):
    # Calculate the number of common movies and Pearson correlation coefficient
    n_intersection, pers_corr = person_corr(user1_id, user2_id, ratings)

    # Weight the Pearson correlation coefficient
    numerator = pers_corr * min(y, n_intersection)

    # Normalize by the weight parameter
    weighted_corr = numerator / y

    return weighted_corr


In [60]:
"""
    Find the top-k neighbors for a target user who rated a target item based on weighted similarity.

    Args:
    - target_user: ID of the target user.
    - target_item: ID of the target item.
    - df: DataFrame containing user-item ratings.
    - k: Number of neighbors to find (default is 20).
    - mod: Flag to print the number of neighbors with positive correlation (default is 0).

    Returns:
    - sorted_neighbors: List of top-k neighbors with their weighted similarity scores.
"""

def find_neighbors(target_user, target_item, df, k=20, mod=0):
    # Filter ratings for the target item
    item_ratings = df[df['movieId'] == target_item]

    # Find users who rated the target item
    item_users = item_ratings['userId'].unique()

    # Calculate weighted similarity between target user and other users
    similarities = {}
    for user in item_users:
        if user != target_user:
            similarity = weight_per_corr(target_user, user, df)
            similarities[user] = similarity

    # Sort neighbors by weighted similarity (higher to lower)
    sorted_neighbors = sorted(similarities.items(), key=lambda x: (x[1], -x[0]), reverse=True)

    # Filter out neighbors with negative similarity values
    sorted_neighbors = [(user, similarity) for user, similarity in sorted_neighbors if similarity > 0]

    # Print the number of neighbors with positive correlation if mod is True
    if mod:
        print("Number of neighbors with positive correlation:", len(sorted_neighbors))

    # Return top k neighbors
    # If not sufficient, return all neighbors
    if len(sorted_neighbors) < k:
        return sorted_neighbors

    return sorted_neighbors[:k]




"""
    Predict the rating of a target user for a target item using user-user collaborative filtering.

    Args:
    - target_user: ID of the target user.
    - target_item: ID of the target item.
    - df: DataFrame containing user-item ratings.
    - neighbors: List of user neighbors with their weighted similarity scores.
    - mod: Flag to print intermediate calculation steps (default is 0).

    Returns:
    - prediction: Predicted rating of the target user for the target item.
"""

def uucf_rating_prediction(target_user, target_item, df, neighbors, mod=0):
    # If no positive similarity values found, return the mean rating of the target user
    if len(neighbors) == 0:
        return df[df['userId'] == target_user]['rating'].mean()

    # Compute user-mean centered ratings for each neighbor
    neighbor_ratings = {}
    for neighbor, similarity in neighbors:
        neighbor_ratings[neighbor] = df[df['userId'] == neighbor]['rating'].mean()

    # Calculate the mean rating of the target user
    target_user_mean = df[df['userId'] == target_user]['rating'].mean()

    # Calculate the weighted sum of deviations
    weighted_sum = 0
    total_weight = 0
    for neighbor, similarity in neighbors:
        deviation = df[(df['userId'] == neighbor) & (df['movieId'] == target_item)]['rating'].values[0] - neighbor_ratings[neighbor]
        weighted_sum += deviation * similarity
        total_weight += similarity

    # Print intermediate calculations if mod is True
    if mod:
        print("Numerator:", weighted_sum)
        print("Denominator:", total_weight)
        print("Weighted average of the deviation from their mean rating:", weighted_sum / total_weight)

    # Predict the rating
    prediction = target_user_mean + (weighted_sum / total_weight)
    return prediction


In [61]:
"""
    Generate top-N recommendations for the target user using User-User Collaborative Filtering.

    Args:
    - user: ID of the target user.
    - df: DataFrame containing user-item ratings.
    - n: Number of recommendations to generate (default is 10).

    Returns:
    - top_n_recommendations: List of top-N recommended items with their predicted ratings.
"""

def top_n_recommendations_uucf(user, df, n=10):
    # Get all unique items in the system
    all_items = df['movieId'].unique()

    # Remove items that have already been rated by the user
    rated_items = df[df['userId'] == user]['movieId'].unique()
    candidate_items = [item for item in all_items if item not in rated_items]

    # Calculate prediction scores for candidate items
    prediction_scores = {}

    for item in candidate_items:
        neighbors = find_neighbors(user, item, df)
        prediction_scores[item] = uucf_rating_prediction(user, item, df, neighbors)

    # Sort candidate items by prediction scores (descending order), and then by item ID (ascending order)
    sorted_predictions = sorted(prediction_scores.items(), key=lambda x: (x[1], -x[0]), reverse=True)

    # Take top-N recommendations
    if n < 0:
        return sorted_predictions

    top_n_recommendations = sorted_predictions[:n]

    return top_n_recommendations


###Item Item CF

In [62]:

"""
    Calculate the cosine similarity matrix for all items based on user-item ratings.

    Args:
    - df: DataFrame containing user-item ratings.

    Returns:
    - correlation_matrix: DataFrame containing the cosine similarity matrix between all items.
"""

def cosine_similarity_tot(df):

    # Calculate mean rating for each item
    user_means = df.groupby('movieId')['rating'].transform("mean")
    df["mean"] = user_means

    # Subtract mean rating from each user's rating to center the ratings
    df['rating_rescaled'] = df['rating'] - user_means

    # Calculate the squared rescaled ratings
    df['squared_rescaled_ratings'] = df['rating_rescaled'] ** 2

    # Create pivot tables for numerator and denominator
    matrix_pivot_numerator = pd.pivot_table(df, index="movieId", columns="userId", values="rating_rescaled", fill_value=0)
    matrix_pivot_denominator = pd.pivot_table(df, index="movieId", columns="userId", values="squared_rescaled_ratings", fill_value=0)

    # Calculate the sum of the product of centered ratings between users
    sum_product_numerator = matrix_pivot_numerator.dot(matrix_pivot_numerator.T)

    # Calculate the denominator for each item
    column_sums = matrix_pivot_denominator.sum(axis=1)
    column_sums = column_sums.values.reshape(-1, 1)
    matrix_denominator = column_sums.dot(column_sums.T)

    # Calculate the square root of the denominator
    matrix_denominator = np.sqrt(matrix_denominator)

    # Calculate the correlation matrix using np.divide
    correlation_matrix = np.divide(sum_product_numerator, matrix_denominator)

    # Set diagonal elements to 1
    np.fill_diagonal(correlation_matrix.values, 1)

    # Fill NaN values with 0
    correlation_matrix = correlation_matrix.fillna(0)

    # Convert the correlation matrix to a Pandas DataFrame
    correlation_matrix = pd.DataFrame(correlation_matrix, index=sum_product_numerator.index, columns=sum_product_numerator.index)

    # Return the correlation matrix
    return correlation_matrix


In [63]:

"""
    Find positively similar items rated by the user to a given item.

    Args:
    - user_id: ID of the user whose ratings are considered.
    - item_id: ID of the item to find similar items for.
    - item_similarity: DataFrame containing item-item similarity values.
    - ratings: DataFrame containing user-item ratings.
    - k: Number of similar items to consider (default is 20).
    - mod: Flag to consider only positive similarities (default is 1).

    Returns:
    - number_elements: Number of positively similar items found.
    - similarities: List of tuples containing positively similar item IDs and their similarity values.
"""

def get_neighborhood(user_id, item_id, item_similarity, ratings, k=20, mod=1):
    # Filter ratings for the specified user
    user_ratings = ratings[ratings["userId"] == user_id]
    movies_rated = set(user_ratings["movieId"].tolist())

    similarities = []
    for movie_id in movies_rated:
        value = item_similarity.loc[movie_id, item_id]
        if mod:
            if value > 0:
                similarities.append((movie_id, value))
        else:
            similarities.append((movie_id, value))

    # Sort similarities by score (higher to lower)
    similarities = sorted(similarities, key=lambda x: (x[1], -x[0]), reverse=True)

    number_elements = len(similarities)

    if k > 0 and number_elements >= k:
        # Take the top k similarities
        similarities = similarities[:k]

    return number_elements, similarities


"""
    Generate a rating prediction for a user and an item based on the similarity with other items.

    Args:
    - user_id: ID of the target user.
    - item_id: ID of the target item.
    - item_similarity: DataFrame containing item-item similarity values.
    - ratings: DataFrame containing user-item ratings.
    - k: Number of similar items to consider (default is 20).
    - mod: Flag to consider only positive similarities (default is 1).

    Returns:
    - prediction: Predicted rating for the target user and item.
"""

def recommend(user_id, item_id, item_similarity, ratings, k=20, mod=1):
    # Get similar items to the given item for the given user
    _, similarities = get_neighborhood(user_id, item_id, item_similarity, ratings, k, mod)

    numerator = 0
    denominator = 0

    # Calculate the weighted sum of ratings of similar items
    for sim_item_id, sim_score in similarities:
        # Find rating of similar item by the user
        rating = ratings[(ratings['userId'] == user_id) & (ratings['movieId'] == sim_item_id)]['rating'].values
        if len(rating) > 0:
            rating = rating[0]
            numerator += sim_score * rating
            denominator += abs(sim_score)

    # If no similar items have been rated by the user, return 0
    if denominator == 0:
        return 0

    # Calculate the predicted rating
    prediction = numerator / denominator
    return prediction


In [64]:
"""
    Get the similarity value between two items from an item-item similarity matrix.

    Args:
    - item_similarity: DataFrame containing item-item similarity values.
    - item1: ID of the first item.
    - item2: ID of the second item.

    Returns:
    - similarity: Similarity value between the two items.
"""
def similarity_value(item_similarity, item1, item2):
    # Retrieve the similarity value from the item-item similarity matrix
    similarity = item_similarity.loc[item1, item2]

    return similarity


In [65]:
"""
    Generate top-N recommendations for a user based on item-item similarity.

    Args:
    - user_id: ID of the target user.
    - item_similarity: DataFrame containing item-item similarity values.
    - ratings: DataFrame containing user-item ratings.
    - n: Number of recommendations to generate (default is 10).
    - k: Number of similar items to consider for each recommendation (default is 20).
    - mod: Flag to consider only positive similarities (default is 1).

    Returns:
    - recommendations: List of top-N recommended items with their predicted ratings.
"""
def get_top_n_recommendations(user_id, item_similarity, ratings, n=10, k=20, mod=1):

    recommendations = []

    # Get items rated by the user
    items_rated_by_user = ratings[ratings['userId'] == user_id]['movieId'].tolist()

    # Iterate over all items
    for item_id in item_similarity.index:
        # Check if the item hasn't been rated by the user
        if item_id not in items_rated_by_user:
            numerator = 0
            denominator = 0

            # Calculate prediction for the item
            for sim_item_id, sim_score in get_neighborhood(user_id, item_id, item_similarity, ratings, k, mod)[1]:
                # Find rating of similar item by the user
                rating = ratings[(ratings['userId'] == user_id) & (ratings['movieId'] == sim_item_id)]['rating'].values
                if len(rating) > 0:
                    rating = rating[0]
                    numerator += sim_score * rating
                    denominator += abs(sim_score)

            # If there are similar items rated by the user
            if denominator != 0:
                prediction = numerator / denominator
                recommendations.append((item_id, prediction))

    # Sort recommendations by predicted rating (descending order), and then by item ID (ascending order)
    recommendations.sort(key=lambda x: (x[1], -x[0]), reverse=True)

    # Return top-N recommendations
    if n < 0:
        return recommendations

    return recommendations[:n]


###Basket Recommendation

In [66]:
"""
    Generate a recommendation score for an item based on its similarity with items in a basket.

    Args:
    - item_id: ID of the target item or a list of item IDs.
    - basket_list: List of item IDs in the basket.
    - item_item_similarity: DataFrame containing item-item similarity values.
    - mod: Flag to sum all similarities (default is 0) or consider only positive similarities (mod=1).

    Returns:
    - sim: Recommendation score for the target item.
"""

def basket_recommendation(item_id, basket_list, item_item_similarity, mod=0):
    # Ensure item_id is a list
    if not isinstance(item_id, list):
        item_id = [item_id]

    # Extract similarity values between target item(s) and items in the basket
    extracted_columns = item_item_similarity.loc[item_id, basket_list]

    sim = 0
    # Calculate recommendation score
    if mod:
        # Sum all similarity values
        sim = extracted_columns.sum(axis=1)
    else:
        # Consider only positive similarity values
        positive_elements = extracted_columns[extracted_columns > 0]
        sim = positive_elements.sum(axis=1)

    return sim


In [67]:
"""
    Generate top-N recommendations based on the similarity of items with a basket of items.

    Args:
    - basket_list: List of item IDs in the basket.
    - item_item_similarity: DataFrame containing item-item similarity values.
    - n: Number of recommendations to generate (default is 10).
    - mod: Flag to sum all similarities (default is 0) or consider only positive similarities (mod=1).

    Returns:
    - top_n_list: List of top-N recommended items with their recommendation scores.
"""

def top_n_basket_recommendation(basket_list, item_item_similarity, n=10, mod=0):
    # Get index names (item IDs) from the item-item similarity matrix
    index_names = item_item_similarity.index.tolist()
    # Filter index names to exclude items already in the basket
    filtered_index_names = [name for name in index_names if name not in basket_list]
    # Calculate recommendation scores for items not in the basket
    row_sum = basket_recommendation(filtered_index_names, basket_list, item_item_similarity, mod)
    # Sort the row sums in descending order and get the top n elements
    top_n_values = row_sum.sort_values(ascending=False)
    # Get the corresponding indices (column names)
    top_n_indices = top_n_values.index.tolist()
    # Create a list of tuples containing the indices and their corresponding values
    top_n_list = [(index, value) for index, value in zip(top_n_indices, top_n_values)]
    # Sort the top-N list by recommendation score (descending order) and then by item ID (ascending order)
    top_n_list = sorted(top_n_list, key=lambda x: (x[1], -x[0]), reverse=True)

    return top_n_list[:n]


### Hybrid Recommendation

In [68]:
"""
    Generate top-N recommendations using a weighted hybrid approach combining two sets of recommendations.

    Args:
    - UUCF_recommendations: List of user-user collaborative filtering recommendations (item_id, score).
    - IICF_recommendations: List of item-item collaborative filtering recommendations (item_id, score).
    - weight: Weight of UUCF recommendations in the hybrid approach (default is 0.5).
    - N: Number of recommendations to generate (default is 10).

    Returns:
    - top_N_recommendations: List of top-N combined recommendations with their weighted scores.
"""

def weighted_hybrid_recommender(UUCF_recommendations, IICF_recommendations, weight=0.5, N=10):
    # Initialize a dictionary to store combined recommendations with weighted sum
    combined_recommendations = {}

    # Combine recommendations with weighted sum
    for item_id, score in UUCF_recommendations:
        combined_recommendations[item_id] = combined_recommendations.get(item_id, 0) + weight * score
    for item_id, score in IICF_recommendations:
        combined_recommendations[item_id] = combined_recommendations.get(item_id, 0) + (1 - weight) * score

    # Sort combined recommendations based on the weighted sum and item ID
    sorted_combined_recommendations = sorted(combined_recommendations.items(), key=lambda x: (x[1], -x[0]), reverse=True)

    # Select top-N recommendations
    top_N_recommendations = sorted_combined_recommendations[:N]

    return top_N_recommendations


##Test

In [69]:

# Read CSV files
ratings_matrix = pd.read_csv("ratings.csv")
movie_matrix = pd.read_csv("movies.csv")



###8.1 UUCF

In [70]:
#calculate the person correlation between the users id 1 and user id 4
movies,person14=person_corr(1,4,ratings_matrix)
print("person correlation users 1 and 4", person14)
#calculate the person correlation between the users id 1 and user id 4 with weighting
person_weight14=weight_per_corr(1,4,ratings_matrix)
print("person correlation with significance weighting users 1 and 4", person_weight14)
#given item 10 , find the neighbors
neighbors = find_neighbors(1, 10, ratings_matrix,mod=1)
print("top k=20 neighbours ",neighbors)
prediction = uucf_rating_prediction(1, 10, ratings_matrix, neighbors,mod=1)
print("Predicted rating:", prediction)



person correlation users 1 and 4 0.042136808375910856
person correlation with significance weighting users 1 and 4 0.021068404187955428
Number of neighbors with positive correlation: 30
top k=20 neighbours  [(390, 0.37976211745379407), (312, 0.3477870022545479), (353, 0.2865915778360536), (428, 0.2729107049454655), (177, 0.2630318859067766), (561, 0.25583582053934906), (405, 0.2023994590198254), (458, 0.1999981580316726), (236, 0.19012109129819604), (247, 0.18298057613606222), (430, 0.1769836444763965), (243, 0.16274849900102134), (285, 0.14863349494711478), (574, 0.14297980920771322), (150, 0.14281903684635436), (608, 0.14008707563881007), (295, 0.13295134132714795), (641, 0.12493900951088492), (592, 0.12403408673701903), (384, 0.1208368018000359)]
Numerator: -1.2106211645743594
Denominator: 3.998431192914241
Weighted average of the deviation from their mean rating: -0.3027740396582898
Predicted rating: 2.24722596034171


In [71]:
#given item 260 , find the neighboors
neighbors = find_neighbors(1, 260, ratings_matrix,mod=1)
print("top k=20 neighbours ",neighbors)
prediction = uucf_rating_prediction(1, 260, ratings_matrix, neighbors,mod=1)
print("Predicted rating:", prediction)

Number of neighbors with positive correlation: 63
top k=20 neighbours  [(580, 0.662547274200393), (463, 0.388763881018101), (390, 0.37976211745379407), (585, 0.35062100531729185), (312, 0.3477870022545479), (353, 0.2865915778360536), (394, 0.28168984805199876), (466, 0.28001045251295337), (510, 0.27673467464372237), (428, 0.2729107049454655), (505, 0.2569497023540963), (561, 0.25583582053934906), (533, 0.2485524986721755), (242, 0.24017846543583934), (41, 0.2179899058684344), (22, 0.21035896427251233), (439, 0.20804249079711568), (405, 0.2023994590198254), (452, 0.19218775874226246), (236, 0.19012109129819604)]
Numerator: 2.906446403718343
Denominator: 5.750034695234129
Weighted average of the deviation from their mean rating: 0.5054658898192959
Predicted rating: 3.0554658898192955


In [72]:
#Top n recommendation for user id 1
n_recommendations1=top_n_recommendations_uucf(1, ratings_matrix)
print("Top-10 recommendations for user 1 ", n_recommendations1)
top_10_recommendation1=find_movies(n_recommendations1,movie_matrix)
print(top_10_recommendation1)


Top-10 recommendations for user 1  [(3216, 4.936834319526627), (40412, 4.936834319526627), (92494, 4.936834319526627), (3320, 4.928235294117647), (4302, 4.928235294117647), (4731, 4.928235294117647), (5071, 4.928235294117647), (86781, 4.928235294117647), (97957, 4.717883488583054), (3414, 4.543621197252207)]
[[3216, 'Vampyros Lesbos (Vampiras, Las) (1971)', 4.936834319526627], [40412, "Dead Man's Shoes (2004)", 4.936834319526627], [92494, 'Dylan Moran: Monster (2004)', 4.936834319526627], [3320, "Mifune's Last Song (Mifunes sidste sang) (1999)", 4.928235294117647], [4302, 'King Is Alive, The (2000)', 4.928235294117647], [4731, 'Innocence (2000)', 4.928235294117647], [5071, 'Maelström (2000)', 4.928235294117647], [86781, 'Incendies (2010)', 4.928235294117647], [97957, 'Excision (2012)', 4.717883488583054], [3414, 'Love Is a Many-Splendored Thing (1955)', 4.543621197252207]]


In [73]:
#Top n recommendations for user id 522
n_recommendations522=top_n_recommendations_uucf(522, ratings_matrix)
print("Top-10 recommendations for user 522 ", n_recommendations522)
top_10_recommendation522=find_movies(n_recommendations522,movie_matrix)
print(top_10_recommendation522)

Top-10 recommendations for user 522  [(565, 6.180285714285714), (1450, 6.180285714285714), (1563, 6.180285714285714), (1819, 6.180285714285714), (4076, 6.180285714285714), (4591, 6.180285714285714), (4796, 6.180285714285714), (4930, 6.180285714285714), (5427, 6.180285714285714), (3216, 5.552834319526627)]
[[565, 'Cronos (1993)', 6.180285714285714], [1450, 'Prisoner of the Mountains (Kavkazsky plennik) (1996)', 6.180285714285714], [1563, 'Dream With the Fishes (1997)', 6.180285714285714], [1819, 'Storefront Hitchcock (1997)', 6.180285714285714], [4076, 'Two Ninas (1999)', 6.180285714285714], [4591, 'Erik the Viking (1989)', 6.180285714285714], [4796, 'Grass Is Greener, The (1960)', 6.180285714285714], [4930, 'Funeral in Berlin (1966)', 6.180285714285714], [5427, 'Caveman (1981)', 6.180285714285714], [3216, 'Vampyros Lesbos (Vampiras, Las) (1971)', 5.552834319526627]]


###8.2 IICF

In [74]:
#create the similarity matrix
II_similarity_matrix=cosine_similarity_tot(ratings_matrix)
print(II_similarity_matrix)

movieId    1         2         3         4         5         6         7       \
movieId                                                                         
1        1.000000  0.143077  0.076882  0.008127  0.093661  0.014571  0.109841   
2        0.143077  1.000000  0.023050  0.042711  0.009544 -0.003685  0.103450   
3        0.076882  0.023050  1.000000  0.015516  0.227926 -0.000419  0.022623   
4        0.008127  0.042711  0.015516  1.000000  0.091506 -0.020982  0.185295   
5        0.093661  0.009544  0.227926  0.091506  1.000000  0.114900  0.005068   
...           ...       ...       ...       ...       ...       ...       ...   
161944   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
162376   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
162542   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
162672   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
163949   0.000000  0.000000 

In [75]:
#count the number of positive elements into the matrix
positive_count = ((II_similarity_matrix > 0).values).sum()
print("Number of strictly positive elements:", positive_count)


Number of strictly positive elements: 8796276


In [76]:
#calculate the cosine similarity between movies id = 594 and id = 596
value=similarity_value(II_similarity_matrix,596,594)
print("Element at row", 594, "and column", 596, ":", value)

#get the name of the films
name596=get_movie_title(movie_matrix,596 )
name594=get_movie_title(movie_matrix,594 )
print(name596)
print(name594)

print(ratings_matrix[ratings_matrix["movieId"]==596]["rating"])
print(ratings_matrix[ratings_matrix["movieId"]==594]["rating"])

# Find common users who have rated both movies
common_users = set(ratings_matrix[ratings_matrix["movieId"] == 596]["userId"]).intersection(set(ratings_matrix[ratings_matrix["movieId"] == 594]["userId"]))

# Print user IDs and ratings for movie 596
print("User IDs and Ratings for Movie 596:")
print(ratings_matrix[(ratings_matrix["movieId"] == 596) & (ratings_matrix["userId"].isin(common_users))][["userId", "rating"]])

# Print user IDs and ratings for movie 594
print("User IDs and Ratings for Movie 594:")
print(ratings_matrix[(ratings_matrix["movieId"] == 594) & (ratings_matrix["userId"].isin(common_users))][["userId", "rating"]])


Element at row 594 and column 596 : 0.31043233186829366
Pinocchio (1940)
Snow White and the Seven Dwarfs (1937)
177      5.0
455      4.0
3296     5.0
5168     4.0
9216     2.0
        ... 
93631    4.0
95797    3.0
96393    2.0
96444    4.0
99337    5.0
Name: rating, Length: 66, dtype: float64
176      5.0
532      4.0
942      1.0
1106     2.5
3294     4.0
        ... 
96442    4.0
97551    4.0
98350    5.0
99335    3.0
99792    4.0
Name: rating, Length: 71, dtype: float64
User IDs and Ratings for Movie 596:
       userId  rating
177         4     5.0
3296       19     5.0
5168       30     4.0
9216       61     2.0
10345      73     3.5
13442      86     4.0
13573      88     4.0
15428     102     4.0
18989     126     4.0
19425     130     4.0
25208     185     3.0
25464     187     3.5
25820     188     5.0
28486     212     3.0
29342     213     2.0
31732     232     5.0
40255     294     3.0
42034     302     4.0
43501     311     2.0
48389     354     4.0
48861     358     3.0


In [77]:
#Calculate a rating prediction for the target item with id=25 for user with id=522.

number_positive_sim, positive_similarities=get_neighborhood(522,25,II_similarity_matrix,ratings_matrix,k=0)
print("number of positive similarities for user 522 give the item 25: ",number_positive_sim)

#provide the neigborhood
number_positive_sim_k, positive_similarities_k=get_neighborhood(522,25,II_similarity_matrix,ratings_matrix)

print(find_movies(positive_similarities_k, movie_matrix))

number of positive similarities for user 522 give the item 25:  160
[[4239, 'Blow (2001)', 0.14678734979171315], [44191, 'V for Vendetta (2006)', 0.13850455754010027], [778, 'Trainspotting (1996)', 0.13802688111417477], [1092, 'Basic Instinct (1992)', 0.130573051771941], [485, 'Last Action Hero (1993)', 0.12462113294565984], [81845, "King's Speech, The (2010)", 0.12143250948890247], [165, 'Die Hard: With a Vengeance (1995)', 0.11451716732306033], [122, 'Boomerang (1992)', 0.10486439864836593], [4019, 'Finding Forrester (2000)', 0.0981149558263367], [68157, 'Inglourious Basterds (2009)', 0.09799314504677242], [64839, 'Wrestler, The (2008)', 0.09536885947503582], [6947, 'Master and Commander: The Far Side of the World (2003)', 0.09066515469223772], [1391, 'Mars Attacks! (1996)', 0.08726682721961322], [193, 'Showgirls (1995)', 0.08722732656443696], [30707, 'Million Dollar Baby (2004)', 0.0859246539362791], [2762, 'Sixth Sense, The (1999)', 0.08525999008350205], [1732, 'Big Lebowski, The (

In [78]:
#Calculate the Top-N recommendations for the user with id = 522
top_10_recommendation522_iicf=get_top_n_recommendations(522, II_similarity_matrix, ratings_matrix,10)
print(find_movies(top_10_recommendation522_iicf,movie_matrix))
print(get_movie_title(movie_matrix,25 ))

[[1232, 'Stalker (1979)', 3.912455651258436], [5690, 'Grave of the Fireflies (Hotaru no haka) (1988)', 3.894039444048318], [58998, 'Forgetting Sarah Marshall (2008)', 3.8797604069770775], [6794, "Beethoven's 2nd (1993)", 3.8737565000535774], [69275, 'Dead Snow (Død snø) (2009)', 3.84812273775676], [72407, 'Twilight Saga: New Moon, The (2009)', 3.845032856608199], [62336, 'FLCL (2000)', 3.830743886234508], [5903, 'Equilibrium (2002)', 3.8225894002109735], [1088, 'Dirty Dancing (1987)', 3.811169352967176], [37475, 'Unfinished Life, An (2005)', 3.809553790636538]]
Leaving Las Vegas (1995)


###8.3 Basket Recommendatios

In [79]:
basket_list=[1]
top_recommendation=top_n_basket_recommendation(basket_list,II_similarity_matrix)
print(top_recommendation)
print(find_movies(top_recommendation,movie_matrix))

#h id=1, id=48, id=239
basket_list=[1,48,239]

top_recommendation=top_n_basket_recommendation(basket_list,II_similarity_matrix)
print(top_recommendation)
print(find_movies(top_recommendation,movie_matrix))
#
top_recommendation=top_n_basket_recommendation(basket_list,II_similarity_matrix,10,1)
print(top_recommendation)
print(find_movies(top_recommendation,movie_matrix))

#get movie name
print(get_movie_title(movie_matrix,1))
print(get_movie_title(movie_matrix,48))
print(get_movie_title(movie_matrix,239))

[(3114, 0.38751909607714247), (2355, 0.2918505710072837), (364, 0.2679706395354099), (588, 0.2424521970778878), (4886, 0.23609185999396126), (4306, 0.2337924887232159), (58559, 0.22285149745651742), (5349, 0.22253962836326827), (1092, 0.22136134667493135), (8961, 0.22061205440681192)]
[[3114, 'Toy Story 2 (1999)', 0.38751909607714247], [2355, "Bug's Life, A (1998)", 0.2918505710072837], [364, 'Lion King, The (1994)', 0.2679706395354099], [588, 'Aladdin (1992)', 0.2424521970778878], [4886, 'Monsters, Inc. (2001)', 0.23609185999396126], [4306, 'Shrek (2001)', 0.2337924887232159], [58559, 'Dark Knight, The (2008)', 0.22285149745651742], [5349, 'Spider-Man (2002)', 0.22253962836326827], [1092, 'Basic Instinct (1992)', 0.22136134667493135], [8961, 'Incredibles, The (2004)', 0.22061205440681192]]
[(2355, 0.6332192482383019), (3810, 0.5908157457013644), (364, 0.5853370435975439), (3807, 0.5851419830836279), (2089, 0.5754033369218359), (54276, 0.5325588921786969), (2772, 0.5266741503467427), (

In [80]:

basket_list=[48]

top_recommendation=top_n_basket_recommendation(basket_list,II_similarity_matrix)
print(top_recommendation)
print(find_movies(top_recommendation,movie_matrix))



basket_list=[239]

top_recommendation=top_n_basket_recommendation(basket_list,II_similarity_matrix)
print(top_recommendation)
print(find_movies(top_recommendation,movie_matrix))


[(2558, 0.3984611897293433), (888, 0.3459141674116619), (2040, 0.3362803097709172), (2876, 0.32103514015660467), (2017, 0.3032548333720153), (881, 0.2859535574132957), (1021, 0.2804068123262101), (257, 0.27801773796537266), (1760, 0.27062189735913744), (2500, 0.2647288518658158)]
[[2558, 'Forces of Nature (1999)', 0.3984611897293433], [888, 'Land Before Time III: The Time of the Great Giving (1995)', 0.3459141674116619], [2040, 'Computer Wore Tennis Shoes, The (1969)', 0.3362803097709172], [2876, 'Thumbelina (1994)', 0.32103514015660467], [2017, 'Babes in Toyland (1961)', 0.3032548333720153], [881, 'First Kid (1996)', 0.2859535574132957], [1021, 'Angels in the Outfield (1994)', 0.2804068123262101], [257, 'Just Cause (1995)', 0.27801773796537266], [1760, 'Spice World (1997)', 0.27062189735913744], [2500, 'Jawbreaker (1999)', 0.2647288518658158]]
[(3807, 0.5805688433876184), (3810, 0.5795906744610713), (2772, 0.5232126833417525), (2532, 0.48784947653240734), (2531, 0.47680872524790535), 

###8.4 Hybrid Recommendation

In [81]:
#Top-N recommendations for the user with id 522 using the Hybrid recommender
#get the UUCF recommendations
top_recommendations_uucf=top_n_recommendations_uucf(522,ratings_matrix,n=-1)

top_recommendations_iicf=get_top_n_recommendations(522,II_similarity_matrix,ratings_matrix,n=-1)

In [82]:

top_10_hybrid_recommendatins=weighted_hybrid_recommender(top_recommendations_uucf,top_recommendations_iicf)
print(top_10_hybrid_recommendatins)
find_movies(top_10_hybrid_recommendatins,movie_matrix)

[(565, 4.785879967737465), (40412, 4.4899028879097065), (2304, 4.120966372009207), (4518, 4.094669351883896), (85438, 4.085897159325705), (85179, 4.07217115682438), (6063, 4.071367959413578), (26587, 4.059822786332655), (1564, 4.04927569318833), (1860, 4.040194212857106)]


[[565, 'Cronos (1993)', 4.785879967737465],
 [40412, "Dead Man's Shoes (2004)", 4.4899028879097065],
 [2304, 'Love Is the Devil (1998)', 4.120966372009207],
 [4518, 'The Lair of the White Worm (1988)', 4.094669351883896],
 [85438, 'Jane Eyre (2011)', 4.085897159325705],
 [85179, 'Summer Wars (Samâ wôzu) (2009)', 4.07217115682438],
 [6063, 'May (2002)', 4.071367959413578],
 [26587, 'Decalogue, The (Dekalog) (1989)', 4.059822786332655],
 [1564, "For Roseanna (Roseanna's Grave) (1997)", 4.04927569318833],
 [1860, 'Character (Karakter) (1997)', 4.040194212857106]]