In [1]:
print("hello world")

hello world


In [None]:
%pip install pandas

import pandas as pd
import numpy as np

In [None]:
# Read the data file and see how it looks like
df = pd.read_csv('u.data', sep='\t', header=None)

# Add column names and check few rows of the dataset
df.columns = ["user_id", "item_id", "rating", "timestamp"]
df = df.drop("timestamp", axis=1)

# Create a matrix from the ratings. Each row represents an user and each column an item/movie.
rating_matrix = df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

# Calculate the Pearson correlation between two users ratings
def pearson_correlation(user1_id, user2_id):
    # Get common movies that both users have rated
    common_movies = rating_matrix.columns[(rating_matrix.loc[user1_id] > 0) & (rating_matrix.loc[user2_id] > 0)]

    # Get the ratings for the given users matching the common movies
    user1_data = rating_matrix.loc[user1_id, common_movies]
    user2_data = rating_matrix.loc[user2_id, common_movies]

    # Compute the Pearson correlation between the ratings of the two users
    pearson_correlation = np.corrcoef(user1_data, user2_data)[0][1]

    return pearson_correlation

def predict_rating(user_id, item_id):
    # Get the users' ratings for the active user
    active_user_ratings = rating_matrix.loc[user_id]

    # Find neighbor ids who have rated the same item
    item_ratings = rating_matrix[item_id]
    neighbors = item_ratings[item_ratings > 0].index

    # Calculate Pearson correlations for each neighbor
    neighbor_correlations = {}
    for neighbor_id in neighbors:
        correlation = pearson_correlation(user_id, neighbor_id)
        neighbor_correlations[neighbor_id] = correlation
    
    # Get the top 10 closest matching neighbors
    top_neighbors = sorted(neighbor_correlations, key=neighbor_correlations.get, reverse=True)[:10]

    # Initialize variables for prediction
    weighted_rating_sum = 0
    similarity_sum = 0

    # Calculate the prediction for active user
    for neighbor_id in top_neighbors:

        # Get the neighbors rating for the chosen item
        neighbor_item_rating = rating_matrix.at[neighbor_id, item_id]

        # Get every given rating of the neighbor
        all_neighbor_ratings = rating_matrix.loc[neighbor_id][rating_matrix.loc[neighbor_id] > 0]

        # Calculate the difference between the chosen item rating and the user average
        neighbor_mean = np.mean(all_neighbor_ratings)
        rating_difference = neighbor_item_rating - neighbor_mean

        # Combine the rating differences.
        # The neighbor similarity calculated earlier is used as the weight.
        weighted_rating_sum += rating_difference * neighbor_correlations[neighbor_id]
        similarity_sum += abs(neighbor_correlations[neighbor_id])

    # Avoid the case where we might divide by zero
    if similarity_sum == 0:
        return 0

    # Calculate the prediction for the active user
    active_user_mean = np.mean(active_user_ratings)
    prediction = active_user_mean + (weighted_rating_sum / similarity_sum)

    return prediction

def create_recommendations(target_user):
    movie_recommendations = {}

    # Go through all the movies
    for movie_id in rating_matrix.columns:
        # Calculate a prediction for a movie that the target user hasn't rated yet
        if rating_matrix.at[target_user, movie_id] == 0:
            prediction = predict_rating(target_user, movie_id)
            movie_recommendations[movie_id] = prediction

    # Get the top 10 matching movies from the recommendations
    top_recommendations = sorted(movie_recommendations, key=movie_recommendations.get, reverse=True)[:10]

    return top_recommendations