In [None]:

import numpy as np
import pandas as pd

# Reading dataset
data = pd.read_csv('data/ratings.dat', 
                   names=['user_id', 'movie_id', 'rating', 'timestamp'], 
                   engine='python', delimiter='::', encoding='ISO-8859-1')

# Preparing the user-item interaction matrix
num_users = data.user_id.max()
num_items = data.movie_id.max()

# Create a binary interaction matrix (1 for interaction, 0 for no interaction)
interaction_matrix = np.zeros((num_users, num_items))
for row in data.itertuples():
    interaction_matrix[row.user_id - 1, row.movie_id - 1] = 1

# Negative sampling: Create a mask for items not interacted by each user
negative_interactions = 1 - interaction_matrix

# Define BPR parameters
latent_dim = 50  # Number of latent factors
learning_rate = 0.01
reg_lambda = 0.01  # Regularization parameter
num_epochs = 50

# Initialize user and item latent factor matrices randomly
user_factors = np.random.normal(0, 0.1, (num_users, latent_dim))
item_factors = np.random.normal(0, 0.1, (num_items, latent_dim))

# Helper function for sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# BPR Training Loop
for epoch in range(num_epochs):
    for user in range(num_users):
        # Sample a positive item (an item the user interacted with)
        positive_items = np.where(interaction_matrix[user, :] == 1)[0]
        if len(positive_items) == 0:
            continue
        pos_item = np.random.choice(positive_items)
        
        # Sample a negative item (an item the user did not interact with)
        negative_items = np.where(negative_interactions[user, :] == 1)[0]
        neg_item = np.random.choice(negative_items)
        
        # Calculate the difference in predicted scores
        x_uij = np.dot(user_factors[user], (item_factors[pos_item] - item_factors[neg_item]))
        sigmoid_x_uij = sigmoid(x_uij)
        
        # Compute gradients
        grad_user = (item_factors[pos_item] - item_factors[neg_item]) * (1 - sigmoid_x_uij) - reg_lambda * user_factors[user]
        grad_pos_item = user_factors[user] * (1 - sigmoid_x_uij) - reg_lambda * item_factors[pos_item]
        grad_neg_item = -user_factors[user] * (1 - sigmoid_x_uij) - reg_lambda * item_factors[neg_item]
        
        # Update factors
        user_factors[user] += learning_rate * grad_user
        item_factors[pos_item] += learning_rate * grad_pos_item
        item_factors[neg_item] += learning_rate * grad_neg_item

    # Print progress
    print(f"Epoch {epoch + 1}/{num_epochs} completed")

# Function to recommend top N items for a user
def recommend_items(user_id, top_n=10):
    user_vector = user_factors[user_id - 1]
    scores = np.dot(item_factors, user_vector)
    recommended_items = np.argsort(-scores)[:top_n]
    return recommended_items

# Example: Recommend top 10 movies for a given user
user_id = int(input("Enter the user ID for recommendations: "))
recommended_items = recommend_items(user_id)

print(f"Top 10 recommended items for User {user_id}:")
for item in recommended_items:
    print(f"Movie ID: {item + 1}")




Epoch 1/50 completed
Epoch 2/50 completed
Epoch 3/50 completed
Epoch 4/50 completed
Epoch 5/50 completed
Epoch 6/50 completed
Epoch 7/50 completed
Epoch 8/50 completed
Epoch 9/50 completed
Epoch 10/50 completed
Epoch 11/50 completed
Epoch 12/50 completed
Epoch 13/50 completed
Epoch 14/50 completed
Epoch 15/50 completed
Epoch 16/50 completed
Epoch 17/50 completed
Epoch 18/50 completed
Epoch 19/50 completed
Epoch 20/50 completed
Epoch 21/50 completed
Epoch 22/50 completed
Epoch 23/50 completed
Epoch 24/50 completed
Epoch 25/50 completed
Epoch 26/50 completed
Epoch 27/50 completed
Epoch 28/50 completed
Epoch 29/50 completed
Epoch 30/50 completed
Epoch 31/50 completed
Epoch 32/50 completed
Epoch 33/50 completed
Epoch 34/50 completed
Epoch 35/50 completed
Epoch 36/50 completed
Epoch 37/50 completed
Epoch 38/50 completed
Epoch 39/50 completed
Epoch 40/50 completed
Epoch 41/50 completed
Epoch 42/50 completed
Epoch 43/50 completed
Epoch 44/50 completed
Epoch 45/50 completed
Epoch 46/50 complet

Calculating RMSE

In [None]:
# Function to calculate RMSE
def calculate_rmse(interaction_matrix, user_factors, item_factors):
    predicted_scores = np.dot(user_factors, item_factors.T)
    actual_interactions = interaction_matrix[interaction_matrix > 0]
    predicted_interactions = predicted_scores[interaction_matrix > 0]
    rmse = np.sqrt(np.mean((actual_interactions - predicted_interactions) ** 2))
    return rmse

# Function to calculate Precision@k and Recall@k
def precision_recall_at_k(interaction_matrix, user_factors, item_factors, k=10):
    precision_list = []
    recall_list = []
    
    predicted_scores = np.dot(user_factors, item_factors.T)
    
    for user in range(interaction_matrix.shape[0]):
        # Get the indices of items the user interacted with
        actual_items = np.where(interaction_matrix[user] > 0)[0]
        
        if len(actual_items) == 0:
            continue  # Skip users with no interactions
        
        # Get the top-k predicted items for the user
        top_k_items = np.argsort(-predicted_scores[user])[:k]
        
        # Calculate precision and recall
        relevant_items = set(actual_items)
        retrieved_items = set(top_k_items)
        
        true_positives = len(relevant_items & retrieved_items)
        
        precision = true_positives / len(retrieved_items)
        recall = true_positives / len(relevant_items)
        
        precision_list.append(precision)
        recall_list.append(recall)
    
    # Average over all users
    avg_precision = np.mean(precision_list)
    avg_recall = np.mean(recall_list)
    
    return avg_precision, avg_recall

# Calculate Metrics
rmse = calculate_rmse(interaction_matrix, user_factors, item_factors)
precision, recall = precision_recall_at_k(interaction_matrix, user_factors, item_factors, k=10)

print(f"RMSE: {rmse:.4f}")
print(f"Precision@10: {precision:.4f}")
print(f"Recall@10: {recall:.4f}")
