In [18]:
import numpy as np
from scipy.spatial.distance import cosine
from scipy.stats import pearsonr

# Create the ratings matrix
ratings = {
    'treadmill': [5, 0, 4, 0, 3],
    'dumbbells': [3, 4, 0, 2, 0],
    'yoga_mat': [0, 3, 5, 1, 4],
    'elliptical': [4, 3, 4, 3, 0],
    'kettlebell': [4, 2, 3, 4, 5]
}

# Convert to numpy array for easier manipulation
users = ['User1', 'User2', 'User3', 'User4', 'User5']
products = ['treadmill', 'dumbbells', 'yoga_mat', 'elliptical', 'kettlebell']
ratings_matrix = np.array([ratings[product] for product in products]).T

def cosine_similarity(user1_ratings, user2_ratings):
    # Handle case where vectors contain zeros
    if np.all(user1_ratings == 0) or np.all(user2_ratings == 0):
        return 0
    return 1 - cosine(user1_ratings, user2_ratings)

def pearson_similarity(user1_ratings, user2_ratings):
    # Remove pairs where either rating is 0
    mask = (user1_ratings != 0) & (user2_ratings != 0)
    if np.sum(mask) < 2:  # Need at least 2 pairs for correlation
        return 0
    correlation, _ = pearsonr(user1_ratings[mask], user2_ratings[mask])
    return correlation if not np.isnan(correlation) else 0

def predict_rating(target_user_idx, target_item_idx, ratings_matrix, similarity_func):
    similarities = []
    for i in range(len(ratings_matrix)):
        if i != target_user_idx:
            sim = similarity_func(ratings_matrix[target_user_idx], ratings_matrix[i])
            similarities.append((sim, i))

    # Sort similarities in descending order
    similarities.sort(reverse=True)

    # Calculate weighted average of ratings
    numerator = 0
    denominator = 0
    for sim, user_idx in similarities:
        if ratings_matrix[user_idx][target_item_idx] != 0:  # Only consider non-zero ratings
            numerator += sim * ratings_matrix[user_idx][target_item_idx]
            denominator += abs(sim)

    if denominator == 0:
        return 0
    return numerator / denominator

# Get target user and item indices
target_user = 'User3'
target_item = 'dumbbells'
target_user_idx = users.index(target_user)
target_item_idx = products.index(target_item)

# Calculate predictions using both similarity measures
cosine_prediction = predict_rating(target_user_idx, target_item_idx, ratings_matrix, cosine_similarity)
pearson_prediction = predict_rating(target_user_idx, target_item_idx, ratings_matrix, pearson_similarity)

print(f"\nPredictions for {target_user}'s rating of {target_item}:")
print(f"User-Based CF (Cosine): {cosine_prediction:.2f}")
print(f"User-Based CF (Pearson): {pearson_prediction:.2f}")

# Print similarity matrices for reference
print("\nCosine Similarity Matrix:")
cosine_sim_matrix = np.zeros((len(users), len(users)))
for i in range(len(users)):
    for j in range(len(users)):
        cosine_sim_matrix[i][j] = cosine_similarity(ratings_matrix[i], ratings_matrix[j])
        print(f"{cosine_sim_matrix[i][j]:.2f}", end=" ")
    print()

print("\nPearson Similarity Matrix:")
pearson_sim_matrix = np.zeros((len(users), len(users)))
for i in range(len(users)):
    for j in range(len(users)):
        pearson_sim_matrix[i][j] = pearson_similarity(ratings_matrix[i], ratings_matrix[j])
        print(f"{pearson_sim_matrix[i][j]:.2f}", end=" ")
    print()


Predictions for User3's rating of dumbbells:
User-Based CF (Cosine): 3.00
User-Based CF (Pearson): 1.28

Cosine Similarity Matrix:
1.00 0.64 0.73 0.76 0.61 
0.64 1.00 0.66 0.83 0.50 
0.73 0.66 1.00 0.65 0.82 
0.76 0.83 0.65 1.00 0.62 
0.61 0.50 0.82 0.62 1.00 

Pearson Similarity Matrix:
1.00 -0.87 0.50 0.87 -1.00 
-0.87 1.00 0.87 -0.63 -1.00 
0.50 0.87 1.00 -0.98 -0.50 
0.87 -0.63 -0.98 1.00 1.00 
-1.00 -1.00 -0.50 1.00 1.00 


In [19]:
import pandas as pd
import numpy as np

# Sample ratings data
data = {
    "product_id": ["treadmill", "dumbbells", "yoga_mat", "elliptical", "kettlebell"],
    "User1": [5, 3, 0, 4, 4],
    "User2": [0, 4, 3, 3, 2],
    "User3": [4, 0, 5, 4, 3],
    "User4": [0, 2, 1, 3, 4],
    "User5": [3, 0, 4, 0, 5]
}

# Create DataFrame
ratings = pd.DataFrame(data).set_index('product_id')

# Function to calculate Pearson Correlation
def pearson_correlation(ratings, user1, user2):
    # Get ratings for both users
    user1_ratings = ratings[user1]
    user2_ratings = ratings[user2]

    # Find common rated items
    common_items = user1_ratings.index.intersection(user2_ratings.index)

    if len(common_items) == 0:
        return 0

    # Calculate means for common items
    user1_mean = user1_ratings[common_items].mean()
    user2_mean = user2_ratings[common_items].mean()

    # Calculate numerator and denominators
    numerator = sum((user1_ratings[common_items] - user1_mean) * (user2_ratings[common_items] - user2_mean))
    denom1 = np.sqrt(sum((user1_ratings[common_items] - user1_mean) ** 2))
    denom2 = np.sqrt(sum((user2_ratings[common_items] - user2_mean) ** 2))

    if denom1 * denom2 == 0:
        return 0.0

    return numerator / (denom1 * denom2)

# Function to calculate Cosine Similarity
def cosine_similarity(ratings, user1, user2):
    # Get ratings for both users
    user1_ratings = ratings[user1]
    user2_ratings = ratings[user2]

    # Find common rated items
    common_items = user1_ratings.index.intersection(user2_ratings.index)

    if len(common_items) == 0:
        return 0

    # Calculate the dot product and magnitudes
    dot_product = sum(user1_ratings[common_items] * user2_ratings[common_items])
    norm_user1 = np.sqrt(sum(user1_ratings[common_items] ** 2))
    norm_user2 = np.sqrt(sum(user2_ratings[common_items] ** 2))

    if norm_user1 == 0 or norm_user2 == 0:
        return 0.0

    return dot_product / (norm_user1 * norm_user2)

# Calculate User-based Similarity for User3
user3 = "User3"
user_similarities = {}
for user in ratings.columns:
    if user != user3:
        pearson_score = pearson_correlation(ratings, user3, user)
        cosine_score = cosine_similarity(ratings, user3, user)
        user_similarities[user] = {"Pearson": pearson_score, "Cosine": cosine_score}

# Display User-based Similarities
print("\nUser-based Similarities to User3:")
for user, scores in user_similarities.items():
    print(f"{user} -> Pearson: {scores['Pearson']:.3f}, Cosine: {scores['Cosine']:.3f}")

# Calculate Item-based Similarity for item2 (dumbbells)
item2 = "dumbbells"
item_similarities = {}
for item in ratings.index:
    if item != item2:
        pearson_score = pearson_correlation(ratings.T, item2, item)  # Transpose to swap rows and columns
        cosine_score = cosine_similarity(ratings.T, item2, item)  # Transpose to swap rows and columns
        item_similarities[item] = {"Pearson": pearson_score, "Cosine": cosine_score}

# Display Item-based Similarities
print("\nItem-based Similarities for item2 (dumbbells):")
for item, scores in item_similarities.items():
    print(f"{item} -> Pearson: {scores['Pearson']:.3f}, Cosine: {scores['Cosine']:.3f}")



User-based Similarities to User3:
User1 -> Pearson: -0.216, Cosine: 0.727
User2 -> Pearson: -0.463, Cosine: 0.659
User4 -> Pearson: -0.247, Cosine: 0.652
User5 -> Pearson: 0.486, Cosine: 0.818

Item-based Similarities for item2 (dumbbells):
treadmill -> Pearson: -0.401, Cosine: 0.394
yoga_mat -> Pearson: -0.634, Cosine: 0.364
elliptical -> Pearson: 0.408, Cosine: 0.788
kettlebell -> Pearson: -0.539, Cosine: 0.621


In [20]:
import pandas as pd
import numpy as np

# Sample ratings data
data = {
    "product_id": ["treadmill", "dumbbells", "yoga_mat", "elliptical", "kettlebell"],
    "User1": [5, 3, 0, 4, 4],
    "User2": [0, 4, 3, 3, 2],
    "User3": [4, 0, 5, 4, 3],
    "User4": [0, 2, 1, 3, 4],
    "User5": [3, 0, 4, 0, 5]
}

# Create DataFrame
ratings = pd.DataFrame(data).set_index('product_id')

# Function to calculate Pearson Correlation
def pearson_correlation(ratings, user1, user2):
    user1_ratings = ratings[user1]
    user2_ratings = ratings[user2]
    common_items = user1_ratings.index.intersection(user2_ratings.index)

    if len(common_items) == 0:
        return 0

    user1_mean = user1_ratings[common_items].mean()
    user2_mean = user2_ratings[common_items].mean()

    numerator = sum((user1_ratings[common_items] - user1_mean) * (user2_ratings[common_items] - user2_mean))
    denom1 = np.sqrt(sum((user1_ratings[common_items] - user1_mean) ** 2))
    denom2 = np.sqrt(sum((user2_ratings[common_items] - user2_mean) ** 2))

    if denom1 * denom2 == 0:
        return 0.0

    return numerator / (denom1 * denom2)

# Function to calculate Cosine Similarity
def cosine_similarity(ratings, user1, user2):
    user1_ratings = ratings[user1]
    user2_ratings = ratings[user2]
    common_items = user1_ratings.index.intersection(user2_ratings.index)

    if len(common_items) == 0:
        return 0

    dot_product = sum(user1_ratings[common_items] * user2_ratings[common_items])
    norm_user1 = np.sqrt(sum(user1_ratings[common_items] ** 2))
    norm_user2 = np.sqrt(sum(user2_ratings[common_items] ** 2))

    if norm_user1 == 0 or norm_user2 == 0:
        return 0.0

    return dot_product / (norm_user1 * norm_user2)

# Calculate User-based Similarity for User3
user3 = "User3"
user_similarities = {}
for user in ratings.columns:
    if user != user3:
        pearson_score = pearson_correlation(ratings, user3, user)
        cosine_score = cosine_similarity(ratings, user3, user)
        user_similarities[user] = {"Pearson": pearson_score, "Cosine": cosine_score}

# Calculate the predicted rating for User3 on item2 (dumbbells)
def predict_rating_user_based(ratings, target_user, item_id, n_neighbors=2):
    similarities = {}
    for user in ratings.columns:
        if user != target_user:
            similarity = pearson_correlation(ratings, target_user, user)
            if similarity > 0:  # Only consider positive similarities
                similarities[user] = similarity

    # Sort by similarity and take the top N neighbors
    neighbors = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:n_neighbors]

    weighted_sum = 0
    similarity_sum = 0

    for neighbor, similarity in neighbors:
        rating = ratings.loc[item_id, neighbor]
        if rating > 0:  # Only consider rated items
            weighted_sum += rating * similarity
            similarity_sum += similarity

    if similarity_sum == 0:
        return None

    return round(weighted_sum / similarity_sum, 2)

# Predict missing value for User3 on item2 (dumbbells)
predicted_rating_user_based = predict_rating_user_based(ratings, user3, "dumbbells")

# Display User-based Predicted Rating
print(f"\nPredicted Rating for User3 on item2 (dumbbells): {predicted_rating_user_based}")

# Calculate Item-based Similarity for item2 (dumbbells)
item2 = "dumbbells"
item_similarities = {}
for item in ratings.index:
    if item != item2:
        pearson_score = pearson_correlation(ratings.T, item2, item)  # Transpose to swap rows and columns
        cosine_score = cosine_similarity(ratings.T, item2, item)  # Transpose to swap rows and columns
        item_similarities[item] = {"Pearson": pearson_score, "Cosine": cosine_score}

# Calculate the predicted rating for User3 on item2 based on similar items
def predict_rating_item_based(ratings, target_user, item_id, n_neighbors=2):
    similarities = {}
    for item in ratings.index:
        if item != item_id:
            similarity = pearson_correlation(ratings.T, item_id, item)  # Transpose for item-based similarity
            if similarity > 0:  # Only consider positive similarities
                similarities[item] = similarity

    # Sort by similarity and take the top N neighbors
    neighbors = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:n_neighbors]

    weighted_sum = 0
    similarity_sum = 0

    for neighbor, similarity in neighbors:
        rating = ratings.loc[neighbor, target_user]
        if rating > 0:  # Only consider rated items
            weighted_sum += rating * similarity
            similarity_sum += similarity

    if similarity_sum == 0:
        return None

    return round(weighted_sum / similarity_sum, 2)

# Predict missing value for User3 on item2 (dumbbells) using item-based approach
predicted_rating_item_based = predict_rating_item_based(ratings, user3, "dumbbells")

# Display Item-based Predicted Rating
print(f"\nPredicted Rating for User3 on item2 (dumbbells) using item-based approach: {predicted_rating_item_based}")



Predicted Rating for User3 on item2 (dumbbells): None

Predicted Rating for User3 on item2 (dumbbells) using item-based approach: 4.0
