#user based

##compute similarity and predict missing values using Cosine Similarity and Pearson Correlation

In [2]:
import numpy as np
import pandas as pd

# Sample DataFrame for demonstration
data = {
    'User': ['User1', 'User1', 'User1', 'User2', 'User2', 'User3', 'User3', 'User3', 'User4', 'User4'],
    'Item': ['Item1', 'Item2', 'Item4', 'Item2', 'Item3', 'Item1', 'Item3', 'Item4', 'Item2', 'Item4'],
    'Rating': [5, 3, 4, 4, 5, 4, 3, 5, 3, 4]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create the user-item matrix
user_item_matrix = df.pivot_table(index='User', columns='Item', values='Rating')
print("User-Item Matrix:\n", user_item_matrix)

# Function to calculate Cosine Similarity manually
def cosine_similarity(user1, user2, user_item_matrix):
    # Get ratings for both users (only for items both have rated)
    common_items = user_item_matrix.columns[user_item_matrix.loc[user1].notna() & user_item_matrix.loc[user2].notna()]

    if len(common_items) == 0:
        return 0  # No common items, no similarity

    ratings_user1 = user_item_matrix.loc[user1, common_items]
    ratings_user2 = user_item_matrix.loc[user2, common_items]

    # Compute dot product and magnitudes
    dot_product = np.dot(ratings_user1, ratings_user2)
    norm_user1 = np.linalg.norm(ratings_user1)
    norm_user2 = np.linalg.norm(ratings_user2)

    return dot_product / (norm_user1 * norm_user2)

# Function to calculate Pearson Correlation manually
def pearson_correlation(user1, user2, user_item_matrix):
    # Get ratings for both users (only for items both have rated)
    common_items = user_item_matrix.columns[user_item_matrix.loc[user1].notna() & user_item_matrix.loc[user2].notna()]

    if len(common_items) == 0:
        return 0  # No common items, no correlation

    ratings_user1 = user_item_matrix.loc[user1, common_items]
    ratings_user2 = user_item_matrix.loc[user2, common_items]

    mean_user1 = ratings_user1.mean()
    mean_user2 = ratings_user2.mean()

    # Compute the numerator and the denominator
    numerator = np.sum((ratings_user1 - mean_user1) * (ratings_user2 - mean_user2))
    denominator = np.sqrt(np.sum((ratings_user1 - mean_user1) ** 2) * np.sum((ratings_user2 - mean_user2) ** 2))

    if denominator == 0:
        return 0  # Avoid division by zero

    return numerator / denominator

# Function to predict missing rating using user-based CF (Cosine or Pearson)
def predict_rating(user_item_matrix, user_similarity, user, item, method="cosine"):
    if item not in user_item_matrix.columns or user not in user_item_matrix.index:
        return None  # Return None if the user or item is not in the matrix

    # Get the ratings of the item for all users
    ratings = user_item_matrix[item]

    # Get the similarity scores for the target user
    if method == "cosine":
        similarity_scores = user_similarity.loc[user]
    elif method == "pearson":
        similarity_scores = user_similarity.loc[user]

    # Exclude the target user from the similarity calculations
    valid_ratings = ratings[ratings.notna()]
    valid_similarity = similarity_scores[valid_ratings.index]

    # Predict using weighted average of ratings
    if valid_similarity.sum() > 0:
        prediction = np.dot(valid_similarity, valid_ratings) / valid_similarity.sum()
    else:
        prediction = valid_ratings.mean()  # Use mean rating if no similar users

    return prediction

# Compute the similarity matrix for all users using Cosine Similarity
def compute_user_similarity(user_item_matrix, method="cosine"):
    users = user_item_matrix.index
    similarity_matrix = pd.DataFrame(index=users, columns=users)

    for user1 in users:
        for user2 in users:
            if method == "cosine":
                similarity_matrix.loc[user1, user2] = cosine_similarity(user1, user2, user_item_matrix)
            elif method == "pearson":
                similarity_matrix.loc[user1, user2] = pearson_correlation(user1, user2, user_item_matrix)

    return similarity_matrix

# Example of Cosine Similarity-based User Similarity Matrix
cosine_similarity_matrix = compute_user_similarity(user_item_matrix, method="cosine")
print("\nCosine Similarity Matrix:\n", cosine_similarity_matrix)

# Example of Pearson Correlation-based User Similarity Matrix
pearson_similarity_matrix = compute_user_similarity(user_item_matrix, method="pearson")
print("\nPearson Correlation Matrix:\n", pearson_similarity_matrix)

# Example Predictions using Cosine Similarity
user = 'User2'
item = 'Item1'
cosine_prediction = predict_rating(user_item_matrix, cosine_similarity_matrix, user, item, method="cosine")
print(f"\nCosine Similarity-based Prediction for {user} and {item}: {cosine_prediction}")

# Example Predictions using Pearson Correlation
pearson_prediction = predict_rating(user_item_matrix, pearson_similarity_matrix, user, item, method="pearson")
print(f"Pearson Correlation-based Prediction for {user} and {item}: {pearson_prediction}")

User-Item Matrix:
 Item   Item1  Item2  Item3  Item4
User                             
User1    5.0    3.0    NaN    4.0
User2    NaN    4.0    5.0    NaN
User3    4.0    NaN    3.0    5.0
User4    NaN    3.0    NaN    4.0

Cosine Similarity Matrix:
 User     User1 User2    User3 User4
User                               
User1      1.0   1.0  0.97561   1.0
User2      1.0   1.0      1.0   1.0
User3  0.97561   1.0      1.0   1.0
User4      1.0   1.0      1.0   1.0

Pearson Correlation Matrix:
 User  User1 User2 User3 User4
User                         
User1   1.0     0  -1.0   1.0
User2     0   1.0     0     0
User3  -1.0     0   1.0     0
User4   1.0     0     0   1.0

Cosine Similarity-based Prediction for User2 and Item1: 4.5
Pearson Correlation-based Prediction for User2 and Item1: 4.5


# item based

#compute similarity and predict missing values using Adjusted Cosine Similarity:

In [3]:
import numpy as np
import pandas as pd

# Sample DataFrame for demonstration
data = {
    'User': ['User1', 'User1', 'User1', 'User2', 'User2', 'User3', 'User3', 'User3', 'User4', 'User4'],
    'Item': ['Item1', 'Item2', 'Item4', 'Item2', 'Item3', 'Item1', 'Item3', 'Item4', 'Item2', 'Item4'],
    'Rating': [5, 3, 4, 4, 5, 4, 3, 5, 3, 4]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create the user-item matrix
user_item_matrix = df.pivot_table(index='User', columns='Item', values='Rating')
print("User-Item Matrix:\n", user_item_matrix)

# Function to calculate Adjusted Cosine Similarity manually
def adjusted_cosine_similarity(item1, item2, user_item_matrix):
    # Get ratings for both items (only for users who have rated both items)
    common_users = user_item_matrix.index[user_item_matrix[item1].notna() & user_item_matrix[item2].notna()]

    if len(common_users) == 0:
        return 0  # No common users, no similarity

    ratings_item1 = user_item_matrix.loc[common_users, item1]
    ratings_item2 = user_item_matrix.loc[common_users, item2]

    # Compute average rating for each user
    user_means = user_item_matrix.loc[common_users].mean(axis=1)

    # Adjust ratings by subtracting the user's average rating
    adjusted_ratings_item1 = ratings_item1 - user_means
    adjusted_ratings_item2 = ratings_item2 - user_means

    # Compute dot product and magnitudes
    dot_product = np.dot(adjusted_ratings_item1, adjusted_ratings_item2)
    norm_item1 = np.linalg.norm(adjusted_ratings_item1)
    norm_item2 = np.linalg.norm(adjusted_ratings_item2)

    return dot_product / (norm_item1 * norm_item2)

# Function to compute item similarity matrix using Adjusted Cosine Similarity
def compute_item_similarity(user_item_matrix):
    items = user_item_matrix.columns
    similarity_matrix = pd.DataFrame(index=items, columns=items)

    for item1 in items:
        for item2 in items:
            similarity_matrix.loc[item1, item2] = adjusted_cosine_similarity(item1, item2, user_item_matrix)

    return similarity_matrix

# Function to predict rating using item-based CF with Adjusted Cosine Similarity
def predict_rating(user_item_matrix, item_similarity_matrix, user, item):
    if item not in user_item_matrix.columns or user not in user_item_matrix.index:
        return None  # Return None if the user or item is not in the matrix

    # Get ratings of the user for all items
    ratings = user_item_matrix.loc[user]

    # Get similarity scores for the target item
    similarity_scores = item_similarity_matrix[item]

    # Find items rated by the user
    rated_items = ratings[ratings.notna()]

    # Compute weighted average of ratings using similarity scores
    numerator = np.sum(similarity_scores[rated_items.index] * (rated_items - user_item_matrix.loc[user].mean()))
    denominator = np.sum(np.abs(similarity_scores[rated_items.index]))

    # Predict using weighted average
    if denominator != 0:
        prediction = user_item_matrix.loc[user].mean() + (numerator / denominator)
    else:
        prediction = user_item_matrix.loc[user].mean()  # If no similar items, use the mean rating

    return prediction

# Compute the item-item similarity matrix using Adjusted Cosine Similarity
item_similarity_matrix = compute_item_similarity(user_item_matrix)
print("\nItem Similarity Matrix (Adjusted Cosine Similarity):\n", item_similarity_matrix)

# Example Predictions using Adjusted Cosine Similarity
user = 'User2'
item = 'Item1'
prediction = predict_rating(user_item_matrix, item_similarity_matrix, user, item)
print(f"\nPredicted Rating for {user} and {item}: {prediction}")

User-Item Matrix:
 Item   Item1  Item2  Item3  Item4
User                             
User1    5.0    3.0    NaN    4.0
User2    NaN    4.0    5.0    NaN
User3    4.0    NaN    3.0    5.0
User4    NaN    3.0    NaN    4.0

Item Similarity Matrix (Adjusted Cosine Similarity):
 Item  Item1     Item2 Item3     Item4
Item                                 
Item1   1.0      -1.0   NaN       0.0
Item2  -1.0       1.0  -1.0 -0.447214
Item3   NaN      -1.0   1.0      -1.0
Item4   0.0 -0.447214  -1.0       1.0

Predicted Rating for User2 and Item1: 5.0


  return dot_product / (norm_item1 * norm_item2)
  return dot_product / (norm_item1 * norm_item2)
