Import library

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import rankdata

# Read dataset

In [2]:
df_user_item = pd.read_csv('./dataset/user_rating_Item-based CF.csv')
df_user_item.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
0,2.770272,3.0,3.957966,4.424952,3.85619,4.324151,5.0,5.0,3.42174,4.46844,...,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333
1,2.738733,3.0,5.0,4.402382,2.0,3.0,5.0,5.0,3.461146,5.0,...,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667
2,4.0,5.0,3.918627,5.0,3.815417,1.0,4.616258,5.0,4.0,4.543643,...,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333
3,4.0,3.89603,3.0,5.0,3.921901,4.409185,4.619848,1.0,3.496586,5.0,...,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333
4,1.0,3.886082,5.0,4.434295,4.0,4.347749,4.669703,4.437355,3.287407,5.0,...,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667


convert dataframe to numpy matrix

In [3]:
matrix = df_user_item.to_numpy()
matrix

array([[2.77027174, 3.        , 3.95796556, ..., 3.93333333, 3.93333333,
        3.93333333],
       [2.73873318, 3.        , 5.        , ..., 4.16666667, 4.16666667,
        4.16666667],
       [4.        , 5.        , 3.91862689, ..., 4.53333333, 4.53333333,
        4.53333333],
       ...,
       [2.78127436, 3.        , 3.93475525, ..., 3.93333333, 3.93333333,
        3.93333333],
       [4.        , 4.        , 5.        , ..., 3.76666667, 3.76666667,
        3.76666667],
       [1.        , 3.85808676, 3.99479862, ..., 3.5       , 3.5       ,
        3.5       ]])

# Rekomendasi

In [4]:
n_items = 75
n_users = 100

## 1. Average

### Group Recommendation using Average Method

The average method for group recommendations works by:
1. Taking individual ratings/preferences from each group member
2. Calculating the average rating for each item across all group members
3. Recommending items with the highest average ratings

In [5]:
def recommend_group_average(matrix, group_members, n_recommendations=5):
    # Get ratings for group members
    group_ratings = matrix[group_members]

    # Calculate average rating for each item
    average_ratings = np.mean(group_ratings, axis=0)

    # Get indices of top N items
    top_n_indices = np.argsort(average_ratings)[::-1][:n_recommendations]

    return top_n_indices, average_ratings[top_n_indices]

In [6]:
# Example usage
group = [0, 1, 2]  # Example group with users 0,1,2
recommended_items, scores = recommend_group_average(matrix, group)

print("Recommended items (indices):", recommended_items)
print("Recommendation scores:", scores)

Recommended items (indices): [ 7 40  6 66 24]
Recommendation scores: [5.         5.         4.87208587 4.86421926 4.85514981]


## 2. Least Misery

### Group Recommendation using Least Misery Method

The Least Misery strategy aims to minimize the disagreement within a group by:
1. Taking individual ratings from each group member
2. For each item, using the minimum rating across all group members
3. Recommending items with the highest minimum ratings

This ensures no group member strongly dislikes the recommended items.

In [7]:
def recommend_group_least_misery(matrix, group_members, n_recommendations=5):
    # Get ratings for group members
    group_ratings = matrix[group_members]

    # Calculate minimum rating for each item
    min_ratings = np.min(group_ratings, axis=0)

    # Get indices of top N items
    top_n_indices = np.argsort(min_ratings)[::-1][:n_recommendations]

    return top_n_indices, min_ratings[top_n_indices]

In [8]:
# Example usage with the same group
group = [0, 1, 2]  # Group with users 0,1,2
recommended_items, scores = recommend_group_least_misery(matrix, group)

print("Recommended items (indices):", recommended_items)
print("Recommendation scores:", scores)

Recommended items (indices): [40  7 24 73  6]
Recommendation scores: [5.         5.         4.76717747 4.74959335 4.6162576 ]


## 3. Most Pleasure

### Group Recommendation using Most Pleasure Method

The Most Pleasure strategy focuses on maximizing satisfaction by:
1. Taking individual ratings from each group member
2. For each item, using the maximum rating across all group members
3. Recommending items with the highest maximum ratings

This approach emphasizes the most positive experiences of group members.

In [9]:
def recommend_group_most_pleasure(matrix, group_members, n_recommendations=5):
    # Get ratings for group members
    group_ratings = matrix[group_members]

    # Calculate maximum rating for each item
    max_ratings = np.max(group_ratings, axis=0)

    # Get indices of top N items
    top_n_indices = np.argsort(max_ratings)[::-1][:n_recommendations]

    return top_n_indices, max_ratings[top_n_indices]

In [10]:
# Example usage with the same group
group = [0, 1, 2]  # Group with users 0,1,2
recommended_items, scores = recommend_group_most_pleasure(matrix, group)

print("Recommended items (indices):", recommended_items)
print("Recommendation scores:", scores)


Recommended items (indices): [49 69 40 43 46]
Recommendation scores: [5. 5. 5. 5. 5.]


## 4. Borda

### Group Recommendation using Borda Method

The Borda count method works by:
1. Converting each member's ratings into rankings
2. Assigning points based on rankings (N-rank points, where N is number of items)
3. Summing up points across all members
4. Recommending items with highest total points

In [11]:
def recommend_group_borda(matrix, group_members, n_recommendations=5):
    # Get ratings for group members
    group_ratings = matrix[group_members]

    # Convert ratings to rankings for each user
    n_items = matrix.shape[1]
    borda_scores = np.zeros(n_items)

    for user_ratings in group_ratings:
        # Get ranking of items (handle ties with 'min' method)
        ranking = n_items - rankdata(user_ratings, method='min')
        # Add Borda points
        borda_scores += ranking

    # Get indices of top N items
    top_n_indices = np.argsort(borda_scores)[::-1][:n_recommendations]

    return top_n_indices, borda_scores[top_n_indices]

In [12]:
# Example usage with the same group
group = [0, 1, 2]  # Group with users 0,1,2
recommended_items, scores = recommend_group_borda(matrix, group)

print("Recommended items (indices):", recommended_items)
print("Recommendation scores:", scores)

Recommended items (indices): [10 11  0 56  4]
Recommendation scores: [280. 274. 272. 270. 267.]


# Testing

In [13]:
df_matrix = pd.DataFrame(matrix)
df_matrix.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,2.770272,3.0,3.957966,4.424952,3.85619,4.324151,5.0,5.0,3.42174,4.46844,...,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333,3.933333
1,2.738733,3.0,5.0,4.402382,2.0,3.0,5.0,5.0,3.461146,5.0,...,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667,4.166667
2,4.0,5.0,3.918627,5.0,3.815417,1.0,4.616258,5.0,4.0,4.543643,...,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333,4.533333
3,4.0,3.89603,3.0,5.0,3.921901,4.409185,4.619848,1.0,3.496586,5.0,...,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333,4.333333
4,1.0,3.886082,5.0,4.434295,4.0,4.347749,4.669703,4.437355,3.287407,5.0,...,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667,4.266667


In [14]:
df_matrix.replace(0, np.nan, inplace=True)


In [15]:
def precision_at_n(recommended, relevant, n):
    recommended_n = recommended[:n]
    hits = len(set(recommended_n) & set(relevant))
    return hits / n if n > 0 else 0


def recall_at_n(recommended, relevant, n):
    recommended_n = recommended[:n]
    hits = len(set(recommended_n) & set(relevant))
    return hits / len(relevant) if len(relevant) > 0 else 0


def dcg_at_n(recommended, relevant_scores, n):
    dcg = 0.0
    for i, item in enumerate(recommended[:n]):
        rel = relevant_scores.get(item, 0)
        dcg += (2 ** rel - 1) / np.log2(i + 2)
    return dcg


def idcg_at_n(relevant_scores, n):
    sorted_rels = sorted(relevant_scores.values(), reverse=True)[:n]
    idcg = 0.0
    for i, rel in enumerate(sorted_rels):
        idcg += (2 ** rel - 1) / np.log2(i + 2)
    return idcg


def ndcg_at_n(recommended, relevant_scores, n):
    dcg = dcg_at_n(recommended, relevant_scores, n)
    idcg = idcg_at_n(relevant_scores, n)
    return dcg / idcg if idcg > 0 else 0


def borda_count_aggregation(ratings_group):
    # ratings_group: DataFrame with items as rows, users as columns
    # For each user, rank items (higher rating = better rank)
    # Assign points inversely proportional to rank, sum across users
    n_items = ratings_group.shape[0]
    scores = pd.Series(0, index=ratings_group.index)
    for user in ratings_group.columns:
        user_ratings = ratings_group[user]
        # Rank items: highest rating rank 1
        ranked = user_ratings.rank(ascending=False, method='min')
        # Borda points: n_items - rank + 1
        points = n_items - ranked + 1
        points = points.fillna(0)
        scores += points
    return scores


def aggregate_group_ratings(ratings_matrix, group_users, method):
    # Extract ratings for group users
    ratings_group = ratings_matrix.iloc[:, group_users]

    if method == 'average':
        group_scores = ratings_group.mean(axis=1, skipna=True)
    elif method == 'least_misery':
        group_scores = ratings_group.min(axis=1, skipna=True)
    elif method == 'most_pleasure':
        group_scores = ratings_group.max(axis=1, skipna=True)
    elif method == 'borda':
        group_scores = borda_count_aggregation(ratings_group)
    else:
        raise ValueError(f"Unknown aggregation method: {method}")

    # Replace NaN with a very low score to avoid recommending unrated items
    group_scores = group_scores.fillna(-np.inf)
    return group_scores

def evaluate_methods(ratings_matrix, groups, top_n=10):
    methods = ['average', 'least_misery', 'most_pleasure', 'borda']
    results = {m: {'precision': [], 'recall': [], 'ndcg': [], 'satisfaction': []} for m in methods}

    for group in groups:
        # Define relevant items for the group as items rated by all group members with rating >= threshold (e.g., 3)
        # Here, we consider relevant items as those with average rating >= 3 by group members
        ratings_group = ratings_matrix.iloc[:, group]
        avg_ratings = ratings_group.mean(axis=1, skipna=True)
        relevant_items = avg_ratings[avg_ratings >= 5].index.tolist()

        # For satisfaction score, we will average the ratings of recommended items by group members

        for method in methods:
            group_scores = aggregate_group_ratings(ratings_matrix, group, method)
            recommended_items = group_scores.sort_values(ascending=False).index.tolist()

            # Compute metrics
            prec = precision_at_n(recommended_items, relevant_items, top_n)
            rec = recall_at_n(recommended_items, relevant_items, top_n)

            # Prepare relevance scores dict for NDCG (use average ratings as relevance)
            relevant_scores = avg_ratings.to_dict()
            ndcg = ndcg_at_n(recommended_items, relevant_scores, top_n)

            # Group satisfaction: average rating of recommended items by group members
            recommended_top = recommended_items[:top_n]
            # Extract ratings for recommended items and group users
            ratings_recommended = ratings_group.loc[recommended_top]
            satisfaction = ratings_recommended.mean(axis=1, skipna=True).mean()

            results[method]['precision'].append(prec)
            results[method]['recall'].append(rec)
            results[method]['ndcg'].append(ndcg)
            results[method]['satisfaction'].append(satisfaction)

    # Aggregate results by averaging over groups
    summary = {}
    for method in methods:
        summary[method] = {
            'Precision@N': np.mean(results[method]['precision']),
            'Recall@N': np.mean(results[method]['recall']),
            'NDCG@N': np.mean(results[method]['ndcg']),
            'Group Satisfaction': np.mean(results[method]['satisfaction'])
        }
    return summary

In [16]:
groups = [[1, 2, 3, 4, 5]]
summary_results = evaluate_methods(df_matrix, groups, top_n=5)
print(summary_results)

{'average': {'Precision@N': 0.0, 'Recall@N': 0.0, 'NDCG@N': 1.0, 'Group Satisfaction': 4.562458069433174}, 'least_misery': {'Precision@N': 0.0, 'Recall@N': 0.0, 'NDCG@N': 0.8500627384300327, 'Group Satisfaction': 4.359708337738592}, 'most_pleasure': {'Precision@N': 0.0, 'Recall@N': 0.0, 'NDCG@N': 0.7214602395463532, 'Group Satisfaction': 4.145588943251299}, 'borda': {'Precision@N': 0.0, 'Recall@N': 0.0, 'NDCG@N': 0.9352816847788071, 'Group Satisfaction': 4.491504548425286}}


Jaccard similarity

In [17]:
methods = ['average', 'least_misery', 'most_pleasure', 'borda']
results = {m: [] for m in methods}
top_n = 10

for group in groups:
    # Define relevant items for the group as items rated by all group members with rating >= threshold (e.g., 3)
    # Here, we consider relevant items as those with average rating >= 3 by group members
    ratings_group = df_matrix.iloc[:, group]
    avg_ratings = ratings_group.mean(axis=1, skipna=True)
    relevant_items = avg_ratings[avg_ratings >= 3].index.tolist()

    # For satisfaction score, we will average the ratings of recommended items by group members

    for method in methods:
        group_scores = aggregate_group_ratings(df_matrix, group, method)
        recommended_items = group_scores.sort_values(ascending=False).index.tolist()

        # Compute metrics
        prec = precision_at_n(recommended_items, relevant_items, top_n)
        rec = recall_at_n(recommended_items, relevant_items, top_n)

        # Prepare relevance scores dict for NDCG (use average ratings as relevance)
        relevant_scores = avg_ratings.to_dict()
        ndcg = ndcg_at_n(recommended_items, relevant_scores, top_n)

        # Group satisfaction: average rating of recommended items by group members
        recommended_top = recommended_items[:top_n]
        results[method].append(recommended_top)

for method in methods:
    results[method] = results[method][0]

print(results)

{'average': [8, 20, 53, 7, 18, 60, 63, 65, 56, 39], 'least_misery': [9, 39, 38, 18, 56, 8, 45, 63, 20, 33], 'most_pleasure': [37, 29, 71, 70, 68, 35, 1, 38, 39, 40], 'borda': [39, 18, 20, 71, 8, 53, 56, 22, 38, 15]}
