# Group Counterfactual Explanation in Recommender Systems - Sliding Window Approach

This notebook implements the sliding window approach for counterfactual explanations in group recommendations using the recoXplainer framework.

In [1]:
import pandas as pd
import numpy as np
import math
import operator
import itertools
from tqdm import tqdm

from recoxplainer.config import cfg
from recoxplainer.data_reader import DataReader
from recoxplainer.models import ALS
from recoxplainer.recommender.grouprecommender import GroupRecommender
from recoxplainer.evaluator.splitter import Splitter

In [2]:
class SlidingWindow:
    def __init__(self, arr, window_size):
        self.arr = arr
        self.window_size = window_size
        self.index = 0

    def get_next_window(self):
        if self.index + self.window_size <= len(self.arr):
            window = self.arr[self.index:self.index + self.window_size]
            self.index += 1
            return window
        return None

In [3]:
def changeData(originalData, groupIds, itemIds):
    newData = originalData.drop(
        originalData[(originalData.itemId.isin(itemIds)) & originalData.userId.isin(groupIds)].index)
    return newData

def getGroupMembers(group):
    group = group.strip()
    members = group.split('_')
    membersIds = []
    for m in members:
        membersIds.append(int(m))
    return membersIds

def getRatedItemsByAllGroupmembers(group, originalData):
    movies = originalData[originalData.userId.isin(group)]['itemId'].unique()
    return movies

def getMoviesForRecommendation(originalData, movie_ids, group):
    movie_ids_group = originalData.loc[originalData.userId.isin(group), "itemId"]
    movie_ids_to_pred = np.setdiff1d(movie_ids, movie_ids_group)
    return movie_ids_to_pred

def findAverageItemIntensityExplanation(e, group, data):
    explanationIntensity = []
    groupint64 = [np.int64(g) for g in group]
    
    for item in e:
        tmp = [item]
        intensity = len(data[(data.itemId.isin(tmp) & data.userId.isin(groupint64))])
        intensity = intensity / len(group)
        explanationIntensity.append(intensity)
    return explanationIntensity

def findUserIntensity(e, group, data):
    userIntensity = []
    for mm in group:
        m = np.int64(mm)
        tmp = [m]
        intensity = len(data[(data.itemId.isin(e) & (data.userId.isin(tmp)))])
        intensity = intensity / len(e)
        userIntensity.append(intensity)
    return userIntensity

In [4]:
# Load and prepare data using recoXplainer
data = DataReader(**cfg.data.ml100k)
data.make_consecutive_ids_in_dataset()
data.binarize(binary_threshold=1)

# Split data
sp = Splitter()
train, test = sp.split_leave_n_out(data, frac=0.1)

# Get movie IDs and groups
movie_ids = data.dataset["itemId"].unique()
all_groups = data.read_groups('groupsWithHighRatings5')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [5]:
# Initialize and train ALS model
model = ALS(**cfg.model.als)
model.fit(train)

# Initialize group recommender
group_recommender = GroupRecommender(train, model)

  self._set_arrayXarray(i, j, x)
100%|██████████| 10/10 [00:01<00:00,  8.88it/s]


In [6]:
# Main processing loop - direct adaptation of sliding-window.py logic
results = []
window_size = 3  # Start with small window for testing
max_calls = 10000

for group_str in tqdm(all_groups, desc="Processing groups"):
    # Get group members
    group = getGroupMembers(group_str)
    
    # Get movies for recommendation
    movies_to_recommend = getMoviesForRecommendation(train.dataset, movie_ids, group)
    
    if len(movies_to_recommend) == 0:
        print(f"No movies to recommend for group {group}")
        continue
        
    # Get initial recommendation
    initial_rec = group_recommender.recommend_group_unseen_items(
        group, movies_to_recommend, top_k=0
    )
    
    # Get rated items
    rated_items = getRatedItemsByAllGroupmembers(group, train.dataset)
    
    if len(rated_items) == 0:
        print(f"No rated items for group {group}")
        continue
    
    # Initialize sliding window
    window = SlidingWindow(rated_items, window_size)
    calls = 0
    found_explanation = False
    
    while True:
        if calls >= max_calls:
            break
            
        current_window = window.get_next_window()
        if current_window is None:
            break
            
        # Try removing current window
        modified_data = changeData(train.dataset, group, current_window)
        calls += 1
        
        # Get new recommendation
        new_rec = group_recommender.recommend_group_unseen_items(
            group, movies_to_recommend, top_k=0
        )
        
        if new_rec != initial_rec:
            # Found a window that changes recommendation
            # Try to find minimal subset
            for length in range(1, len(current_window) + 1):
                if found_explanation:
                    break
                    
                for subset in itertools.combinations(current_window, length):
                    if calls >= max_calls:
                        break
                        
                    modified_data = changeData(train.dataset, group, list(subset))
                    calls += 1
                    
                    new_rec = group_recommender.recommend_group_unseen_items(
                        group, movies_to_recommend, top_k=0
                    )
                    
                    if new_rec != initial_rec:
                        # Found minimal explanation
                        explanation = list(subset)
                        
                        # Calculate metrics
                        item_intensity = findAverageItemIntensityExplanation(
                            explanation, group, train.dataset)
                        user_intensity = findUserIntensity(
                            explanation, group, train.dataset)
                        
                        results.append({
                            'group': group,
                            'initial_rec': initial_rec,
                            'new_rec': new_rec,
                            'explanation': explanation,
                            'explanation_size': len(explanation),
                            'item_intensity': np.mean(item_intensity),
                            'user_intensity': np.mean(user_intensity),
                            'calls': calls
                        })
                        
                        found_explanation = True
                        break
                        
            if found_explanation:
                break

Processing groups: 100%|██████████| 17/17 [01:05<00:00,  3.88s/it]


In [7]:
# Analysis of results
results_df = pd.DataFrame(results)

print("Summary Statistics:")
print(f"Number of groups processed: {len(all_groups)}")
print(f"Number of successful explanations: {len(results)}")
print(f"Success rate: {len(results)/len(all_groups)*100:.2f}%")

if len(results) > 0:
    print("\nMetrics:")
    print(f"Average explanation size: {results_df['explanation_size'].mean():.2f}")
    print(f"Average item intensity: {results_df['item_intensity'].mean():.2f}")
    print(f"Average user intensity: {results_df['user_intensity'].mean():.2f}")
    print(f"Average number of calls: {results_df['calls'].mean():.2f}")
    
    print("\nDetailed Results:")
    display(results_df)
else:
    print("\nNo successful explanations were found.")

Summary Statistics:
Number of groups processed: 17
Number of successful explanations: 0
Success rate: 0.00%

No successful explanations were found.
