In [46]:
import pandas as pd
import numpy as np

links = pd.read_csv('ml-latest-small/links.csv')
links.head(5)
movies = pd.read_csv('ml-latest-small/movies.csv')
movies.head(5)
tags = pd.read_csv('ml-latest-small/tags.csv')
tags.head(5)
ratings = pd.read_csv("ml-latest-small/ratings.csv")
ratings.head(5)
#dropping the timestamp column
ratings = ratings.drop(['timestamp'], axis=1)
#movie and ratings dataset
movie_ratings = pd.merge(ratings, movies, on='movieId')
movie_ratings.head()

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [47]:
def compute_group_similarity(group_ratings):
    """
    Compute the similarity matrix for a group of users

    Args:
        group_ratings: A DataFrame containing the ratings of a group of users

    Returns:
        A similarity matrix where each element [i, j] represents the similarity between user i and user j
    """
    # Standardize the ratings
    standardized_ratings = (group_ratings - group_ratings.mean()) / group_ratings.std()

    # Compute pairwise cosine similarities
    similarities = standardized_ratings.dot(standardized_ratings.T)

    return similarities

In [65]:
def generate_group_recommendations(group_ratings, similarity_matrix):
    """
    Generate group recommendations using a weighted average of individual preferences

    Args:
        group_ratings: A DataFrame containing the ratings of a group of users
        similarity_matrix: A similarity matrix

    Returns:
        A list of recommended items
    """
    # Calculate weighted averages of individual predictions
   # Calculate weighted averages of individual predictions
    predictions = pd.DataFrame(index=group_ratings['movieId'].unique())
    for user_id, row in group_ratings.iterrows():
        weighted_ratings = row['rating'] * similarity_matrix.loc[user_id, :]
        predictions = predictions.join(weighted_ratings)
    predictions = predictions.mean().sort_values(ascending=False).index[:10]
    print(predictions)
   


    # Select the top-10 recommended items with movie information
    recommended_movies = movie_ratings[movie_ratings['movieId'].isin(predictions)]

    return recommended_movies


# Generate group recommendations
similarity_matrix = compute_group_similarity(group_ratings)
recommended_movies = generate_group_recommendations(group_ratings, similarity_matrix)

  standardized_ratings = (group_ratings - group_ratings.mean()) / group_ratings.std()


Int64Index([0, 215, 267, 369, 572, 776, 831, 854, 1091, 1135], dtype='int64')


In [49]:
def generate_explanations(group_ratings, similarity_matrix, recommended_items):
    """
    Generate explanations for why a specific item was recommended

    Args:
        group_ratings: A DataFrame containing the ratings of a group of users
        similarity_matrix: A similarity matrix
        recommended_items: A list of recommended items

    Returns:
        A list of explanations for each recommended item
    """
    explanations = []
    for recommended_item in recommended_items:
        # Find users who rated the recommended item highly
        high_rating_users = group_ratings[group_ratings['movieId'] == recommended_item]['userId']

        # Calculate the average similarity of these users
        if len(high_rating_users) > 0:
            group_similarity = similarity_matrix[high_rating_users].mean()
            explanation = f"Recommended because it is highly rated by users with a mean similarity of {group_similarity:.2f}"
        else:
            explanation = "Recommended because it is overall popular among similar users"

        explanations.append(explanation)

    return explanations

In [50]:
def generate_why_not_explanations(group_ratings, similarity_matrix, recommended_items, why_not_question):
    """
    Generate explanations for why a specific item is not recommended

    Args:
        group_ratings: A DataFrame containing the ratings of a group of users
        similarity_matrix: A similarity matrix
        recommended_items: A list of recommended items
        why_not_question: A why-not question regarding a specific item

    Returns:
        An explanation for the why-not question
    """
    explanations = []

    for recommended_item in recommended_items:
        if why_not_question == "Why not Matrix?" or why_not_question == "Why not action movies?":
            # Handle atomic or group why-not questions
            explanation = "Not recommended because it does not align with the overall preferences of the group"
        elif why_not_question == "Why not rank Matrix first?":
            # Handle position absenteeism why-not question
            explanation = "Not ranked first because it is not the most popular or highly rated item among similar users"
        else:
            explanation = f"Invalid why-not question: {why_not_question}. Please choose from the following: Why not Matrix, Why not action movies, Why not rank Matrix first?"

        explanations.append(explanation)

    return explanations


In [66]:
# Create a group of 3 users
users = [1,2,3]

# Load the MovieLens dataset
data = movie_ratings

# Filter the data for the selected users
group_ratings = data[data['userId'].isin(users)]


# Generate explanations for the why-not questions
explanations = []

# Print the contents of recommended_movies 
print("\nTop-10 Recommendations:")
print(recommended_movies[['movieId', 'title']])


for why_not_question in ["Why not Matrix?", "Why not action movies?", "Why not rank Matrix first?"]:
    explanations = generate_why_not_explanations(group_ratings, similarity_matrix, recommendations, why_not_question)

    if why_not_question == "Why not Matrix?" or why_not_question == "Why not action movies?":
        atomic_question = why_not_question
        atomic_explanation = explanations[0]
        print("Explanation for atomic question '{}': {}".format(atomic_question, atomic_explanation))
    elif why_not_question == "Why not rank Matrix first?":
        position_question = why_not_question
        group_explanation = explanations[0]
        print("Explanation for position absenteeism '{}': {}".format(position_question, group_explanation))
    else:
        group_question = why_not_question
        group_explanation = explanations[0]
        print("Explanation for group question '{}': {}".format(group_question, group_explanation))




Top-10 Recommendations:
       movieId                    title
19280      215    Before Sunrise (1995)
19281      215    Before Sunrise (1995)
19282      215    Before Sunrise (1995)
19283      215    Before Sunrise (1995)
19284      215    Before Sunrise (1995)
...        ...                      ...
86138     1135  Private Benjamin (1980)
86139     1135  Private Benjamin (1980)
86140     1135  Private Benjamin (1980)
86141     1135  Private Benjamin (1980)
86142     1135  Private Benjamin (1980)

[77 rows x 2 columns]
Explanation for atomic question 'Why not Matrix?': Not recommended because it does not align with the overall preferences of the group
Explanation for atomic question 'Why not action movies?': Not recommended because it does not align with the overall preferences of the group
Explanation for position absenteeism 'Why not rank Matrix first?': Not ranked first because it is not the most popular or highly rated item among similar users
