# **Results analysis**

## Utilities

### Dependencies

In [1]:
import pandas as pd
import os
import random
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

### Constants

In [2]:
def get_root_dir_path(curr_working_dir, no_folders_above):
    dir_path = curr_working_dir
    for _ in range(no_folders_above):
        dir_path = os.path.dirname(dir_path)
    return dir_path

In [3]:
ROOT_DIR = get_root_dir_path(os.getcwd(), 2)

In [4]:
# ----------
# Data paths
# ----------

DATA_DIR = ROOT_DIR + "/data/processed/"
EXTRA_DATA_DIR = ROOT_DIR + "/data/processed/extra_cut/"

BOOKS2_5_DIR = DATA_DIR + "Books2_5/"
BOOKS3_5_DIR = DATA_DIR + "Books3_5/"
EXTRA_BOOKS_DIR = EXTRA_DATA_DIR + "Books/"

CDS2_5_DIR = DATA_DIR + "CDs2_5/"
CDS3_5_DIR = DATA_DIR + "CDs3_5/"
EXTRA_CDS_DIR = EXTRA_DATA_DIR + "CDs/"

MOVIES2_5_DIR = DATA_DIR + "Movies2_5/"
MOVIES3_5_DIR = DATA_DIR + "Movies3_5/"
EXTRA_MOVIES_DIR = EXTRA_DATA_DIR + "Movies/"

In [5]:
# ----------------
# Tested scenarios
# ----------------
TEST_CASES = [                                                                       
    "books35_cds510", "books35_movies510",
    "books35_movies810", "books35_cds810",

    "cds35_books510", "cds35_movies510",
    "cds35_books810", "cds35_movies810",

    "movies35_books510", "movies35_cds510",
    "movies35_books810", "movies35_cds810",
]

TEST_CASES_DICT = {
    "books25_movies510": [BOOKS2_5_DIR + "books_2_5__1", BOOKS2_5_DIR + "movies_5_10"], 
    "books25_movies610": [BOOKS2_5_DIR + "books_2_5__2", BOOKS2_5_DIR + "movies_6_10"],  

    "books35_cds510": [BOOKS3_5_DIR + "books_3_5__1", BOOKS3_5_DIR + "cds_5_10"], 
    "books35_movies510": [BOOKS3_5_DIR + "books_3_5__2", BOOKS3_5_DIR + "movies_5_10"], 
    "books35_movies610": [BOOKS3_5_DIR + "books_3_5__3", BOOKS3_5_DIR + "movies_6_10"], 
    "books35_movies810": [EXTRA_BOOKS_DIR + "books_3_5__6", EXTRA_BOOKS_DIR + "movies_8_10"], 
    "books35_cds810": [EXTRA_BOOKS_DIR + "books_3_5__4", EXTRA_BOOKS_DIR + "cds_8_10"], 

    "cds25_books510": [CDS2_5_DIR + "cds_2_5__1", CDS2_5_DIR + "books5_10"], 
    "cds25_books610": [CDS2_5_DIR + "cds_2_5__2", CDS2_5_DIR + "books6_10"], 
    "cds25_movies510": [CDS2_5_DIR + "cds_2_5__3", CDS2_5_DIR + "movies_5_10"], 
    "cds25_movies610": [CDS2_5_DIR + "cds_2_5__4", CDS2_5_DIR + "movies_6_10"], 

    "cds35_books510": [CDS3_5_DIR + "cds_3_5__1", CDS3_5_DIR + "books5_10"], 
    "cds35_books610": [CDS3_5_DIR + "cds_3_5__2", CDS3_5_DIR + "books6_10"], 
    "cds35_movies510": [CDS3_5_DIR + "cds_3_5__3", CDS3_5_DIR + "movies_5_10"], 
    "cds35_movies610": [CDS3_5_DIR + "cds_3_5__4", CDS3_5_DIR + "movies_6_10"],
    "cds35_books810": [EXTRA_CDS_DIR + "cds_3_5__5", EXTRA_CDS_DIR + "books_8_10"], 
    "cds35_movies810": [EXTRA_CDS_DIR + "cds_3_5__7", EXTRA_CDS_DIR + "movies_8_10"], 

    "movies25_books510": [MOVIES2_5_DIR + "movies_2_5__1", MOVIES2_5_DIR + "books5_10"], 
    "movies25_books610": [MOVIES2_5_DIR + "movies_2_5__2", MOVIES2_5_DIR + "books6_10"], 
    "movies25_cds510": [MOVIES2_5_DIR + "movies_2_5__3", MOVIES2_5_DIR + "cds_5_10"], 
    "movies25_cds610": [MOVIES2_5_DIR + "movies_2_5__4", MOVIES2_5_DIR + "cds_6_10"], 

    "movies35_books510": [MOVIES3_5_DIR + "movies_3_5__1", MOVIES3_5_DIR + "books5_10"], 
    "movies35_books610": [MOVIES3_5_DIR + "movies_3_5__2", MOVIES3_5_DIR + "books6_10"], 
    "movies35_cds510": [MOVIES3_5_DIR + "movies_3_5__3", MOVIES3_5_DIR + "cds_5_10"], 
    "movies35_cds610": [MOVIES3_5_DIR + "movies_3_5__4", MOVIES3_5_DIR + "cds_6_10"],
    "movies35_books810": [EXTRA_MOVIES_DIR + "movies_3_5__5", EXTRA_MOVIES_DIR + "books_8_10"], 
    "movies35_cds810": [EXTRA_MOVIES_DIR + "movies_3_5__7", EXTRA_MOVIES_DIR + "cds_8_10"], 
}

In [6]:
COLUMNS_TYPES =  {
        'user_id': 'string',
        'item_id': 'string',
        'rating': 'string',
        'timestamp': 'string',
        'title': 'string',
        'brand': 'string',
        'category': 'string'
    }

In [7]:
LIKED_THRESHOLD = 4

In [8]:
# -------------------
# Path to predictions
# -------------------
PREDICTIONS_DIR = ROOT_DIR + "/models/predictions/"

BOOKS_35_CDS_510_0S = PREDICTIONS_DIR + "/books35_cds510/books35_cds510_GPT_0s.pkl"
BOOKS_35_CDS_510_1S = PREDICTIONS_DIR + "/books35_cds510/books35_cds510_GPT_1s.pkl"
BOOKS_35_CDS_810_0S = PREDICTIONS_DIR + "/cut_8_10/books35_cds810/books35_cds810_0s.pkl"
BOOKS_35_CDS_810_1S = PREDICTIONS_DIR + "/cut_8_10/books35_cds810/books35_cds810_1s.pkl"

BOOKS_35_MOVIES_510_0S = PREDICTIONS_DIR + "/books35_movies510/books35_movies510_GPT_0s.pkl"
BOOKS_35_MOVIES_510_1S = PREDICTIONS_DIR + "/books35_movies510/books35_movies510_GPT_1s.pkl"
BOOKS_35_MOVIES_810_0S = PREDICTIONS_DIR + "/cut_8_10/books35_movies810/books35_movies810_0s.pkl"
BOOKS_35_MOVIES_810_1S = PREDICTIONS_DIR + "/cut_8_10/books35_movies810/books35_movies810_1s.pkl"

CDS_35_BOOKS_510_0S = PREDICTIONS_DIR + "/cds35_books510/cds35_books510_GPT_0s.pkl"
CDS_35_BOOKS_510_1S = PREDICTIONS_DIR + "/cds35_books510/cds35_books510_GPT_1s.pkl"
CDS_35_BOOKS_810_0S = PREDICTIONS_DIR + "/cut_8_10/cds35_books810/cds35_books810_0s.pkl"
CDS_35_BOOKS_810_1S = PREDICTIONS_DIR + "/cut_8_10/cds35_books810/cds35_books810_1s.pkl"

CDS_35_MOVIES_510_0S = PREDICTIONS_DIR + "/cds35_movies510/cds35_movies510_GPT_0s.pkl"
CDS_35_MOVIES_510_1S = PREDICTIONS_DIR + "/cds35_movies510/cds35_movies510_GPT_1s.pkl"
CDS_35_MOVIES_810_0S = PREDICTIONS_DIR + "/cut_8_10/cds35_movies810/cds35_movies810_0s.pkl"
CDS_35_MOVIES_810_1S = PREDICTIONS_DIR + "/cut_8_10/cds35_movies810/cds35_movies810_1s.pkl"

MOVIES_35_BOOKS_510_0S = PREDICTIONS_DIR + "/movies35_books510/movies35_books510_GPT_0s.pkl"
MOVIES_35_BOOKS_510_1S = PREDICTIONS_DIR + "/movies35_books510/movies35_books510_GPT_1s.pkl"
MOVIES_35_BOOKS_810_0S = PREDICTIONS_DIR + "/cut_8_10/movies35_books810/movies35_books810_0s.pkl"
MOVIES_35_BOOKS_810_1S = PREDICTIONS_DIR + "/cut_8_10/movies35_books810/movies35_books810_1s.pkl"

MOVIES_35_CDS_510_0S = PREDICTIONS_DIR + "/movies35_cds510/movies35_cds510_GPT_0s.pkl"
MOVIES_35_CDS_510_1S = PREDICTIONS_DIR + "/movies35_cds510/movies35_cds510_GPT_1s.pkl"
MOVIES_35_CDS_810_0S = PREDICTIONS_DIR + "/cut_8_10/movies35_cds810/movies35_cds810_0s.pkl"
MOVIES_35_CDS_810_1S = PREDICTIONS_DIR + "/cut_8_10/movies35_cds810/movies35_cds810_1s.pkl"

In [9]:
SKIP_KEYS = ['Mean', 'Max', 'Min']

### Methods

In [10]:
def data_loader(data_paths):
    """
        Method to load datasets given their paths
    """
    liked_items_data = pd.read_csv(data_paths[0] + ".csv", dtype=COLUMNS_TYPES)
    candidate_items_data = pd.read_csv(data_paths[1] + ".csv", dtype=COLUMNS_TYPES)
    print("Datasets loaded!")
    return liked_items_data, candidate_items_data

In [11]:
def split_items_for_user(items):
    """
        Splits the items for a user into liked and disliked items.
    """
    liked_items = items[items["rating"].astype(int) > LIKED_THRESHOLD]
    disliked_items = items[items["rating"].astype(int) <= LIKED_THRESHOLD]

    return liked_items, disliked_items

In [12]:
def count_users_with_empty_splits(data):
    """
        Counts users with empty liked_items and empty disliked_items.

        Parameters:
        - data: DataFrame with user-item ratings.

        Returns:
        - count_empty_liked: Number of users with empty liked_items.
        - count_empty_disliked: Number of users with empty disliked_items.
    """
    count_only_liked = 0
    only_liked_users = []

    count_only_disliked = 0
    only_disliked_users = []

    # Group data by user_id
    grouped_data = data.groupby('user_id')

    for user_id, group in grouped_data:
        liked_items, disliked_items = split_items_for_user(group)

        if liked_items.empty:
            count_only_disliked += 1
            only_disliked_users.append(user_id)

        if disliked_items.empty:
            count_only_liked += 1
            only_liked_users.append(user_id)

    return count_only_disliked, only_disliked_users, count_only_liked, only_liked_users

In [13]:
def count_words(sentence):
  """
    Counts the number of words in a sentence.
  """
  splitted = sentence.split(" ")

  return len(splitted)

In [14]:
def plot_length_distribution(df, name, color):
    """
        Plots the distribution of the length of the reviews.
    """
    train_lens = [count_words(sentence) for sentence in df['Explanation']]

    mean_train_len = np.mean(train_lens)
    max_train_len = max(train_lens)
    min_train_len = min(train_lens)

    fig = px.histogram(train_lens,
                    labels={'value': 'Length', 'count': 'Frequency'},
                    title=f'Length Distribution - {name} [Mean: {int(mean_train_len)} - Max: {max_train_len} - Min: {min_train_len}]',
                    color_discrete_sequence=[color])

    fig.update_layout(xaxis_title='Length', yaxis_title='Frequency')

    fig.show()

In [15]:
def plot_length_distribution_comparison(df_0s, df_1s, name):
    """
        Plots the distribution of the length of the reviews for zero and one shot predictions.
    """
    lens_0s = [count_words(sentence) for sentence in df_0s['Explanation']]
    lens_1s = [count_words(sentence) for sentence in df_1s['Explanation']]

    fig = go.Figure()

    fig.add_trace(go.Histogram(x=lens_0s, name="Zero-Shot", marker_color="#28A08C"))
    fig.add_trace(go.Histogram(x=lens_1s, name="One-Shot", marker_color="#01DB8C"))

    fig.update_layout(
        title=f"{name} - Explanation length distribution",
        xaxis_title="Length",
        yaxis_title="Frequency"
    )

    fig.show()

In [16]:
def compute_statistics_explanations(df):
    """
        Computes statistics for the explanations.
    """
    lens = [count_words(sentence) for sentence in df['Explanation']]

    mean_len = np.mean(lens)
    max_len = max(lens)
    min_len = min(lens)

    # Filter explanations based on criteria
    one_word_explanations = [explanation for explanation in df['Explanation'] if count_words(explanation) == 1]
    short_explanations = [explanation for explanation in df['Explanation'] if count_words(explanation) < 5]
    below_mean_explanations = [explanation for explanation in df['Explanation'] if count_words(explanation) < mean_len]
    range_10_20_explanations = [explanation for explanation in df['Explanation'] if 10 <= count_words(explanation) <= 20]
    above_mean_explanations = [explanation for explanation in df['Explanation'] if count_words(explanation) >= mean_len]
    range_max_explanations = [explanation for explanation in df['Explanation'] if max_len-20 <= count_words(explanation) <= max_len]


    return {
        'Mean': mean_len,
        'Max': max_len,
        'Min': min_len,
        'One_word': len(one_word_explanations),
        'Short': len(short_explanations),
        'Below_mean': len(below_mean_explanations),
        'Range_10_20': len(range_10_20_explanations),
        'Above_mean': len(above_mean_explanations),
        'Range_max': len(range_max_explanations),
    }

In [17]:
def plot_statistics_comparison(stats_0s, stats_1s, name, skip_keys=[]):
    """
        Plots statistics comparison between zero and one shot predictions.
    """
    # Filter keys to skip
    stats_0s_filtered = {key: value for key, value in stats_0s.items() if key not in skip_keys}
    stats_1s_filtered = {key: value for key, value in stats_1s.items() if key not in skip_keys}

    # Convert data to DataFrames
    df_0s = pd.DataFrame(stats_0s_filtered.items(), columns=['Column', 'Value'])
    df_1s = pd.DataFrame(stats_1s_filtered.items(), columns=['Column', 'Value'])

    fig = go.Figure()

    fig.add_trace(go.Bar(x=df_0s['Column'], y=df_0s['Value'], name="Zero-Shot", marker_color="#28A08C"))
    fig.add_trace(go.Bar(x=df_1s['Column'], y=df_1s['Value'], name="One-Shot", marker_color="#01DB8C"))

    fig.update_layout(title=f'{name} - Statistics comparison')
    fig.show()

In [18]:
def compare_statistics(stats_0s, stats_1s):
    """
        Compare statistics between zero and one shot predictions.
    """
    keys = set(stats_0s.keys()).union(stats_1s.keys())

    data = {"Property": [], "Zero-shot": [], "One-shot": [], "Shift": []}

    for key in keys:
        value_0s = stats_0s.get(key, None)
        value_1s = stats_1s.get(key, None)

        data["Property"].append(key)
        data["Zero-shot"].append(int(value_0s) if (value_0s is not None) else None)
        data["One-shot"].append(int(value_1s) if (value_1s is not None) else None)
        data["Shift"].append(int(value_1s - value_0s) if (value_0s is not None and value_1s is not None) else None)

    df = pd.DataFrame(data)

    return df

In [19]:
def get_sample_explanations(explanations):
    """
        Gets a sample of explanations based on their length.
    """
    # Calculate mean and max lengths
    mean_length = np.mean([count_words(explanation) for explanation in explanations])
    max_length = np.max([count_words(explanation) for explanation in explanations])

    # Filter explanations based on criteria
    one_word_explanations = [explanation for explanation in explanations if count_words(explanation) == 1]
    short_explanations = [explanation for explanation in explanations if count_words(explanation) < 5]
    below_mean_explanations = [explanation for explanation in explanations if count_words(explanation) < mean_length]
    range_10_20_explanations = [explanation for explanation in explanations if 10 <= count_words(explanation) <= 20]
    above_mean_explanations = [explanation for explanation in explanations if count_words(explanation) >= mean_length]
    range_max_explanations = [explanation for explanation in explanations if max_length-20 <= count_words(explanation) <= max_length]

    # Sample from the filtered explanations
    sample_below_mean = np.random.choice(below_mean_explanations, size=min(5, len(below_mean_explanations)), replace=False)
    sample_10_20 = np.random.choice(range_10_20_explanations, size=min(5, len(range_10_20_explanations)), replace=False)
    sample_above_mean = np.random.choice(above_mean_explanations, size=min(5, len(above_mean_explanations)), replace=False)
    sample_range_max = np.random.choice(range_max_explanations, size=min(5, len(range_max_explanations)), replace=False)

    return {
        'one_word': one_word_explanations,
        'short': short_explanations,
        'count_below_mean': len(below_mean_explanations),
        'sample_below_mean': sample_below_mean,
        'count_10_20': len(range_10_20_explanations),
        'sample_10_20': sample_10_20,
        'count_above_mean': len(above_mean_explanations),
        'sample_above_mean': sample_above_mean,
        'count_range_max': len(range_max_explanations),
        'sample_range_max': sample_range_max
    }

## Unbalanced users' preferences in base domains

Here is an exploratory analysis on the number of users who have either liked or disliked items exclusively within all the base domains used in our experiments.

In [20]:
results_dict = {}

for scenario in TEST_CASES:
    print("\n-------------------------------------------------")
    print(f"The selected test case is: {scenario}!")

    data_paths_to_load = TEST_CASES_DICT.get(scenario)
    base_domain, _ = data_loader(data_paths_to_load)

    count_only_disliked, only_disliked_users, count_only_liked, only_liked_users = count_users_with_empty_splits(base_domain)
    
    results_dict[scenario] = {
        'count_only_disliked': count_only_disliked,
        'only_disliked_users': only_disliked_users,
        'count_only_liked': count_only_liked,
        'only_liked_users': only_liked_users
    }
    
    print(f"Users with only liked items: {count_only_liked}")
    print(f"Users with only disliked items: {count_only_disliked}")
    print("-------------------------------------------------\n")


-------------------------------------------------
The selected test case is: books35_cds510!
Datasets loaded!
Users with only liked items: 195
Users with only disliked items: 62
-------------------------------------------------


-------------------------------------------------
The selected test case is: books35_movies510!
Datasets loaded!
Users with only liked items: 182
Users with only disliked items: 50
-------------------------------------------------


-------------------------------------------------
The selected test case is: books35_movies810!
Datasets loaded!
Users with only liked items: 69
Users with only disliked items: 19
-------------------------------------------------


-------------------------------------------------
The selected test case is: books35_cds810!
Datasets loaded!
Users with only liked items: 66
Users with only disliked items: 19
-------------------------------------------------


-------------------------------------------------
The selected test case is

Let's determine which are the scenarios mostly affected by this characteristic.

In [21]:
max_liked_scenario = max(results_dict.items(), key=lambda x: x[1]['count_only_liked'])
max_disliked_scenario = max(results_dict.items(), key=lambda x: x[1]['count_only_disliked'])

print(f"The scenario with the maximum count_only_liked is: {max_liked_scenario[0]} with {max_liked_scenario[1]['count_only_liked']} users with only liked items")
print(f"The scenario with the maximum count_only_disliked is: {max_disliked_scenario[0]} with {max_disliked_scenario[1]['count_only_disliked']} users with only liked items")

The scenario with the maximum count_only_liked is: cds35_books510 with 460 users with only liked items
The scenario with the maximum count_only_disliked is: movies35_books510 with 120 users with only liked items


In cases where capturing detailed user preferences is challenging, it's interesting to look at the explanations GPT gives during the recommendation step. 

This analysis aims to assess how GPT uses knowledge to overcome this issue, if at all.
We'll also compare how, for the same user, the given explanation varies according to the adopted prompting technique.

In [22]:
# ------------------------
# Books - Read predictions
# ------------------------
books_35_cds_510_0s_preds = pd.read_pickle(BOOKS_35_CDS_510_0S)
books_35_cds_510_1s_preds = pd.read_pickle(BOOKS_35_CDS_510_1S)
books_35_cds_810_0s_preds = pd.read_pickle(BOOKS_35_CDS_810_0S)
books_35_cds_810_1s_preds = pd.read_pickle(BOOKS_35_CDS_810_1S)

books_35_movies_510_0s_preds = pd.read_pickle(BOOKS_35_MOVIES_510_0S)
books_35_movies_510_1s_preds = pd.read_pickle(BOOKS_35_MOVIES_510_1S)
books_35_movies_810_0s_preds = pd.read_pickle(BOOKS_35_MOVIES_810_0S)
books_35_movies_810_1s_preds = pd.read_pickle(BOOKS_35_MOVIES_810_1S)

In [23]:
# ----------------------
# CDs - Read predictions
# ----------------------
cds_35_books_510_0s_preds = pd.read_pickle(CDS_35_BOOKS_510_0S)
cds_35_books_510_1s_preds = pd.read_pickle(CDS_35_BOOKS_510_1S)
cds_35_books_810_0s_preds = pd.read_pickle(CDS_35_BOOKS_810_0S)
cds_35_books_810_1s_preds = pd.read_pickle(CDS_35_BOOKS_810_1S)

cds_35_movies_510_0s_preds = pd.read_pickle(CDS_35_MOVIES_510_0S)
cds_35_movies_510_1s_preds = pd.read_pickle(CDS_35_MOVIES_510_1S)
cds_35_movies_810_0s_preds = pd.read_pickle(CDS_35_MOVIES_810_0S)
cds_35_movies_810_1s_preds = pd.read_pickle(CDS_35_MOVIES_810_1S)

In [24]:
# -------------------------
# Movies - Read predictions
# -------------------------
movies_35_books_510_0s_preds = pd.read_pickle(MOVIES_35_BOOKS_510_0S)
movies_35_books_510_1s_preds = pd.read_pickle(MOVIES_35_BOOKS_510_1S)
movies_35_books_810_0s_preds = pd.read_pickle(MOVIES_35_BOOKS_810_0S)
movies_35_books_810_1s_preds = pd.read_pickle(MOVIES_35_BOOKS_810_1S)

movies_35_cds_510_0s_preds = pd.read_pickle(MOVIES_35_CDS_510_0S)
movies_35_cds_510_1s_preds = pd.read_pickle(MOVIES_35_CDS_510_1S)
movies_35_cds_810_0s_preds = pd.read_pickle(MOVIES_35_CDS_810_0S)
movies_35_cds_810_1s_preds = pd.read_pickle(MOVIES_35_CDS_810_1S)

In [25]:
# -----------------
# Pick random users
# -----------------
only_liked_sample_users = random.sample(max_liked_scenario[1]['only_liked_users'], 5)
only_disliked_sample_users = random.sample(max_disliked_scenario[1]['only_disliked_users'], 5)

print(f"Sample users' ids with only liked items: {only_liked_sample_users}")
print(f"Sample users' ids with only disliked items: {only_disliked_sample_users}")

Sample users' ids with only liked items: ['AZ14WYRAE0ZLT', 'A13V8R45OBN11B', 'A18PRAQOCXAJ43', 'A8PK56SFLXQ8B', 'A1K53QGOP7B8JL']
Sample users' ids with only disliked items: ['A8CC95A35ET2C', 'AQJP15L6DMEQD', 'A136IQFGB01KQB', 'A21AMFQK5SGWTC', 'AOUL71VSSLB7J']


We'll examine the scenario with most users that have only liked items, and then the scenario with most users that have only disliked items.

In [26]:
for user_id in only_liked_sample_users:
    print("\n-------------------------------------------------")
    print(f"User id: {user_id}")
    explanation_0s = cds_35_books_510_0s_preds[cds_35_books_510_1s_preds['UserId'] == user_id]['Explanation'].values[0]
    explanation_1s = cds_35_books_510_1s_preds[cds_35_books_510_1s_preds['UserId'] == user_id]['Explanation'].values[0]

    print(f"Explanation 0s: \n {explanation_0s}")
    print(f"Explanation 1s: \n {explanation_1s}")
    print("-------------------------------------------------\n")


-------------------------------------------------
User id: AZ14WYRAE0ZLT
Explanation 0s: 
 Based on the user's liked CDs, which include Billie Holiday, Edwin Starr, and Professor Longhair, I recommend the following books:
Explanation 1s: 
 Based on your liked CDs, which include Billie Holiday and Edwin Starr, I have ranked the books that are most likely to align with your preferences. The top-ranked book, "The Fabulous Furry Freak Brothers Omnibus" by Gilbert Shelton, falls under the category of "Comics & Graphic Novels," which may appeal to your interest in music-related content. The second-ranked book, "The Dove of Death: A Mystery of Ancient Ireland" by Peter Tremayne, is a mystery novel that may provide an engaging reading experience. The remaining books also cover a range of genres such as fiction, philosophy, and history, offering a diverse selection for you to explore.
-------------------------------------------------


-------------------------------------------------
User id:

In [27]:
for user_id in only_disliked_sample_users:
    print("\n-------------------------------------------------")
    print(f"User id: {user_id}")
    explanation_0s = movies_35_books_510_0s_preds[movies_35_books_510_0s_preds['UserId'] == user_id]['Explanation'].values[0]
    explanation_1s = movies_35_books_510_1s_preds[movies_35_books_510_1s_preds['UserId'] == user_id]['Explanation'].values[0]

    print(f"Explanation 0s: \n {explanation_0s}")
    print(f"Explanation 1s: \n {explanation_1s}")
    print("-------------------------------------------------\n")


-------------------------------------------------
User id: A8CC95A35ET2C
Explanation 0s: 
 Based on the user's liked movies, which are related to The Beatles and Paul McCartney, I recommend the following books:
Explanation 1s: 
 Based on your liked movies, which are related to music artists like The Beatles and Paul McCartney, I have ranked the books that are also related to music and biographies. The top-ranked book, "Dark Horse: The Life And Art Of George Harrison," is a biography about George Harrison, one of the members of The Beatles. This book provides insights into his life and his contributions to music. The second-ranked book, "Fire And Rain: The James Taylor Story," is a biography about James Taylor, another influential musician. The third-ranked book, "The Rolling Stone Illustrated History of Rock and Roll: The Definitive History of the Most Important Artists and Their Music," is a comprehensive guide to the history of rock and roll, covering various artists and their music

## Explanations analysis

We'll start with the length distribution analysis, so that we can assess how verbose could be GPT while explaining recommendations, or, identify anomalies (if any).

Specifically, we'll plot the explanation length distribution for each tested case, comparing those obtained with zero-shot prompting and those obtained with one-shot prompting.
Then, we'll compute some statistics.

### Length distribution

In [28]:
plot_length_distribution_comparison(books_35_movies_510_0s_preds, books_35_movies_510_1s_preds, "Books_3_5_Movies_5_10")
plot_length_distribution_comparison(books_35_movies_810_0s_preds, books_35_movies_810_1s_preds, "Books_3_5_Movies_8_10")

In [29]:
plot_length_distribution_comparison(books_35_cds_510_0s_preds, books_35_cds_510_1s_preds, "Books_3_5_CDs_5_10")
plot_length_distribution_comparison(books_35_cds_810_0s_preds, books_35_cds_810_1s_preds, "Books_3_5_CDs_8_10")

In [30]:
plot_length_distribution_comparison(cds_35_movies_510_0s_preds, cds_35_movies_510_1s_preds, "CDs_3_5_Movies_5_10")
plot_length_distribution_comparison(cds_35_movies_810_0s_preds, cds_35_movies_810_1s_preds, "CDs_3_5_Movies_8_10")

In [31]:
plot_length_distribution_comparison(cds_35_books_510_0s_preds, cds_35_books_510_1s_preds, "CDs_3_5_Books_5_10")
plot_length_distribution_comparison(cds_35_books_810_0s_preds, cds_35_books_810_1s_preds, "CDs_3_5_Books_8_10")

In [32]:
plot_length_distribution_comparison(movies_35_books_510_0s_preds, movies_35_books_510_1s_preds, "Movies_3_5_Books_5_10")
plot_length_distribution_comparison(movies_35_books_810_0s_preds, movies_35_books_810_1s_preds, "Movies_3_5_Books_8_10")

In [33]:
plot_length_distribution_comparison(movies_35_cds_510_0s_preds, movies_35_cds_510_1s_preds, "Movies_3_5_CDs_5_10")
plot_length_distribution_comparison(movies_35_cds_810_0s_preds, movies_35_cds_810_1s_preds, "Movies_3_5_CDs_8_10")

### Statistics

In [34]:
# ------------------
# Base domain: Books
# ------------------
books_35_cds_510_0s_stats = compute_statistics_explanations(books_35_cds_510_0s_preds)
books_35_cds_510_1s_stats = compute_statistics_explanations(books_35_cds_510_1s_preds)
books_35_cds_810_0s_stats = compute_statistics_explanations(books_35_cds_810_0s_preds)
books_35_cds_810_1s_stats = compute_statistics_explanations(books_35_cds_810_1s_preds)

books_35_movies_510_0s_stats = compute_statistics_explanations(books_35_movies_510_0s_preds)
books_35_movies_510_1s_stats = compute_statistics_explanations(books_35_movies_510_1s_preds)
books_35_movies_810_0s_stats = compute_statistics_explanations(books_35_movies_810_0s_preds)
books_35_movies_810_1s_stats = compute_statistics_explanations(books_35_movies_810_1s_preds)

In [35]:
compare_statistics(books_35_cds_510_0s_stats, books_35_cds_510_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,1,2,1
1,Below_mean,532,408,-124
2,Max,275,236,-39
3,Min,16,15,-1
4,Mean,61,100,39
5,Short,0,0,0
6,One_word,0,0,0
7,Range_10_20,91,21,-70
8,Above_mean,293,417,124


In [36]:
compare_statistics(books_35_cds_810_0s_stats, books_35_cds_810_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,1,1,0
1,Below_mean,182,137,-45
2,Max,223,221,-2
3,Min,16,17,1
4,Mean,54,98,43
5,Short,0,0,0
6,One_word,0,0,0
7,Range_10_20,28,4,-24
8,Above_mean,80,125,45


In [37]:
compare_statistics(books_35_movies_510_0s_stats, books_35_movies_510_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,4,6,2
1,Below_mean,544,304,-240
2,Max,209,197,-12
3,Min,1,11,10
4,Mean,42,98,56
5,Short,8,0,-8
6,One_word,8,0,-8
7,Range_10_20,75,10,-65
8,Above_mean,185,425,240


In [38]:
compare_statistics(books_35_movies_810_0s_stats, books_35_movies_810_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,4,13,9
1,Below_mean,173,98,-75
2,Max,197,166,-31
3,Min,1,18,17
4,Mean,39,97,57
5,Short,7,0,-7
6,One_word,7,0,-7
7,Range_10_20,35,5,-30
8,Above_mean,73,148,75


In [39]:
plot_statistics_comparison(books_35_cds_510_0s_stats, books_35_cds_510_1s_stats, "Books_3_5_CDs_5_10", skip_keys=SKIP_KEYS)
plot_statistics_comparison(books_35_cds_810_0s_stats, books_35_cds_810_1s_stats, "Books_3_5_CDs_8_10", skip_keys=SKIP_KEYS)

In [40]:
plot_statistics_comparison(books_35_movies_510_0s_stats, books_35_movies_510_1s_stats, "Books_3_5_Movies_5_10", skip_keys=SKIP_KEYS)
plot_statistics_comparison(books_35_movies_810_0s_stats, books_35_movies_810_1s_stats, "Books_3_5_Movies_8_10", skip_keys=SKIP_KEYS)

In [41]:
# ----------------
# Base domain: CDs
# ----------------
cds_35_books_510_0s_stats = compute_statistics_explanations(cds_35_books_510_0s_preds)
cds_35_books_510_1s_stats = compute_statistics_explanations(cds_35_books_510_1s_preds)
cds_35_books_810_0s_stats = compute_statistics_explanations(cds_35_books_810_0s_preds)
cds_35_books_810_1s_stats = compute_statistics_explanations(cds_35_books_810_1s_preds)

cds_35_movies_510_0s_stats = compute_statistics_explanations(cds_35_movies_510_0s_preds)
cds_35_movies_510_1s_stats = compute_statistics_explanations(cds_35_movies_510_1s_preds)
cds_35_movies_810_0s_stats = compute_statistics_explanations(cds_35_movies_810_0s_preds)
cds_35_movies_810_1s_stats = compute_statistics_explanations(cds_35_movies_810_1s_preds)

In [42]:
compare_statistics(cds_35_books_510_0s_stats, cds_35_books_510_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,5,4,-1
1,Below_mean,916,669,-247
2,Max,234,222,-12
3,Min,1,17,16
4,Mean,47,93,46
5,Short,23,0,-23
6,One_word,23,0,-23
7,Range_10_20,135,7,-128
8,Above_mean,331,578,247


In [43]:
compare_statistics(cds_35_books_810_0s_stats, cds_35_books_810_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,2,6,4
1,Below_mean,342,236,-106
2,Max,217,190,-27
3,Min,1,20,19
4,Mean,43,89,46
5,Short,17,0,-17
6,One_word,17,0,-17
7,Range_10_20,45,2,-43
8,Above_mean,117,223,106


In [44]:
compare_statistics(cds_35_movies_510_0s_stats, cds_35_movies_510_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,1,1,0
1,Below_mean,917,490,-427
2,Max,221,236,15
3,Min,1,17,16
4,Mean,41,100,59
5,Short,37,0,-37
6,One_word,37,0,-37
7,Range_10_20,124,5,-119
8,Above_mean,298,725,427


In [45]:
compare_statistics(cds_35_movies_810_0s_stats, cds_35_movies_810_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,1,3,2
1,Below_mean,308,169,-139
2,Max,221,202,-19
3,Min,1,20,19
4,Mean,37,95,58
5,Short,32,0,-32
6,One_word,32,0,-32
7,Range_10_20,39,1,-38
8,Above_mean,105,244,139


In [46]:
plot_statistics_comparison(cds_35_books_510_0s_stats, cds_35_books_510_1s_stats, "CDs_3_5_Books_5_10", skip_keys=SKIP_KEYS)
plot_statistics_comparison(cds_35_books_810_0s_stats, cds_35_books_810_1s_stats, "CDs_3_5_Books_8_10", skip_keys=SKIP_KEYS)

In [47]:
plot_statistics_comparison(cds_35_movies_510_0s_stats, cds_35_movies_510_1s_stats, "CDs_3_5_Movies_5_10", skip_keys=SKIP_KEYS)
plot_statistics_comparison(cds_35_movies_810_0s_stats, cds_35_movies_810_1s_stats, "CDs_3_5_Movies_8_10", skip_keys=SKIP_KEYS)

In [48]:
# -------------------
# Base domain: Movies
# -------------------
movies_35_books_510_0s_stats = compute_statistics_explanations(movies_35_books_510_0s_preds)
movies_35_books_510_1s_stats = compute_statistics_explanations(movies_35_books_510_1s_preds)
movies_35_books_810_0s_stats = compute_statistics_explanations(movies_35_books_810_0s_preds)
movies_35_books_810_1s_stats = compute_statistics_explanations(movies_35_books_810_1s_preds)

movies_35_cds_510_0s_stats = compute_statistics_explanations(movies_35_cds_510_0s_preds)
movies_35_cds_510_1s_stats = compute_statistics_explanations(movies_35_cds_510_1s_preds)
movies_35_cds_810_0s_stats = compute_statistics_explanations(movies_35_cds_810_0s_preds)
movies_35_cds_810_1s_stats = compute_statistics_explanations(movies_35_cds_810_1s_preds)

In [49]:
compare_statistics(movies_35_books_510_0s_stats, movies_35_books_510_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,2,1,-1
1,Below_mean,806,633,-173
2,Max,237,232,-5
3,Min,1,16,15
4,Mean,42,95,52
5,Short,2,0,-2
6,One_word,2,0,-2
7,Range_10_20,199,14,-185
8,Above_mean,365,592,227


In [50]:
compare_statistics(movies_35_books_810_0s_stats, movies_35_books_810_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,1,13,12
1,Below_mean,301,239,-62
2,Max,221,191,-30
3,Min,1,16,15
4,Mean,40,92,51
5,Short,2,0,-2
6,One_word,2,0,-2
7,Range_10_20,64,8,-56
8,Above_mean,147,222,75


In [51]:
compare_statistics(movies_35_cds_510_0s_stats, movies_35_cds_510_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,14,2,-12
1,Below_mean,856,627,-229
2,Max,190,227,37
3,Min,1,13,12
4,Mean,52,95,43
5,Short,5,0,-5
6,One_word,5,0,-5
7,Range_10_20,148,19,-129
8,Above_mean,369,598,229


In [52]:
compare_statistics(movies_35_cds_810_0s_stats, movies_35_cds_810_1s_stats)

Unnamed: 0,Property,Zero-shot,One-shot,Shift
0,Range_max,4,3,-1
1,Below_mean,249,192,-57
2,Max,189,203,14
3,Min,1,17,16
4,Mean,47,89,41
5,Short,4,0,-4
6,One_word,4,0,-4
7,Range_10_20,50,6,-44
8,Above_mean,122,179,57


In [53]:
plot_statistics_comparison(movies_35_books_510_0s_stats, movies_35_books_510_1s_stats, "Movies_3_5_Books_5_10", skip_keys=SKIP_KEYS)
plot_statistics_comparison(movies_35_books_810_0s_stats, movies_35_books_810_1s_stats, "Movies_3_5_Books_8_10", skip_keys=SKIP_KEYS)

In [54]:
plot_statistics_comparison(movies_35_cds_510_0s_stats, movies_35_cds_510_1s_stats, "Movies_3_5_CDs_5_10", skip_keys=SKIP_KEYS)
plot_statistics_comparison(movies_35_cds_810_0s_stats, movies_35_cds_810_1s_stats, "Movies_3_5_CDs_8_10", skip_keys=SKIP_KEYS)

### Sample scenarios

For simplicity, we'll use the same scenarios used in previous sections.

In [55]:
cds_35_books_510_0s_explanations = get_sample_explanations(cds_35_books_510_0s_preds['Explanation'].values)
cds_35_books_510_1s_explanations = get_sample_explanations(cds_35_books_510_1s_preds['Explanation'].values)

movies_35_books_510_0s_explanations = get_sample_explanations(movies_35_books_510_0s_preds['Explanation'].values)
movies_35_books_510_1s_explanations = get_sample_explanations(movies_35_books_510_1s_preds['Explanation'].values)

#### Short explanations

In [56]:
# ---------------------------
# Count one-word explanations
# ---------------------------
print("\n-------------------------------------------------")
print("CDs_3_5_Books_5_10 - One word explanations:")
print(f"Zero-shot: {len(cds_35_books_510_0s_explanations['one_word'])}")
print(f"One-shot: {len(cds_35_books_510_1s_explanations['one_word'])}")
print("-------------------------------------------------")

print("\n-------------------------------------------------")
print("Movies_3_5_Books_5_10 - One word explanations:")
print(f"Zero-shot: {len(movies_35_books_510_0s_explanations['one_word'])}")
print(f"One-shot: {len(movies_35_books_510_1s_explanations['one_word'])}")
print("-------------------------------------------------")


-------------------------------------------------
CDs_3_5_Books_5_10 - One word explanations:
Zero-shot: 23
One-shot: 0
-------------------------------------------------

-------------------------------------------------
Movies_3_5_Books_5_10 - One word explanations:
Zero-shot: 2
One-shot: 0
-------------------------------------------------


In [57]:
print(f"CDs_3_5_Books_5_10__0s: \n{cds_35_books_510_0s_explanations['one_word']} \n")
print(f"Movies_3_5_Books_5_10__0s: \n{movies_35_books_510_0s_explanations['one_word']}")

CDs_3_5_Books_5_10__0s: 
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] 

Movies_3_5_Books_5_10__0s: 
['', '']


In [58]:
# ---------------------------
# Count short explanations
# ---------------------------
print("\n-------------------------------------------------")
print("CDs_3_5_Books_5_10 - Short explanations:")
print(f"Zero-shot: {len(cds_35_books_510_0s_explanations['short'])}")
print(f"One-shot: {len(cds_35_books_510_1s_explanations['short'])}")
print("-------------------------------------------------")

print("\n-------------------------------------------------")
print("Movies_3_5_Books_5_10 - Short explanations:")
print(f"Zero-shot: {len(movies_35_books_510_0s_explanations['short'])}")
print(f"One-shot: {len(movies_35_books_510_1s_explanations['short'])}")
print("-------------------------------------------------")


-------------------------------------------------
CDs_3_5_Books_5_10 - Short explanations:
Zero-shot: 23
One-shot: 0
-------------------------------------------------

-------------------------------------------------
Movies_3_5_Books_5_10 - Short explanations:
Zero-shot: 2
One-shot: 0
-------------------------------------------------


Therefore, very short explanation (up to 5 words) are the empty ones.

In [59]:
# ---------------------------
# Count 10-20 words explanations
# ---------------------------
print("\n-------------------------------------------------")
print("CDs_3_5_Books_5_10 - 10-20 words explanations:")
print(f"Zero-shot: {cds_35_books_510_0s_explanations['count_10_20']}")
print(f"One-shot: {cds_35_books_510_1s_explanations['count_10_20']}")
print("-------------------------------------------------")

print("\n-------------------------------------------------")
print("Movies_3_5_Books_5_10 - 10-20 words explanations:")
print(f"Zero-shot: {movies_35_books_510_0s_explanations['count_10_20']}")
print(f"One-shot: {movies_35_books_510_1s_explanations['count_10_20']}")
print("-------------------------------------------------")


-------------------------------------------------
CDs_3_5_Books_5_10 - 10-20 words explanations:
Zero-shot: 135
One-shot: 7
-------------------------------------------------

-------------------------------------------------
Movies_3_5_Books_5_10 - 10-20 words explanations:
Zero-shot: 199
One-shot: 14
-------------------------------------------------


In [60]:
print("\n------------- CDs_3_5_Books_5_10__0s -------------")
for explanation in cds_35_books_510_0s_explanations['sample_10_20']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- CDs_3_5_Books_5_10__1s -------------")
for explanation in cds_35_books_510_1s_explanations['sample_10_20']:
    print(explanation)
print("-------------------------------------------------")


------------- CDs_3_5_Books_5_10__0s -------------
Based on the user's liked CDs from The Rolling Stones, I recommend the following books:
Based on the user's liked CDs, which are in the pop and singer-songwriter genres, I recommend the following books:
Based on the user's liked and disliked CDs, I recommend the following books:
Based on the user's liked and disliked CDs, I have ranked the candidate books.
Based on the user's liked CDs, which include soundtracks and classic rock albums, I recommend the following books:
-------------------------------------------------

------------- CDs_3_5_Books_5_10__1s -------------
Based on your liked CDs, which are mainly in the Pop and Soft Rock genres, I recommend the following books:
Based on your liked CDs, which include classic rock, jazz, and alternative metal, I recommend the following books:
Based on your liked CDs, which include Broadway musicals and classical music, I recommend the following books:
Based on your dislike for CDs related 

In [61]:
print("\n------------- Movies_3_5_Books_5_10__0s -------------")
for explanation in movies_35_books_510_0s_explanations['sample_10_20']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- Movies_3_5_Books_5_10__1s -------------")
for explanation in movies_35_books_510_1s_explanations['sample_10_20']:
    print(explanation)
print("-------------------------------------------------")


------------- Movies_3_5_Books_5_10__0s -------------
Based on the user's liked movies, I have recommended books that are in similar categories or have similar themes.
Based on the user's liked movies, which include action movies and studio specials, I would recommend the following books:
Based on the user's liked movies, which include action, adventure, drama, and fantasy genres, I recommend the following books:
Based on the user's liked and disliked movies, I will recommend books that are similar in genre and themes.
Based on your liked movies, I have identified the following patterns:
-------------------------------------------------

------------- Movies_3_5_Books_5_10__1s -------------
Based on your liked movies, which include action, adventure, and fantasy genres, I recommend the following books:
Based on your liked movies in the reality TV genre, I have recommended books that cover a range of topics.
Based on your liked movies, which are "Reilly: Ace of Spies" and "Producers", 

In [62]:
# ---------------------------
# Count below mean explanations
# ---------------------------
print("\n-------------------------------------------------")
print("CDs_3_5_Books_5_10 - Below mean explanations:")
print(f"Zero-shot: {cds_35_books_510_0s_explanations['count_below_mean']}")
print(f"One-shot: {cds_35_books_510_1s_explanations['count_below_mean']}")
print("-------------------------------------------------")

print("\n-------------------------------------------------")
print("Movies_3_5_Books_5_10 - Below mean explanations:")
print(f"Zero-shot: {movies_35_books_510_0s_explanations['count_below_mean']}")
print(f"One-shot: {movies_35_books_510_1s_explanations['count_below_mean']}")
print("-------------------------------------------------")


-------------------------------------------------
CDs_3_5_Books_5_10 - Below mean explanations:
Zero-shot: 916
One-shot: 669
-------------------------------------------------

-------------------------------------------------
Movies_3_5_Books_5_10 - Below mean explanations:
Zero-shot: 806
One-shot: 633
-------------------------------------------------


In [63]:
print("\n------------- CDs_3_5_Books_5_10__0s -------------")
for explanation in cds_35_books_510_0s_explanations['sample_below_mean']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- CDs_3_5_Books_5_10__1s -------------")
for explanation in cds_35_books_510_1s_explanations['sample_below_mean']:
    print(explanation)
print("-------------------------------------------------")


------------- CDs_3_5_Books_5_10__0s -------------
Based on the user's liked CDs, which are in the category of 'World Music', 'Europe', 'British Isles', 'Britain', I recommend books that are related to literature and fiction.
Based on the user's liked and disliked CDs, we can see that the user has a preference for soundtracks and movie scores. Therefore, it is likely that the user has an interest in books related to music or movies.
Based on the user's liked CDs, which are in the categories of 'Dance & Electronic' and 'Techno', and the disliked CDs, which are in the categories of 'Jazz', it seems that the user has a preference for electronic music and may not be interested in jazz.
Based on the user's liked CDs, which are in the categories of 'Classic Rock' and 'Album-Oriented Rock (AOR)', I recommend books that are related to engineering and transportation. The top 5 recommended books are:
Based on the user's liked CDs, which are mainly in the categories of 'CDs & Vinyl', 'Pop', 'Adu

In [64]:
print("\n------------- Movies_3_5_Books_5_10__0s -------------")
for explanation in movies_35_books_510_0s_explanations['sample_below_mean']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- Movies_3_5_Books_5_10__1s -------------")
for explanation in movies_35_books_510_1s_explanations['sample_below_mean']:
    print(explanation)
print("-------------------------------------------------")


------------- Movies_3_5_Books_5_10__0s -------------
Based on the user's liked movies, which are related to science, astronomy, and documentary, I recommend the following books:
Based on the user's liked movies, which include movies about submarines, action & adventure, and television boxed sets, I recommend books that are related to similar themes.
Based on the user's liked item "Orla Fallon's Celtic Christmas" which falls under the category "Movies & TV, Holidays & Seasonal, Christmas", I recommend the following books that are related to holidays, politics, and social sciences.
Based on the user's liked movies in the comedy genre, I recommend books from the "Comics & Graphic Novels" category. These books are likely to appeal to the user's preference for visual storytelling and entertainment. The top 5 recommended books are:
Based on the user's liked movies, which are in the categories of 'Movies & TV' and 'Drama', I have selected books that are in similar categories. The top 5 reco

#### Long explanations

In [65]:
# ---------------------------
# Count above mean explanations
# ---------------------------
print("\n-------------------------------------------------")
print("CDs_3_5_Books_5_10 - Above mean explanations:")
print(f"Zero-shot: {cds_35_books_510_0s_explanations['count_above_mean']}")
print(f"One-shot: {cds_35_books_510_1s_explanations['count_above_mean']}")
print("-------------------------------------------------")

print("\n-------------------------------------------------")
print("Movies_3_5_Books_5_10 - Above mean explanations:")
print(f"Zero-shot: {movies_35_books_510_0s_explanations['count_above_mean']}")
print(f"One-shot: {movies_35_books_510_1s_explanations['count_above_mean']}")
print("-------------------------------------------------")


-------------------------------------------------
CDs_3_5_Books_5_10 - Above mean explanations:
Zero-shot: 331
One-shot: 578
-------------------------------------------------

-------------------------------------------------
Movies_3_5_Books_5_10 - Above mean explanations:
Zero-shot: 365
One-shot: 592
-------------------------------------------------


In [66]:
print("\n------------- CDs_3_5_Books_5_10__0s -------------")
for explanation in cds_35_books_510_0s_explanations['sample_above_mean']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- CDs_3_5_Books_5_10__1s -------------")
for explanation in cds_35_books_510_1s_explanations['sample_above_mean']:
    print(explanation)
print("-------------------------------------------------")


------------- CDs_3_5_Books_5_10__0s -------------
Based on the user's liked CDs, which are mainly in the Jazz genre, I recommend books that are related to music and specifically guitar. The recommended books are focused on different aspects of guitar playing, such as arpeggios, soloing, classical guitar method, and scales & modes. These books are likely to be of interest to the user as they provide further knowledge and techniques for guitar playing, which aligns with their interest in Jazz music.
Based on the user's liked CDs, which are mainly in the categories of 'Comedy & Spoken Word' and 'Comedic Music', I recommend books that are related to humor and entertainment. The top-ranked book, "The Effortless Sleep Method: The Incredible New Cure for Insomnia and Chronic Sleep Problems", may be of interest to the user as it falls under the category of 'Health, Fitness & Dieting' and addresses a common issue. The next recommended book, "Dr. Peter Scardino's Prostate Book, Revised Edition

In [67]:
print("\n------------- Movies_3_5_Books_5_10__0s -------------")
for explanation in movies_35_books_510_0s_explanations['sample_above_mean']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- Movies_3_5_Books_5_10__1s -------------")
for explanation in movies_35_books_510_1s_explanations['sample_above_mean']:
    print(explanation)
print("-------------------------------------------------")


------------- Movies_3_5_Books_5_10__0s -------------
Based on the user's liked items, which include movies related to music artists and live performances, and the user's disliked items, which include drama TV series, the recommender system suggests books that are related to music, entertainment, and crafts. The top-ranked book, "Backstage Pass: Just Justin," is a biography about Justin Timberlake, which aligns with the user's interest in music artists. "Kardashian Konfidential" is another biography about Kim Kardashian, who is also mentioned in the user's liked item. The other recommended books are related to crafts, teaching, and entertainment, which are relevant to the user's preferences.
Based on the user's liked movies, which are "Mulholland Drive" and "Amelie", I can see that the user has a preference for movies in the "Movies & TV" category, specifically in the "Studio Specials" subcategory. Therefore, I will recommend books that are also in the "Arts & Photography" category, a

In [68]:
# ---------------------------
# Count Range max words explanations
# ---------------------------
print("\n-------------------------------------------------")
print("CDs_3_5_Books_5_10 - Range max words explanations:")
print(f"Zero-shot: {cds_35_books_510_0s_explanations['count_range_max']}")
print(f"One-shot: {cds_35_books_510_1s_explanations['count_range_max']}")
print("-------------------------------------------------")

print("\n-------------------------------------------------")
print("Movies_3_5_Books_5_10 - Range max wordss explanations:")
print(f"Zero-shot: {movies_35_books_510_0s_explanations['count_range_max']}")
print(f"One-shot: {movies_35_books_510_1s_explanations['count_range_max']}")
print("-------------------------------------------------")


-------------------------------------------------
CDs_3_5_Books_5_10 - Range max words explanations:
Zero-shot: 5
One-shot: 4
-------------------------------------------------

-------------------------------------------------
Movies_3_5_Books_5_10 - Range max wordss explanations:
Zero-shot: 2
One-shot: 1
-------------------------------------------------


In [69]:
print("\n------------- CDs_3_5_Books_5_10__0s -------------")
for explanation in cds_35_books_510_0s_explanations['sample_range_max']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- CDs_3_5_Books_5_10__1s -------------")
for explanation in cds_35_books_510_1s_explanations['sample_range_max']:
    print(explanation)
print("-------------------------------------------------")


------------- CDs_3_5_Books_5_10__0s -------------
Based on the user's liked CDs, which are all Christmas-themed, I recommend books that are related to history, landmarks, and relationships. The top-ranked book, 'Hutzler's: Where Baltimore Shops', is a book about the history of a famous department store in Baltimore. This recommendation is based on the user's interest in Christmas music and the category overlap between the CDs and the book. The second-ranked book, 'Baltimore's Bygone Department Stores: Many Happy Returns', is also a book about the history of department stores in Baltimore, which aligns with the user's interest in Christmas music and the category overlap. The third-ranked book, 'Days to Remember: A Keepsake Book for Birthdays, Anniversaries & Special Occasions', is a book about preserving memories and special occasions, which relates to the user's interest in Christmas music and the sentimental nature of the CDs. The fourth-ranked book, 'Friends: An Address Book for Th

In [70]:
print("\n------------- Movies_3_5_Books_5_10__0s -------------")
for explanation in movies_35_books_510_0s_explanations['sample_range_max']:
    print(explanation)
print("-------------------------------------------------")

print("\n------------- Movies_3_5_Books_5_10__1s -------------")
for explanation in movies_35_books_510_1s_explanations['sample_range_max']:
    print(explanation)
print("-------------------------------------------------")


------------- Movies_3_5_Books_5_10__0s -------------
Based on the user's liked movies, which are in the categories of 'Movies & TV', 'Science Fiction & Fantasy', 'Science Fiction', and 'Animation', I have ranked the books that have similar categories. The top recommended book, 'The Evidence Bible: Irrefutable Evidence for the Thinking Mind', falls under the category of 'Books', 'New, Used & Rental Textbooks', and 'Humanities', which aligns with the user's interest in science fiction and fantasy. The second recommended book, 'The Way Of The Master Evidence Bible', is in the categories of 'Books', 'Christian Books & Bibles', and 'Bibles', which may appeal to the user's interest in movies with religious themes. The third recommended book, 'The Harbinger: The Ancient Mystery That Holds the Secret of America's Future (Thorndike Christian Mystery)', is in the categories of 'Books', 'Christian Books & Bibles', and 'Literature & Fiction', which aligns with the user's interest in movies with 