# Book Recommendation System

# Part IV: Dash

### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import ast
import itertools

from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from io import StringIO

### Loading the Data

In [2]:
books = pd.read_csv("data/Books_cleaned.csv").drop('Unnamed: 0', axis = 1)
#ratings = pd.read_csv("data_cleaned/Ratings_cleaned.csv").drop('Unnamed: 0', axis = 1)

ratings_files = [f'data/Ratings_cleaned_part_{i}.csv' for i in range(1,6+1)]
ratings_dfs = [pd.read_csv(file) for file in ratings_files]
ratings = pd.concat(ratings_dfs, ignore_index=True).drop('Unnamed: 0', axis = 1)

books_genres = pd.read_csv("data/Books_genres_cleaned.csv").drop('Unnamed: 0', axis = 1)
books_genres_list = pd.read_csv("data/Books_genres_list_cleaned.csv").drop('Unnamed: 0', axis = 1)

### Functions

In [3]:
def get_csr_matrix_norm(csr_matrix, method='mean_centering'):
    csr_matrix_norm = csr_matrix.copy()

    for i in range(csr_matrix.shape[0]):
        row_start = csr_matrix.indptr[i]
        row_end = csr_matrix.indptr[i + 1]

        if row_start < row_end: # If the row is not empty
            row_data = csr_matrix_norm.data[row_start:row_end]
    
            if method == 'mean_centering':
                # Normalize each row subtracting by the mean value of the row
                mean = row_data.mean()
                row_data = row_data - mean 
            elif method == 'z_score':
                # Normalize each row subtracting the mean and dividing by the std of the row
                mean = row_data.mean()
                std = row_data.std()
                if std != 0: 
                    row_data = (row_data - mean) / std
                else:
                    row_data = row_data - mean
            elif method == 'min_max': 
                # Normalize each row subtracting the min of the row and dividing by the difference between max and min of each row
                max_val = row_data.max()
                min_val = row_data.min()
                if max_val != min_val:
                    row_data = (row_data - min_val) / (max_val - min_val) 
                else:
                    row_data = row_data - min_val   
            else:
                raise ValueError("Invalid method. Please specify 'mean_centering', 'z_score', or 'min_max'.")

            csr_matrix_norm.data[row_start:row_end] = row_data
            
    return csr_matrix_norm

In [4]:
def user_dictionary_to_df(user_dict, target_user):
    # Convert the target_user dictionary to a dataframe with the required columns
    target_user_ratings = pd.DataFrame(user_dict.items(), columns=['Title', 'Rating'])
    target_user_ratings = pd.merge(target_user_ratings, books[['Title', 'BookID']], on='Title', how='left').drop('Title', axis=1)

    # Add an ID to the target_user
    target_user_ratings['UserID'] = target_user 

    # Concat the target_user ratings to the original ratings dataframe
    data = pd.concat([ratings, target_user_ratings], ignore_index=True)

    return data

def selected_users_df(ratings_df, target_books, n_max, target_user):
    # Users who have rated at least 1 of the items rated by the current user
    selected_users = ratings_df[ratings_df['BookID'].isin(target_books)]
    selected_users = pd.DataFrame(selected_users.groupby('UserID').size(), columns=['Coincidences']).sort_values(by='Coincidences', ascending=False).reset_index()

    number_of_users = selected_users.shape[0]

    # Drop users if the size of selected_users is too large
    if number_of_users > n_max:
        # First, select the first n_users_upper_limit with higher coincidences
        selected_users_aux = selected_users.head(n_max) 
        # Minimum number of coincidences in the previous set
        lowest_coincidences = selected_users_aux['Coincidences'].min() 
        # Number of users with the minimum number of coincidences needed to reach n_users_upper_limit
        n_users_with_lowest_coincidences_needed = n_max - selected_users[selected_users['Coincidences'] > lowest_coincidences].shape[0]

        selected_users_higher = selected_users[selected_users['Coincidences'] > lowest_coincidences]
        selected_users_lowest = selected_users[selected_users['Coincidences'] == lowest_coincidences].sample(n_users_with_lowest_coincidences_needed)

        # Users with the highest number of coincidences and random users with the minimum number of coincidences
        selected_users = pd.concat([selected_users_higher, selected_users_lowest], ignore_index=True)

    if len(selected_users[selected_users['UserID'] == target_user]) == 0:
            target_user = pd.DataFrame({target_user: len(target_books)}.items(), columns=['UserID', 'Coincidences'])
            selected_users = pd.concat([selected_users, target_user], ignore_index=True)
        
    # Ratings of the selected users
    selected_ratings = ratings_df[ratings_df['UserID'].isin(selected_users.UserID.values)]

    return selected_users, selected_ratings

def get_users_matrix(ratings):
    # Creating the matrix with users and books
    ratings_matrix = ratings[['UserID', 'BookID', 'Rating']].pivot(index = 'UserID', columns = 'BookID', values = 'Rating').fillna(0)
    ratings_csr_matrix = csr_matrix(ratings_matrix.values)
    # Normalize the csr matrix
    ratings_csr_matrix_norm = get_csr_matrix_norm(ratings_csr_matrix, method='min_max')
    
    return ratings_csr_matrix_norm, ratings_matrix

def knn_model(csr_matrix, matrix, target_user, number_neighbours, ratings, exclude):
    # Build the KNN model
    model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
    model_knn.fit(csr_matrix)

    # The row of the target user
    query_index = matrix.index.get_loc(target_user) 

    if matrix.shape[0] < (number_neighbours + 1):
        number_neighbours = matrix.shape[0]
        
    distances, indices = model_knn.kneighbors(matrix.iloc[query_index,:].values.reshape(1, -1), n_neighbors = (number_neighbours+1))

    # Most similar users
    most_similar_users = [matrix.iloc[index].name for index in indices.flatten()[1:]]

    # Ratings of the most similar users
    similar_users_ratings = ratings[(ratings['UserID'].isin(most_similar_users)) & (~ratings['BookID'].isin(exclude))]

    # Books that have been rated by at least 5 users.
    count_ratings = similar_users_ratings.groupby('BookID').size()
    count_ratings_df = pd.DataFrame(count_ratings, columns=['Ratings_Count']).reset_index()
    #multirated_books = count_ratings[count_ratings >= 5].index

    # Average rating for these books and sorted dataframe
    #similar_users_ratings = similar_users_ratings[similar_users_ratings['BookID'].isin(multirated_books)]
    #multirated_books_rating = pd.DataFrame(similar_users_ratings.groupby('BookID')['Rating'].mean(), columns=['Rating']).reset_index()
    #multirated_books_rating.columns = ['BookID', 'Average_Rating']
    #multirated_books_rating = multirated_books_rating.sort_values(by='Average_Rating', ascending=False).reset_index()[['BookID', 'Average_Rating']]

    similar_books = pd.DataFrame(similar_users_ratings.groupby('BookID')['Rating'].mean()).reset_index()
    similar_books.columns = ['BookID', 'Average_Rating']
    similar_books = pd.merge(similar_books, count_ratings_df, on='BookID', how='left')

    # In order to obtain a weighted rating for each book taking into account the number of votes a book has and its average rating, 
    # I will use the "True Bayesian Estimate", used by IMDB 
    # (https://stats.stackexchange.com/questions/6418/rating-system-taking-account-of-number-of-votes, 
    #  https://github.com/wbotelhos/rating):

    # Weighted rating (WR) = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C 

    # , where:

    # - R = average for the book (mean)
    # - v = number of votes for the book
    # - m = minimum votes required to be listed in the top
    # - C = the mean vote across the whole set of books

    # For an intuitive explanation of the formula, take a look to [this link](https://stats.stackexchange.com/questions/189658/what-are-good-resources-on-bayesian-rating).

    # WR = P(enough evidence) × (Rating based on evidence) + P(no evidence) × (best guess when no evidence)
    
    # Parameters for the "True Bayesian Estimate". We can chose several options for m.
    R = similar_books['Average_Rating']
    v = similar_books['Ratings_Count']
    #m = similar_books['Ratings_Count'].mean()
    m = similar_books['Ratings_Count'].quantile(0.9)
    C = similar_users_ratings['Rating'].mean() # C is the mean of all the ratings, not the mean of the books' means

    # Weighted Rating: "True Bayesian Estimate"
    similar_books['Weighted_Rating'] = (v / (v + m)) * R + (m / (v + m)) * C

    similar_books = similar_books.sort_values(by='Weighted_Rating', ascending=False).reset_index()
    
    return similar_books

In [5]:
def contains_all_genres(row, genres):
    # The function checks if all the specified genres in a given list are 
    # present in any of the genre columns of the DataFrame row.

    # The any function returns True if the current genre is found in any of the 7 genre columns.
    # The all function ensures that this condition (any returning True) holds for every genre in the genres list.
    return all(any(row[f'Genre_{i}'] == genre for i in range(1, 8)) for genre in genres)

def contains_any_genre(row, genres):
    # The function checks if all the specified genres in a given list are 
    # present in any of the genre columns of the DataFrame row.

    # The any function returns True if at least one genre from the genres list is found in any of the 7 genre columns of the row.
    return any(row[f'Genre_{i}'] in genres for i in range(1, 8))

def ordinal_number(number):
    # Function that returns the ordinal equivalent of a number
    if 10 <= number % 100 <= 20: # n % 100 returns the last 2 digits
        sufix = 'th'
    else:
        sufix = {1: 'st', 2: 'nd', 3: 'rd'}.get(number % 10, 'th')
    return str(number) + sufix
    

def books_satisfying_genres(data, genres, exclude=[], combine=False): 
    
    ###########################################################################################
    #                                                                                         #
    # This function returns the books (with their genres) satisfying some genres restrictions #
    #                                                                                         #
    # data = dataset with the prediction                                                      #
    # genres = list of the genres the user is interested in                                   #
    # exclude = list of genres the user wants to exclude                                      #
    # combine = if True, look for books that have all the genres                              #
    #                                                                                         #
    ###########################################################################################
    
    # I convert the list into a dictionary and back into a list to drop duplicates
#    genres = list(dict.fromkeys(genres))

    # If the list of genres specified by the user contains genres not present in the
    # set of unique genres of the books, the funcion ends
#    unique_genres = books_genres_list['Genre'].unique()
#    if not set(genres).issubset(unique_genres):
#        print('ERROR: There are genres that do not exist.')
#        return
#    if not set(exclude).issubset(unique_genres):
#        print('ERROR: There are genres that do not exist.')
#        return

    # If the list of genres specified by the user contains genres present in the
    # list of genres the user wants to exclude, stop the function
#    for genre in genres:
#        if genre in exclude:
#            print('ERROR: There are coincident genres in both lists.')
#            return

    data = pd.merge(data, books_genres[['BookID','Genre_1', 'Genre_2', 'Genre_3', 'Genre_4', 'Genre_5', 'Genre_6', 'Genre_7']], on='BookID', how='left')

    if len(genres) == 0: # Case with no genres specified
        data = data
    else:
        if combine: # If the user wants the books to include all the genres specified
            if len(genres) <= 7:
                data = data[data.apply(lambda row: contains_all_genres(row, genres), axis=1)]
            else:
                print('Books can have, at most, 7 different genres. If you want book recommendations including all the selected genres simultaneously, please, choose a maximum of 7 options.')
                return
        else: # If the user wants the books to include at least one of the genres specified
            data = data[data.apply(lambda row: contains_any_genre(row, genres), axis=1)]

    # To drop the books with, at leat, one of its genres in the list exclude
    data = data[~data.apply(lambda row: contains_any_genre(row, exclude), axis=1)]

    return data # This dataframe contains the genres of the books

## Dash

In [6]:
import dash
from dash import dcc, html, Input, Output, State, ALL
import dash_grocery

import json

Check this [link](https://community.plotly.com/t/how-to-use-other-peoples-react-components-in-my-dash-app/65627) for an explanation on how to use the rating stars.

In [7]:
#books = books.head(1000)

In [9]:
# Maximum number of users with coincidences that we use
n_users_upper_limit = 10000 

# Number of neighbours
default_number_neighbours = 50




# Create a dash application
app = dash.Dash(__name__, suppress_callback_exceptions=True)



# Create an app layout
app.layout = html.Div([
    dcc.Store( # Store to maintain app state
        id='app_state', 
        data={'book_selection_ongoing': True,
              'potential_recommendations_ongoing': False,
              'final_recommendations_ongoing': False}
    ),  
    dcc.Store( # Store to store the ratings 
        id='rating_store'
    ),
    dcc.Store(
        id='potential_recommendations_df'
    ), 
    #
    # Book Selection
    #
    html.Div([
        html.H1("Book selection"),
        html.P("Choose as many books as you want from the list and rate them. Select at least one."),
        dcc.Dropdown(
            id='dropdown_book_titles',
            options=[
                {'label': book_title, 'value': book_title} for book_title in books['Title']
            ],
            multi=True, # Allow multiple selection
            placeholder="Select books...",
            style={'display': 'block'} # Default style to display the dropdown
        ),
        html.Button("Finish selection", id="finish_book_selection_button"),  # Button to finish selection
        html.P(
            "No book selected! Please select at least one book.",
            id='text_no_select', 
            style={'display': 'none'}
        ),
        html.Div(id='selected_books_container'), # Container to show the selected books       
    ], id='book_selection', style={'display': 'block'}),
    #
    # Recommender program
    # 
    html.Div([
        html.H1("Obtaining your recommendations"),
        html.P('Wait while the recommendations are obtained...')
    ], id='potential_recommendations_program', style={'display': 'none'}), 
    #
    # Final recommendations
    # 
    html.Div([
        html.H1("Here are your recommendations!"),
        html.Div([
            html.P('If you want your recommendations to satisfy any genre selection, please, select the genres in the dropdown below.'),
            html.Div([
                html.P('Do you want the recommendations to include all the selected genres of just any of them?', 
                       style={'margin-left': '30px'}),
                html.Button("All", id="include_all_genres", n_clicks=0, style={'margin-left': '30px', 'margin-right': '15px'}),
                html.Button("Any", id="include_any_genres", n_clicks=0),
                dcc.Store(
                    id='genre_button_state', 
                    data={'include_all_genres': False, 
                          'include_any_genres': True,
                          'have_they_changed': False}
                ),
            ], style={'display': 'flex', 'align-items': 'center'}),
            html.Div([
                dcc.Dropdown(
                    id='dropdown_include_genres',
                    multi=True,
                    placeholder="Select genre(s) to include..."
                )
            ])
        ], style={'display': 'block'}),
        html.Div([
            html.P("If you want your recommendations to exclude any genre selection, please, select the genres in the dropdown below."),
            html.Div([
                dcc.Dropdown(
                    id='dropdown_exclude_genres',
                    multi=True,
                    placeholder="Select genre(s) to exclude..."
                )
            ])
        ], style={'display': 'block'}),
        html.P("Note: Both dropdowns only include genres that are present in your recommendations."),
        html.P(
            "No recommendations available with your genre selection. Please, change your choice.", 
            id='text_no_recommendations', 
            style={'display': 'none'}
        ),
        html.Div(id='recommended_books_container')
    ], id='final_recommendations', style={'display': 'none'})
])



# Callback to update app state when finish button is clicked and to hide the "No book selected!" message
@app.callback(
    [Output('app_state', 'data'),
     Output('text_no_select', 'style')],
    [Input('finish_book_selection_button', 'n_clicks'),
     Input('dropdown_book_titles', 'value')],
     State('app_state', 'data')
)
def update_app_state_or_hide_message(n_clicks,  selected_books, app_state):
    ctx = dash.callback_context

    # Determine which input triggered the callback
    if not ctx.triggered:
        raise dash.exceptions.PreventUpdate

    trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]

    if trigger_id == 'finish_book_selection_button':
        # This branch handles the finish_book_selection_button changes
        if n_clicks is not None:
            if not selected_books:
                text_no_select_style = {'display': 'block'}
            else:
                text_no_select_style = {'display': 'none'}
                app_state['book_selection_ongoing'] = False
                app_state['potential_recommendations_ongoing'] = True
        return app_state, text_no_select_style
    else:
        if trigger_id == 'dropdown_book_titles':
            # This branch handles the dropdown selection changes
            return dash.no_update, {'display': 'none'}            



# Callback to show/hide components based on app state
@app.callback(
    [Output('book_selection', 'style'),
     Output('potential_recommendations_program', 'style'),
     Output('final_recommendations', 'style')],
    [Input('app_state', 'data')]
)
def update_components_visibility(app_state):
    book_selection_style = {'display': 'block'} if app_state['book_selection_ongoing'] else {'display': 'none'}
    recommendations_program_style = {'display': 'block'} if app_state['potential_recommendations_ongoing'] else {'display': 'none'}
    final_recommendations_style = {'display': 'block'} if app_state['final_recommendations_ongoing'] else {'display': 'none'}
    
    return book_selection_style, recommendations_program_style, final_recommendations_style



###############################################################################
#                                                                             #
#                               BOOK SELECTION                                #
#                                                                             #
###############################################################################


# Callback to display the selected books by the user from the initial dropdown
@app.callback(
    Output('selected_books_container', 'children'),
    [Input('dropdown_book_titles', 'value')],
    [State('rating_store', 'data')] # State is used to access the current state of a component without triggering the callback
)
def display_selected_books(selected_books, rating_store):
    if selected_books:
        books_info = []
        for book_title in selected_books:
            book_row = books[books['Title'] == book_title].iloc[0]
            image_url = book_row['Image_url']
            rating_value = rating_store.get(book_title, 1) if rating_store else 1
            rating = dash_grocery.Stars(
                id={'type': 'rating', 'index': book_title}, 
                count=5, value=rating_value, color2="gold", size=30, edit=True, half=False
            )
            book_info = html.Div([
                html.Div([
                    html.Button('x', id={'type': 'remove_book_dropdown', 'index': book_title}, n_clicks=0, style={'margin-right': '10px'}),
                    html.Img(src=image_url, style={'width': '50px', 'height': '75px', 'margin-top': '10px', 'margin-right': '20px'}),
                    html.H3(book_title, style={'margin-right': '20px'}),
                    rating
                ], style={'display': 'flex', 'align-items': 'center'}),
            ])
            books_info.append(book_info)
        return books_info
    else:
        return html.Div()



# Callback to handle book removal using the 'x' button
@app.callback(
    Output('dropdown_book_titles', 'value'),
    [Input({'type': 'remove_book_dropdown', 'index': ALL}, 'n_clicks')],
    [State('dropdown_book_titles', 'value')]
)
def remove_selected_book_from_dropdown(n_clicks, selected_books):
    # This allows to access detailed information about what has actuvated a 
    # callback and about the inputs and outputs involved in the function
    ctx = dash.callback_context 

    # ctx.triggered is a list of the inputs that activated the callback
    # Each element is a dictionary with the keys 'prop_id' and 'value'
    if not ctx.triggered: 
        raise dash.exceptions.PreventUpdate

    # Determine which input triggered the callback
    # 'prop_id' indicates what input changed
    trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
    trigger_id = ast.literal_eval(trigger_id)

    for i, elem in enumerate(selected_books):
        if elem == trigger_id['index'] and n_clicks[i] != 0:
            book_to_remove = elem
            if book_to_remove in selected_books:
                selected_books.remove(book_to_remove)
                return selected_books

    raise dash.exceptions.PreventUpdate



# Callback to update the Store with the values of the ratings
@app.callback(
    Output('rating_store', 'data'),
    [Input({'type': 'rating', 'index': ALL}, 'value')],  # Dynamic input for all the ratings
    [State('dropdown_book_titles', 'value'),  # State for the selected books
     State('rating_store', 'data')]  # Access to the current Store  
)
def update_rating_store(rating_values, selected_books, rating_store):
    # To initialize the store (dictionary) every time the function is called.
    # This guarantees that the books that were removed are dropped from the dictionary
    rating_store = {}

    # If there are no books selected, exit the function
    if selected_books is None:
        return rating_store
    
    # Iterate over the selected books and their corresponding rating values
    for book_title, rating_value in zip(selected_books, rating_values):
        # Update the rating value for each selected book
        rating_store[book_title] = rating_value

    # Save the dictionary of selected books
#    with open('rating_store.json', 'w') as f:
#            json.dump(rating_store, f)
    
    return rating_store





###############################################################################
#                                                                             #
#                            RECOMMENDATION SYSTEM                            #
#                                                                             #
###############################################################################


@app.callback(
    [Output('potential_recommendations_df', 'data'),
     Output('app_state', 'data', allow_duplicate=True)],
    [Input('app_state', 'data')],
    [State('rating_store', 'data')],
     prevent_initial_call=True
)
def update_intermediate_state(app_state, rating_store):
    if not app_state['book_selection_ongoing'] and app_state['potential_recommendations_ongoing']:

        target_UserID = 19960808 # This value is arbitrary, but not an existing UserID

        # ratings dataframe including the target user ratings
        ratings_new = user_dictionary_to_df(rating_store, target_UserID)

        # Books rated by the target user
        target_books = ratings_new[ratings_new['UserID'] == target_UserID].BookID.values

        # Selected users to get the recommendations
        selected_users, selected_ratings = selected_users_df(ratings_new, target_books, n_users_upper_limit, target_UserID)

        # Creating the matrix with users and books
        ratings_csr_matrix, ratings_matrix = get_users_matrix(selected_ratings)

        # Get the potential recommendations
        potential_recommendations = knn_model(ratings_csr_matrix, ratings_matrix, target_UserID, default_number_neighbours, selected_ratings, target_books)
                                           
        del ratings_matrix
    
        potential_recommendations_json = potential_recommendations.to_json(orient='split')

        # Save the table of potential recommendations
#        potential_recommendations_list = potential_recommendations.to_dict(orient='records')
#        with open('potential_recommendations.json', 'w') as f:
#            json.dump(potential_recommendations_list, f)

        # Update the state to indicate that the process has finished
        app_state['potential_recommendations_ongoing'] = False
        app_state['final_recommendations_ongoing'] = True
        
        return potential_recommendations_json, app_state

    else:
        raise dash.exceptions.PreventUpdate




###############################################################################
#                                                                             #
#                            FINAL RECOMMENDATIONS                            #
#                                                                             #
###############################################################################


# Callback to modify the genres options of the dropdown that the recommended books must satisfy
@app.callback(
    [Output('dropdown_include_genres', 'options'),
     Output('dropdown_include_genres', 'value'),
     Output('dropdown_exclude_genres', 'options'),
     Output('dropdown_exclude_genres', 'value'),
     Output('genre_button_state', 'data', allow_duplicate=True)],
    [Input('app_state', 'data'),
     Input('potential_recommendations_df' , 'data'),
     Input('dropdown_include_genres', 'value'),
     Input('dropdown_exclude_genres', 'value'),
     Input('genre_button_state', 'data')],
     prevent_initial_call=True
)
def get_genres_to_include(app_state, pot_recom_json, selected_included_genres, selected_excluded_genres, button_state):
    # pot_recom_json : all the potential recommendations for the user
    # selected_included_genres : genres currently selected in the included genres dropdown
    # selected_excluded_genres : genres currently selected in the excluded genres dropdown
    # button_state : dictionary with the state of the All and Any buttuns
    
    if pot_recom_json is None or not app_state['final_recommendations_ongoing']:
        raise dash.exceptions.PreventUpdate
    
    pot_recom = pd.read_json(StringIO(pot_recom_json), orient='split')

    # Include the genres lists in the dataframe
    pot_recom = pd.merge(pot_recom, books_genres[['BookID', 'Genres', 'Genre_1', 'Genre_2', 'Genre_3', 'Genre_4', 'Genre_5', 'Genre_6', 'Genre_7']], on='BookID', how='left')

    # Already selected excluded genres
    if selected_excluded_genres is None:
        excluded_genres = []
    else:
        excluded_genres = [genre for genre in selected_excluded_genres]

    # Already selected included genres
    if selected_included_genres is None:
        included_genres = []
    else:
        included_genres = [genre for genre in selected_included_genres]

    # Keep only the books that do not have the excluded genres
    pot_recom = pot_recom[~pot_recom.apply(lambda row: contains_any_genre(row, excluded_genres), axis=1)]

    # If the state of the buttons has just changed, initialize the selected included genres
    if button_state['have_they_changed'] == True:
        included_genres = []
        # Put the have_they_changed state in the genre button state back to False
        button_state['have_they_changed'] = False

    # List with all the lists of genres of the potential recommendations. The array is also converted to a list
    lists_genres = pot_recom[['Genres']].values
    lists_genres = [ast.literal_eval(item[0]) for item in lists_genres]
    
    # The list for the dropdown depends on the genre buttons selection
    if button_state['include_all_genres'] == True:
        # Lists that include the selected genres
        filtered_lists_genres = [lst for lst in lists_genres if all(genre in lst for genre in included_genres)] 
    else:
        # Lists that include the selected genres
        if not included_genres:
            filtered_lists_genres = lists_genres
        else:
            filtered_lists_genres = [lst for lst in lists_genres if any(genre in lst for genre in included_genres)] 

    # One list with all the genres of the previous lists
    possible_genres = list(itertools.chain(*filtered_lists_genres)) 
    # Drop duplicates
    include_list_for_dropdowns = list(set(possible_genres))

    # The list for the excluded genres has to include the excluded genres too for them to remain selected
    for genre in included_genres:
        if not genre in include_list_for_dropdowns:
            include_list_for_dropdowns.append(genre)
    
    # The list for the excluded genres has to include the excluded genres too for them to remain selected
    # Also, it has to exclude the selected genres to be included
    exclude_list_for_dropdowns = include_list_for_dropdowns.copy()
    for genre in excluded_genres:
        exclude_list_for_dropdowns.append(genre)
    for genre in included_genres:
        if genre in exclude_list_for_dropdowns:
            exclude_list_for_dropdowns.remove(genre)

    # Options for the dropdowns
    options_include = [
        {'label': genre, 'value': genre} for genre in include_list_for_dropdowns
    ]

    options_exclude = [
        {'label': genre, 'value': genre} for genre in exclude_list_for_dropdowns
    ]
    
    return options_include, included_genres, options_exclude, excluded_genres, button_state





@app.callback(
    [Output('genre_button_state', 'data'),
     Output('include_all_genres', 'style'),
     Output('include_any_genres', 'style')],
    [Input('include_all_genres', 'n_clicks'), 
     Input('include_any_genres', 'n_clicks')],
    [State('genre_button_state', 'data')]
)
def toggle_genre_button_and_style(button_all_clicks, button_any_clicks, button_state):
    changed_id = [trigger_id['prop_id'] for trigger_id in dash.callback_context.triggered][0]
    
    # Update the state of the buttons
    if 'include_all_genres' in changed_id:
        if button_state['include_all_genres'] == True:
            button_state['have_they_changed'] = False
        else:
            button_state['have_they_changed'] = True
        button_state['include_all_genres'] = True
        button_state['include_any_genres'] = False
    elif 'include_any_genres' in changed_id:
        if button_state['include_any_genres'] == True:
            button_state['have_they_changed'] = False
        else:
            button_state['have_they_changed'] = True
        button_state['include_all_genres'] = False
        button_state['include_any_genres'] = True
    
    # Update the style of the buttons depending on the state
    button_all_style = {'background-color': 'blue', 'color': 'white', 'margin-left': '30px', 'margin-right': '15px'} if button_state['include_all_genres'] else {'margin-left': '30px', 'margin-right': '15px'}
    button_any_style = {'background-color': 'blue', 'color': 'white'} if button_state['include_any_genres'] else {}
    
    return button_state, button_all_style, button_any_style






# Callback to print the recommendations
@app.callback(
    [Output('recommended_books_container', 'children'),
     Output('text_no_recommendations', 'style')],
    [Input('app_state', 'data'),
     Input('potential_recommendations_df' , 'data'),
     Input('dropdown_include_genres', 'value'),
     Input('dropdown_exclude_genres', 'value')],
     State('genre_button_state', 'data')
)
def get_the_final_recommendations(app_state, pot_recom_json, selected_genres, excluded_genres, button_state):
    if pot_recom_json is None or not app_state['final_recommendations_ongoing']:
        raise dash.exceptions.PreventUpdate
        
    # Genres selected for the books to include or exclude them
    included_genres = selected_genres if selected_genres else []
    excluded_genres = excluded_genres if excluded_genres else []

    # Potential book recommendation
    pot_recom = pd.read_json(StringIO(pot_recom_json), orient='split')
    # Include the genres lists in the dataframe
    pot_recom = pd.merge(pot_recom, books_genres[['BookID', 'Genres']], on='BookID', how='left')

    # Filter the potential recommendations by the selected genres
    if button_state['include_all_genres'] == True:
        combine = True
    else:
        combine = False
    recommendations = books_satisfying_genres(pot_recom, included_genres, excluded_genres, combine=combine)
    recommendations = pd.merge(recommendations, books[['BookID', 'Title', 'Image_url']], on='BookID', how='left')

    # Number of recommendations
    n = 10
    recommendations = recommendations.head(n)
    
    # Save the table of potential recommendations
    recommendations_list = recommendations.to_dict(orient='records')
#    with open('recommendations.json', 'w') as f:
#        json.dump(recommendations_list, f)

    # Crear la lista de recomendaciones para mostrar en el contenedor
    recommendations_display = []
    for rec in recommendations_list:
        book_title = rec['Title']
        book_image_url = rec['Image_url']
        recommendations_display.append(
            html.Div([
                html.Img(src=book_image_url, style={'width': '50px', 'height': '75px', 'margin-right': '20px'}),
                html.H4(book_title, style={'margin-right': '20px'})
            ], style={'display': 'flex', 'align-items': 'center', 'margin-bottom': '10px'})
        )

    # Show or hide the 'No recommendations' message
    if len(recommendations_list) == 0:
        text_no_recommendations_style = {'display': 'block', 'fontSize': 20, 'color': 'red'}
    else:
        text_no_recommendations_style = {'display': 'none', 'fontSize': 20, 'color': 'red'}

    return recommendations_display, text_no_recommendations_style




if __name__ == '__main__':
    app.run_server(debug=True)

TODO:
- Dejar que el usuario seleccione el número de recomendaciones.
- Ver cómo puedo tener en cuenta para las recomendaciones la distancia entre vecinos. A más lejos, menos relevante debe ser su aportación. Tal vez estaría bien poner un número de vecinos muy alto y filtrar después en función de las distancias.
- Permitir buscar también por autor.
- Tal vez no mostrar en el dropdown de selección de libros los libros seleccionados, ya que se pueden eliminar con el botón en x.
- No recomendar packs de libros?
- Si está recomendando una segunda, o tercera parte, y el usuario no ha leído la primera, solo recomendar la primera?
- Guardar la selección de libros en un json para poder cargarlo en futuros usos
- Eliminar 'Empty' de la selección de géneros

In [9]:
with open('rating_store.json', 'r') as f:
    rating_store_data = json.load(f)

rating_store_data

{'The Final Empire (Mistborn, #1)': 5,
 'The Well of Ascension (Mistborn, #2)': 5,
 'The Hero of Ages (Mistborn, #3)': 5,
 'The Alloy of Law (Mistborn, #4)': 4}

In [12]:
with open('potential_recommendations.json', 'r') as f:
        recommendation_data_list = json.load(f)

recommendation_data = pd.DataFrame(recommendation_data_list)

potential_recommendations = pd.merge(recommendation_data, books[['BookID', 'Title']], on='BookID', how='left')

potential_recommendations = pd.merge(potential_recommendations, books_genres[['BookID', 'Genres', 'Genre_1', 'Genre_2', 'Genre_3', 'Genre_4', 'Genre_5', 'Genre_6', 'Genre_7']], on='BookID', how='left')

potential_recommendations.head()

Unnamed: 0,index,BookID,Average_Rating,Ratings_Count,Weighted_Rating,Title,Genres,Genre_1,Genre_2,Genre_3,Genre_4,Genre_5,Genre_6,Genre_7
0,512,862,4.896552,29,4.753295,"Words of Radiance (The Stormlight Archive, #2)","['Fantasy', 'Fiction', 'Epic Fantasy', 'High F...",Fantasy,Fiction,Epic Fantasy,High Fantasy,Audiobook,Adult,Magic
1,369,562,4.8125,32,4.69052,"The Way of Kings (The Stormlight Archive, #1)","['Fantasy', 'Fiction', 'Epic Fantasy', 'High F...",Fantasy,Fiction,Epic Fantasy,High Fantasy,Audiobook,Adult,Science Fiction Fantasy
2,717,1374,4.818182,11,4.523915,"A Memory of Light (Wheel of Time, #14)","['Fantasy', 'Fiction', 'Epic Fantasy', 'High F...",Fantasy,Fiction,Epic Fantasy,High Fantasy,Science Fiction Fantasy,Audiobook,Epic
3,1171,2889,4.875,8,4.488227,"Mistborn Trilogy Boxed Set (Mistborn, #1-3)","['Fantasy', 'Fiction', 'Epic Fantasy', 'Scienc...",Fantasy,Fiction,Epic Fantasy,Science Fiction Fantasy,Magic,High Fantasy,Science Fiction
4,224,307,4.6,20,4.452447,"The Wise Man's Fear (The Kingkiller Chronicle,...","['Fantasy', 'Fiction', 'Epic Fantasy', 'High F...",Fantasy,Fiction,Epic Fantasy,High Fantasy,Magic,Science Fiction Fantasy,Adventure


In [11]:
with open('recommendations.json', 'r') as f:
    recommendations_list = json.load(f)

recommendation_df = pd.DataFrame(recommendations_list)

recommendation_df.head()

Unnamed: 0,index,BookID,Average_Rating,Ratings_Count,Weighted_Rating,Genres,Genre_1,Genre_2,Genre_3,Genre_4,Genre_5,Genre_6,Genre_7,Title,Image_url
0,512,862,4.896552,29,4.753295,"['Fantasy', 'Fiction', 'Epic Fantasy', 'High F...",Fantasy,Fiction,Epic Fantasy,High Fantasy,Audiobook,Adult,Magic,"Words of Radiance (The Stormlight Archive, #2)",https://images.gr-assets.com/books/1391535251m...
1,1171,2889,4.875,8,4.488227,"['Fantasy', 'Fiction', 'Epic Fantasy', 'Scienc...",Fantasy,Fiction,Epic Fantasy,Science Fiction Fantasy,Magic,High Fantasy,Science Fiction,"Mistborn Trilogy Boxed Set (Mistborn, #1-3)",https://images.gr-assets.com/books/1257442247m...
2,224,307,4.6,20,4.452447,"['Fantasy', 'Fiction', 'Epic Fantasy', 'High F...",Fantasy,Fiction,Epic Fantasy,High Fantasy,Magic,Science Fiction Fantasy,Adventure,"The Wise Man's Fear (The Kingkiller Chronicle,...",https://images.gr-assets.com/books/1452624392m...
3,153,192,4.545455,22,4.417643,"['Fantasy', 'Fiction', 'Epic Fantasy', 'High F...",Fantasy,Fiction,Epic Fantasy,High Fantasy,Magic,Science Fiction Fantasy,Adult,The Name of the Wind (The Kingkiller Chronicle...,https://images.gr-assets.com/books/1472068073m...
4,861,1760,5.0,3,4.265532,"['Fantasy', 'Young Adult', 'Fiction', 'Middle ...",Fantasy,Young Adult,Fiction,Middle Grade,Adventure,Magic,Childrens,"Keys to the Demon Prison (Fablehaven, #5)",https://images.gr-assets.com/books/1298081448m...
