#### Imports

In [58]:
import pickle
import nbimporter
import pandas as pd
# from frequent_itemsets import find_associated_items, preprocess_rules
from utility import find_associated_items, preprocess_rules
from similar_items import *

### Load variables from frequent_itemsets notebook

In [59]:
def preprocess_rules(rules):
   """
   Preprocess the rules DataFrame to ensure 'antecedents' and 'consequents' are sets.
   """
    # Create a copy of the DataFrame to avoid modifying the original
   rules = rules.copy()
   rules['antecedents'] = rules['antecedents'].apply(lambda x: x.split(", ") if isinstance(x, str) else x)
   rules['consequents'] = rules['consequents'].apply(lambda x: x.split(", ") if isinstance(x, str) else x)
   return rules   

In [60]:
# Function to find items associated with an input item
def find_associated_items(input_item, rules):
   """
   Find all items associated with an input item based on association rules.

   Parameters:
      input_item (str): The item to find associations for.
      rules (pd.DataFrame): A DataFrame of association rules with columns 'antecedents' and 'consequents'.

   Returns:
      list: A list of items associated with the input item, preserving the order of rules.
   """
   # Sort rules by confidence in descending order
   rules = rules.sort_values(by='confidence', ascending=False).reset_index(drop=True)

   input_item_antecedents = [input_item]

   associated_items = []
   
   # Iterate through the rules
   for _, rule in rules.iterrows():
      antecedents = rule['antecedents']
      consequents = rule['consequents']
            
      # Check if the input item is in the antecedents
      if input_item_antecedents == antecedents:
         # Add consequents to the associated items if not already present
         for item in consequents:
               if item not in associated_items:
                  associated_items.append(item)
                  
   return associated_items

In [61]:
# Load variables from the file
with open('rules_display.pkl', 'rb') as f:
    rules_display = pickle.load(f)
    rules = preprocess_rules(rules_display) 

with open('filtered_rules.pkl', 'rb') as f:
    filtered_rules = pickle.load(f)

skincare_df = pd.read_csv("processed_data/skincare.csv")

## Recommender

In [62]:
"""
Pseudo code:

def recommender(product_id, n=5):
   
    do find_associated_items()

   # check if our product has association rule
   if find_associated_items() does not return empty:
      # get the consequents of the product_id products, if there´s less than 5 associated items, then output all associated items
      recommendations = similar_items(first 5 outputs of associated_items) + first 5 outputs of associated_items
      
      return recommendations
   
   else if find_associated_items() does return empty:
        perform similar items on all antecedents (unique_values)
        and product_id is now the most similar item

        then do recommender method again with new product_id
"""

'\nPseudo code:\n\ndef recommender(product_id, n=5):\n   \n    do find_associated_items()\n\n   # check if our product has association rule\n   if find_associated_items() does not return empty:\n      # get the consequents of the product_id products, if there´s less than 5 associated items, then output all associated items\n      recommendations = similar_items(first 5 outputs of associated_items) + first 5 outputs of associated_items\n      \n      return recommendations\n   \n   else if find_associated_items() does return empty:\n        perform similar items on all antecedents (unique_values)\n        and product_id is now the most similar item\n\n        then do recommender method again with new product_id\n'

In [63]:
# # Recommender function
# def recommender(product_id, rules, n=5):
#     """
#     Recommend products based on association rules and similarity.

#     Parameters:
#         product_id (str): The product ID for which recommendations are needed.
#         rules (pd.DataFrame): A DataFrame of association rules with columns 'antecedents' and 'consequents'.
#         n (int): Number of recommendations to return.

#     Returns:
#         list: Recommended products.
#     """
#     # Step 1: Find associated items
#     associated_items = find_associated_items(product_id, rules)

#     if associated_items:
#         # Step 2: Direct match found; get recommendations
#         recommendations = list(associated_items)[:n]

#         # Add similar items to recommendations (assuming similar_items works on associated_items)
#         similar_recommendations = get_similar_items(product_id, associated_items, n)
#         return similar_recommendations + recommendations

#     else:
#         # Step 3: No direct match; find the most similar product
#         antecedents = pd.concat([rules_display['antecedents']]).explode().unique()  # All unique antecedent items
#         most_similar = get_similar_items(product_id, set(antecedents), 1)

#         if not most_similar:
#             return []  # No similar items found; cannot make recommendations

#         # Recursive call with the new product ID
#         return #recommender(most_similar[0], rules, n)

# # Example Usage
# # Preprocess the rules DataFrame
# rules_df = preprocess_rules(filtered_rules)

# # Input product
# input_product = 'P500633'

# # Get recommendations
# recommendations = recommender(input_product, rules_df, n=5)

# print(f"Recommendations for product '{input_product}': {recommendations}")

In [64]:
# Recommender function
def recommender(product_id, rules, df, n=5):
    """
    Recommend products based on association rules and similarity.

    Parameters:
        product_id (str): The product ID for which recommendations are needed.
        rules (pd.DataFrame): A DataFrame of association rules with columns 'antecedents' and 'consequents'.
        n (int): Number of recommendations to return.

    Returns:
        list: Recommended products.
    """
    
    # Step 1: Find associated items
    associated_items = find_associated_items(product_id, rules)
    
    # no rule for chosen item
    if len(associated_items) == 0:
        # find the most similar product to the product_id
        
        # get all the single antecedents in the rules
        antecedents = [
            list(item)[0]  # Convert frozenset to list and get the single element
            for item in rules["antecedents"]
            if len(item) == 1
        ]
        
        # filter skincare so it searches for the similar items only among the antecedents and product_id
        antecedents.append(product_id)
        df_antecedents = df[df['product_id'].isin(antecedents)]

        # the most similar antecedents to a product_id
        the_most_similar_product_id = get_similar_items(product_id, df_antecedents, n = 1)['product_id'].iloc[0]
        
        associated_items = find_associated_items(the_most_similar_product_id, rules)
    
    # get the top n associated items
    if len(associated_items) >= n:
        return associated_items[:n]
    
    # if there are not enough  associated items get the similar items to the product_id as well
    else:
        number_of_similar_items = n - len(associated_items)
        
        # get the top number_of_similar_items
        similar_items = list(get_similar_items(product_id, df, n = number_of_similar_items)['product_id'])
        
        return associated_items + similar_items
        

# Input product
# input_product = 'P500633'
input_product = "P503814" 

# Get recommendations
recommendations = recommender(input_product, rules, skincare_df, n=5)

print(f"Recommendations for product '{input_product}': {recommendations}")



Recommendations for product 'P503814': ['P500633', 'P503936', 'P270594', 'P442540', 'P94421']


In [65]:
"""def recommender(product_id, n=5):
   
   # find frequent items
   association_rules = get_association_rules()
   
   # get the antecedents
   antecedents = association_rules["antecedents"]
   
   # check if our product has association rule
   if product_id in antecedents:
      # get the consequents of the product_id products 
      consequents = association_rules[association_rules['antecedents'] == product_id]
      
      # get top 5 products for each consequents:
      recommendations = DataFrame() # )
      for consequent in consequents:
         similar_items = get_similar_items(consequent, 10) # this function exists, change it so it takes the dataframe as well
         recommendations = pd.concat([recommendations, similar_items], ignore_index=True)
         
      recommendations = recommendations.sort_values(by='rrf_score', ascending=False).reset_index(drop=True)
      recommendations = recommendations[:n]
      
      return recommendations
   
   else:
      # in case it does not exist find the most similar antecedents to this one 
      
      # take the skincare products that are antecedents
      
      # call a new function (the same as previous but input is df as well)
      
      # get the most similar product to this one
      
      # get the consequents of that product
      
      # repeat the same as in the if part"""

'def recommender(product_id, n=5):\n   \n   # find frequent items\n   association_rules = get_association_rules()\n   \n   # get the antecedents\n   antecedents = association_rules["antecedents"]\n   \n   # check if our product has association rule\n   if product_id in antecedents:\n      # get the consequents of the product_id products \n      consequents = association_rules[association_rules[\'antecedents\'] == product_id]\n      \n      # get top 5 products for each consequents:\n      recommendations = DataFrame() # )\n      for consequent in consequents:\n         similar_items = get_similar_items(consequent, 10) # this function exists, change it so it takes the dataframe as well\n         recommendations = pd.concat([recommendations, similar_items], ignore_index=True)\n         \n      recommendations = recommendations.sort_values(by=\'rrf_score\', ascending=False).reset_index(drop=True)\n      recommendations = recommendations[:n]\n      \n      return recommendations\n   \n   e