## Machine Learning and Artificial Intelligence, MSc Business Mathematics
## Online Retail Analysis Project
### Angelos Semoglou, s3332318
#### **`Part 3: Simple Product Recommendation System using Association Rules`**

***

In [1]:
import numpy as np
import pandas as pd

from mlxtend.frequent_patterns import fpgrowth, association_rules

### Load Processed Data (data processed in part 1 ipynb)

In [2]:
all_data = pd.read_csv('processed_online_retail.csv')

# Convert the 'customer_id' column to string type 
all_data['customer_id'] = all_data['customer_id'].astype(str)   

# Convert the 'invoice_date' column to datetime type
all_data['invoice_date'] = pd.to_datetime(all_data['invoice_date'])

***

### Association Rules Extractrion using FP-Growth Algorithm

#### **Key Metrics in Association Rule Mining**


**Support**
- **Definition**: Support is a measure of how frequently an itemset appears in the dataset. 
- **Calculation**: It is calculated as the proportion of transactions in the dataset that contain the itemset.
- **Formula**: 
  $$
  \text{Support}(X) = \frac{\text{Number of transactions containing } X}{\text{Total number of transactions}}
  $$
- **Interpretation**: Higher support indicates that the itemset is more common in the dataset. It's used to filter out the itemsets that are too infrequent to be of interest.

**Confidence**
- **Definition**: Confidence is a measure of the likelihood that an item Y is purchased when item X is purchased. It represents the percentage of transactions containing X that also contain Y.
- **Calculation**: It is calculated as the ratio of the number of transactions that contain both X and Y to the number of transactions that contain X.
- **Formula**:
  $$
  \text{Confidence}(X \rightarrow Y) = \frac{\text{Support}(X \cup Y)}{\text{Support}(X)}
  $$
- **Interpretation**: Higher confidence indicates a stronger association between items X and Y. It helps in identifying rules that have a high likelihood of occurrence.

**Lift**
- **Definition**: Lift is a measure of how much more likely item Y is purchased when item X is purchased, compared to its general likelihood of being purchased.
- **Calculation**: It is calculated as the ratio of the observed support for X and Y to the expected support if X and Y were independent.
- **Formula**:
  $$
  \text{Lift}(X \rightarrow Y) = \frac{\text{Support}(X \cup Y)}{\text{Support}(X) \times \text{Support}(Y)}
  $$
- **Interpretation**: Lift greater than 1 indicates a positive association between X and Y, meaning they occur together more often than expected by chance. Lift less than 1 indicates a negative association, and lift equal to 1 indicates no association.



In [3]:
class AssociationRules():
    
    def __init__(self, min_support, metric, min_metric_threshold, max_len = None):
        self.min_support = min_support
        self.metric = metric
        self.min_metric_threshold = min_metric_threshold
        self.max_len = max_len

    def generate_rules(self, data, index = 'invoice_id', columns = 'description'):
        
        # Transforming dataset in a form suitable for FPGrowth
        grouped = data.groupby(index)[columns].agg(list)

        one_hot_encoded  = grouped.str.join('|').str.get_dummies()

        product_matrix = one_hot_encoded .astype(bool)

        # Creating frequent itemsets
        frequent_itemsets = fpgrowth(product_matrix,
                                     min_support = self.min_support,
                                     use_colnames = True,
                                     max_len = self.max_len)

        # Extracting Association Rules
        rules = association_rules(frequent_itemsets,
                                  metric = self.metric,
                                  min_threshold = self.min_metric_threshold)
        return rules

In [4]:
minimun_support = 0.005
chosen_metric = 'lift'
minimun_metric_threshold = 1.2
max_item_len = 2

In [5]:
builder = AssociationRules(min_support = minimun_support,
                           metric = chosen_metric,
                           min_metric_threshold = minimun_metric_threshold,
                           max_len = max_item_len)

rules_df = builder.generate_rules(all_data,
                                  index = 'invoice_id',
                                  columns = 'description')


# Convert frozenset to list
rules_df.loc[:, 'antecedents'] = rules_df['antecedents'].apply(lambda x: list(x))
rules_df.loc[:, 'consequents'] = rules_df['consequents'].apply(lambda x: list(x))

rules_df['antecedents'] = rules_df['antecedents'].apply(lambda x : x[0])
rules_df['consequents'] = rules_df['consequents'].apply(lambda x : x[0])

# Select relevant columns
rules_df = rules_df[['antecedents',
                     'consequents',
                     'support',
                     'confidence',
                     'lift']]

In [6]:
display(rules_df)

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,HAND WARMER OWL DESIGN,HAND WARMER UNION JACK,0.009658,0.362069,17.652080
1,HAND WARMER UNION JACK,HAND WARMER OWL DESIGN,0.009658,0.470852,17.652080
2,HAND WARMER SCOTTY DOG DESIGN,HAND WARMER UNION JACK,0.008738,0.401691,19.583803
3,HAND WARMER UNION JACK,HAND WARMER SCOTTY DOG DESIGN,0.008738,0.426009,19.583803
4,HAND WARMER BIRD DESIGN,HAND WARMER UNION JACK,0.007542,0.341667,16.657399
...,...,...,...,...,...
2401,TRADITIONAL PICK UP STICKS GAME,TRADITIONAL NAUGHTS & CROSSES,0.005197,0.489177,49.243867
2402,JUMBO BAG PAISLEY PARK,VINTAGE DOILY JUMBO BAG RED,0.006025,0.428105,39.277240
2403,VINTAGE DOILY JUMBO BAG RED,JUMBO BAG PAISLEY PARK,0.006025,0.552743,39.277240
2404,LUNCH BAG PAISLEY PARK,JUMBO BAG PAISLEY PARK,0.006209,0.451505,32.083415


In [7]:
rules_df = rules_df.sort_values(by=['confidence', 'lift'], ascending=[False, False])

In [8]:
display(rules_df.head())

Unnamed: 0,antecedents,consequents,support,confidence,lift
1151,HERB MARKER THYME,HERB MARKER ROSEMARY,0.00814,0.946524,107.755075
1159,HERB MARKER CHIVES,HERB MARKER PARSLEY,0.007174,0.934132,108.041279
1144,HERB MARKER PARSLEY,HERB MARKER ROSEMARY,0.008048,0.930851,105.970814
1160,HERB MARKER CHIVES,HERB MARKER THYME,0.007128,0.928144,107.922764
1163,HERB MARKER CHIVES,HERB MARKER ROSEMARY,0.007128,0.928144,105.662601


***

### Recommendation Function

In [9]:
def get_recommendations_for_product(product, rules_df, basket_items, top_n=5):
    # Find rules where the product is in the antecedents
    recommendations = rules_df[rules_df['antecedents'] == product]['consequents']
    
    # Filter out items already in the cart
    recommendations = [rec for rec in recommendations if rec not in basket_items]
    
    # Get the top_n recommendations
    top_recommendations = list(pd.Series(recommendations).value_counts().head(top_n).index)
    
    return top_recommendations

In [10]:
def view_basket(user_basket, rules_df):
    print("Items in basket:")
    for item in user_basket:
        print(f"- {item}")
    print("\nRecommendations:")
    
    for item in user_basket:
        recommendations = get_recommendations_for_product(item, rules_df, user_basket)
        if recommendations:
            print(f"\nBecause you liked '{item}', you may like:")
            for rec in recommendations:
                print(f"- {rec}")
        else:
            print(f"\nNo recommendations found for '{item}'")

In [11]:
user_basket = ['HAND WARMER OWL DESIGN',
             'HERB MARKER THYME',
             'ALARM CLOCK BAKELIKE PINK',
             'JAM MAKING SET WITH JARS',
             'JUMBO BAG APPLES',
             'RED KITCHEN SCALES']
             
view_basket(user_basket, rules_df)

Items in basket:
- HAND WARMER OWL DESIGN
- HERB MARKER THYME
- ALARM CLOCK BAKELIKE PINK
- JAM MAKING SET WITH JARS
- JUMBO BAG APPLES
- RED KITCHEN SCALES

Recommendations:

Because you liked 'HAND WARMER OWL DESIGN', you may like:
- HAND WARMER SCOTTY DOG DESIGN
- HAND WARMER BIRD DESIGN
- HAND WARMER UNION JACK
- HAND WARMER RED LOVE HEART
- HAND WARMER RED RETROSPOT

Because you liked 'HERB MARKER THYME', you may like:
- HERB MARKER ROSEMARY
- HERB MARKER PARSLEY
- HERB MARKER BASIL
- HERB MARKER MINT
- HERB MARKER CHIVES 

Because you liked 'ALARM CLOCK BAKELIKE PINK', you may like:
- ALARM CLOCK BAKELIKE RED 
- ALARM CLOCK BAKELIKE GREEN
- ALARM CLOCK BAKELIKE IVORY
- ALARM CLOCK BAKELIKE ORANGE
- ALARM CLOCK BAKELIKE CHOCOLATE

Because you liked 'JAM MAKING SET WITH JARS', you may like:
- JAM MAKING SET PRINTED
- SET OF 3 CAKE TINS PANTRY DESIGN 
- RECIPE BOX PANTRY YELLOW DESIGN
- SET OF 4 PANTRY JELLY MOULDS
- REGENCY CAKESTAND 3 TIER

Because you liked 'JUMBO BAG APPLES', yo

In [12]:
filtered_df_1 = rules_df[rules_df['antecedents'] == 'JAM MAKING SET WITH JARS']
filtered_df_2 = rules_df[rules_df['antecedents'] == 'RED KITCHEN SCALES']
filtered_df_3 = rules_df[rules_df['antecedents'] == 'HAND WARMER OWL DESIGN']

In [13]:
filtered_df_1

Unnamed: 0,antecedents,consequents,support,confidence,lift
369,JAM MAKING SET WITH JARS,JAM MAKING SET PRINTED,0.014257,0.322581,7.979742
45,JAM MAKING SET WITH JARS,SET OF 3 CAKE TINS PANTRY DESIGN,0.011359,0.257024,4.588447
1245,JAM MAKING SET WITH JARS,RECIPE BOX PANTRY YELLOW DESIGN,0.008968,0.202914,5.05981
1703,JAM MAKING SET WITH JARS,SET OF 4 PANTRY JELLY MOULDS,0.0086,0.194589,4.835591
40,JAM MAKING SET WITH JARS,REGENCY CAKESTAND 3 TIER,0.008232,0.186264,2.175151
1674,JAM MAKING SET WITH JARS,SET OF 6 SPICE TINS PANTRY DESIGN,0.007542,0.170656,5.580052
39,JAM MAKING SET WITH JARS,PACK OF 72 RETROSPOT CAKE CASES,0.00676,0.152966,3.201237
35,JAM MAKING SET WITH JARS,WHITE HANGING HEART T-LIGHT HOLDER,0.005749,0.130073,1.408518
533,JAM MAKING SET WITH JARS,HOMEMADE JAM SCENTED CANDLES,0.005473,0.123829,5.118907
42,JAM MAKING SET WITH JARS,PARTY BUNTING,0.005289,0.119667,1.859928


In [14]:
filtered_df_2

Unnamed: 0,antecedents,consequents,support,confidence,lift
1206,RED KITCHEN SCALES,IVORY KITCHEN SCALES,0.012325,0.580087,20.11707
1238,RED KITCHEN SCALES,MINT KITCHEN SCALES,0.005657,0.266234,18.495166


In [15]:
filtered_df_3

Unnamed: 0,antecedents,consequents,support,confidence,lift
603,HAND WARMER OWL DESIGN,HAND WARMER SCOTTY DOG DESIGN,0.012923,0.484483,22.271867
622,HAND WARMER OWL DESIGN,HAND WARMER BIRD DESIGN,0.012325,0.462069,20.931724
0,HAND WARMER OWL DESIGN,HAND WARMER UNION JACK,0.009658,0.362069,17.65208
2391,HAND WARMER OWL DESIGN,HAND WARMER RED LOVE HEART,0.009336,0.35,20.513208
612,HAND WARMER OWL DESIGN,HAND WARMER RED RETROSPOT,0.0086,0.322414,20.32048
601,HAND WARMER OWL DESIGN,HOT WATER BOTTLE KEEP CALM,0.006485,0.243103,7.281049
598,HAND WARMER OWL DESIGN,PAPER CHAIN KIT 50'S CHRISTMAS,0.006301,0.236207,5.198464
597,HAND WARMER OWL DESIGN,SCOTTIE DOG HOT WATER BOTTLE,0.005519,0.206897,6.816301
