In [18]:
#import libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
from mlxtend.preprocessing import TransactionEncoder

In [19]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [20]:
#load dataset 
data = pd.read_csv(r"D:\assignment qns\Association Rules\Online retail.csv")
data.head(3)

Unnamed: 0,"shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil"
0,"burgers,meatballs,eggs"
1,chutney
2,"turkey,avocado"


Drop the null values :

In [22]:
data.dropna(inplace = True)

In [23]:
data.drop_duplicates(inplace = True)

In [24]:
#convert the data to the appropriate format 
transactions = data.apply(lambda x: x.str.split(','))

In [25]:
#convert to a list of lists
transactions_list = transactions.values.tolist()
transactions_list = [item for sublist in transactions_list for item in sublist]

In [26]:
#One -hot encode the transaction data 
te = TransactionEncoder()
te_ary = te.fit(transactions_list).transform(transactions_list)
data_ohe = pd.DataFrame(te_ary, columns = te.columns_)
data_ohe.head(3)

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False


In [27]:
data_ohe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5175 entries, 0 to 5174
Columns: 120 entries,  asparagus to zucchini
dtypes: bool(120)
memory usage: 606.6 KB


In [28]:
data_ohe.columns

Index([' asparagus', 'almonds', 'antioxydant juice', 'asparagus', 'avocado',
       'babies food', 'bacon', 'barbecue sauce', 'black tea', 'blueberries',
       ...
       'turkey', 'vegetables mix', 'water spray', 'white wine',
       'whole weat flour', 'whole wheat pasta', 'whole wheat rice', 'yams',
       'yogurt cake', 'zucchini'],
      dtype='object', length=120)

In [29]:
data_ohe.describe()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
count,5175,5175,5175,5175,5175,5175,5175,5175,5175,5175,...,5175,5175,5175,5175,5175,5175,5175,5175,5175,5175
unique,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
top,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
freq,5174,5024,5118,5141,4938,5144,5110,5095,5069,5107,...,4744,5002,5172,5061,5108,4965,4772,5097,5005,5105


## Apply Apriori algorithm  and generate association rules :

In [30]:
from mlxtend.frequent_patterns import apriori, association_rules

In [31]:
frequent_itemsets = apriori(data_ohe, min_support = 0.01, use_colnames = True)

In [32]:
# Generate the association rules
rules = association_rules(frequent_itemsets, metric ='lift', min_threshold = 1.0)

In [33]:
# Filter the rules based on confidence and lift
rules = rules[(rules['confidence'] >= 0.2) & (rules['lift'] >= 1.2)]

In [34]:
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(almonds),(mineral water),0.029179,0.29971,0.010821,0.370861,1.237399,0.002076,1.113092,0.197619
4,(avocado),(french fries),0.045797,0.192657,0.011594,0.253165,1.314069,0.002771,1.081019,0.250476
6,(avocado),(milk),0.045797,0.170048,0.010821,0.236287,1.389528,0.003034,1.086732,0.293786
14,(brownies),(french fries),0.045024,0.192657,0.011208,0.248927,1.292074,0.002534,1.07492,0.236708
24,(burgers),(eggs),0.113816,0.208116,0.036135,0.317487,1.525531,0.012448,1.160248,0.388735


In [35]:
def interpret_rules(rules):
    for _, row in rules.iterrows():
        antecedents = ', '.join(list(row['antecedents']))
        consequents = ', '.join(list(row['consequents']))
        support = row['support']
        confidence = row['confidence']
        lift = row['lift']
        
        print(f"Rule: If a customer buys [{antecedents}], they are likely to also buy [{consequents}]")
        print(f"Support: {support:.2%} of all transactions contain both {antecedents} and {consequents}.")
        print(f"Confidence: When {antecedents} is purchased, there's a {confidence:.2%} chance that {consequents} is also purchased.")
        print(f"Lift: The likelihood of buying {consequents} increases by {lift:.2f} times when {antecedents} is bought.")
        print("------")

interpret_rules(rules.head(3))

Rule: If a customer buys [almonds], they are likely to also buy [mineral water]
Support: 1.08% of all transactions contain both almonds and mineral water.
Confidence: When almonds is purchased, there's a 37.09% chance that mineral water is also purchased.
Lift: The likelihood of buying mineral water increases by 1.24 times when almonds is bought.
------
Rule: If a customer buys [avocado], they are likely to also buy [french fries]
Support: 1.16% of all transactions contain both avocado and french fries.
Confidence: When avocado is purchased, there's a 25.32% chance that french fries is also purchased.
Lift: The likelihood of buying french fries increases by 1.31 times when avocado is bought.
------
Rule: If a customer buys [avocado], they are likely to also buy [milk]
Support: 1.08% of all transactions contain both avocado and milk.
Confidence: When avocado is purchased, there's a 23.63% chance that milk is also purchased.
Lift: The likelihood of buying milk increases by 1.39 times whe

## Conclusion


> Bundling Opportunities:

Create bundles or promotions combining mineral water with salmon and shrimp with avocado. These combinations are frequently bought together, and promoting them together can enhance sales.

> Cross-Selling Strategies:

Train sales staff to suggest eggs when customers buy burgers and meatballs. This can capitalize on the existing associations and potentially increase sales.

> Inventory Management:

Ensure that associated items (like mineral water and salmon) are stocked together or in nearby locations to facilitate easy access for customers.