# Association Rules for Market Basket Analysis

* Objective:
To identify interesting associations or relationships between products in the retail dataset using association rule mining techniques. The goal is to understand customer behavior and identify frequently co-purchased items.

In [3]:
# Install required library
!pip install mlxtend


Defaulting to user installation because normal site-packages is not writeable


In [4]:
# Import Libraries
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import association_rules, apriori
from mlxtend.preprocessing import TransactionEncoder
import warnings
warnings.filterwarnings('ignore')

# Dataset

In [6]:
df = pd.read_excel("https://raw.githubusercontent.com/snehuuu28/Excelr_Data_Science/main/Assignments/Assignment_10/Online%20retail.xlsx", header=None)

# Data Preprocessing

In [8]:
# Rename the column to 'Products' for better readability
df.columns = ['Products']

In [9]:
# Display the structure and summary of the dataset
print("Dataset Information:")
df.info()

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7501 entries, 0 to 7500
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Products  7501 non-null   object
dtypes: object(1)
memory usage: 58.7+ KB


In [10]:
print("\nMissing Values Count:")
print(df.isnull().sum())


Missing Values Count:
Products    0
dtype: int64


In [11]:
print("\nDataset Summary:")
print(df.describe())


Dataset Summary:
       Products
count      7501
unique     5176
top     cookies
freq        223


In [12]:
# Check a sample row for verification
print("\nSample Row:")
print(df.iloc[1])


Sample Row:
Products    burgers,meatballs,eggs
Name: 1, dtype: object


In [13]:
# Split the 'Products' column into individual items
# Each row is a transaction, with items separated by commas
df = df['Products'].apply(lambda x: x.split(','))

In [14]:
# Display the transformed transactions
print("\nTransformed Transactions (First 5 rows):")
print(df.head())


Transformed Transactions (First 5 rows):
0    [shrimp, almonds, avocado, vegetables mix, gre...
1                           [burgers, meatballs, eggs]
2                                            [chutney]
3                                    [turkey, avocado]
4    [mineral water, milk, energy bar, whole wheat ...
Name: Products, dtype: object


In [15]:
# Encode the transactional data using TransactionEncoder
te = TransactionEncoder()
encoded_data = te.fit_transform(df)
encoded_df = pd.DataFrame(encoded_data, columns=te.columns_)

In [16]:
# Replace True/False with 1/0 for numerical processing
encoded_df.replace([True, False], [1, 0], inplace=True)

In [17]:
# Display the shape and a preview of the encoded dataframe
print("\nEncoded DataFrame Shape:")
print(encoded_df.shape)


Encoded DataFrame Shape:
(7501, 120)


In [18]:
print("\nEncoded DataFrame Preview (First 5 rows):")
encoded_df.head()


Encoded DataFrame Preview (First 5 rows):


Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,0,1,1,0,1,0,0,0,0,0,...,0,1,0,0,1,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


# Association Rule Mining

In [20]:
# Apply Apriori Algorithm with a lower support threshold of 0.05 and allowing larger itemsets
frequent_itemsets = apriori(encoded_df, min_support=0.05, use_colnames=True, max_len=3)

# Display frequent itemsets
print("\nFrequent Itemsets:")
print(frequent_itemsets.head())

# Ensure itemsets are in frozenset format (necessary for association_rules)
frequent_itemsets['itemsets'] = frequent_itemsets['itemsets'].apply(frozenset)

# Number of itemsets in the frequent_itemsets
num_itemsets = len(frequent_itemsets)

# Generate Association Rules with Lift as the metric and lower confidence threshold (e.g., 0.3)
if not frequent_itemsets.empty:
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1, num_itemsets=num_itemsets)

    # Sort rules by lift for better interpretation
    sorted_rules = rules.sort_values(by='lift', ascending=False)

    # Display the sorted rules
    print("\nSorted Association Rules (Top 5):")
    print(sorted_rules.head())

    # Provide insights from the rules
    print("\nInsights from Association Rules:")
    if not sorted_rules.empty:
        for index, rule in sorted_rules.head(5).iterrows():
            print(f"Rule: If a customer buys {rule['antecedents']}, they are likely to buy {rule['consequents']}.")
            print(f" - Support: {rule['support']:.2f}, Confidence: {rule['confidence']:.2f}, Lift: {rule['lift']:.2f}\n")
    else:
        print("No significant rules found. Adjust thresholds for better results.")
else:
    print("No frequent itemsets found. Adjust the minimum support threshold.")



Frequent Itemsets:
    support     itemsets
0  0.087188    (burgers)
1  0.081056       (cake)
2  0.059992    (chicken)
3  0.163845  (chocolate)
4  0.080389    (cookies)

Sorted Association Rules (Top 5):
       antecedents      consequents  antecedent support  consequent support  \
4  (mineral water)      (spaghetti)            0.238368            0.174110   
5      (spaghetti)  (mineral water)            0.174110            0.238368   
0      (chocolate)  (mineral water)            0.163845            0.238368   
1  (mineral water)      (chocolate)            0.238368            0.163845   
2           (eggs)  (mineral water)            0.179709            0.238368   

    support  confidence      lift  representativity  leverage  conviction  \
4  0.059725    0.250559  1.439085               1.0  0.018223    1.102008   
5  0.059725    0.343032  1.439085               1.0  0.018223    1.159314   
0  0.052660    0.321400  1.348332               1.0  0.013604    1.122357   
1  0.052660 

# Analysis and Interpretation:

* Spaghetti and Mineral Water: The top two rules suggest that spaghetti and mineral water are frequently bought together. This could be due to customer preferences for pairing certain foods (e.g., pasta and drinks) during a shopping trip.
  
* Chocolate and Mineral Water: There is also a relationship between chocolate and mineral water, which may indicate that customers often buy chocolate and drinks in similar shopping baskets.
  
* Eggs and Mineral Water: The relationship between eggs and mineral water is also interesting but less strong than the others. It could reflect broader customer shopping patterns, where mineral water is a staple that gets bought along with other products.