In [22]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

data = pd.read_csv("Online_Retail.csv", encoding='latin1')

data.dropna(inplace=True)

data = data[~data['InvoiceNo'].astype(str).str.startswith('C')]

data = data[data['Quantity'] > 0]

basket = (data[data['Country'] == "United Kingdom"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

def encode_units(x):
    return 1 if x >= 1 else 0

basket_sets = basket.apply(lambda x: x.map(encode_units))

frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

print("Frequent Itemsets:\n", frequent_itemsets.head())
print("\nAssociation Rules:\n", rules[['antecedents','consequents','support','confidence','lift']].head())




Frequent Itemsets:
     support                           itemsets
0  0.022404         (3 STRIPEY MICE FELTCRAFT)
1  0.037720           (6 RIBBONS RUSTIC CHARM)
2  0.025767  (60 CAKE CASES VINTAGE CHRISTMAS)
3  0.035257      (60 TEATIME FAIRY CAKE CASES)
4  0.026668   (72 SWEETHEART FAIRY CAKE CASES)

Association Rules:
                             antecedents                           consequents  \
0           (ALARM CLOCK BAKELIKE RED )          (ALARM CLOCK BAKELIKE GREEN)   
1          (ALARM CLOCK BAKELIKE GREEN)           (ALARM CLOCK BAKELIKE RED )   
2  (GARDENERS KNEELING PAD CUP OF TEA )   (GARDENERS KNEELING PAD KEEP CALM )   
3   (GARDENERS KNEELING PAD KEEP CALM )  (GARDENERS KNEELING PAD CUP OF TEA )   
4     (GREEN REGENCY TEACUP AND SAUCER)      (PINK REGENCY TEACUP AND SAUCER)   

    support  confidence       lift  
0  0.027269    0.598945  14.451925  
1  0.027269    0.657971  14.451925  
2  0.027509    0.730463  16.390122  
3  0.027509    0.617251  16.390122  
4  0.