# Chapter 13: Apriori Algorithm

Reference: https://www.geeksforgeeks.org/implementing-apriori-algorithm-in-python/

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
data = pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx")
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [4]:
data.columns

Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [5]:
# Transactions done in the United Kingdom
basket_UK = (data[data['Country'] =="United Kingdom"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [6]:
# Defining the hot encoding function to make the data suitable 
# for the concerned libraries
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1
  
# Encoding the datasets
basket_encoded = basket_UK.applymap(hot_encode)
basket_UK = basket_encoded

In [7]:
frq_items = apriori(basket_UK, min_support = 0.01, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())

                                            antecedents  \
1694  (REGENCY TEA PLATE ROSES , REGENCY TEA PLATE P...   
1727  (CHARLOTTE BAG PINK POLKADOT, WOODLAND CHARLOT...   
1693  (REGENCY TEA PLATE GREEN , REGENCY TEA PLATE P...   
1700  (CHARLOTTE BAG PINK POLKADOT, CHARLOTTE BAG SU...   
1757  (REGENCY CAKESTAND 3 TIER, PINK REGENCY TEACUP...   

                            consequents  antecedent support  \
1694         (REGENCY TEA PLATE GREEN )            0.010708   
1727      (RED RETROSPOT CHARLOTTE BAG)            0.011343   
1693         (REGENCY TEA PLATE ROSES )            0.010935   
1700      (RED RETROSPOT CHARLOTTE BAG)            0.011797   
1757  (GREEN REGENCY TEACUP AND SAUCER)            0.013203   

      consequent support   support  confidence       lift  leverage  \
1694            0.015245  0.010118    0.944915  61.981941  0.009955   
1727            0.041062  0.010526    0.928000  22.600133  0.010061   
1693            0.017287  0.010118    0.925311  53.52

In [8]:
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1694,"(REGENCY TEA PLATE ROSES , REGENCY TEA PLATE P...",(REGENCY TEA PLATE GREEN ),0.010708,0.015245,0.010118,0.944915,61.981941,0.009955,17.877091
1727,"(CHARLOTTE BAG PINK POLKADOT, WOODLAND CHARLOT...",(RED RETROSPOT CHARLOTTE BAG),0.011343,0.041062,0.010526,0.928,22.600133,0.010061,13.318587
1693,"(REGENCY TEA PLATE GREEN , REGENCY TEA PLATE P...",(REGENCY TEA PLATE ROSES ),0.010935,0.017287,0.010118,0.925311,53.527189,0.009929,13.157438
1700,"(CHARLOTTE BAG PINK POLKADOT, CHARLOTTE BAG SU...",(RED RETROSPOT CHARLOTTE BAG),0.011797,0.041062,0.010799,0.915385,22.292903,0.010314,11.332907
1757,"(REGENCY CAKESTAND 3 TIER, PINK REGENCY TEACUP...",(GREEN REGENCY TEACUP AND SAUCER),0.013203,0.042377,0.011978,0.907216,21.407978,0.011419,10.321043
