In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# hide warnings
import warnings
warnings.filterwarnings('ignore')

# print all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

- The data for Market Basket Analysis comes in many form which has to be transformed to use with the `apyori` package.

# 1. Basic Example

Comma separated dataset having each record as products sold in a single transaction.

In [2]:
# import data
df1 = pd.read_csv('online_retail_basic.csv', header = None)
df1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice
1,burgers,meatballs,eggs,,,,,,,
2,chutney,,,,,,,,,
3,turkey,avocado,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,


In [3]:
df1.shape

(20, 10)

- We have a dataset with **7,501 transactions** and each transaction has **at most 20 products**.
- We will now convert this data into the structure required for `apyori` package i.e. `list` of transactions and each transaction represnted as a `tuple`.

In [4]:
%%time

records = []
for i in range(len(df1)):
    records.append(tuple(df1.loc[i, pd.notna(df1.iloc[i, :])]))

CPU times: total: 31.2 ms
Wall time: 19 ms


In [5]:
for e in range(5):
    print(e+1, records[e], sep = " - ")

1 - ('shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice')
2 - ('burgers', 'meatballs', 'eggs')
3 - ('chutney',)
4 - ('turkey', 'avocado')
5 - ('mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea')


### `apyori.apriori`

In [6]:
# Apriori Algorithm
from apyori import apriori

association_rules = apriori(transactions = records,
                            min_support = .0045,     # consider items(sets) appearing 0.45% of the time
                            min_confidence = 0.2,
                            min_lift = 3,
                            min_length = 2     # minimum items in an itemset
                           )

In [7]:
association_results = list(association_rules)

In [8]:
len(association_results)

2218

In [9]:
for e in range(5):
    print(e+1, association_results[e], sep = " - ")

1 - RelationRecord(items=frozenset({'almonds', 'avocado'}), support=0.05, ordered_statistics=[OrderedStatistic(items_base=frozenset({'almonds'}), items_add=frozenset({'avocado'}), confidence=1.0, lift=10.0), OrderedStatistic(items_base=frozenset({'avocado'}), items_add=frozenset({'almonds'}), confidence=0.5, lift=10.0)])
2 - RelationRecord(items=frozenset({'almonds', 'cottage cheese'}), support=0.05, ordered_statistics=[OrderedStatistic(items_base=frozenset({'almonds'}), items_add=frozenset({'cottage cheese'}), confidence=1.0, lift=20.0), OrderedStatistic(items_base=frozenset({'cottage cheese'}), items_add=frozenset({'almonds'}), confidence=1.0, lift=20.0)])
3 - RelationRecord(items=frozenset({'energy drink', 'almonds'}), support=0.05, ordered_statistics=[OrderedStatistic(items_base=frozenset({'almonds'}), items_add=frozenset({'energy drink'}), confidence=1.0, lift=20.0), OrderedStatistic(items_base=frozenset({'energy drink'}), items_add=frozenset({'almonds'}), confidence=1.0, lift=20.

In [10]:
# print rules in proper format
for item in association_results[:5]:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")

Rule: almonds -> avocado
Support: 0.05
Confidence: 1.0
Lift: 10.0
Rule: almonds -> cottage cheese
Support: 0.05
Confidence: 1.0
Lift: 20.0
Rule: energy drink -> almonds
Support: 0.05
Confidence: 1.0
Lift: 20.0
Rule: almonds -> green grapes
Support: 0.05
Confidence: 1.0
Lift: 20.0
Rule: almonds -> shrimp
Support: 0.05
Confidence: 1.0
Lift: 10.0


# THE END