# Apriori

## Importing the libraries

Install a required library from console:

**pip install apyori**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [2]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
# Apyori expects a list of lists of transactions as strings so we need to convert the data to that format
# dataset.head()
# len(dataset.columns)
transactions_with_nans = []
for i in range(0, len(dataset)):
    transactions_with_nans.append([str(dataset.values[i,j]) for j in range(0, len(dataset.columns))])

Let's get rid of nan values

In [3]:
transactions = []
for i in range(0, len(transactions_with_nans)):
    transactions.append([item for item in transactions_with_nans[i] if str(item) != 'nan'])

## Training the Apriori model on the dataset

In [4]:
from apyori import apriori
# min_support: e.g. 3 times a day, data for 7 days, divided by len(dataset) ~ 0.3
# min_confidence: start with e.g. 0.8 and if that doesn't give enough rules, try 0,4 etc
# min_lift: generally a good value is 3 or more
# min_length and max_length: decided on business problem, here: Buy one product and get one for free
# for "buy ten and get one for free" use min_length 2 and max_length 11
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

## Visualising the results

### Displaying the first results coming directly from the output of the apriori function

In [5]:
results = list(rules)
results

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

### Putting the results well organised into a Pandas DataFrame

In [6]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

### Displaying the results non sorted

In [7]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812
3,fromage blanc,honey,0.003333,0.245098,5.164271
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
6,light cream,olive oil,0.0032,0.205128,3.11471
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
8,pasta,shrimp,0.005066,0.322034,4.506672


To interprete that, e.g when a person buys light cream they have a 29% change of buying chicken, there are 0.45% of examples of this and lift gives us the combined power of the rule

### Displaying the results sorted by descending lifts

In [8]:
resultsinDataFrame.nlargest(n = 10, columns = "Lift")

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,light cream,olive oil,0.0032,0.205128,3.11471


## Some changes have to be made for more products

In [9]:
from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 4, min_length = 2, max_length = len(dataset.columns))
results = list(rules)
results

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0.2450980392156863, lift=5.164270764485569)]),
 RelationRecord(items=frozenset({'whole wheat pasta', 'olive oil'}), support=0.007998933475536596, ordered_statistics=[OrderedStatistic(items_base=frozenset({'whole wheat pasta'}), items_add=frozenset({'olive oil'}), confidence=0.271

In [10]:
def inspect(results):
    lhs         = [tuple(result[2][0][0]) for result in results]
    rhs         = [tuple(result[2][0][1]) for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
20,"(frozen vegetables, soup)","(milk, mineral water)",0.003066,0.383333,7.987176
19,"(frozen vegetables, olive oil)","(milk, mineral water)",0.003333,0.294118,6.128268
14,"(whole wheat pasta, mineral water)","(olive oil,)",0.003866,0.402778,6.115863
11,"(tomato sauce,)","(spaghetti, ground beef)",0.003066,0.216981,5.535971
2,"(fromage blanc,)","(honey,)",0.003333,0.245098,5.164271
0,"(light cream,)","(chicken,)",0.004533,0.290598,4.843951
1,"(pasta,)","(escalope,)",0.005866,0.372881,4.700812
6,"(spaghetti, cereals)","(ground beef,)",0.003066,0.46,4.681764
9,"(french fries, ground beef)","(herb & pepper,)",0.0032,0.230769,4.665768
15,"(frozen vegetables, mineral water, chocolate)","(shrimp,)",0.0032,0.328767,4.6009
