## Libraries and Data Loading

In [None]:
# Required libraries
# !pip install apyori
# !pip install pyfpgrowth

import numpy as np
import pandas as pd
from apyori import apriori
import pyfpgrowth as fp

In [None]:
# Importing data (via Machine Learning Quick Reference Book)
data = pd.read_csv("data/Market_Basket_Optimisation.csv", header=None)
data.shape

In [None]:
data.head()

## Pre-Processing

Apriori requires a list as input format, whereas we have a Pandas DataFrame here.
We need to convert our DataFrame into a ``special`` list containing sub-lists, as follows:
1. Iterate over all rows
2. For each row, extract all the item-pairs from the columns

In [None]:
# Let's create a transaction "container"
transactions_list = []

# Iterate over all rows and extract the transactions
for i in range(data.shape[0]):
    transactions_list.append([str(data.values[i,j]) for j in range(data.shape[1])
                             if not pd.isna(data.values[i,j])])
    
# Let's take a look at the first transaction
print(", ".join(transactions_list[0]))

## Recap

![img/rules.png](img/rules.png)

## Apriori Algorithm

In [None]:
# We can now train the apriori algorithm on our transactions list to get the rules based on the specified params
rules_generator = apriori(transactions_list, min_support = 0.004, min_confidence = 0.2, min_lift = 3, min_length = 2)
apriori_rules = list(rules_generator)

# Let's take a look at the first rule
apriori_rules[0]

Some considerations on the first rule:
* We have {'chicken', 'light cream'} with support=0.0045, confidence=0.291 and lift=4.84
* items_base is {'light cream'} and items_add is {'chicken'}
* There is 29% chance (confidence) that user will buy chicken if he has already bought light cream

In [None]:
# Let's extract our rules and visualize them

def inspect(results):
    lhs         = [tuple(result [2] [0] [0]) [0] for result in results]
    rhs         = [tuple(result [2] [0] [1]) [0] for result in results]
    supports    = [result [1] for result in results]
    confidences = [result [2] [0] [2]   for result in results]
    lifts = [result [2] [0] [3]   for result in results]
    
    return list(zip(lhs,rhs,supports,confidences, lifts))

df_results = pd.DataFrame(inspect(apriori_rules),
                          columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'] )
df_results.head()

In [None]:
# What are the top-5 rules for support, confidence or lift?
df_results.nlargest(n=5, columns='Support')

## FP-Growth Algorithm

In [None]:
# Some dummy data
transactions_list = [["bread", "butter", "cereal"],
                     ["butter", "milk"],
                     ["bread", "milk"],
                     ["butter", "cereal", "milk"],
                     ["egg", "bread"],
                     ["egg", "butter"],
                     ["cereal", "milk"],
                     ["bread", "butter", "cereal", "egg"],
                     ["cereal", "bread", "butter"]]

In [None]:
# Let's find the frequent patterns in baskets that occur over the support threshold
frequent_patterns = fp.find_frequent_patterns(transactions=transactions_list, support_threshold=2)

# We can now find patterns that are associated with another with a certain minimum confidence
rules = fp.generate_association_rules(patterns=frequent_patterns, confidence_threshold=0.5)

In [None]:
# Let's inspect the association rules
for left, right in rules.items():
    left_items = ", ".join([item for item in left])
    right_items = ", ".join([item for item in right[0]])
    print("{} -> {} ({})".format(left_items, right_items, round(right[1], 3)))