# Market Basket Analysis with Python

## 1. Load the Dataset

In [1]:
# Import Pandas and Numpy
import pandas as pd
import numpy as np

# Load the Dataset
df = pd.read_csv("input/Market_Basket_Optimisation.csv", header=None)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'input/Market_Basket_Optimisation.csv'

## 2. Data Preparation

In [None]:
# Create a list of transaction
df['Transactions']= df.values.tolist()

In [None]:
df['Transactions']

In [None]:
# Delete NaN from the transaction list
df['Transactions'] = df['Transactions'].apply(lambda x: [i for i in x if str(i) != "nan"])

In [None]:
df['Transactions']

In [None]:
# Convert the transaction list from a DataFrame column into a list of strings
transactions = list(df['Transactions'])

In [None]:
# Count a transaction which contains burgers, meatballs, and eggs
transactions.count(['burgers', 'meatballs', 'eggs'])

In [None]:
# Count the number of rules

# Import library to count the number of permutations
from itertools import permutations

# Extract unique items.
unique_items = [item for transaction in transactions for item in transaction]

# Convert the unique item list from a string to a list
unique_item_list = list(set(unique_items))

# Compute and print rules.
rules = list(permutations(unique_item_list, 2))
print(rules)

In [None]:
# Print the number of rules with length 2
print(len(rules))

## 3. Basic Metrics

### 3.1. Support

In [None]:
# Import the library for encoding
from mlxtend.preprocessing import TransactionEncoder

# Instantiate transaction encoder
encoder = TransactionEncoder().fit(transactions)

In [None]:
# One-hot encode itemsets by applying fit and transform
onehot = encoder.transform(transactions)

# Convert one-hot encoded data to DataFrame
onehot = pd.DataFrame(onehot, columns = encoder.columns_)
print(onehot)

In [None]:
# Computing Support for Single Items
print(onehot.mean())

In [None]:
# Define itemset that contains both eggs and ground beef
onehot['eggs_&_ground beef'] = np.logical_and(onehot['eggs'], onehot['ground beef'])

# Compute Support for itemset that contains both eggs and ground beef 
print(onehot['eggs_&_ground beef'].mean())

In [None]:
# Drop the column of "eggs_&_ground beef" to keep the dataset simple 
onehot=onehot.drop('eggs_&_ground beef', axis=1)
onehot

### 3.2. Confidence

In [None]:
# Compute Support for the itemsets that contains eggs and ground beef
support_eggs_groundbeef = np.logical_and(onehot['eggs'], onehot['ground beef']).mean()
support_eggs = onehot['eggs'].mean()
support_groundbeef = onehot['ground beef'].mean()

In [None]:
# Compute and print Confidence {eggs -> ground beef}
confidence_eggs_to_groundbeef = support_eggs_groundbeef / support_eggs
print(confidence_eggs_to_groundbeef)

### 3.3. Lift

In [None]:
# Compute and print Lift {eggs -> ground beef}
lift_eggs_to_groundbeef = support_eggs_groundbeef / (support_eggs * support_groundbeef)
print(lift_eggs_to_groundbeef)

In [None]:
lift2 = confidence_eggs_to_groundbeef / support_groundbeef
print(lift2)

## 4. Apriori Algorithm

In [None]:
# Import Apriori algorithm
from mlxtend.frequent_patterns import apriori

In [None]:
# Compute frequent itemsets
frequent_itemsets = apriori(onehot, min_support = 0.0005,max_len = 4, use_colnames = True)

# Print number of itemsets
print(len(frequent_itemsets))

In [None]:
# Print frequent itemsets
print(frequent_itemsets.head())

In [None]:
# Import Apriori algorithm
from mlxtend.frequent_patterns import apriori, association_rules

# Compute association rules
Rules = association_rules(frequent_itemsets,
                          metric = "support",
                          min_threshold = 0.005)

In [None]:
# Print association rules
Rules

In [None]:
# Print the rules.
print(Rules)

In [None]:
filtered_rules = Rules[(Rules['antecedent support'] > 0.01) & 
                       (Rules['support'] > 0.009) & 
                       (Rules['confidence'] > 0.5) & 
                       (Rules['lift'] > 1.00)]

In [None]:
filtered_rules

In [None]:
# Computing support.
supportASAL = np.logical_and(onehot['asparagus'],onehot['almonds']).mean()
supportAS = onehot['asparagus'].mean()
supportAL = onehot['almonds'].mean()

# Compute and print confidence and lift.
confidence = supportASAL / supportAS
lift = supportASAL / (supportAS * supportAL)

# Print results.
print(supportAL, confidence, lift)