# Market Basket Analysis with Python

## 1. Load the Dataset

In [1]:
# Import Pandas and Numpy
import pandas as pd
import numpy as np

# Load the Dataset
df = pd.read_csv("input/Market_Basket_Optimisation.csv", header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


## 2. Data Preparation

In [2]:
# Create a list of transaction
df['Transactions']= df.values.tolist()

In [3]:
df['Transactions']

0       [shrimp, almonds, avocado, vegetables mix, gre...
1       [burgers, meatballs, eggs, nan, nan, nan, nan,...
2       [chutney, nan, nan, nan, nan, nan, nan, nan, n...
3       [turkey, avocado, nan, nan, nan, nan, nan, nan...
4       [mineral water, milk, energy bar, whole wheat ...
                              ...                        
7496    [butter, light mayo, fresh bread, nan, nan, na...
7497    [burgers, frozen vegetables, eggs, french frie...
7498    [chicken, nan, nan, nan, nan, nan, nan, nan, n...
7499    [escalope, green tea, nan, nan, nan, nan, nan,...
7500    [eggs, frozen smoothie, yogurt cake, low fat y...
Name: Transactions, Length: 7501, dtype: object

In [4]:
# Delete NaN from the transaction list
df['Transactions'] = df['Transactions'].apply(lambda x: [i for i in x if str(i) != "nan"])

In [5]:
df['Transactions']

0       [shrimp, almonds, avocado, vegetables mix, gre...
1                              [burgers, meatballs, eggs]
2                                               [chutney]
3                                       [turkey, avocado]
4       [mineral water, milk, energy bar, whole wheat ...
                              ...                        
7496                    [butter, light mayo, fresh bread]
7497    [burgers, frozen vegetables, eggs, french frie...
7498                                            [chicken]
7499                                [escalope, green tea]
7500    [eggs, frozen smoothie, yogurt cake, low fat y...
Name: Transactions, Length: 7501, dtype: object

In [6]:
# Convert the transaction list from a DataFrame column into a list of strings
transactions = list(df['Transactions'])

In [7]:
# Count a transaction which contains burgers, meatballs, and eggs
transactions.count(['burgers', 'meatballs', 'eggs'])

1

In [8]:
# Count the number of rules

# Import library to count the number of permutations
from itertools import permutations

# Extract unique items.
unique_items = [item for transaction in transactions for item in transaction]

# Convert the unique item list from a string to a list
unique_item_list = list(set(unique_items))

# Compute and print rules.
rules = list(permutations(unique_item_list, 2))
print(rules)

[('soup', 'salt'), ('soup', 'burger sauce'), ('soup', 'nonfat milk'), ('soup', 'extra dark chocolate'), ('soup', 'rice'), ('soup', 'hand protein bar'), ('soup', 'tomatoes'), ('soup', 'corn'), ('soup', 'flax seed'), ('soup', 'sandwich'), ('soup', 'spaghetti'), ('soup', 'chocolate bread'), ('soup', 'almonds'), ('soup', 'avocado'), ('soup', 'mashed potato'), ('soup', 'soda'), ('soup', 'green beans'), ('soup', 'chocolate'), ('soup', 'brownies'), ('soup', 'cider'), ('soup', 'green grapes'), ('soup', 'whole wheat rice'), ('soup', 'shrimp'), ('soup', 'ground beef'), ('soup', 'cookies'), ('soup', 'mayonnaise'), ('soup', 'babies food'), ('soup', 'mint'), ('soup', 'bramble'), ('soup', 'yams'), ('soup', 'whole weat flour'), ('soup', 'french wine'), ('soup', 'eggplant'), ('soup', 'yogurt cake'), ('soup', 'meatballs'), ('soup', 'champagne'), ('soup', 'turkey'), ('soup', 'mineral water'), ('soup', 'cream'), ('soup', 'low fat yogurt'), ('soup', 'black tea'), ('soup', 'gluten free bar'), ('soup', 'muf

In [9]:
# Print the number of rules with length 2
print(len(rules))

14280


## 3. Basic Metrics

### 3.1. Support

In [10]:
# Import the library for encoding
from mlxtend.preprocessing import TransactionEncoder

# Instantiate transaction encoder
encoder = TransactionEncoder().fit(transactions)

ModuleNotFoundError: No module named 'mlxtend'

In [None]:
# One-hot encode itemsets by applying fit and transform
onehot = encoder.transform(transactions)

# Convert one-hot encoded data to DataFrame
onehot = pd.DataFrame(onehot, columns = encoder.columns_)
print(onehot)

In [None]:
# Computing Support for Single Items
print(onehot.mean())

In [None]:
# Define itemset that contains both eggs and ground beef
onehot['eggs_&_ground beef'] = np.logical_and(onehot['eggs'], onehot['ground beef'])

# Compute Support for itemset that contains both eggs and ground beef 
print(onehot['eggs_&_ground beef'].mean())

In [None]:
# Drop the column of "eggs_&_ground beef" to keep the dataset simple 
onehot=onehot.drop('eggs_&_ground beef', axis=1)
onehot

### 3.2. Confidence

In [None]:
# Compute Support for the itemsets that contains eggs and ground beef
support_eggs_groundbeef = np.logical_and(onehot['eggs'], onehot['ground beef']).mean()
support_eggs = onehot['eggs'].mean()
support_groundbeef = onehot['ground beef'].mean()

In [None]:
# Compute and print Confidence {eggs -> ground beef}
confidence_eggs_to_groundbeef = support_eggs_groundbeef / support_eggs
print(confidence_eggs_to_groundbeef)

### 3.3. Lift

In [None]:
# Compute and print Lift {eggs -> ground beef}
lift_eggs_to_groundbeef = support_eggs_groundbeef / (support_eggs * support_groundbeef)
print(lift_eggs_to_groundbeef)

In [None]:
lift2 = confidence_eggs_to_groundbeef / support_groundbeef
print(lift2)

## 4. Apriori Algorithm

In [None]:
# Import Apriori algorithm
from mlxtend.frequent_patterns import apriori

In [None]:
# Compute frequent itemsets
frequent_itemsets = apriori(onehot, min_support = 0.0005,max_len = 4, use_colnames = True)

# Print number of itemsets
print(len(frequent_itemsets))

In [None]:
# Print frequent itemsets
print(frequent_itemsets.head())

In [None]:
# Import Apriori algorithm
from mlxtend.frequent_patterns import apriori, association_rules

# Compute association rules
Rules = association_rules(frequent_itemsets,
                          metric = "support",
                          min_threshold = 0.005)

In [None]:
# Print association rules
Rules

In [None]:
# Print the rules.
print(Rules)

In [None]:
filtered_rules = Rules[(Rules['antecedent support'] > 0.01) & 
                       (Rules['support'] > 0.009) & 
                       (Rules['confidence'] > 0.5) & 
                       (Rules['lift'] > 1.00)]

In [None]:
filtered_rules

In [None]:
# Computing support.
supportASAL = np.logical_and(onehot['asparagus'],onehot['almonds']).mean()
supportAS = onehot['asparagus'].mean()
supportAL = onehot['almonds'].mean()

# Compute and print confidence and lift.
confidence = supportASAL / supportAS
lift = supportASAL / (supportAS * supportAL)

# Print results.
print(supportAL, confidence, lift)