# Market Basket optimization

This project aims to identify ideal association rules between products to design a marketing campaign where purchasing one item offers a discount on another item. Both Apriori and Eclat algorithms are utilized to compare results and derive actionable insights.

## Apriori

In [26]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from apyori import apriori

# Preprocess Dataset
dataset = pd.read_csv('raw_data/Market_Basket_Optimisation.csv', header = None)

# Convert the dataset into a list of transactions
transactions = []
for i in range(0, len(dataset)): 
  transactions.append([str(dataset.values[i,j]) for j in range(0, len(dataset.columns))])
  
# Training the Apriori model
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

# Organizing the results into a table
results = list(rules)
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame_apriori = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

# Printing results by decreasing lift
resultsinDataFrame_apriori.nlargest(n = 10, columns = 'Lift')


Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,light cream,olive oil,0.0032,0.205128,3.11471


## ECLAT

In [15]:
# Import Libraries
import pandas as pd
from pyECLAT import ECLAT

# Load and preprocess the dataset
dataset = pd.read_csv('raw_data/Market_Basket_Optimisation.csv', header=None)

# Convert the dataset into a list of transactions
transactions = []
for i in range(0, len(dataset)): 
  transactions.append([str(dataset.values[i,j]) for j in range(0, len(dataset.columns))])

# Initialize the ECLAT model with the processed transactions
eclat_instance = ECLAT(data=pd.DataFrame(transactions), verbose=True)

# Get frequent itemsets of length 2
rule_indices, supports = eclat_instance.fit(min_support=0.003, min_combination=2, max_combination=2)

# Convert results to DataFrame
eclat_results = pd.DataFrame({
    'Itemset': supports.keys(),
    'Support': supports.values()
})

# Sort and display the top 10 frequent itemsets
eclat_results = eclat_results.sort_values(by='Support', ascending=False).reset_index(drop=True)
display(eclat_results.head(10))


100%|██████████| 120/120 [00:01<00:00, 62.29it/s]
100%|██████████| 120/120 [00:00<00:00, 5915.59it/s]
100%|██████████| 120/120 [00:00<00:00, 3831.70it/s]


Combination 2 by 2


6555it [01:44, 62.89it/s]


Unnamed: 0,Itemset,Support
0,spaghetti & mineral water,0.059725
1,chocolate & mineral water,0.05266
2,eggs & mineral water,0.050927
3,mineral water & milk,0.047994
4,ground beef & mineral water,0.040928
5,chocolate & spaghetti,0.039195
6,spaghetti & ground beef,0.039195
7,eggs & spaghetti,0.036528
8,french fries & eggs,0.036395
9,frozen vegetables & mineral water,0.035729
