In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import OneHotEncoder

!pip install mlxtend==0.23.1

Collecting mlxtend==0.23.1
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mlxtend
  Attempting uninstall: mlxtend
    Found existing installation: mlxtend 0.23.3
    Uninstalling mlxtend-0.23.3:
      Successfully uninstalled mlxtend-0.23.3
Successfully installed mlxtend-0.23.1


# Association Rule for Store Dataset

In this case study, we will explore how association rule can be used to analyze the items that are usualy purcased together.

you can refer to this article to find out about apriori and association rule:
https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/apriori/
https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/

## Load Data

We will use the dataset of the transaction in a certain store. You can get the dataset here:
https://gist.githubusercontent.com/Harsh-Git-Hub/2979ec48043928ad9033d8469928e751/raw/72de943e040b8bd0d087624b154d41b2ba9d9b60/retail_dataset.csv

In [3]:
 # load the data set ans show the first five transaction

# Load the dataset
df = pd.read_csv('https://gist.githubusercontent.com/Harsh-Git-Hub/2979ec48043928ad9033d8469928e751/raw/72de943e040b8bd0d087624b154d41b2ba9d9b60/retail_dataset.csv')

# Display the first five transactions
df.head()

Unnamed: 0,0,1,2,3,4,5,6
0,Bread,Wine,Eggs,Meat,Cheese,Pencil,Diaper
1,Bread,Cheese,Meat,Diaper,Wine,Milk,Pencil
2,Cheese,Meat,Eggs,Milk,Wine,,
3,Cheese,Meat,Eggs,Milk,Wine,,
4,Meat,Pencil,Wine,,,,


# Get the set of product that has been purchased


In [4]:
# Mengonversi data ke NumPy array
array = np.array(df)

# Meratakan array menjadi satu dimensi
flattened = np.ravel(array)

# Mengambil produk unik (mengabaikan NaN/None)
unique_products = set(flattened)

print(unique_products)

{'Diaper', 'Bagel', 'Wine', 'Cheese', 'Pencil', 'Bread', 'Meat', 'Eggs', 'Milk', nan}


## Preprocess Data

In this step, we will transform our dataset so that we will have a one hot encoding based on the purchased products.

In [5]:
# Mapping encoding
encoding = {
    'Milk': 0,
    'Bagel': 0,
    np.nan: 0,  # Encoding untuk nilai NaN
    'Wine': 1,
    'Cheese': 1,
    'Diaper': 1,
    'Meat': 1,
    'Eggs': 1,
    'Bread': 1,
    'Pencil': 1
}

# Membentuk itemset
def create_itemset(df, encoding):
    itemset = {}
    for col in df.columns:
        for product in df[col].unique():
            # Menambahkan produk dengan encoding-nya
            itemset[product] = encoding.get(product, 0)
    return itemset

# Membuat itemset berdasarkan DataFrame dan encoding
itemset = create_itemset(df, encoding)

# Menampilkan itemset
itemset

{'Bread': 1,
 'Cheese': 1,
 'Meat': 1,
 'Eggs': 1,
 'Wine': 1,
 'Bagel': 0,
 'Pencil': 1,
 'Diaper': 1,
 'Milk': 0,
 nan: 0}

In [6]:
# Daftar produk unik dari DataFrame
products = pd.unique(df.values.ravel())

# Buat DataFrame kosong dengan kolom produk unik
encoded_df = pd.DataFrame(0, index=df.index, columns=products)

# Isi DataFrame baru dengan encoding
for col in df.columns:
    for idx, product in enumerate(df[col]):
        if pd.notna(product):  # Abaikan NaN
            encoded_df.loc[idx, product] = 1

# Tampilkan DataFrame hasil encoding
encoded_df.head()

Unnamed: 0,Bread,Wine,Eggs,Meat,Cheese,Pencil,Diaper,Milk,NaN,Bagel
0,1,1,1,1,1,1,1,0,0,0
1,1,1,0,1,1,1,1,1,0,0
2,0,1,1,1,1,0,0,1,0,0
3,0,1,1,1,1,0,0,1,0,0
4,0,1,0,1,0,1,0,0,0,0


Since, the encoded dataframe consist of the empty column. We will drop the NaN column or select all columns other than the first column.

## Apriori Algorithm

We will use appriori algorithm to determine the frequently purchased products.
For this case study, we will min_support=0.2

In [14]:
from mlxtend.frequent_patterns import apriori, fpmax, fpgrowth

frequent_itemsets = apriori(encoded_df, min_support=0.2, use_colnames=True)


frequent_itemsets

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.504762,(Bread)
1,0.438095,(Wine)
2,0.438095,(Eggs)
3,0.47619,(Meat)
4,0.501587,(Cheese)
5,0.361905,(Pencil)
6,0.406349,(Diaper)
7,0.501587,(Milk)
8,0.425397,(Bagel)
9,0.244444,"(Bread, Wine)"


In [17]:
from mlxtend.frequent_patterns import association_rules

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Bagel),(Bread),0.425397,0.504762,0.279365,0.656716,1.301042,0.064641,1.44265,0.402687
1,(Wine),(Cheese),0.438095,0.501587,0.269841,0.615942,1.227986,0.050098,1.297754,0.330409
2,(Eggs),(Meat),0.438095,0.47619,0.266667,0.608696,1.278261,0.05805,1.338624,0.387409
3,(Eggs),(Cheese),0.438095,0.501587,0.298413,0.681159,1.358008,0.07867,1.563203,0.469167
4,(Meat),(Cheese),0.47619,0.501587,0.32381,0.68,1.355696,0.084958,1.55754,0.500891
5,(Cheese),(Meat),0.501587,0.47619,0.32381,0.64557,1.355696,0.084958,1.477891,0.526414
6,(Milk),(Cheese),0.501587,0.501587,0.304762,0.607595,1.211344,0.053172,1.270148,0.350053
7,(Cheese),(Milk),0.501587,0.501587,0.304762,0.607595,1.211344,0.053172,1.270148,0.350053
8,"(Meat, Eggs)",(Cheese),0.266667,0.501587,0.215873,0.809524,1.613924,0.082116,2.616667,0.518717
9,"(Meat, Cheese)",(Eggs),0.32381,0.438095,0.215873,0.666667,1.521739,0.074014,1.685714,0.507042


The we will generate association rule of the frequent itemset based on confidence level with the threshold=0.6

Provide explanation about __antecedent support__, __consequent support__, __support__, __confidence__, __lift__, __leverage__, __conviction__, __conviction__ and the interpretation from the case above (please use text section)

Confidence Tertinggi (0.809524):

Aturan: {Meat} -> {Cheese}
Penjelasan: Jika pelanggan membeli Meat, maka ada kemungkinan 80.95% pelanggan tersebut juga membeli Cheese. Aturan ini memiliki Lift = 1.613924, yang menunjukkan bahwa aturan ini lebih baik dibandingkan prediksi acak.
Lift Tertinggi (1.685714):

Aturan: {Meat, Cheese} -> {Eggs}
Penjelasan: Kombinasi Meat dan Cheese meningkatkan peluang pembelian Eggs sebesar 1.685 kali dibandingkan pembelian acak. Confidence dari aturan ini adalah 72.34%.
Aturan dengan Dukungan Tinggi:

Aturan: {Bagel} -> {Bread}
Dukungan: 0.279365
Penjelasan: Bagel dan Bread sering dibeli bersama, dengan confidence 65.67% dan lift 1.301042.
Aturan Menarik Lainnya:

Aturan: {Eggs} -> {Meat}
Confidence: 60.86%
Lift: 1.278261
Penjelasan: Pelanggan yang membeli Eggs memiliki kemungkinan 60.86% juga membeli Meat.
Kesimpulan Umum:

Hubungan antara Meat, Cheese, dan Eggs sangat kuat, baik dalam confidence maupun lift, menunjukkan bahwa produk-produk ini sering dibeli bersama.
Kombinasi Bagel dan Bread juga memiliki hubungan yang cukup signifikan dengan confidence 65.67%.
Rekomendasi:
Bundling Produk:
Produk seperti Meat, Cheese, dan Eggs dapat dipaketkan bersama untuk meningkatkan penjualan.
Penempatan di Rak:
Tempatkan Bagel berdekatan dengan Bread, karena sering dibeli bersama.
Promosi Khusus:
Tawarkan diskon pada salah satu produk dalam aturan yang memiliki confidence tinggi (misalnya Meat atau Cheese) untuk mendorong pembelian produk lainnya.