# Persoalan 5
Carilah data transaksi pembelian di online shopping lalu lakukan association rule mining dengan menentukan terlebih dahulu minsupp dan minconf. 

In [146]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import chardet
import re

with open('../dataset/Online Retail Data Set.csv', 'rb') as file:
    result = chardet.detect(file.read(10000))  # Periksa 10 KB pertama
    print(result)


{'encoding': 'ascii', 'confidence': 1.0, 'language': ''}


In [147]:
df = pd.read_csv('../dataset/Online Retail Data Set.csv', sep=',', encoding='ISO-8859-1')

In [148]:
# 1. Konversi ke huruf kecil
df['Description'] = df['Description'].str.upper()

# 2. Hapus spasi ekstra di awal dan akhir
df['Description'] = df['Description'].str.strip()
df['Description'] = df['Description'].replace(synonym_mapping, regex=True)

In [149]:
lookup_table = df.dropna(subset=['Description']).drop_duplicates('StockCode').set_index('StockCode')['Description']
df['Description'] = df.apply(
    lambda row: lookup_table[row['StockCode']] if pd.isna(row['Description']) and row['StockCode'] in lookup_table
    else (row['StockCode'] if pd.isna(row['Description']) and row['StockCode'] else row['Description']),
    axis=1
)

In [150]:
df.Description.value_counts()

Description
WHITE HANGING HEART T-LIGHT HOLDER    2369
REGENCY CAKESTAND 3 TIER              2200
JUMBO BAG RED RETROSPOT               2159
PARTY BUNTING                         1727
LUNCH BAG RED RETROSPOT               1638
                                      ... 
84971L                                   1
????MISSING                              1
CROCHET LILAC/RED BEAR KEYRING           1
LARGE HEART FLOWERS HOOK                 1
SET 10 CARDS 3 WISE MEN 17107            1
Name: count, Length: 4306, dtype: int64

In [151]:
basket = df.groupby('InvoiceNo')['Description'].apply(list).reset_index()

In [152]:
te = TransactionEncoder()
te_array = te.fit(basket['Description']).transform(basket['Description'])
df_trans = pd.DataFrame(te_array, columns=te.columns_)

In [153]:
min_support = 0.025
min_treshold = 0.5

In [154]:
min_support

0.025

In [155]:
frequent_itemsets = apriori(df_trans, min_support=min_support, use_colnames=True)

In [156]:
frequent_itemsets.describe()

Unnamed: 0,support
count,119.0
mean,0.035978
std,0.011575
min,0.025019
25%,0.027954
50%,0.032741
75%,0.040676
max,0.08888


In [157]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_treshold, num_itemsets=2)

In [158]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(ROSES REGENCY TEACUP AND SAUCER),(GREEN REGENCY TEACUP AND SAUCER),0.043243,0.040811,0.03027,0.7,17.152318,1.0,0.028505,3.197297,0.984262,0.562814,0.687236,0.720861
1,(GREEN REGENCY TEACUP AND SAUCER),(ROSES REGENCY TEACUP AND SAUCER),0.040811,0.043243,0.03027,0.741722,17.152318,1.0,0.028505,3.704366,0.981765,0.562814,0.730048,0.720861
2,(JUMBO BAG PINK POLKADOT),(JUMBO BAG RED RETROSPOT),0.047529,0.082432,0.032162,0.676686,8.208973,1.0,0.028244,2.838004,0.922004,0.328859,0.64764,0.533425
3,(JUMBO SHOPPER VINTAGE RED PAISLEY),(JUMBO BAG RED RETROSPOT),0.04583,0.082432,0.026371,0.5754,6.980264,1.0,0.022593,2.161017,0.897889,0.25881,0.537255,0.447653
4,(JUMBO STORAGE BAG SUKI),(JUMBO BAG RED RETROSPOT),0.046371,0.082432,0.028301,0.610325,7.403939,1.0,0.024479,2.354698,0.906995,0.281598,0.575317,0.476825
5,(LUNCH BAG BLACK SKULL.),(LUNCH BAG RED RETROSPOT),0.05,0.062046,0.025019,0.500386,8.064717,1.0,0.021917,1.877357,0.922109,0.287489,0.467336,0.451811


In [159]:
def find_closed_patterns(frequent_itemsets):
    """
    Mencari Closed Patterns dari frequent itemsets.
    """
    closed_patterns = []
    for i, row in frequent_itemsets.iterrows():
        is_closed = True
        for j, compare_row in frequent_itemsets.iterrows():
            if set(row['itemsets']).issubset(set(compare_row['itemsets'])) and row['support'] == compare_row['support'] and row['itemsets'] != compare_row['itemsets']:
                is_closed = False
                break
        if is_closed:
            closed_patterns.append(row)
    return pd.DataFrame(closed_patterns)

def find_maximal_closed_patterns(frequent_itemsets):
    """
    Mencari Maximal Closed Patterns dari frequent itemsets.
    """
    maximal_patterns = []
    for i, row in frequent_itemsets.iterrows():
        is_maximal = True
        for j, compare_row in frequent_itemsets.iterrows():
            if set(row['itemsets']).issubset(set(compare_row['itemsets'])) and row['itemsets'] != compare_row['itemsets']:
                is_maximal = False
                break
        if is_maximal:
            maximal_patterns.append(row)
    return pd.DataFrame(maximal_patterns)

In [160]:
closed_patterns = find_closed_patterns(frequent_itemsets)
print("Closed Patterns:")
print(closed_patterns)

Closed Patterns:
      support                                           itemsets
0    0.037104                           (6 RIBBONS RUSTIC CHARM)
1    0.032278                      (60 TEATIME FAIRY CAKE CASES)
2    0.038726                       (ALARM CLOCK BAKELIKE GREEN)
3    0.030849                        (ALARM CLOCK BAKELIKE PINK)
4    0.041737                         (ALARM CLOCK BAKELIKE RED)
..        ...                                                ...
114  0.030270  (ROSES REGENCY TEACUP AND SAUCER, GREEN REGENC...
115  0.032162  (JUMBO BAG RED RETROSPOT, JUMBO BAG PINK POLKA...
116  0.026371  (JUMBO BAG RED RETROSPOT, JUMBO SHOPPER VINTAG...
117  0.028301  (JUMBO BAG RED RETROSPOT, JUMBO STORAGE BAG SUKI)
118  0.025019  (LUNCH BAG RED RETROSPOT, LUNCH BAG  BLACK SKU...

[119 rows x 2 columns]


In [161]:
# Cari Maximal Closed Patterns
maximal_patterns = find_maximal_closed_patterns(frequent_itemsets)
print("\nMaximal Closed Patterns:")
print(maximal_patterns)


Maximal Closed Patterns:
      support                                           itemsets
0    0.037104                           (6 RIBBONS RUSTIC CHARM)
1    0.032278                      (60 TEATIME FAIRY CAKE CASES)
2    0.038726                       (ALARM CLOCK BAKELIKE GREEN)
3    0.030849                        (ALARM CLOCK BAKELIKE PINK)
4    0.041737                         (ALARM CLOCK BAKELIKE RED)
..        ...                                                ...
114  0.030270  (ROSES REGENCY TEACUP AND SAUCER, GREEN REGENC...
115  0.032162  (JUMBO BAG RED RETROSPOT, JUMBO BAG PINK POLKA...
116  0.026371  (JUMBO BAG RED RETROSPOT, JUMBO SHOPPER VINTAG...
117  0.028301  (JUMBO BAG RED RETROSPOT, JUMBO STORAGE BAG SUKI)
118  0.025019  (LUNCH BAG RED RETROSPOT, LUNCH BAG  BLACK SKU...

[111 rows x 2 columns]
