In [4]:
#mengimpor library yang dibutuhkan
import pandas as pd
import numpy as np
from mlxtend. frequent_patterns import fpgrowth, association_rules

In [5]:
#Data Collection
#mengimpor dataset dan menyimpan dalam dataframe
path = "../Dataset/API_Kaggle/market_basket_dataset.csv"
df = pd.read_csv(path)
df

Unnamed: 0,BillNo,Itemname,Quantity,Price,CustomerID
0,1000,Apples,5,8.30,52299
1,1000,Butter,4,6.06,11752
2,1000,Eggs,4,2.66,16415
3,1000,Potatoes,4,8.10,22889
4,1004,Oranges,2,7.26,52255
...,...,...,...,...,...
495,1493,Juice,2,4.24,55321
496,1493,Bread,5,7.05,14479
497,1497,Coffee,3,2.01,25378
498,1497,Pasta,3,2.64,53334


In [6]:
#Data Preprocessing
#mengganti nama kolom
df.rename(columns={'BillNo': 'Bill', 'Itemname': 'Item'}, inplace=True)
df

Unnamed: 0,Bill,Item,Quantity,Price,CustomerID
0,1000,Apples,5,8.30,52299
1,1000,Butter,4,6.06,11752
2,1000,Eggs,4,2.66,16415
3,1000,Potatoes,4,8.10,22889
4,1004,Oranges,2,7.26,52255
...,...,...,...,...,...
495,1493,Juice,2,4.24,55321
496,1493,Bread,5,7.05,14479
497,1497,Coffee,3,2.01,25378
498,1497,Pasta,3,2.64,53334


In [7]:
#mengecek apakah ada kolom yang bernilai NaN
df.isna().sum()

Bill          0
Item          0
Quantity      0
Price         0
CustomerID    0
dtype: int64

In [8]:
#Mengecek apakah ada data transaksi dan item yang duplikat
print('Jumlah duplikasi: ', df.duplicated(subset=['Bill','Item'], keep=False).sum())
#Menampilkan data yang duplikat
print(df[df.duplicated(subset=['Bill','Item'], keep=False)])

Jumlah duplikasi:  0
Empty DataFrame
Columns: [Bill, Item, Quantity, Price, CustomerID]
Index: []


In [9]:
#menggabungkan data dalam satu transaksi menjadi 1 baris data
df_grouped = df.groupby('Bill')['Item'].apply(lambda x: ','.join(x)).reset_index()
df_grouped.head()

Unnamed: 0,Bill,Item
0,1000,"Apples,Butter,Eggs,Potatoes"
1,1004,Oranges
2,1005,"Milk,Onions,Cereal"
3,1008,"Tomatoes,Potatoes,Cereal"
4,1011,Bananas


In [10]:
#mengubah bentuk dari dataframe menjadi list
data = list(df_grouped["Item"].apply(lambda x:x.split(",") ))
data

[['Apples', 'Butter', 'Eggs', 'Potatoes'],
 ['Oranges'],
 ['Milk', 'Onions', 'Cereal'],
 ['Tomatoes', 'Potatoes', 'Cereal'],
 ['Bananas'],
 ['Tomatoes'],
 ['Pasta',
  'Onions',
  'Bread',
  'Bananas',
  'Coffee',
  'Sugar',
  'Potatoes',
  'Oranges'],
 ['Bananas', 'Oranges', 'Potatoes'],
 ['Chicken', 'Cereal', 'Bananas'],
 ['Cheese', 'Pasta', 'Cereal', 'Onions', 'Bananas', 'Chicken'],
 ['Sugar', 'Eggs'],
 ['Onions', 'Cereal', 'Cheese'],
 ['Cereal', 'Coffee', 'Bread', 'Onions'],
 ['Chicken', 'Pasta'],
 ['Eggs', 'Butter', 'Bananas', 'Chicken', 'Tomatoes'],
 ['Tea', 'Bananas', 'Pasta', 'Eggs', 'Cereal'],
 ['Sugar', 'Coffee'],
 ['Apples', 'Tomatoes', 'Chicken', 'Pasta', 'Bread', 'Sugar', 'Potatoes'],
 ['Yogurt', 'Butter', 'Bananas'],
 ['Milk', 'Eggs', 'Sugar', 'Juice', 'Tomatoes', 'Butter', 'Onions'],
 ['Coffee', 'Bananas'],
 ['Onions'],
 ['Coffee', 'Tea', 'Bananas', 'Onions', 'Juice', 'Pasta'],
 ['Tea', 'Juice', 'Cereal', 'Butter'],
 ['Yogurt', 'Juice'],
 ['Pasta',
  'Onions',
  'Chicken'

In [11]:
#Data Transformation
#mengubah kolom dengan nilai False menjadi 0
from mlxtend.preprocessing import TransactionEncoder
a = TransactionEncoder()
a_data = a.fit(data).transform(data)
df = pd.DataFrame(a_data,columns=a.columns_)
df = df.replace(False,0)
df

Unnamed: 0,Apples,Bananas,Bread,Butter,Cereal,Cheese,Chicken,Coffee,Eggs,Juice,Milk,Onions,Oranges,Pasta,Potatoes,Sugar,Tea,Tomatoes,Yogurt
0,True,0,0,True,0,0,0,0,True,0,0,0,0,0,True,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,True,0,0,0,0,0,0
2,0,0,0,0,True,0,0,0,0,0,True,True,0,0,0,0,0,0,0
3,0,0,0,0,True,0,0,0,0,0,0,0,0,0,True,0,0,True,0
4,0,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148,0,True,True,True,True,True,True,True,True,True,0,0,True,True,0,0,0,0,True
149,0,0,0,0,0,0,0,True,0,0,0,0,0,0,0,0,0,True,0
150,0,True,True,True,0,0,True,0,0,True,True,0,True,0,True,0,0,0,0
151,0,True,True,0,0,0,True,0,0,True,0,0,0,0,0,0,0,0,0


In [12]:
#Association Analysis
#dengan FP Growth
#nilai minimal support di-set dengan 0.04
df_fp =fpgrowth(df.astype('bool'),min_support=0.04, use_colnames=True)
df_fp

Unnamed: 0,support,itemsets
0,0.176471,(Potatoes)
1,0.176471,(Eggs)
2,0.163399,(Butter)
3,0.163399,(Apples)
4,0.189542,(Oranges)
...,...,...
81,0.045752,"(Juice, Eggs)"
82,0.058824,"(Juice, Coffee)"
83,0.052288,"(Juice, Tea)"
84,0.052288,"(Juice, Bananas)"


In [13]:
#Melakukan evaluasi terhadap hasil association analysis
rules = association_rules(df_fp, metric = "confidence", min_threshold = 0.4)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Oranges),(Milk),0.189542,0.156863,0.078431,0.413793,2.637931,1.0,0.048699,1.438293,0.766129,0.292683,0.304731,0.456897
1,(Milk),(Oranges),0.156863,0.189542,0.078431,0.5,2.637931,1.0,0.048699,1.620915,0.736434,0.292683,0.383065,0.456897
2,(Onions),(Cereal),0.150327,0.202614,0.065359,0.434783,2.145863,1.0,0.034901,1.410759,0.628462,0.227273,0.291162,0.378682
3,(Bread),(Coffee),0.150327,0.215686,0.071895,0.478261,2.217391,1.0,0.039472,1.503268,0.646154,0.244444,0.334783,0.405797
4,(Cheese),(Cereal),0.183007,0.202614,0.078431,0.428571,2.115207,1.0,0.041352,1.395425,0.645333,0.255319,0.283372,0.407834


In [14]:
#Melakukan evaluasi terhadap hasil association analysis
rules = association_rules(df_fp, num_itemsets=len(df_fp), metric="confidence", min_threshold=0.4)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Oranges),(Milk),0.189542,0.156863,0.078431,0.413793,2.637931,1.0,0.048699,1.438293,0.766129,0.292683,0.304731,0.456897
1,(Milk),(Oranges),0.156863,0.189542,0.078431,0.5,2.637931,1.0,0.048699,1.620915,0.736434,0.292683,0.383065,0.456897
2,(Onions),(Cereal),0.150327,0.202614,0.065359,0.434783,2.145863,1.0,0.034901,1.410759,0.628462,0.227273,0.291162,0.378682
3,(Bread),(Coffee),0.150327,0.215686,0.071895,0.478261,2.217391,1.0,0.039472,1.503268,0.646154,0.244444,0.334783,0.405797
4,(Cheese),(Cereal),0.183007,0.202614,0.078431,0.428571,2.115207,1.0,0.041352,1.395425,0.645333,0.255319,0.283372,0.407834


In [15]:
#Fungsi untuk menghitung waktu eksekusi algoritma
def perform_rule_calculation(transact_items_matrix, rule_type="fpgrowth", min_support=0.001):
    """
        desc: this function performs the association rule calculation
        @params:
            - transact_items_matrix: the transaction X Items matrix
            - rule_type:
            - apriori or Growth algorithms (default="fpgrowth")

            - min_support: minimum support threshold value (default = 0.001)

        @returns:
            - the matrix containing 3 columns:
            - support: support values for each combination of items
            - itemsets: the combination of items
            - number_of_items: the number of items in each combination of items

            - the excution time for the corresponding algorithm
    """
    start_time = 0
    total_execution = 0

    if(not rule_type == "fpgrowth"):
        start_time = time.time()
        rule_items = apriori(transact_items_matrix,
                                min_support=min_support,
                                use_colnames=True)
        total_execution = time.time() - start_time
        print("Computed Apriori!")

    else:
        start_time = time.time()
        rule_items = fpgrowth(transact_items_matrix,
                                min_support=min_support,
                                use_colnames=True)
        total_execution = time.time() - start_time
        print("Computed Fp Growth!")

    rule_items['number_of_items'] = rule_items['itemsets'].apply(lambda x: len(x))

    return rule_items, total_execution

In [16]:
#Menghitung waktu eksekusi algoritma FP-Growth
#MemanggiL fungsi perform_rule_calculation
import time
fpgrowth_matrix, fp_growth_exec_time = perform_rule_calculation(df.astype('bool'))
print("Waktu eksekusi FP-Growth : {} detik".format(fp_growth_exec_time))

Computed Fp Growth!
Waktu eksekusi FP-Growth : 4.276700258255005 detik


In [17]:
from mlxtend.frequent_patterns import apriori
apriori_matrix, apriori_exec_time = perform_rule_calculation(df.astype('bool'), rule_type="apriori")
print("Waktu eksekusi Algoritma Apriori : {} detik".format(apriori_exec_time))

Computed Apriori!
Waktu eksekusi Algoritma Apriori : 0.10515975952148438 detik


In [18]:
#Rekomendasi
rules['antecedents'] = rules['antecedents'].apply(lambda x: {item. lower() for item in x})
rules['consequents' ] = rules['consequents' ]. apply(lambda x: {item. lower() for item in x})
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,{oranges},{milk},0.189542,0.156863,0.078431,0.413793,2.637931,1.0,0.048699,1.438293,0.766129,0.292683,0.304731,0.456897
1,{milk},{oranges},0.156863,0.189542,0.078431,0.5,2.637931,1.0,0.048699,1.620915,0.736434,0.292683,0.383065,0.456897
2,{onions},{cereal},0.150327,0.202614,0.065359,0.434783,2.145863,1.0,0.034901,1.410759,0.628462,0.227273,0.291162,0.378682
3,{bread},{coffee},0.150327,0.215686,0.071895,0.478261,2.217391,1.0,0.039472,1.503268,0.646154,0.244444,0.334783,0.405797
4,{cheese},{cereal},0.183007,0.202614,0.078431,0.428571,2.115207,1.0,0.041352,1.395425,0.645333,0.255319,0.283372,0.407834


In [19]:
# Fungsi untuk merekomendasikan item
def recommend_items(user_items, rules):
    user_items = set(user_items) # Ubah input menjadi set
    recommendations = set()

    for _, row in rules.iterrows():
        if row['antecedents'].issubset(user_items): # Cek apakah item user cocok dengan antecedents
            recommendations.update(row['consequents'] - user_items)# Tambahkan consequents ke rekomendasi

    # Hapus item yang sudah dimiliki pengguna dari rekomendasi
    recommendations = recommendations - user_items

    return list(recommendations) if recommendations else ["Tidak ada rekomendasi"]

In [20]:
# Input dari user
user_input = input("Masukkan item yang Anda beli (pisahkan dengan koma): ").split(",")
user_items = [item.strip() for item in user_input] # Bersihkan spasi tambahan
user_items = [item.strip().lower() for item in user_input]

# Dapatkan rekomendasi
rekomendasi = recommend_items(user_items, rules)
print("Rekomendasi untuk Anda:", rekomendasi)

Rekomendasi untuk Anda: ['coffee']


In [21]:
# Input dari user
user_input = input("Masukkan item yang Anda beli (pisahkan dengan koma): ").split(",")
user_items = [item.strip() for item in user_input] # Bersihkan spasi tambahan
user_items = [item.strip().lower() for item in user_input]

# Dapatkan rekomendasi
rekomendasi = recommend_items(user_items, rules)
print("Rekomendasi untuk Anda:", rekomendasi)

Rekomendasi untuk Anda: ['cereal']


In [22]:
# Input dari user
user_input = input("Masukkan item yang Anda beli (pisahkan dengan koma): ").split(",")
user_items = [item.strip() for item in user_input] # Bersihkan spasi tambahan
user_items = [item.strip().lower() for item in user_input]

# Dapatkan rekomendasi
rekomendasi = recommend_items(user_items, rules)
print("Rekomendasi untuk Anda:", rekomendasi)

Rekomendasi untuk Anda: ['Tidak ada rekomendasi']
