# Tugas Akhir TA-11 Sistem Informasi T.A. 2019/2020
- Association Analysis using Frequent Pattern Growth-Genetic Algorithm for Restaurant Recommendation System
- (12S16013) Panji Jonatan Situmorang dan (12S16051) Hesti Rugun Juli Arta Siagian
- Notebook ini ditujukan untuk mendemonstrasikan sistem yang dirancang.

## 1. Muat Pustaka Python

In [None]:
import ast
import math
import numpy as np
import pandas as pd
import random
import re
import time

# Gunakan modul fpgrowth dalam mlxtend library untuk melakukan
# FP-Growth
try:
    import mlxtend
    from mlxtend.preprocessing import TransactionEncoder
    from mlxtend.frequent_patterns import fpgrowth
    from mlxtend.frequent_patterns import association_rules
except:
    %pip install mlxtend --upgrade
    import mlxtend
    from mlxtend.preprocessing import TransactionEncoder
    from mlxtend.frequent_patterns import fpgrowth
    from mlxtend.frequent_patterns import association_rules

# Gunakan deap library untuk melakukan
# Genetic Algorithm
try:
    # Gunakan deap library untuk melakukan
    # Genetic Algorithm
    from deap import base
    from deap import creator
    from deap import tools
except:
    %pip install deap --upgrade
    from deap import base
    from deap import creator
    from deap import tools

## 2. Muat Data

Pada _cell_ berikut ini dilakukan pembacaan set data **"Restaurant reviews"** dan **"Restaurant names and Metadata.csv"**. Keduanya digabungkan dan disimpan dalam struktur data _DataFrame_ bernama <code>df_tr_restoran</code>.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Menyimpan set data review restoran dalam DataFrame df_review_restoran
df_review_restoran = pd.read_csv("/content/drive/MyDrive/Restaurant reviews.csv",
                                 usecols = ["Restaurant", "Reviewer", "Rating"])

# Menyimpan set data metadata restoran dalam DataFrame df_md_restoran
df_md_restoran = pd.read_csv("/content/drive/MyDrive/Restaurant names and Metadata.csv",
                             usecols = ["Name", "Cuisines"])

# Menyimpan gabungan df_review_restoran dan df_md_restoran dalam DataFrame df_tr_restoran
df_tr_restoran = df_md_restoran.merge(df_review_restoran,
                                      left_on = "Name",
                                      right_on = "Restaurant",
                                      how = "inner").drop(["Name"], axis = 1)

# Ganti nama kolom "Restaurant" menjadi "Preferred Restaurant"
df_tr_restoran = df_tr_restoran.rename(columns={"Restaurant": "Preferred Restaurant"})

## 3. Prapemrosesan

In [None]:
# Hapus baris data yang memiliki setidaknya satu nilai atribut hilang (NaN / None)
df_tr_restoran = df_tr_restoran.dropna(axis = 0, how ='any')
df_tr_restoran = df_tr_restoran.reset_index(drop=True)

In [None]:
# Pastikan tipe data Rating adalah numerik 
df_tr_restoran['Rating'] = pd.to_numeric(df_tr_restoran['Rating'], errors='coerce')

# Pilih restoran dengan rating baik (rating >= 3)
df_tr_restoran = df_tr_restoran[df_tr_restoran['Rating'] >= 3]
df_tr_restoran = df_tr_restoran.drop(['Rating'], axis=1)
df_tr_restoran = df_tr_restoran.reset_index(drop=True)

In [None]:
# Pengelompokkan restoran berdasarkan reviewer
df_tr_restoran = df_tr_restoran.groupby(by = ["Reviewer"])["Preferred Restaurant"].apply(list).reset_index()
display(df_tr_restoran.head())

Unnamed: 0,Reviewer,Preferred Restaurant
0,#FOODPORN,[Zega - Sheraton Hyderabad Hotel]
1,#chloesviews // CAH,[eat.fit]
2,$ign,[SKYHY]
3,@FuloriBinaChutneyKaiseBani,[Chinese Pavilion]
4,@Vignesh #FoodMad,[Owm Nom Nom]


In [None]:
# Pengkodean Label pada nama restoran  
# Simpan set pemetaan dalam restaurant_replace_map_comp
restaurant_labels = np.unique(np.concatenate([np.unique(row) for row in df_tr_restoran['Preferred Restaurant']])).tolist()
restaurant_replace_map_comp = {v: k for k, v in zip(list(range(1,len(restaurant_labels)+1)), restaurant_labels)}

# Ganti setiap nama restoran dengan indeks yang sesuai 
for i, row in df_tr_restoran.iterrows():
    res = [restaurant_replace_map_comp.get(ele, ele) for ele in row['Preferred Restaurant']]
    df_tr_restoran.at[i, 'Preferred Restaurant'] = np.array(res)

In [None]:
display(df_tr_restoran.tail())

Unnamed: 0,Reviewer,Preferred Restaurant
5626,విజయ్ కుమార్ తెజవత్,[44]
5627,ಅಭಿಷೇಕ್ ಉಪಾಧ್ಯ,[80]
5628,✌️✌️Reddy 🥂🍽️,[47]
5629,✔️ Sonu,[76]
5630,🍛🍲🥗,"[67, 34]"


In [None]:
# Simpan hasil prapemrosesan dalam Numpy Array np_transaksi_restoran
np_transaksi_restoran = df_tr_restoran['Preferred Restaurant'].to_numpy()

## 4. Penerapan FP-Growth

In [None]:
dataset = df_tr_restoran['Preferred Restaurant'].tolist()

In [None]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
display(df.tail())

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
5626,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5627,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5628,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5629,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5630,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
# minimal 2 kali dari seluruh transaksi
df_frequent_itemsets = fpgrowth(df, min_support=(2/len(df)), max_len=2, use_colnames=True)

In [None]:
df_rules = association_rules(df_frequent_itemsets, metric="confidence", min_threshold=(2/len(df)))
# df_rules = association_rules(df_frequent_itemsets, metric="confidence", min_threshold=0.5)

In [None]:
df_rules.tail()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1709,(61),(67),0.013142,0.009235,0.000355,0.027027,2.926715,0.000234,1.018287
1710,(26),(67),0.015095,0.009235,0.000355,0.023529,2.547964,0.000216,1.014639
1711,(67),(26),0.009235,0.015095,0.000355,0.038462,2.547964,0.000216,1.024301
1712,(67),(95),0.009235,0.011898,0.000355,0.038462,3.232491,0.000245,1.027626
1713,(95),(67),0.011898,0.009235,0.000355,0.029851,3.232491,0.000245,1.02125


In [None]:
# Fungsi _Encoding_ dan _Decoding_ pada Aturan Asosiasi
# Michigan Encoding
def michigan_encoding(df_association_rules, list_items):
    list_encoded_association_rules = []

    for tuple_rule in df_association_rules.itertuples():
        list_encoded_rule = []
        
        for item in list_items:
            if item in tuple_rule.antecedents:
                list_encoded_rule.append('01')
            elif item in tuple_rule.consequents:
                list_encoded_rule.append('10')
            else:
                list_encoded_rule.append('00')
        list_encoded_association_rules.append(list_encoded_rule)
    return list_encoded_association_rules

# Michigan Decoding
def michigan_decoding(list_encoded_association_rules, list_items):
    list_rules = []
    
    for encoded_rule in list_encoded_association_rules:
        antecedent = []
        consequent = []

        for idx, ele in enumerate(encoded_rule):
            if ele == '01':
                antecedent.append(list_items[idx])
            elif ele == '10':
                consequent.append(list_items[idx])
        list_rules.append([antecedent, consequent, encoded_rule])
    
    return pd.DataFrame(list_rules, columns=['antecedents', 'consequents', 'chromosome'])

Jalankan _encoding_.

In [None]:
list_encoded_association_rules = michigan_encoding(df_rules, [*restaurant_replace_map_comp.values()])

In [None]:
# cuplikan aturan asosiasi yang telah di-encode 
for index, rule in enumerate(list_encoded_association_rules):
    print(index, ":", str(rule))
    if index == 1:
        break

0 : ['00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '10', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '01']
1 : ['00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '01', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', 

## 5. Penerapan Genetic Algorithm

**Creator**

Karena struktur aktual individu yang diperlukan dalam _genetic algorithm_ sangat bergantung pada kasus yang akan diselesaikan, maka DEAP _library_ tidak menyediakan struktur eksplisit apa pun. Namun, DEAP _library_ menyediakan _method_ untuk membuat wadah bagi atribut, terkait dengan _fitness_. _Method_ ini disebut <code>deap.creator</code>.

<code>creator</code> adalah "pabrik kelas" yang dapat kita gunakan untuk membuat kelas baru pada saat program dijalankan. Pemanggilannya dilengkapi dengan beberapa argumen. Argumen pertama yaitu nama yang diinginkan dari kelas baru yang akan dibuat, argumen kedua adalah kelas dasar yang akan diwarisi, dan argumen berikutnya adalah atribut kelas baru yang dibuat.

Pada _cell_ berikut ini, kita mendefinisikan sebuah kelas bernama **"FitnessMax"**. Kelas ini akan mewarisi kelas **"Fitness"** dari modul <code>deap.base</code>. Kelas ini juga akan memiliki atribut tambahan yang disebut **"weights"**. Perhatikan bahwa nilai **"weights"** disajikan dengan bentuk _tuple_ (1.0,). Dengan cara ini, kita secara eksplisit menyatakan bahwa kasus optimasi yang dikerjakan adalah memaksimalkan _fitness_ objektif tunggal.

In [None]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))

Selanjutnya, kita buat kelas bernama **"Individual"**, yang akan mewarisi kelas **"list"** dan mengandung kelas **"FitnessMax"** yang telah kita definisikan sebelumnya dalam atribut **"fitness"**.

In [None]:
creator.create("Individual", list, fitness=creator.FitnessMax)

**Toolbox**

In [None]:
toolbox = base.Toolbox()

**Evaluation Function**

- $ \mathrm{support}(A->C) = \mathrm{support}(A \cup C)$, range: [0, 1]



- $ \mathrm{confidence}(A->C) = \frac{\mathrm{support}(A \cup C)}{\mathrm{support}(A)}$, range: [0, 1]



- $ \mathrm{lift}(A->C) = \frac{\mathrm{confidence}(A->C)}{\mathrm{support}(C)}$, range: [0, inf]



- $ \mathrm{kulczynski}(A->C) = \frac{1}{2} (\frac{\mathrm{support}(A \cup C)}{\mathrm{support}(A)} + \frac{\mathrm{support}(A \cup C)}{\mathrm{support}(C)}) $, range: [0, 1]

In [None]:
def safe_div(x,y):
    if y==0: return 0
    return x/y

def kulczynski(encoded_rule, np_data_transaksi, list_items):
    antecedent = []
    consequent = []

    for idx, ele in enumerate(encoded_rule):
        if ele == '01':
            antecedent.append(list_items[idx])
        elif ele == '10':
            consequent.append(list_items[idx])
    decoded_rule = antecedent + consequent

    support_rule = [bool(set(decoded_rule) and set(decoded_rule).issubset(set(sublist))) \
                    for sublist in np_data_transaksi].count(True) / len(np_data_transaksi)
    support_antecedent = [bool(set(antecedent) and set(antecedent).issubset(set(sublist))) \
                    for sublist in np_data_transaksi].count(True) / len(np_data_transaksi)
    support_consequent = [bool(set(consequent) and set(consequent).issubset(set(sublist))) \
                    for sublist in np_data_transaksi].count(True) / len(np_data_transaksi)
    kulczynski_rule = (1/2) * (safe_div(support_rule, support_antecedent) + safe_div(support_rule, support_consequent))
    
    return kulczynski_rule

def evalOneMax(individual):
    return kulczynski(individual, np_transaksi_restoran, [*restaurant_replace_map_comp.values()]),

**The Genetic Operators**

In [None]:
#----------
# Registrasi Operator Genetik
#----------
# Registrasi tujuan / fungsi fitness 'evalOneMax'
toolbox.register("evaluate", evalOneMax)

# Registrasi operator crossover dengan
# teknik one point crossover
toolbox.register("mate", tools.cxOnePoint)

# Registrasi operator mutation dengan teknik DNA shuffling
# dan probabilitas setiap DNA dipindahkan
toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)

# Registrasi operator dengan teknik tournament selection 
# untuk memilih individu-individu di generasi selanjutnya
# di mana setiap individu dari generasi sebelumnya digantikan
# dengan 2 individu dari generasi saat ini
toolbox.register("select", tools.selTournament, tournsize=2)

#----------

**Creating the Population**

In [None]:
mins=[]
maxs=[]
means=[]
stds=[]

def main(): 
    pop = []
    # Konversi struktur data setiap kromosom dari 'list'
    # menjadi 'deap.creator.Individual' supaya dapat
    # digunakan di algoritma GA dalam DEAP library
    for rule in list_encoded_association_rules:
        pop.append(creator.Individual(rule))
    
    # CXPB adalah probabilitas dua individu
    # disilangkan
    #
    # MUTPB adalah probabilitas sebuah individu
    # mengalami mutasi
    CXPB, MUTPB = 0.5, 0.2

    print('Start of evolution')

    # Hitung nilai fitness seluruh individu dalam populasi
    # dengan menggunakan 'evalOneMax' yang sudah disimpan
    # dalam 'toolbox.evaluate' di cell 
    # 'Registrasi Operator Genetik'
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    print('  Evaluated %i individuals' % len(pop))

    # Ekstraksi nilai fitness setiap individu 'ind'
    # dalam populasi 'pop' dan simpan dalam list 'fits'
    fits = [ind.fitness.values[0] for ind in pop]
    
    length = len(pop)
    mean = sum(fits) / length
    sum2 = sum(x*x for x in fits)
    std = abs(sum2 / length - mean*2)*0.5
        
    mins.append(min(fits))
    maxs.append(max(fits))
    means.append(mean)
    stds.append(std)
    
    print('  Min %s' % min(fits))
    print('  Max %s' % max(fits))
    print('  Avg %s' % mean)
    print('  Std %s' % std)        
    
    # Variabel 'g' untuk pemantauan total generasi
    # di mana satu generasi sama dengan satu iterasi
    g = 0

    # Lakukan evolusi selama generasi 'g'
    # lebih kecil dari 1000
    while g < 1000:
        # Sebuah generasi baru
        g = g + 1
        print('-- Generation %i --' % g)

        # Pilih individu-individu untuk generasi selanjutnya
        offspring = toolbox.select(pop, len(pop))
        # Kloning individu-individu yang terpilih
        offspring = list(map(toolbox.clone, offspring))

        # Terapkan penyilangan dan mutation pada  
        # individu yang merupakan anakan 'offspring'
        for child1, child2 in zip(offspring[::2], offspring[1::2]):

            # Silangkan dua individu dengan menggunakan 
            # CXPB
            if random.random() < CXPB:
                toolbox.mate(child1, child2)

                # Hapus nilai fitness value hasil
                # persilangan karena invalid
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:

            # Mutasi sebuah individu dengan menggunakan
            # MUTPB
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # Kalkulasi ulang nilai fitness individu-individu
        # 'offspring'
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        print('  Evaluated %i individuals' % len(invalid_ind))

        # Gantikan populasi seutuhnya dengan menggunakan 
        # offspring
        pop[:] = offspring

        # Kumpulkan semua fitness dalam sebuah list 
        # dan untuk ditampilkan statistiknya seperti
        # min, max, rata-rata, dan standar deviasi
        fits = [ind.fitness.values[0] for ind in pop]

        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean*2)*0.5
        
        mins.append(min(fits))
        maxs.append(max(fits))
        means.append(mean)
        stds.append(std)
        
        print('  Min %s' % min(fits))
        print('  Max %s' % max(fits))
        print('  Avg %s' % mean)
        print('  Std %s' % std)

    print('-- End of (successful) evolution --')
    print('\n')
    
    # Tampilkan populasi akhir
    print('-- Final Population --')
    df_fits = pd.DataFrame(fits, columns =['kulczynski']) 
    df_pop = michigan_decoding(pop, [*restaurant_replace_map_comp.values()])
    df_pop = pd.concat([df_pop, df_fits], axis=1)
    print('\n')
    
    return df_pop

if __name__ == '__main__':
    df_hasil = main()

[1;30;43mOutput streaming akan dipotong hingga 5000 baris terakhir.[0m
  Avg 0.9231453481683567
  Std 0.4618461137632904
-- Generation 169 --
  Evaluated 1063 individuals
  Min 0.0
  Max 1.0
  Avg 0.9179071979388157
  Std 0.45955516968232357
-- Generation 170 --
  Evaluated 1025 individuals
  Min 0.0
  Max 1.0
  Avg 0.930308618703275
  Std 0.465664710843739
-- Generation 171 --
  Evaluated 1016 individuals
  Min 0.0
  Max 1.0
  Avg 0.9348173284739231
  Std 0.46786441613544827
-- Generation 172 --
  Evaluated 1032 individuals
  Min 0.0
  Max 1.0
  Avg 0.9260542836951698
  Std 0.463154752818024
-- Generation 173 --
  Evaluated 1027 individuals
  Min 0.0
  Max 1.0
  Avg 0.9317429130345254
  Std 0.46605376279913435
-- Generation 174 --
  Evaluated 1005 individuals
  Min 0.0
  Max 1.0
  Avg 0.9362687898963554
  Std 0.46833491766361307
-- Generation 175 --
  Evaluated 992 individuals
  Min 0.0
  Max 1.0
  Avg 0.9364144662746193
  Std 0.46835306054131237
-- Generation 176 --
  Evaluated 988

In [None]:
display(df_hasil.head())

Unnamed: 0,antecedents,consequents,chromosome,kulczynski
0,"[39, 73, 77]","[54, 67]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
1,"[39, 73]","[54, 67]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
2,"[39, 73]","[54, 67]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
3,"[39, 73]","[5, 67]","[00, 00, 00, 00, 10, 00, 00, 00, 00, 00, 00, 0...",0.0
4,"[39, 73]","[54, 67]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0


In [None]:
print('Nilai fitness minimum pada populasi sebelum GA %s' %round(mins[0], 4))
print('Nilai fitness minimum pada populasi setelah GA %s' %round(mins[-1], 4))
        
print("Nilai fitness maksimum pada populasi sebelum GA %s" %round(maxs[0], 4))
print("Nilai fitness maksimum pada populasi setelah GA %s" %round(maxs[-1], 4))
        
print("Rata-rata nilai fitness pada populasi sebelum GA %s" %round(means[0], 4))
print("Rata-rata nilai fitness pada populasi setelah GA %s" %round(means[-1], 4))
        
print("Standar deviasi nilai fitness pada populasi sebelum GA %s" %round(stds[0], 4))
print("Standar deviasi nilai fitness pada populasi setelah GA %s" %round(stds[-1], 4))

Nilai fitness minimum pada populasi sebelum GA 0.0212
Nilai fitness minimum pada populasi setelah GA 0.0
Nilai fitness maksimum pada populasi sebelum GA 0.1165
Nilai fitness maksimum pada populasi setelah GA 1.0
Rata-rata nilai fitness pada populasi sebelum GA 0.0334
Rata-rata nilai fitness pada populasi setelah GA 0.9245
Standar deviasi nilai fitness pada populasi sebelum GA 0.0328
Standar deviasi nilai fitness pada populasi setelah GA 0.4625


In [None]:
# hapus aturan asosiasi yang duplikat
df_hasil['new'] = df_hasil.chromosome.apply(tuple)
df_hasil_akhir = df_hasil.sort_values('kulczynski', ascending=False).drop_duplicates('new').drop(['new'], axis=1)
df_hasil_akhir = df_hasil_akhir[df_hasil_akhir["kulczynski"] != 0.00].reset_index(drop=True)

# hapus aturan asosiasi dengan nilai antecedent / consequent yang hilang
# atau dengan item lebih dari satu pada antecedent / consequent
for id, baris in df_hasil_akhir.iterrows():
    if len(baris['antecedents']) == 0 or len(baris['consequents']) == 0:
        df_hasil_akhir.drop(id, inplace=True)

In [None]:
display(df_hasil_akhir.head())

Unnamed: 0,antecedents,consequents,chromosome,kulczynski
0,"[39, 73, 77]","[54, 67]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
1,"[39, 73]","[54, 67]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
2,"[39, 77]","[54, 67]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
3,"[39, 73]","[54, 100]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
4,"[39, 73]","[54, 67, 100]","[00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0


In [None]:
# Periksa aturan asosiasi apakah anteseden mengandung semua restoran
# yang ada pada set data
for i in range(100):
    if [i+1] in df_hasil_akhir['antecedents'].tolist() == False:
        print(i+1, " tidak ada")

In [None]:
def lookup_item(k):
    return list(restaurant_replace_map_comp.keys())[list(restaurant_replace_map_comp.values()).index(k)]

for baris in df_hasil_akhir.itertuples():
    print("Aturan Asosiasi #{0}".format(baris.Index + 1))
    (premise, conclusion) = (baris.antecedents, baris.consequents)
    premise_names = ", ".join(lookup_item(elemen) for elemen in premise)
    conclusion_names = ", ".join(lookup_item(elemen) for elemen in conclusion)
    print("Aturan Asosiasi: Jika seseorang merekomendasi {0}, maka ia juga akan merekomendasi {1}".format(premise_names, conclusion_names))
    print(" - Kulczynski: {0:.2f}".format(baris.kulczynski))
    print("\n")

Aturan Asosiasi #1
Aturan Asosiasi: Jika seseorang merekomendasi Gal Punjab Di, Royal Spicy Restaurant, Shah Ghouse Spl Shawarma, maka ia juga akan merekomendasi Kritunga Restaurant, Pakwaan Grand
 - Kulczynski: 1.00


Aturan Asosiasi #2
Aturan Asosiasi: Jika seseorang merekomendasi Gal Punjab Di, Royal Spicy Restaurant, maka ia juga akan merekomendasi Kritunga Restaurant, Pakwaan Grand
 - Kulczynski: 1.00


Aturan Asosiasi #3
Aturan Asosiasi: Jika seseorang merekomendasi Gal Punjab Di, Shah Ghouse Spl Shawarma, maka ia juga akan merekomendasi Kritunga Restaurant, Pakwaan Grand
 - Kulczynski: 1.00


Aturan Asosiasi #4
Aturan Asosiasi: Jika seseorang merekomendasi Gal Punjab Di, Royal Spicy Restaurant, maka ia juga akan merekomendasi Kritunga Restaurant, eat.fit
 - Kulczynski: 1.00


Aturan Asosiasi #5
Aturan Asosiasi: Jika seseorang merekomendasi Gal Punjab Di, Royal Spicy Restaurant, maka ia juga akan merekomendasi Kritunga Restaurant, Pakwaan Grand, eat.fit
 - Kulczynski: 1.00


Atur

## 6. Rekomendasi Restoran

In [None]:
def lookup_id_restoran(nama_restoran):
    return int(restaurant_replace_map_comp.get(nama_restoran))

def rekomendasi_restoran(id_restoran_prefensi):
    list_temp = []

    for indeks, baris in df_hasil_akhir.iterrows():
        if id_restoran_prefensi in baris['antecedents']:
            list_temp.append(baris)
    df_extract = pd.DataFrame(list_temp,
                           columns = ['antecedents',
                                      'consequents',
                                      'chromosome',
                                      'kulczynski']).reset_index(drop=True)
    
    # sortir baris pada df_extract berdasarkan kolom kulczynski
    df_extract = df_extract.sort_values('kulczynski', ascending=False)
    
    # pilih aturan asosiasi top 3
    # df_extract = df_extract.head(3)

    # kombinasikan setiap item pada consequent
    list_top_restaurant = []
    for i in df_extract['consequents']:
        list_top_restaurant = list_top_restaurant + i

    list_rekomendasi = []
    for i in list_top_restaurant:
        list_rekomendasi = list_rekomendasi + [lookup_item(i)]

    return list(dict.fromkeys(list_rekomendasi))

In [None]:
# print('Masukkan restoran preferensi:')
x = input('')
print('Rekomendasi adalah', rekomendasi_restoran(lookup_id_restoran(x)))

Pakwaan Grand
Rekomendasi adalah ['Gal Punjab Di', 'Kritunga Restaurant']
