# Tugas Akhir TA-11 Sistem Informasi T.A. 2019/2020
- Association Analysis using Frequent Pattern Growth-Genetic Algorithm for Restaurant Recommendation System
- (12S16013) Panji Jonatan Situmorang dan (12S16051) Hesti Rugun Juli Arta Siagian
- Notebook ini ditujukan untuk mendemonstrasikan sistem yang dirancang.

## 1. Muat Pustaka Python

In [None]:
import ast
import math
import numpy as np
import pandas as pd
import random
import re
import time

# Gunakan modul fpgrowth dalam mlxtend library untuk melakukan
# FP-Growth
try:
    import mlxtend
    from mlxtend.preprocessing import TransactionEncoder
    from mlxtend.frequent_patterns import fpgrowth
    from mlxtend.frequent_patterns import association_rules
except:
    %pip install mlxtend --upgrade
    import mlxtend
    from mlxtend.preprocessing import TransactionEncoder
    from mlxtend.frequent_patterns import fpgrowth
    from mlxtend.frequent_patterns import association_rules

# Gunakan deap library untuk melakukan
# Genetic Algorithm
try:
    # Gunakan deap library untuk melakukan
    # Genetic Algorithm
    from deap import base
    from deap import creator
    from deap import tools
except:
    %pip install deap --upgrade
    from deap import base
    from deap import creator
    from deap import tools

## 2. Muat Data

Pada _cell_ berikut ini dilakukan pembacaan set data **"Restaurant reviews"** dan **"Restaurant names and Metadata.csv"**. Keduanya digabungkan dan disimpan dalam struktur data _DataFrame_ bernama <code>df_tr_restoran</code>.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Menyimpan set data review restoran dalam DataFrame df_review_restoran
df_review_restoran = pd.read_csv("/content/drive/MyDrive/Restaurant reviews.csv",
                                 usecols = ["Restaurant", "Reviewer", "Rating"])

# Menyimpan set data metadata restoran dalam DataFrame df_md_restoran
df_md_restoran = pd.read_csv("/content/drive/MyDrive/Restaurant names and Metadata.csv",
                             usecols = ["Name", "Cuisines"])

# Menyimpan gabungan df_review_restoran dan df_md_restoran dalam DataFrame df_tr_restoran
df_tr_restoran = df_md_restoran.merge(df_review_restoran,
                                      left_on = "Name",
                                      right_on = "Restaurant",
                                      how = "inner").drop(["Name"], axis = 1)

# Ganti nama kolom "Restaurant" menjadi "Preferred Restaurant"
df_tr_restoran = df_tr_restoran.rename(columns={"Restaurant": "Preferred Restaurant"})

## 3. Prapemrosesan

In [None]:
# Hapus baris data yang memiliki setidaknya satu nilai atribut hilang (NaN / None)
df_tr_restoran = df_tr_restoran.dropna(axis = 0, how ='any')
df_tr_restoran = df_tr_restoran.reset_index(drop=True)

In [None]:
# Pastikan tipe data Rating adalah numerik 
df_tr_restoran['Rating'] = pd.to_numeric(df_tr_restoran['Rating'], errors='coerce')

# Pilih restoran dengan rating baik (rating >= 3)
df_tr_restoran = df_tr_restoran[df_tr_restoran['Rating'] >= 3]
df_tr_restoran = df_tr_restoran.drop(['Rating'], axis=1)
df_tr_restoran = df_tr_restoran.reset_index(drop=True)

In [None]:
# Pengelompokkan restoran berdasarkan reviewer
df_tr_restoran = df_tr_restoran.groupby(by = ["Reviewer"])["Preferred Restaurant"].apply(list).reset_index()
display(df_tr_restoran.head())

Unnamed: 0,Reviewer,Preferred Restaurant
0,#FOODPORN,[Zega - Sheraton Hyderabad Hotel]
1,#chloesviews // CAH,[eat.fit]
2,$ign,[SKYHY]
3,@FuloriBinaChutneyKaiseBani,[Chinese Pavilion]
4,@Vignesh #FoodMad,[Owm Nom Nom]


In [None]:
# Pengkodean Label pada nama restoran  
# Simpan set pemetaan dalam restaurant_replace_map_comp
restaurant_labels = np.unique(np.concatenate([np.unique(row) for row in df_tr_restoran['Preferred Restaurant']])).tolist()
restaurant_replace_map_comp = {v: k for k, v in zip(list(range(1,len(restaurant_labels)+1)), restaurant_labels)}

# Ganti setiap nama restoran dengan indeks yang sesuai 
for i, row in df_tr_restoran.iterrows():
    res = [restaurant_replace_map_comp.get(ele, ele) for ele in row['Preferred Restaurant']]
    df_tr_restoran.at[i, 'Preferred Restaurant'] = np.array(res)

In [None]:
display(df_tr_restoran.tail())

Unnamed: 0,Reviewer,Preferred Restaurant
5626,విజయ్ కుమార్ తెజవత్,[44]
5627,ಅಭಿಷೇಕ್ ಉಪಾಧ್ಯ,[80]
5628,✌️✌️Reddy 🥂🍽️,[47]
5629,✔️ Sonu,[76]
5630,🍛🍲🥗,"[67, 34]"


In [None]:
# Simpan hasil prapemrosesan dalam Numpy Array np_transaksi_restoran
np_transaksi_restoran = df_tr_restoran['Preferred Restaurant'].to_numpy()

## 4. Penerapan FP-Growth

In [None]:
dataset = df_tr_restoran['Preferred Restaurant'].tolist()

In [None]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
display(df.tail())

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
5626,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5627,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5628,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5629,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5630,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
# minimal 2 kali dari seluruh transaksi
df_frequent_itemsets = fpgrowth(df, min_support=(2/len(df)), max_len=2, use_colnames=True)

In [None]:
df_rules = association_rules(df_frequent_itemsets, metric="confidence", min_threshold=(2/len(df)))
# df_rules = association_rules(df_frequent_itemsets, metric="confidence", min_threshold=0.5)

In [None]:
df_rules.tail()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1709,(61),(67),0.013142,0.009235,0.000355,0.027027,2.926715,0.000234,1.018287
1710,(26),(67),0.015095,0.009235,0.000355,0.023529,2.547964,0.000216,1.014639
1711,(67),(26),0.009235,0.015095,0.000355,0.038462,2.547964,0.000216,1.024301
1712,(67),(95),0.009235,0.011898,0.000355,0.038462,3.232491,0.000245,1.027626
1713,(95),(67),0.011898,0.009235,0.000355,0.029851,3.232491,0.000245,1.02125


In [None]:
# Fungsi _Encoding_ dan _Decoding_ pada Aturan Asosiasi
# Michigan Encoding
def michigan_encoding(df_association_rules, list_items):
    list_encoded_association_rules = []

    for tuple_rule in df_association_rules.itertuples():
        list_encoded_rule = []
        
        for item in list_items:
            if item in tuple_rule.antecedents:
                list_encoded_rule.append('01')
            elif item in tuple_rule.consequents:
                list_encoded_rule.append('10')
            else:
                list_encoded_rule.append('00')
        list_encoded_association_rules.append(list_encoded_rule)
    return list_encoded_association_rules

# Michigan Decoding
def michigan_decoding(list_encoded_association_rules, list_items):
    list_rules = []
    
    for encoded_rule in list_encoded_association_rules:
        antecedent = []
        consequent = []

        for idx, ele in enumerate(encoded_rule):
            if ele == '01':
                antecedent.append(list_items[idx])
            elif ele == '10':
                consequent.append(list_items[idx])
        list_rules.append([antecedent, consequent, encoded_rule])
    
    return pd.DataFrame(list_rules, columns=['antecedents', 'consequents', 'chromosome'])

Jalankan _encoding_.

In [None]:
list_encoded_association_rules = michigan_encoding(df_rules, [*restaurant_replace_map_comp.values()])

In [None]:
# cuplikan aturan asosiasi yang telah di-encode 
for index, rule in enumerate(list_encoded_association_rules):
    print(index, ":", str(rule))
    if index == 1:
        break

0 : ['00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '10', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '01']
1 : ['00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '01', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', '00', 

## 5. Penerapan Genetic Algorithm

**Creator**

Karena struktur aktual individu yang diperlukan dalam _genetic algorithm_ sangat bergantung pada kasus yang akan diselesaikan, maka DEAP _library_ tidak menyediakan struktur eksplisit apa pun. Namun, DEAP _library_ menyediakan _method_ untuk membuat wadah bagi atribut, terkait dengan _fitness_. _Method_ ini disebut <code>deap.creator</code>.

<code>creator</code> adalah "pabrik kelas" yang dapat kita gunakan untuk membuat kelas baru pada saat program dijalankan. Pemanggilannya dilengkapi dengan beberapa argumen. Argumen pertama yaitu nama yang diinginkan dari kelas baru yang akan dibuat, argumen kedua adalah kelas dasar yang akan diwarisi, dan argumen berikutnya adalah atribut kelas baru yang dibuat.

Pada _cell_ berikut ini, kita mendefinisikan sebuah kelas bernama **"FitnessMax"**. Kelas ini akan mewarisi kelas **"Fitness"** dari modul <code>deap.base</code>. Kelas ini juga akan memiliki atribut tambahan yang disebut **"weights"**. Perhatikan bahwa nilai **"weights"** disajikan dengan bentuk _tuple_ (1.0,). Dengan cara ini, kita secara eksplisit menyatakan bahwa kasus optimasi yang dikerjakan adalah memaksimalkan _fitness_ objektif tunggal.

In [None]:
# # In the following cell, we define a class named "FitnessMax". 
# This class will inherit the "Fitness" class from the deep.base module. 
# This class will also have an additional attribute called "weights".

creator.create("FitnessMax", base.Fitness, weights=(1.0,))

Selanjutnya, kita buat kelas bernama **"Individual"**, yang akan mewarisi kelas **"list"** dan mengandung kelas **"FitnessMax"** yang telah kita definisikan sebelumnya dalam atribut **"fitness"**.

In [None]:
# Next, we create a class named "Individual", 
# which will inherit the "list" class and contain the "FitnessMax" class 
# we defined earlier in the "fitness" attribute.

creator.create("Individual", list, fitness=creator.FitnessMax)

**Toolbox**

In [None]:
toolbox = base.Toolbox()

**Evaluation Function**

- $ \mathrm{support}(A->C) = \mathrm{support}(A \cup C)$, range: [0, 1]



- $ \mathrm{confidence}(A->C) = \frac{\mathrm{support}(A \cup C)}{\mathrm{support}(A)}$, range: [0, 1]



- $ \mathrm{lift}(A->C) = \frac{\mathrm{confidence}(A->C)}{\mathrm{support}(C)}$, range: [0, inf]



- $ \mathrm{kulczynski}(A->C) = \frac{1}{2} (\frac{\mathrm{support}(A \cup C)}{\mathrm{support}(A)} + \frac{\mathrm{support}(A \cup C)}{\mathrm{support}(C)}) $, range: [0, 1]

In [None]:
def safe_div(x,y):
    if y==0: return 0
    return x/y

def kulczynski(encoded_rule, np_data_transaksi, list_items):
    antecedent = []
    consequent = []

    for idx, ele in enumerate(encoded_rule):
        if ele == '01':
            antecedent.append(list_items[idx])
        elif ele == '10':
            consequent.append(list_items[idx])
    decoded_rule = antecedent + consequent

    support_rule = [bool(set(decoded_rule) and set(decoded_rule).issubset(set(sublist))) \
                    for sublist in np_data_transaksi].count(True) / len(np_data_transaksi)
    support_antecedent = [bool(set(antecedent) and set(antecedent).issubset(set(sublist))) \
                    for sublist in np_data_transaksi].count(True) / len(np_data_transaksi)
    support_consequent = [bool(set(consequent) and set(consequent).issubset(set(sublist))) \
                    for sublist in np_data_transaksi].count(True) / len(np_data_transaksi)
    kulczynski_rule = (1/2) * (safe_div(support_rule, support_antecedent) + safe_div(support_rule, support_consequent))
    
    return kulczynski_rule

def evalOneMax(individual):
    return kulczynski(individual, np_transaksi_restoran, [*restaurant_replace_map_comp.values()]),

**The Genetic Operators**

In [None]:
#----------
# Registrasi Operator Genetik
#----------
# Goal registration / fitness function 'evalOneMax'
toolbox.register("evaluate", evalOneMax)

# Rcrossover operator registration with
# one point crossover technique
toolbox.register("mate", tools.cxOnePoint)

# Registration of mutation operators with DNA shuffling technique
# and probability of each DNA being transferred
toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)

# Operator registration with tournament selection technique
# to choose individuals in the next generation
# where each individual from the previous generation is replaced
# with 2 individuals of the current generation
toolbox.register("select", tools.selTournament, tournsize=2)

#----------

**Creating the Population**

In [None]:
mins=[]
maxs=[]
means=[]
stds=[]

def main(): 
    pop = []
    # Conversion of data structure of each chromosome from 'list'
     # become 'deap.creator.Individual' so we can
     # used in GA algorithm in DEAP library
    for rule in list_encoded_association_rules:
        pop.append(creator.Individual(rule))
    
    # CXPB is the probability of two individuals
     # crossed
     #
     # MUTPB is the probability of an individual
     # have mutation
    CXPB, MUTPB = 0.5, 0.2

    print('Start of evolution')

    # Calculate the fitness value of all individuals in the population
     # by using saved 'evalOneMax'
     # in 'toolbox.evaluate' in cell
     # 'Genetic Operator Registration'
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    print('  Evaluated %i individuals' % len(pop))

    # Extraction of the fitness value of each individual 'ind'
     # in population 'pop' and save in list 'fits'
    fits = [ind.fitness.values[0] for ind in pop]
    
    length = len(pop)
    mean = sum(fits) / length
    sum2 = sum(x*x for x in fits)
    std = abs(sum2 / length - mean*2)*0.5
        
    mins.append(min(fits))
    maxs.append(max(fits))
    means.append(mean)
    stds.append(std)
    
    print('  Min %s' % min(fits))
    print('  Max %s' % max(fits))
    print('  Avg %s' % mean)
    print('  Std %s' % std)        
    
    # Variable 'g' for total generation monitoring
     # where one generation equals one iteration
    g = 0

    # Perform evolution during 'g' generation
     # smaller than 100
    while g < 100:
        # Sthe new generation
        g = g + 1
        print('-- Generation %i --' % g)

        # Select individuals for the next generation
        offspring = toolbox.select(pop, len(pop))
        # Clone selected individuals
        offspring = list(map(toolbox.clone, offspring))

        # Apply crosses and mutations to
         # individuals that are 'offspring' offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):

            # Cross two individuals using
             # CXPB
            if random.random() < CXPB:
                toolbox.mate(child1, child2)

                # Delete the result's fitness value
                 # crosses because they are invalid
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:

            # Mutation of an individual using
             # MUTPB
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # Recalculate individual fitness values
         # 'offspring'
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        print('  Evaluated %i individuals' % len(invalid_ind))

        # Replace the whole population using
         # offspring
        pop[:] = offspring

        # Collect all fitness in a list
         # and to display statistics like
         # min, max, mean and standard deviation
        fits = [ind.fitness.values[0] for ind in pop]

        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean*2)*0.5
        
        mins.append(min(fits))
        maxs.append(max(fits))
        means.append(mean)
        stds.append(std)
        
        print('  Min %s' % min(fits))
        print('  Max %s' % max(fits))
        print('  Avg %s' % mean)
        print('  Std %s' % std)

    print('-- End of (successful) evolution --')
    print('\n')
    
    # Show final population
    print('-- Final Population --')
    df_fits = pd.DataFrame(fits, columns =['kulczynski']) 
    df_pop = michigan_decoding(pop, [*restaurant_replace_map_comp.values()])
    df_pop = pd.concat([df_pop, df_fits], axis=1)
    print('\n')
    
    return df_pop

if __name__ == '__main__':
    df_hasil = main()

Start of evolution
  Evaluated 1714 individuals
  Min 0.02119309262166405
  Max 0.11652188439692615
  Avg 0.03343791297628511
  Std 0.03280754231621501
-- Generation 1 --
  Evaluated 1016 individuals
  Min 0.0
  Max 0.5068493150684932
  Avg 0.08699949304954528
  Std 0.07178488853854795
-- Generation 2 --
  Evaluated 1040 individuals
  Min 0.0
  Max 0.5092592592592593
  Avg 0.165760018824214
  Std 0.13006193428371016
-- Generation 3 --
  Evaluated 978 individuals
  Min 0.0
  Max 0.5074626865671642
  Avg 0.25588368714536086
  Std 0.1963931945098193
-- Generation 4 --
  Evaluated 1058 individuals
  Min 0.0
  Max 0.5074626865671642
  Avg 0.33340977836641994
  Std 0.25286279052435445
-- Generation 5 --
  Evaluated 1010 individuals
  Min 0.0
  Max 0.5074626865671642
  Avg 0.39441311597618167
  Std 0.29716966181869575
-- Generation 6 --
  Evaluated 993 individuals
  Min 0.0
  Max 0.5073529411764706
  Avg 0.42086308355175606
  Std 0.31617176612594444
-- Generation 7 --
  Evaluated 1020 individ

In [None]:
display(df_hasil.head())

Unnamed: 0,antecedents,consequents,chromosome,kulczynski
0,"[2, 84, 92]","[32, 83]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
1,"[2, 92]","[32, 62]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
2,"[2, 92]","[32, 62, 71]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
3,"[2, 92]","[32, 62]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
4,"[2, 92]","[32, 62]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0


In [None]:
print('Nilai fitness minimum pada populasi sebelum GA %s' %round(mins[0], 4))
print('Nilai fitness minimum pada populasi setelah GA %s' %round(mins[-1], 4))
        
print("Nilai fitness maksimum pada populasi sebelum GA %s" %round(maxs[0], 4))
print("Nilai fitness maksimum pada populasi setelah GA %s" %round(maxs[-1], 4))
        
print("Rata-rata nilai fitness pada populasi sebelum GA %s" %round(means[0], 4))
print("Rata-rata nilai fitness pada populasi setelah GA %s" %round(means[-1], 4))
        
print("Standar deviasi nilai fitness pada populasi sebelum GA %s" %round(stds[0], 4))
print("Standar deviasi nilai fitness pada populasi setelah GA %s" %round(stds[-1], 4))

Nilai fitness minimum pada populasi sebelum GA 0.0212
Nilai fitness minimum pada populasi setelah GA 0.0
Nilai fitness maksimum pada populasi sebelum GA 0.1165
Nilai fitness maksimum pada populasi setelah GA 1.0
Rata-rata nilai fitness pada populasi sebelum GA 0.0334
Rata-rata nilai fitness pada populasi setelah GA 0.9153
Standar deviasi nilai fitness pada populasi sebelum GA 0.0328
Standar deviasi nilai fitness pada populasi setelah GA 0.4591


In [None]:
# remove duplicate association rules
df_hasil['new'] = df_hasil.chromosome.apply(tuple)
df_hasil_akhir = df_hasil.sort_values('kulczynski', ascending=False).drop_duplicates('new').drop(['new'], axis=1)
df_hasil_akhir = df_hasil_akhir[df_hasil_akhir["kulczynski"] != 0.00].reset_index(drop=True)

# remove association rules with missing antecedent/consequent values
# or with more than one item in antecedent / consequent
for id, baris in df_hasil_akhir.iterrows():
    if len(baris['antecedents']) == 0 or len(baris['consequents']) == 0:
        df_hasil_akhir.drop(id, inplace=True)

In [None]:
display(df_hasil_akhir.head())

Unnamed: 0,antecedents,consequents,chromosome,kulczynski
0,"[2, 84, 92]","[32, 83]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
1,"[2, 84, 92]","[32, 62]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
2,"[2, 92]","[32, 62]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
3,"[2, 92]","[32, 62, 83]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0
4,"[2, 84]","[32, 62]","[00, 01, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0...",1.0


In [None]:
# Check the association rules whether the antecedent contains all restaurants
# in the dataset
for i in range(100):
    if [i+1] in df_hasil_akhir['antecedents'].tolist() == False:
        print(i+1, " tidak ada")

In [None]:
def lookup_item(k):
    return list(restaurant_replace_map_comp.keys())[list(restaurant_replace_map_comp.values()).index(k)]

for baris in df_hasil_akhir.itertuples():
    print("Aturan Asosiasi #{0}".format(baris.Index + 1))
    (premise, conclusion) = (baris.antecedents, baris.consequents)
    premise_names = ", ".join(lookup_item(elemen) for elemen in premise)
    conclusion_names = ", ".join(lookup_item(elemen) for elemen in conclusion)
    print("Aturan Asosiasi: Jika seseorang merekomendasi {0}, maka ia juga akan merekomendasi {1}".format(premise_names, conclusion_names))
    print(" - Kulczynski: {0:.2f}".format(baris.kulczynski))
    print("\n")

Aturan Asosiasi #1
Aturan Asosiasi: Jika seseorang merekomendasi 13 Dhaba, The Chocolate Room, Tiki Shack, maka ia juga akan merekomendasi Dunkin' Donuts, Tempteys
 - Kulczynski: 1.00


Aturan Asosiasi #2
Aturan Asosiasi: Jika seseorang merekomendasi 13 Dhaba, The Chocolate Room, Tiki Shack, maka ia juga akan merekomendasi Dunkin' Donuts, Mustang Terrace Lounge
 - Kulczynski: 1.00


Aturan Asosiasi #3
Aturan Asosiasi: Jika seseorang merekomendasi 13 Dhaba, Tiki Shack, maka ia juga akan merekomendasi Dunkin' Donuts, Mustang Terrace Lounge
 - Kulczynski: 1.00


Aturan Asosiasi #4
Aturan Asosiasi: Jika seseorang merekomendasi 13 Dhaba, Tiki Shack, maka ia juga akan merekomendasi Dunkin' Donuts, Mustang Terrace Lounge, Tempteys
 - Kulczynski: 1.00


Aturan Asosiasi #5
Aturan Asosiasi: Jika seseorang merekomendasi 13 Dhaba, The Chocolate Room, maka ia juga akan merekomendasi Dunkin' Donuts, Mustang Terrace Lounge
 - Kulczynski: 1.00


Aturan Asosiasi #6
Aturan Asosiasi: Jika seseorang merek

## 6. Rekomendasi Restoran

In [1]:
def lookup_id_restoran(nama_restoran):
    return int(restaurant_replace_map_comp.get(nama_restoran))

def rekomendasi_restoran(id_restoran_prefensi):
    list_temp = []

    for indeks, baris in df_hasil_akhir.iterrows():
        if id_restoran_prefensi in baris['antecedents']:
            list_temp.append(baris)
    df_extract = pd.DataFrame(list_temp,
                           columns = ['antecedents',
                                      'consequents',
                                      'chromosome',
                                      'kulczynski']).reset_index(drop=True)
    
    # sort rows in df_extract by column kulczynski
    df_extract = df_extract.sort_values('kulczynski', ascending=False)
    
    # select the top 3 association rules
     # df_extract = df_extract.head(3)

    # combine each item in consequent
    list_top_restaurant = []
    for i in df_extract['consequents']:
        list_top_restaurant = list_top_restaurant + i

    list_rekomendasi = []
    for i in list_top_restaurant:
        list_rekomendasi = list_rekomendasi + [lookup_item(i)]

    return list(dict.fromkeys(list_rekomendasi))

In [None]:
# Enter preferred restaurant
x = input('')
print('Rekomendasi adalah', rekomendasi_restoran(lookup_id_restoran(x)))