In [118]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import re
import random
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

In [120]:
# Load the cafe dataset
cafe_df = pd.read_csv('dataset/New_KafeJakarta.csv')

In [121]:
cafe_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 33 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   cafe_id                 102 non-null    object
 1   name                    102 non-null    object
 2   description             41 non-null     object
 3   website                 68 non-null     object
 4   featured_image          102 non-null    object
 5   main_category           102 non-null    object
 6   categories              102 non-null    object
 7   workday_timing          99 non-null     object
 8   closed_on               15 non-null     object
 9   phone                   90 non-null     object
 10  address                 102 non-null    object
 11  review_keywords         89 non-null     object
 12  link                    102 non-null    object
 13  outdoor                 102 non-null    int64 
 14  Indoor                  102 non-null    int64 
 15  RetroV

In [122]:
print(cafe_df.columns)

Index(['cafe_id', 'name', 'description', 'website', 'featured_image',
       'main_category', 'categories', 'workday_timing', 'closed_on', 'phone',
       'address', 'review_keywords', 'link', 'outdoor', 'Indoor',
       'RetroVintage', 'MinimalisCafe', 'IndustrialCafe', 'ModernCafe',
       'ArtCafe', 'PetCafe', 'BooksCafe', '24HoursCafe', 'MeetingCafe',
       'StudyCafe', 'Cafe with a good views', 'FamilyCafe', 'CountryFood',
       'Smoking', 'NonSmoking', 'Coffee', 'NonCoffee', 'GardenCafe'],
      dtype='object')


In [123]:
cafe_df["description"].fillna("Tidak tersedia", inplace=True)
cafe_df["website"].fillna("Tidak tersedia", inplace=True)
print(cafe_df["description"].isnull().sum())
print(cafe_df["website"].isnull().sum())

# cafe_df["description"] = cafe_df["description"].replace("NaN", "Tidak ada deskripsi")
cafe_df.info()

0
0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 33 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   cafe_id                 102 non-null    object
 1   name                    102 non-null    object
 2   description             102 non-null    object
 3   website                 102 non-null    object
 4   featured_image          102 non-null    object
 5   main_category           102 non-null    object
 6   categories              102 non-null    object
 7   workday_timing          99 non-null     object
 8   closed_on               15 non-null     object
 9   phone                   90 non-null     object
 10  address                 102 non-null    object
 11  review_keywords         89 non-null     object
 12  link                    102 non-null    object
 13  outdoor                 102 non-null    int64 
 14  Indoor                  102 non-null    int64 
 15  Re

In [124]:
def print_description(index):
    example = cafe_df[cafe_df.index == index][['name', 'address', 'description']].values[0]
    if len(example) > 0:
        print(example[2])
        print('Nama:', example[0])
        print('Alamat:', example[1])

In [125]:
print_description(0)

Tempat mengolah kopi yang menyajikan kopi klasik & campuran, serta makanan Indonesia, di ruang luas & klasik.
Nama: Bakoel Koffie Cikini
Alamat: Bakoel Koffie Cikini, Jl. Cikini Raya No.25, RT.16/RW.1, Cikini, Kec. Menteng, Kota Jakarta Pusat, Daerah Khusus Ibukota Jakarta 10330


In [126]:
print_description(1)

Tidak tersedia
Nama: Anomali Coffee Menteng
Alamat: Anomali Coffee Menteng, Jl. Teuku Cik Ditiro No.52, RT.10/RW.5, Menteng, Kec. Menteng, Kota Jakarta Pusat, Daerah Khusus Ibukota Jakarta 10310


In [127]:
print_description(40)

Menawarkan aneka minuman kopi yg terbuat dari biji kopi pilihan dari berbagai daerah. Tersedia kopi luwak.
Nama: Blumchen Coffee
Alamat: Blumchen Coffee, Fairground SCBD Lot 14, Jl. Jend. Sudirman kav 52-53 No.Kav 52-53, RT.5/RW.3, Senayan, Kec. Kby. Baru, Kota Jakarta Selatan, Daerah Khusus Ibukota Jakarta 12190


In [128]:
cafe_df.set_index('name', inplace=True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0.01)
tfidf_matrix = tf.fit_transform(cafe_df['description'])
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cos_sim

array([[1.        , 0.        , 0.31700413, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       [0.31700413, 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 1.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       [0.        , 1.        , 0.        , ..., 1.        , 1.        ,
        1.        ],
       [0.        , 1.        , 0.        , ..., 1.        , 1.        ,
        1.        ]])

In [129]:
# Set index utama di kolom 'name'
indices = pd.Series(cafe_df.index)
indices[:50]

0                     Bakoel Koffie Cikini
1                   Anomali Coffee Menteng
2               Lucky Cat Coffee & Kitchen
3                           Anomali Coffee
4                  Giyanti Coffee Roastery
5                                 The Cafe
6                             Arborea Cafe
7                            Walking Drums
8                       Shisha Cafe Kemang
9     Monolog Coffee Company Plaza Senayan
10                           Kedai Tjikini
11                           Hause Rooftop
12                             Kopi Kalyan
13                    Langit Seduh Rooftop
14                    Saudagar Kopi Sabang
15                    1/15 Coffee, Menteng
16                                Goedkoop
17               Fami Cafe grand indonesia
18                    Pison Coffee Jakarta
19                    One Fifteenth Coffee
20                             Crematology
21                         Djournal Coffee
22                               Starbucks
23         

In [130]:
def recommendations(name, cos_sim = cos_sim):
    
    recommended_cafe = []
    
    # Mengambil nama hotel berdasarkan variabel indicies
    idx = indices[indices == name].index[0]

    # Membuat series berdasarkan skor kesamaan
    score_series = pd.Series(cos_sim[idx]).sort_values(ascending = False)

    # mengambil index dan dibuat 10 baris rekomendasi terbaik
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    for i in top_10_indexes:
        recommended_cafe.append(list(cafe_df.index)[i])
        
    return recommended_cafe

In [131]:
recommendations('Lucky Cat Coffee & Kitchen')

["D'Bubbles shisha Cafe Tebet",
 'Kopi Kalyan',
 'Bakoel Koffie Cikini',
 'CHIEF BARBERSHOP & COFFEE - CIRAGIL',
 'Twin House Cipete',
 'Saudagar Kopi Sabang',
 'Walking Drums',
 'Djakarta Kafe',
 'Antipodean Coffee',
 'KLTR Jakarta']

In [134]:
recommendations('The Cat Cabin')

['Crematology',
 'Djakarta Kafe',
 'REN Coffee & Eatery',
 'KEN ALIY COFFEE - KOPI ENAK SPACES',
 'Hause Rooftop',
 'Monolog Coffee Company Plaza Senayan',
 'Pison Coffee Jakarta',
 'CHIEF COFFEE - KEMANG',
 'Arborea Cafe',
 'Walking Drums']