In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import re
import random


In [None]:
df = pd.read_csv('/content/hotel_bandung_english.csv')
df.head(100)

Unnamed: 0,name,address,description
0,Capital O 253 Topas Galeria Hotel,"Jl. Dr. Djundjunan No. 153, 40173 Bandung, Ind...","A 10-minute drive from Bandung Airport, Topas ..."
1,Sheraton Bandung Hotel & Towers,"Jl. Ir H Juanda 390, 40135 Bandung, Indonesia",Sheraton Hotel & Towers offers 5-star accommod...
2,OYO 794 Ln 9 Bandung Residence,"Jalan Lemahnendeut No 9, Sukajadi, 40164 Bandu...","Conveniently located in Sukajadi, Bandung, OYO..."
3,OYO 226 LJ hotel,"Jl. Malabar No.2, Malabar, Lengkong, Dago, Asi...","Featuring a shared lounge, OYO 226 LJ hotel is..."
4,OYO 230 Maleo Residence,"JI. Dangeur Indah II No. 15, Sukagalih, Sukaja...",Attractively set in the Sukajadi district of B...
...,...,...,...
95,De Batara Hotel,"Jl. Cihampelas no. 112, Bandung Wetan, 40131 B...",In a great location in the center of Bandung C...
96,Hemangini Hotel Bandung,"Jl. Setiabudhi No. 66, 40141 Bandung, Indonesia","Set in Bandung, Hemangini Hotel Bandung offers..."
97,Vio Cihampelas,"Jl. Cihampelas No. 108, 40116 Bandung, Indonesia",A 3-minute walk from Cihampelas Walk and the w...
98,OYO 260 Home 33,"Jl. Terusan Babakan Jeruk IV No.33, Sukagalih,...","Located in Bandung, 4.5 km from Villa Isola, O..."


In [None]:
df.describe()

Unnamed: 0,name,address,description
count,105,105,105
unique,101,102,103
top,OYO 794 Ln 9 Bandung Residence,"Jalan Lemahnendeut No 9, Sukajadi, 40164 Bandu...","Conveniently located in Sukajadi, Bandung, OYO..."
freq,3,3,2


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105 entries, 0 to 104
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   name         105 non-null    object
 1   address      105 non-null    object
 2   description  105 non-null    object
dtypes: object(3)
memory usage: 2.6+ KB


In [None]:
def print_description(index):
  example = df[df.index == index][['description', 'name', 'address']].values[0]
  if len(example) > 0:
    print(example[0])
    print('Nama:', example[1])
    print('Alamat:', example[2])

In [None]:
print_description(1)

Sheraton Hotel & Towers offers 5-star accommodation in the middle of a green landscape in Bandung. All spacious rooms come with a flat-screen cable TV. The hotel offers an outdoor pool, spa center and restaurant with mountain views. Wi-Fi access is available free in all areas of the hotel. Elegant rooms have modern interiors, light wood furnishings and large windows. Each provides a comfortable seating area, DVD player and private bathroom with shower. You can work out in the gym or enjoy body treatments at the spa. Reception staff are ready to serve your needs for 24 hours. International and Asian dishes are offered at Feast Restaurant, while soft drinks are served at Samsara Lounge. A variety of cocktails and snacks are also available at Poolside Terrace. Sheraton Bandung Hotel & Towers is a 10-minute drive from Juanda Culture Park and Dago area, where various factory outlets are located. Husein Sastranegara Airport is a 30-minute drive away.
Nama: Sheraton Bandung Hotel & Towers
Ala

In [None]:
print_description(50)

Featuring an outdoor pool and a restaurant, House-Sangkuriang is conveniently located just a 5-minute walk from Dago’s factory outlets. It has a 24-hour front desk and provides free Wi-Fi access in all areas. Elegant and warmly lit, the air-conditioned rooms in House-Sangkuriang include hardwood floors. A flat-screen satellite TV, an electric kettle and a free one-time minibar are among the in-room comforts, and a shower, slippers and a hairdryer are included in the private bathrooms. The hotel also serves daily afternoon tea in the lobby and on the pool terrace. Cihampelas Walk Mall is a 10-minute drive from the property, and Husein Sastranegara Airport is a 20-minute drive away. Airport transportation can be arranged upon request. The staff at the front desk can assist with valet parking and luggage storage. Housing a business center, the hotel also provides laundry service for a fee. International dishes are served at Dining Room. Guests can also dine in the comfort of their rooms.


In [None]:
import nltk
nltk.download('stopwords')

clean_spcl = re.compile('[/(){}\[\]\|@,;]')
clean_symbol = re.compile('[^0-9a-z #+_]')
stopworda = set(stopwords.words('english'))

def clean_text(text):
  text = text.lower()
  text = clean_spcl.sub(' ', text)
  text = clean_symbol.sub(' ', text)
  text = ' '.join(word for word in text.split() if word not in stopworda)
  return text

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
df['desc_clean'] = df['description'].apply(clean_text)
df.head()

Unnamed: 0,name,address,description,desc_clean
0,Capital O 253 Topas Galeria Hotel,"Jl. Dr. Djundjunan No. 153, 40173 Bandung, Ind...","A 10-minute drive from Bandung Airport, Topas ...",10 minute drive bandung airport topas galeria ...
1,Sheraton Bandung Hotel & Towers,"Jl. Ir H Juanda 390, 40135 Bandung, Indonesia",Sheraton Hotel & Towers offers 5-star accommod...,sheraton hotel towers offers 5 star accommodat...
2,OYO 794 Ln 9 Bandung Residence,"Jalan Lemahnendeut No 9, Sukajadi, 40164 Bandu...","Conveniently located in Sukajadi, Bandung, OYO...",conveniently located sukajadi bandung oyo 794 ...
3,OYO 226 LJ hotel,"Jl. Malabar No.2, Malabar, Lengkong, Dago, Asi...","Featuring a shared lounge, OYO 226 LJ hotel is...",featuring shared lounge oyo 226 lj hotel locat...
4,OYO 230 Maleo Residence,"JI. Dangeur Indah II No. 15, Sukagalih, Sukaja...",Attractively set in the Sukajadi district of B...,attractively set sukajadi district bandung oyo...


In [None]:
def print_description_clean(index):
  example = df[df.index == index][['desc_clean', 'name', 'address']].values[0]
  if len(example) > 0:
    print(example[0])
    print('Nama : ', example[1])
    print('Alamat :', example[2])

In [None]:
print_description_clean(1)

sheraton hotel towers offers 5 star accommodation middle green landscape bandung spacious rooms come flat screen cable tv hotel offers outdoor pool spa center restaurant mountain views wi fi access available free areas hotel elegant rooms modern interiors light wood furnishings large windows provides comfortable seating area dvd player private bathroom shower work gym enjoy body treatments spa reception staff ready serve needs 24 hours international asian dishes offered feast restaurant soft drinks served samsara lounge variety cocktails snacks also available poolside terrace sheraton bandung hotel towers 10 minute drive juanda culture park dago area various factory outlets located husein sastranegara airport 30 minute drive away
Nama :  Sheraton Bandung Hotel & Towers
Alamat : Jl. Ir H Juanda 390, 40135 Bandung, Indonesia


In [None]:
df.set_index('name', inplace=True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df = 0, stop_words = 'english')
# tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['desc_clean'])
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cos_sim


array([[1.        , 0.03481248, 0.0199805 , ..., 0.01324636, 0.04176129,
        0.03686415],
       [0.03481248, 1.        , 0.02372401, ..., 0.01972437, 0.03977114,
        0.02362085],
       [0.0199805 , 0.02372401, 1.        , ..., 0.18021868, 0.01812967,
        0.0314635 ],
       ...,
       [0.01324636, 0.01972437, 0.18021868, ..., 1.        , 0.01098484,
        0.02875464],
       [0.04176129, 0.03977114, 0.01812967, ..., 0.01098484, 1.        ,
        0.0357502 ],
       [0.03686415, 0.02362085, 0.0314635 , ..., 0.02875464, 0.0357502 ,
        1.        ]])

In [None]:
indices = pd.Series(df.index)
indices[:50]

0                Capital O 253 Topas Galeria Hotel
1                  Sheraton Bandung Hotel & Towers
2                   OYO 794 Ln 9 Bandung Residence
3                                 OYO 226 LJ hotel
4                          OYO 230 Maleo Residence
5                        OYO 167 Dago's Hill Hotel
6                   OYO 794 Ln 9 Bandung Residence
7                       OYO 196 Horizone Residence
8     OYO 483 Flagship Tamansari Panoramic Bandung
9               OYO 295 Grha Ciumbuleuit Residence
10                            OYO 193 SM Residence
11              Capital O 874 Hotel Nyland Pasteur
12                            OYO 352 Sabang Hotel
13                                  Hilton Bandung
14             InterContinental Bandung Dago Pakar
15                                Aryaduta Bandung
16               Art Deco Luxury Hotel & Residence
17                            Crowne Plaza Bandung
18          Best Western Premier La Grande Bandung
19                         éL R

In [None]:
def recommendations(name, cos_sim = cos_sim):
  recommended_hotel = []
  idx = indices[indices == name].index[0]
  score_series = pd.Series(cos_sim[idx]).sort_values(ascending = False)
  top_10_indexes = list(score_series.iloc[1:11].index)
  for i in top_10_indexes:
    recommended_hotel.append(list(df.index)[i])

  return recommended_hotel

In [None]:
recommendations('OYO 226 LJ hotel')

['OYO 794 Ln 9 Bandung Residence',
 'P Hostel',
 'OYO 352 Sabang Hotel',
 'OYO 260 Home 33',
 'Mogens Guesthouse',
 'OYO 569 Cigadung Residence',
 'Hotel Cemerlang',
 'OYO 196 Horizone Residence',
 'Kampioen Bed & Breakfast',
 'OYO 794 Ln 9 Bandung Residence']