In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [2]:
df_tourism_rating = pd.read_csv('../../Dataset/Tourism Rating/raw/tourism_rating.csv')
df_tourism  = pd.read_csv('../../Dataset/Tourism/tourism_encoded.csv')
df_user = pd.read_csv('../../Dataset/User/raw/user.csv')

In [3]:
df_tourism.head()

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,...,_1,Rating_Count,Alam,Hiburan,Kuliner,Olahraga,Rekreasi,Religius,Sejarah Edukasi,Seni Budaya
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Sejarah Edukasi,Jakarta,20000,46,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-61753924,...,1,18,0,0,0,0,0,0,1,0
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...","Sejarah Edukasi,Seni Budaya,Kuliner",Jakarta,0,46,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-61376448,...,2,25,0,0,1,0,0,0,1,1
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,"Rekreasi,Hiburan",Jakarta,270000,46,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-61253124,...,3,19,0,1,0,0,1,0,0,0
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,"Sejarah Edukasi,Seni Budaya,Rekreasi",Jakarta,10000,45,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-63024459,...,4,21,0,0,0,0,1,0,1,1
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,"Rekreasi,Olahraga",Jakarta,94000,45,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-612419,...,5,24,0,0,0,1,1,0,0,0


In [4]:
df_tourism.columns

Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',
       'Rating', 'Time_Minutes', 'Coordinate', 'Lat', 'Long', 'Column1', '_1',
       'Rating_Count', 'Alam', 'Hiburan', 'Kuliner', 'Olahraga', 'Rekreasi',
       'Religius', 'Sejarah Edukasi', 'Seni Budaya'],
      dtype='object')

In [5]:
drop_columns = df_tourism.columns[2:14]
df_tourism.drop(drop_columns, axis=1, inplace=True)
df_tourism.head()

Unnamed: 0,Place_Id,Place_Name,Alam,Hiburan,Kuliner,Olahraga,Rekreasi,Religius,Sejarah Edukasi,Seni Budaya
0,1,Monumen Nasional,0,0,0,0,0,0,1,0
1,2,Kota Tua,0,0,1,0,0,0,1,1
2,3,Dunia Fantasi,0,1,0,0,1,0,0,0
3,4,Taman Mini Indonesia Indah (TMII),0,0,0,0,1,0,1,1
4,5,Atlantis Water Adventure,0,0,0,1,1,0,0,0


In [6]:
category_similarity = cosine_similarity(df_tourism.iloc[:, 2:])
category_similarity.shape

(437, 437)

In [7]:
def similarity_tourism(tourism_name):
    index = df_tourism[df_tourism['Place_Name'] == tourism_name].index[0]
    similarity_score = category_similarity[index]
    similarity_place = sorted(list(enumerate(similarity_score)),key=lambda x: x[1],reverse=True)[1:11]
    similarity_list = []    
    for i in similarity_place:
        similarity_list.append([df_tourism.iloc[i[0], 1]] + [i[1]])
    return similarity_list

In [8]:
similarity_place = similarity_tourism('Dunia Fantasi')
similarity_place

[['Grand Indonesia Mall', 0.9999999999999998],
 ['The Escape Hunt', 0.9999999999999998],
 ['Taman Legenda Keong Emas', 0.9999999999999998],
 ['Waterboom PIK (Pantai Indah Kapuk)', 0.9999999999999998],
 ['SnowBay Waterpark', 0.9999999999999998],
 ['The Lost World Castle', 0.9999999999999998],
 ['Blue Lagoon Jogja', 0.9999999999999998],
 ['Jogja Bay Pirates Adventure Waterpark', 0.9999999999999998],
 ['Galaxy Waterpark Jogja', 0.9999999999999998],
 ['Trans Studio Bandung', 0.9999999999999998]]

In [9]:
df_collaborative = df_tourism_rating.copy()
df_collaborative

Unnamed: 0,User_Id,Place_Id,Place_Ratings
0,1,179,3
1,1,344,2
2,1,5,5
3,1,373,3
4,1,101,4
...,...,...,...
9995,300,425,2
9996,300,64,4
9997,300,311,3
9998,300,279,4


In [10]:
user_id = df_collaborative['User_Id'].unique().tolist()
user_id_encoded = {value: index for index, value in enumerate(user_id)}
print(user_id_encoded)

place_id = df_collaborative['Place_Id'].unique().tolist()
place_id_encoded = {value: index for index, value in enumerate(place_id)}
print(place_id_encoded)


{1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 32: 31, 33: 32, 34: 33, 35: 34, 36: 35, 37: 36, 38: 37, 39: 38, 40: 39, 41: 40, 42: 41, 43: 42, 44: 43, 45: 44, 46: 45, 47: 46, 48: 47, 49: 48, 50: 49, 51: 50, 52: 51, 53: 52, 54: 53, 55: 54, 56: 55, 57: 56, 58: 57, 59: 58, 60: 59, 61: 60, 62: 61, 63: 62, 64: 63, 65: 64, 66: 65, 67: 66, 68: 67, 69: 68, 70: 69, 71: 70, 72: 71, 73: 72, 74: 73, 75: 74, 76: 75, 77: 76, 78: 77, 79: 78, 80: 79, 81: 80, 82: 81, 83: 82, 84: 83, 85: 84, 86: 85, 87: 86, 88: 87, 89: 88, 90: 89, 91: 90, 92: 91, 93: 92, 94: 93, 95: 94, 96: 95, 97: 96, 98: 97, 99: 98, 100: 99, 101: 100, 102: 101, 103: 102, 104: 103, 105: 104, 106: 105, 107: 106, 108: 107, 109: 108, 110: 109, 111: 110, 112: 111, 113: 112, 114: 113, 115: 114, 116: 115, 117: 116, 118: 117, 119: 118, 120: 119, 121: 120, 122: 12

In [11]:
scaler = MinMaxScaler()
place_ratings = df_collaborative['Place_Ratings'].tolist()
place_ratings_scaled = scaler.fit_transform(np.array(place_ratings).reshape(-1, 1))
df_collaborative['Place_Ratings'] = place_ratings_scaled


In [12]:
df_collaborative['User'] = df_collaborative['User_Id'].map(user_id_encoded)
df_collaborative['Place'] = df_collaborative['Place_Id'].map(place_id_encoded)
df_collaborative

Unnamed: 0,User_Id,Place_Id,Place_Ratings,User,Place
0,1,179,0.50,0,0
1,1,344,0.25,0,1
2,1,5,1.00,0,2
3,1,373,0.50,0,3
4,1,101,0.75,0,4
...,...,...,...,...,...
9995,300,425,0.25,299,324
9996,300,64,0.75,299,132
9997,300,311,0.50,299,348
9998,300,279,0.75,299,290


In [13]:
x_train, x_test, y_train, y_test = train_test_split(df_collaborative[['User', 'Place']].values, df_collaborative['Place_Ratings'].values, test_size=0.2, random_state=42,shuffle=True)

In [14]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((8000, 2), (2000, 2), (8000,), (2000,))

In [15]:
class RecommenderNet(tf.keras.Model):
 
  def __init__(self, num_users, num_place, embedding_size, **kwargs):
    super(RecommenderNet, self).__init__(**kwargs)
    self.num_users = num_users
    self.num_place = num_place
    self.embedding_size = embedding_size
    self.user_embedding = tf.keras.layers.Embedding(
        num_users,
        embedding_size,
        embeddings_initializer = 'he_normal',
        embeddings_regularizer = tf.keras.regularizers.l2(1e-6)
    )
    self.user_bias = tf.keras.layers.Embedding(num_users, 1)
    self.place_embedding = tf.keras.layers.Embedding(
        num_place,
        embedding_size,
        embeddings_initializer = 'he_normal',
        embeddings_regularizer = tf.keras.regularizers.l2(1e-6)
    )
    self.place_bias = tf.keras.layers.Embedding(num_place, 1)
    self.dense_1 = tf.keras.layers.Dense(128, activation='relu')
    self.dense_2 = tf.keras.layers.Dense(64, activation='relu')
    self.dense_3 = tf.keras.layers.Dense(1, activation='sigmoid')
 
  def call(self, inputs):
    user_vector = self.user_embedding(inputs[:,0])
    user_bias = self.user_bias(inputs[:, 0])
    place_vector = self.place_embedding(inputs[:, 1])
    place_bias = self.place_bias(inputs[:, 1])
 
    dot_user_place = tf.tensordot(user_vector, place_vector, 2) 
 
    x = dot_user_place + user_bias + place_bias
    x = self.dense_1(x)
    x = self.dense_2(x)
    x = self.dense_3(x)
  
    return x

In [16]:
model = RecommenderNet(num_users=len(user_id_encoded), num_place=len(place_id_encoded), embedding_size=128)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.RootMeanSquaredError()])

In [17]:
model.fit(x_train, y_train, batch_size=128,epochs=100, verbose=2, validation_data=(x_test, y_test))

Epoch 1/100
63/63 - 3s - loss: 0.6943 - root_mean_squared_error: 0.3463 - val_loss: 0.6937 - val_root_mean_squared_error: 0.3434 - 3s/epoch - 40ms/step
Epoch 2/100
63/63 - 1s - loss: 0.6938 - root_mean_squared_error: 0.3459 - val_loss: 0.6938 - val_root_mean_squared_error: 0.3435 - 586ms/epoch - 9ms/step
Epoch 3/100
63/63 - 1s - loss: 0.6933 - root_mean_squared_error: 0.3456 - val_loss: 0.6940 - val_root_mean_squared_error: 0.3436 - 549ms/epoch - 9ms/step
Epoch 4/100
63/63 - 1s - loss: 0.6931 - root_mean_squared_error: 0.3455 - val_loss: 0.6938 - val_root_mean_squared_error: 0.3435 - 585ms/epoch - 9ms/step
Epoch 5/100
63/63 - 1s - loss: 0.6932 - root_mean_squared_error: 0.3456 - val_loss: 0.6940 - val_root_mean_squared_error: 0.3436 - 585ms/epoch - 9ms/step
Epoch 6/100
63/63 - 1s - loss: 0.6934 - root_mean_squared_error: 0.3457 - val_loss: 0.6942 - val_root_mean_squared_error: 0.3438 - 584ms/epoch - 9ms/step
Epoch 7/100
63/63 - 1s - loss: 0.6932 - root_mean_squared_error: 0.3455 - val_

<keras.callbacks.History at 0x21555a38d90>

In [18]:
user_id = df_tourism_rating['User_Id'].sample(1).iloc[0]
place_visited = df_tourism_rating[df_tourism_rating['User_Id'] == user_id]
place_visited.head()

Unnamed: 0,User_Id,Place_Id,Place_Ratings
8357,252,145,3
8358,252,114,1
8359,252,328,1
8360,252,169,3
8361,252,200,1


In [19]:
place_not_visited = df_tourism[~df_tourism['Place_Id'].isin(place_visited['Place_Id'])]['Place_Id']
place_not_visited = list(set(place_not_visited).intersection(set(place_id_encoded.keys())))
place_not_visited = [[place_id_encoded.get(x)] for x in place_not_visited]
user_encoder = user_id_encoded.get(user_id)
user_place_array = np.hstack(([[user_encoder]] * len(place_not_visited), place_not_visited))

In [20]:
ratings = model.predict(user_place_array).flatten()

 1/13 [=>............................] - ETA: 2s



In [21]:
top_ratings_indices = ratings.argsort()[-10:][::-1]
recommended_place_ids = [place_id_encoded.get(place_not_visited[x][0]) for x in top_ratings_indices]


In [22]:
print('Showing recommendations for user: {}'.format(user_id))
print('===' * 9)
print('Places with high ratings from user')
print('---' * 8)
top_ratings_place = (place_visited.sort_values(by='Place_Ratings', ascending=False).head(5)['Place_Id'].values)
df_tourism_rows = df_tourism[df_tourism['Place_Id'].isin(top_ratings_place)]
for row in df_tourism_rows.itertuples():
  print(row.Place_Name)
print('---' * 8)
print('Top 10 place recommendations')
print('---' * 8)
recommended_place = df_tourism[df_tourism['Place_Id'].isin(recommended_place_ids)]
for row in recommended_place.itertuples():
  print(row.Place_Name)
print('---' * 8)



Showing recommendations for user: 252
Places with high ratings from user
------------------------
Galeri Nasional Indonesia
Museum Sasmita Loka Ahmad Yani
Alun-alun Utara Keraton Yogyakarta
Grojogan Watu Purbo Bangunrejo
Pasar Baru
------------------------
Top 10 place recommendations
------------------------
Museum Fatahillah
Puncak Pinus Becici
Puncak Kebun Buah Mangunan
Hutan Pinus Asri
Pintoe Langit Dahromo
Museum Pos Indonesia
Wisata Mangrove Tapak
Waduk Jatibarang
Masjid Muhammad Cheng Hoo
Monumen Jalesveva Jayamahe
------------------------


In [23]:
model.save('model.h5')

NotImplementedError: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.