In [8]:
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten
from tensorflow.keras.models import Sequential

# from keras_tuner import RandomSearch, HyperModel

# Load the dataset
file_path = 'combined-dataset/final_reviews_data.csv'
data = pd.read_csv(file_path)

# Encode the 'types' column
label_encoder = LabelEncoder()
data['types_encoded'] = label_encoder.fit_transform(data['types'])

# Tokenize the 'review' column
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])

# Pad the sequences
max_sequence_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Create the feature set
X = {
    'review': padded_sequences,
    'types': data['types_encoded'].values,
}

# Normalize the sentiment scores
y = data['sentiment'].values


In [9]:
# Define input layers
review_input = Input(shape=(max_sequence_length,), name='review')
types_input = Input(shape=(1,), name='types')

# Define embedding and LSTM layers for review input
review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(review_input)
review_lstm = LSTM(128)(review_embedding)

# Define embedding layer for types input
types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
types_flat = Flatten()(types_embedding)

# Concatenate the review and types embeddings
concatenated = Concatenate()([review_lstm, types_flat])

# Sequential part of the model
sequential_model = Sequential([
    Input(shape=(concatenated.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Full model combining the inputs and sequential model
output = sequential_model(concatenated)
full_model = Model(inputs=[review_input, types_input], outputs=output)

# Compile the model
full_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
full_model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
full_model.save('39_test_modelV3.keras')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [ ]:
# class SentimentHyperModel(HyperModel):
#     def build(self, hp):
#         review_input = Input(shape=(max_sequence_length,), name='review')
#         types_input = Input(shape=(1,), name='types')
# 
#         # Define embedding and LSTM layers for review input
#         embedding_output_dim = hp.Int('embedding_output_dim', min_value=64, max_value=256, step=32)
#         lstm_units = hp.Int('lstm_units', min_value=64, max_value=256, step=32)
#         review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_output_dim)(review_input)
#         review_lstm = LSTM(units=lstm_units)(review_embedding)
# 
#         # Define embedding layer for types input
#         types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
#         types_flat = Flatten()(types_embedding)
# 
#         # Concatenate the review and types embeddings
#         concatenated = Concatenate()([review_lstm, types_flat])
# 
#         # Add dense layers for final prediction
#         dense_units_1 = hp.Int('dense_units_1', min_value=64, max_value=256, step=32)
#         dense_units_2 = hp.Int('dense_units_2', min_value=32, max_value=128, step=16)
#         dense_1 = Dense(units=dense_units_1, activation='relu')(concatenated)
#         dense_2 = Dense(units=dense_units_2, activation='relu')(dense_1)
#         output = Dense(1, activation='linear')(dense_2)
# 
#         # Choose an optimizer
#         optimizer_choice = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])
# 
#         if optimizer_choice == 'adam':
#             optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
#         elif optimizer_choice == 'sgd':
#             optimizer = tf.keras.optimizers.SGD(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
#         elif optimizer_choice == 'rmsprop':
#             optimizer = tf.keras.optimizers.RMSprop(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
# 
#         # Create the model
#         model = Model(inputs=[review_input, types_input], outputs=output)
#         model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
# 
#         return model

In [ ]:
# tuner = RandomSearch(
#     hypermodel=SentimentHyperModel(),
#     objective='val_loss',
#     max_trials=10,  # Number of different hyperparameter sets to try
#     executions_per_trial=2,  # Number of models to train with the same hyperparameters
#     directory='model-testing',
#     project_name='sentiment_tuning'
# )
# 
# # %%
# # Search for the best hyperparameters
# tuner.search([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
# 
# # Get the optimal hyperparameters
# best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
# 
# # Build the best model
# best_model = tuner.hypermodel.build(best_hps)

In [ ]:
# best_model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
# 
# # %%
# # Save the best model
# best_model.save('best_sentiment_model.keras')

In [ ]:
import tensorflow as tf
model = tf.keras.models.load_model('39_test_modelV2.keras')

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Example 
place_id = 'ChIJIaGQ-Eg60i0RnT9pzyD_gvM'  # Replace with an actual place ID from your dataset
recommendations = get_recommendations(place_id, data, model, top_n=10)
print(recommendations)


In [None]:
# Save the model
model.save('39_test_model.keras')

In [None]:
# Load the model
# model = tf.keras.models.load_model('39_test_model.keras')

In [None]:
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations(rand_id, df_review, model, top_n=10)

In [None]:

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

In [14]:
# Test cell, Run this cell to get recommendations for a random place in the dataset

import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten


print(tf.__version__)

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Create the feature set
    X = {
        'review': padded_sequences,
        'types': data['types_encoded'].values,
    }
    
    # Normalize the sentiment scores
    y = data['sentiment'].values

    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# model = tf.keras.models.load_model('best_sentiment_model.keras')
model = tf.keras.models.load_model('39_test_modelV3.keras')

df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)[0]

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

2.15.0
Random place :ChIJF70-LKBG0i0RUO3m3-Jy9BM The Anvaya Beach Resort Bali
                                  name  \
2              Teba Junjungan Cottages   
5  Griya Santrian a Beach Resort & Spa   
9              Goa Rang Reng Waterfall   
6                    Matahari Bungalow   
4                         Umah ketipat   
8                       Hot Stone Club   
1                    Wyn's Penida Cafe   
0   Karsawabali Khos dan Innoka Coffee   
7                Wr. Nasi Lukluk (WNL)   
3            Warung Banyuwangi Bu Doni   

                                             types_x  rating  
2                         bed_and_breakfast, lodging     4.5  
5  hotel, resort_hotel, spa, lodging, restaurant,...     4.5  
9                                 tourist_attraction     4.7  
6                                            lodging     4.2  
4                                   restaurant, food     4.5  
8  fitness_center, hotel, spa, gym, lodging, spor...     4.6  
1                 

In [15]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X_review[place_idx]
    place_types = X_types[place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types], batch_size=128, verbose=0)

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X_review, X_types.reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n-1:][::-1]
    similar_indices = similar_indices[similar_indices != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


Random place: ChIJ04MsoFw60i0Re1uww-1VfY0 Warung DKI
                                  name  \
1              Teba Junjungan Cottages   
4  Griya Santrian a Beach Resort & Spa   
8              Goa Rang Reng Waterfall   
5                    Matahari Bungalow   
3                         Umah ketipat   
7                       Hot Stone Club   
0                    Wyn's Penida Cafe   
6                Wr. Nasi Lukluk (WNL)   
9                  The Upper Deck Ubud   
2            Warung Banyuwangi Bu Doni   

                                             types_x  rating  
1                         bed_and_breakfast, lodging     4.5  
4  hotel, resort_hotel, spa, lodging, restaurant,...     4.5  
8                                 tourist_attraction     4.7  
5                                            lodging     4.2  
3                                   restaurant, food     4.5  
7  fitness_center, hotel, spa, gym, lodging, spor...     4.6  
0                                   restaur

In [16]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import faiss

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X_review[place_idx]
    place_types = X_types[place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types], batch_size=128, verbose=0)

    # Combine review and types vectors
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X_review, X_types.reshape(-1, 1)])

    # Using Faiss for approximate nearest neighbors
    d = all_vectors.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(all_vectors.astype(np.float32))
    D, I = index.search(np.array([place_vector.astype(np.float32)]), top_n + 1)

    # Get top N similar places (excluding the place itself)
    similar_indices = I[0][I[0] != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


Random place: ChIJ3bZRNeZA0i0RAYfkeurNqDo Wiracana HandFan Sesetan
                                   name  \
1               Teba Junjungan Cottages   
3   Griya Santrian a Beach Resort & Spa   
8                     Salad Lab Ungasan   
2                          Umah ketipat   
6  Abian Harmony Hotel Restaurant & Spa   
9                        Hot Stone Club   
7             Sea View Villa Bloom Bali   
0                     Wyn's Penida Cafe   
5                     Cici Claypot Bali   
4                 Wr. Nasi Lukluk (WNL)   

                                             types_x  rating  
1                         bed_and_breakfast, lodging     4.5  
3  hotel, resort_hotel, spa, lodging, restaurant,...     4.5  
8                                   restaurant, food     4.4  
2                                   restaurant, food     4.5  
6                                     hotel, lodging     3.9  
9  fitness_center, hotel, spa, gym, lodging, spor...     4.6  
7                 

In [17]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import faiss

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values.reshape(-1, 1)

    # Combine review and types vectors
    combined_vectors = np.hstack([X_review, X_types])

    # Dimensionality reduction using PCA
    pca = PCA(n_components=50)  # Adjust the number of components as needed
    reduced_vectors = pca.fit_transform(combined_vectors)

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_vector = reduced_vectors[place_idx]

    # Using Faiss for approximate nearest neighbors
    d = reduced_vectors.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(reduced_vectors.astype(np.float32))
    D, I = index.search(np.array([place_vector.astype(np.float32)]), top_n + 1)

    # Get top N similar places (excluding the place itself)
    similar_indices = I[0][I[0] != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types.squeeze()], batch_size=128, verbose=0)

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


Random place: ChIJGwTW4RlH0i0RSU-EOx8spQ0 Amadea Resort & Villas
                               name  \
2                 Salad Lab Ungasan   
5     Dwa Chandra Villas & Retreats   
7  Marriott’s Bali Nusa Dua Terrace   
0                   Ubud Batan Nyuh   
8                    Ritatkala Cafe   
4                    Ubud Poke Bowl   
3                    INFINITY8 BALI   
1               Alassari Plantation   
6                      Villa Mathis   
9               Taman Bintang Villa   

                                             types_x  rating  
2                                   restaurant, food     4.4  
5    bed_and_breakfast, event_venue, lodging, health     5.0  
7         hotel, wedding_venue, lodging, event_venue     4.7  
0                                     hotel, lodging     4.3  
8  cafe, coffee_shop, indonesian_restaurant, stor...     4.8  
4                indian_restaurant, restaurant, food     4.7  
3  hotel, bed_and_breakfast, coffee_shop, swimmin...     4.4  
1