In [1]:
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten
from keras_tuner import RandomSearch, HyperModel

# Load the dataset
file_path = 'combined-dataset/final_reviews_data.csv'
data = pd.read_csv(file_path)

# Encode the 'types' column
label_encoder = LabelEncoder()
data['types_encoded'] = label_encoder.fit_transform(data['types'])

# Tokenize the 'review' column
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])

# Pad the sequences
max_sequence_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Create the feature set
X = {
    'review': padded_sequences,
    'types': data['types_encoded'].values,
}

# Normalize the sentiment scores
y = data['sentiment'].values


2024-06-13 03:10:00.382762: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-13 03:10:00.382816: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-13 03:10:00.383357: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-13 03:10:00.387139: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# # Define input layers
# review_input = Input(shape=(max_sequence_length,), name='review')
# types_input = Input(shape=(1,), name='types')
# 
# # Define embedding and LSTM layers for review input
# review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(review_input)
# review_lstm = LSTM(128)(review_embedding)
# 
# # Define embedding layer for types input
# types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
# types_flat = tf.keras.layers.Flatten()(types_embedding)
# 
# # Concatenate the review and types embeddings
# concatenated = Concatenate()([review_lstm, types_flat])
# 
# # Add dense layers for final prediction
# dense_1 = Dense(128, activation='relu')(concatenated)
# dense_2 = Dense(64, activation='relu')(dense_1)
# output = Dense(1, activation='linear')(dense_2)
# 
# # Create the model
# model = Model(inputs=[review_input, types_input], outputs=output)
# model.compile(optimizer='adam', loss='mse', metrics=['mae'])
# 
# # Train the model
# model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)


In [3]:
class SentimentHyperModel(HyperModel):
    def build(self, hp):
        review_input = Input(shape=(max_sequence_length,), name='review')
        types_input = Input(shape=(1,), name='types')

        # Define embedding and LSTM layers for review input
        embedding_output_dim = hp.Int('embedding_output_dim', min_value=64, max_value=256, step=32)
        lstm_units = hp.Int('lstm_units', min_value=64, max_value=256, step=32)
        review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_output_dim)(review_input)
        review_lstm = LSTM(units=lstm_units)(review_embedding)

        # Define embedding layer for types input
        types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
        types_flat = Flatten()(types_embedding)

        # Concatenate the review and types embeddings
        concatenated = Concatenate()([review_lstm, types_flat])

        # Add dense layers for final prediction
        dense_units_1 = hp.Int('dense_units_1', min_value=64, max_value=256, step=32)
        dense_units_2 = hp.Int('dense_units_2', min_value=32, max_value=128, step=16)
        dense_1 = Dense(units=dense_units_1, activation='relu')(concatenated)
        dense_2 = Dense(units=dense_units_2, activation='relu')(dense_1)
        output = Dense(1, activation='linear')(dense_2)

        # Choose an optimizer
        optimizer_choice = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])

        if optimizer_choice == 'adam':
            optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
        elif optimizer_choice == 'sgd':
            optimizer = tf.keras.optimizers.SGD(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
        elif optimizer_choice == 'rmsprop':
            optimizer = tf.keras.optimizers.RMSprop(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))

        # Create the model
        model = Model(inputs=[review_input, types_input], outputs=output)
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

        return model

In [4]:
tuner = RandomSearch(
    hypermodel=SentimentHyperModel(),
    objective='val_loss',
    max_trials=10,  # Number of different hyperparameter sets to try
    executions_per_trial=2,  # Number of models to train with the same hyperparameters
    directory='model-testing',
    project_name='sentiment_tuning'
)

# %%
# Search for the best hyperparameters
tuner.search([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the best model
best_model = tuner.hypermodel.build(best_hps)

Trial 10 Complete [00h 19m 27s]
val_loss: 0.044572435319423676

Best val_loss So Far: 0.021111944690346718
Total elapsed time: 02h 56m 37s


In [5]:
best_model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)

# %%
# Save the best model
best_model.save('best_sentiment_model.keras')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
model = tf.keras.models.load_model('best_sentiment_model.keras')

In [7]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places

# Example usage
place_id = 'ChIJYcGr7GSb0S0RckePBrCWikw'  # Replace with an actual place ID from your dataset
recommendations = get_recommendations(place_id, data, model, top_n=10)
print(recommendations)


                                id                                    types  \
0      ChIJYcGr7GSb0S0RckePBrCWikw                           hotel, lodging   
9397   ChIJMx5rxaE_0i0Rs0e7AyFBAc4                         restaurant, food   
26459  ChIJs5sHkDub0S0Rs6_ArAb6GYw                         restaurant, food   
24537  ChIJy1yJjCFH0i0REyjNQfSK0Vw            cafe, store, restaurant, food   
2445   ChIJVWVU7lI60i0ROjIjerOky-4  indonesian_restaurant, restaurant, food   
26714  ChIJ5ZNHoDeJ0S0RhyP0FmUFXBQ   fast_food_restaurant, restaurant, food   
7369   ChIJ-UX-z8wg0i0R2t1WpZDLVDA  indonesian_restaurant, restaurant, food   
17727  ChIJpXrQ0Hhz0i0RlgGmL0UEIcE                          hostel, lodging   
14808  ChIJ0__EiSM90i0Rh-u4ELIOCcQ        thai_restaurant, restaurant, food   
21670  ChIJzUP7KWk90i0RawIr0Sl0yyM                           hotel, lodging   

      review_number                                             review  \
0          review 1  It has quite small room, and the ha

In [8]:
# Save the model
# model.save('39_test_model.keras')

In [9]:
# Load the model
# model = tf.keras.models.load_model('39_test_model.keras')

In [10]:
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations(rand_id, df_review, model, top_n=10)

Random place :ChIJE6JRPEv10S0RXGpICmBZF0g Mutiara Resto Kintamani


In [11]:

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

                       name  \
8          Kampuak Cottages   
4                Tulen Ubud   
1   Teba Junjungan Cottages   
7               Il Pomodoro   
3             Ayu Tamansari   
0   Mutiara Resto Kintamani   
9     Guling Samsam Merekak   
2  Sea Breeze Cafe Ceningan   
6            Villa Anjing 2   
5           Taman Indrakila   

                                             types_x  rating  
8                                            lodging     4.9  
4                                   restaurant, food     4.7  
1                         bed_and_breakfast, lodging     4.5  
7  italian_restaurant, pizza_restaurant, restaura...     4.5  
3                                     hotel, lodging     4.5  
0                                   restaurant, food     4.0  
9            indonesian_restaurant, restaurant, food     4.6  
2                              restaurant, bar, food     4.5  
6                                     hotel, lodging     3.5  
5                           