In [1]:
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten
from tensorflow.keras.models import Sequential

# from keras_tuner import RandomSearch, HyperModel

# Load the dataset
file_path = 'combined-dataset/final_reviews_data.csv'
data = pd.read_csv(file_path)

# Encode the 'types' column
label_encoder = LabelEncoder()
data['types_encoded'] = label_encoder.fit_transform(data['types'])

# Tokenize the 'review' column
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])

# Pad the sequences
max_sequence_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Create the feature set
X = {
    'review': padded_sequences,
    'types': data['types_encoded'].values,
}

# Normalize the sentiment scores
y = data['sentiment'].values


2024-06-15 01:53:38.774394: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Define input layers
review_input = Input(shape=(max_sequence_length,), name='review')
types_input = Input(shape=(1,), name='types')

# Define embedding and LSTM layers for review input
review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(review_input)
review_lstm = LSTM(128)(review_embedding)

# Define embedding layer for types input
types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
types_flat = Flatten()(types_embedding)

# Concatenate the review and types embeddings
concatenated = Concatenate()([review_lstm, types_flat])

# Sequential part of the model
sequential_model = Sequential([
    Input(shape=(concatenated.shape[1],)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Full model combining the inputs and sequential model
output = sequential_model(concatenated)
full_model = Model(inputs=[review_input, types_input], outputs=output)

# Compile the model
full_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
full_model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
full_model.save('39_test_modelV2.keras')

2024-06-15 01:53:42.392919: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-15 01:53:42.500449: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-15 01:53:42.500511: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-15 01:53:42.503346: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-15 01:53:42.503414: I external/local_xla/xla/stream_executor

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-06-15 01:53:45.131198: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 71ms/step - loss: 1.0101 - mae: 0.5668 - val_loss: 0.0912 - val_mae: 0.2280
Epoch 2/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 69ms/step - loss: 0.0616 - mae: 0.1822 - val_loss: 0.0610 - val_mae: 0.1783
Epoch 3/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 70ms/step - loss: 0.0331 - mae: 0.1314 - val_loss: 0.0598 - val_mae: 0.1736
Epoch 4/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 71ms/step - loss: 0.0221 - mae: 0.1073 - val_loss: 0.0551 - val_mae: 0.1703
Epoch 5/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 72ms/step - loss: 0.0167 - mae: 0.0929 - val_loss: 0.0388 - val_mae: 0.1322
Epoch 6/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 72ms/step - loss: 0.0137 - mae: 0.0851 - val_loss: 0.0391 - val_mae: 0.1368
Epoch 7/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 69ms/step

In [3]:
# class SentimentHyperModel(HyperModel):
#     def build(self, hp):
#         review_input = Input(shape=(max_sequence_length,), name='review')
#         types_input = Input(shape=(1,), name='types')
# 
#         # Define embedding and LSTM layers for review input
#         embedding_output_dim = hp.Int('embedding_output_dim', min_value=64, max_value=256, step=32)
#         lstm_units = hp.Int('lstm_units', min_value=64, max_value=256, step=32)
#         review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_output_dim)(review_input)
#         review_lstm = LSTM(units=lstm_units)(review_embedding)
# 
#         # Define embedding layer for types input
#         types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
#         types_flat = Flatten()(types_embedding)
# 
#         # Concatenate the review and types embeddings
#         concatenated = Concatenate()([review_lstm, types_flat])
# 
#         # Add dense layers for final prediction
#         dense_units_1 = hp.Int('dense_units_1', min_value=64, max_value=256, step=32)
#         dense_units_2 = hp.Int('dense_units_2', min_value=32, max_value=128, step=16)
#         dense_1 = Dense(units=dense_units_1, activation='relu')(concatenated)
#         dense_2 = Dense(units=dense_units_2, activation='relu')(dense_1)
#         output = Dense(1, activation='linear')(dense_2)
# 
#         # Choose an optimizer
#         optimizer_choice = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])
# 
#         if optimizer_choice == 'adam':
#             optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
#         elif optimizer_choice == 'sgd':
#             optimizer = tf.keras.optimizers.SGD(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
#         elif optimizer_choice == 'rmsprop':
#             optimizer = tf.keras.optimizers.RMSprop(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
# 
#         # Create the model
#         model = Model(inputs=[review_input, types_input], outputs=output)
#         model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
# 
#         return model

In [4]:
# tuner = RandomSearch(
#     hypermodel=SentimentHyperModel(),
#     objective='val_loss',
#     max_trials=10,  # Number of different hyperparameter sets to try
#     executions_per_trial=2,  # Number of models to train with the same hyperparameters
#     directory='model-testing',
#     project_name='sentiment_tuning'
# )
# 
# # %%
# # Search for the best hyperparameters
# tuner.search([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
# 
# # Get the optimal hyperparameters
# best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
# 
# # Build the best model
# best_model = tuner.hypermodel.build(best_hps)

In [5]:
# best_model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
# 
# # %%
# # Save the best model
# best_model.save('best_sentiment_model.keras')

In [8]:
import tensorflow as tf
model = tf.keras.models.load_model('39_test_modelV2.keras')

In [9]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Example 
place_id = 'ChIJIaGQ-Eg60i0RnT9pzyD_gvM'  # Replace with an actual place ID from your dataset
recommendations = get_recommendations(place_id, data, model, top_n=10)
print(recommendations)


[1m1024/1024[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 23ms/step
(                                id                                    types  \
2552   ChIJIaGQ-Eg60i0RnT9pzyD_gvM                               cafe, food   
18987  ChIJjT4DJK5G0i0R3pksi46oHZY                               cafe, food   
23697  ChIJ1dMM21FH0i0Ru3XNx9p8_S0  indonesian_restaurant, restaurant, food   
31801  ChIJAe-Pc09F0i0RFm0SEDsDyU8                       tourist_attraction   
13981  ChIJxdi5l84n0i0RyUyMbwuSf1w                         restaurant, food   
20320  ChIJqeoHnm2H0S0Re7c7kU8NUtE                 park, tourist_attraction   
2716   ChIJ65f5180V0i0RkMx79fIo0Ts                         restaurant, food   
4039   ChIJz0XREtZH0i0RowHdlImQUYQ           coffee_shop, cafe, store, food   
30428  ChIJQ7sXNoZB0i0RLRxhrTl5500                           hotel, lodging   
31575  ChIJt_0lSYZz0i0RfM-BdC8kMhU                         restaurant, food   

      review_number                               

In [None]:
# Save the model
model.save('39_test_model.keras')

In [None]:
# Load the model
# model = tf.keras.models.load_model('39_test_model.keras')

In [None]:
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations(rand_id, df_review, model, top_n=10)

In [None]:

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

In [None]:
# Test cell, Run this cell to get recommendations for a random place in the dataset

import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Create the feature set
    X = {
        'review': padded_sequences,
        'types': data['types_encoded'].values,
    }
    
    # Normalize the sentiment scores
    y = data['sentiment'].values

    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# model = tf.keras.models.load_model('best_sentiment_model.keras')
model = tf.keras.models.load_model('39_test_model.keras')

df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations(rand_id, df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])