In [None]:
# Import os and disable tensorflow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '5'

In [None]:
# Import libraries
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten, BatchNormalization, Dropout

In [None]:
# Load reviews dataset
reviews_path = 'combined-dataset/final_reviews_data.csv'
data = pd.read_csv(reviews_path)

In [None]:
# Encode types column and Tokenize reviews column
encoder = LabelEncoder()
data['types_encoded'] = encoder.fit_transform(data['types'])

tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])

# Pad sequences
max_sequence_length = max(map(len, sequences), default=0)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

In [None]:
# Create features set and normalize sentiment for label
X = {
    'review': padded_sequences,
    'types': data['types_encoded'].values
}

Y = data['sentiment'].values

In [None]:
## Define Models Layer
# Input
review_input = Input(shape=(max_sequence_length,), name='review')
types_input = Input(shape=(1,), name='types')

# Embedding and LSTM for review
review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64)(review_input)
review_lstm = LSTM(64)(review_embedding)

# Embedding and Flatten for types
types_embedding = Embedding(input_dim=len(encoder.classes_), output_dim=64)(types_input)
types_flat = Flatten()(types_embedding)

# Concatenate review and types
concatenated = Concatenate()([review_lstm, types_flat])

# Dense layers
dense_1 = Dense(64, activation='relu')(concatenated)
batch_1 = BatchNormalization()(dense_1)
dropout_1 = Dropout(0.2)(batch_1)

# dense_2 = Dense(64, activation='relu')(dropout_1)
# batch_2 = BatchNormalization()(dense_2)
# dropout_2 = Dropout(0.2)(batch_2)
# 
# dense_3 = Dense(32, activation='relu')(dropout_2)
# batch_3 = BatchNormalization()(dense_3)
# dropout_3 = Dropout(0.2)(batch_3)

output = Dense(1, activation='linear')(dropout_1)

In [None]:
# Create and Compile the Model
model = Model(inputs=[review_input, types_input], outputs=output)
model.summary()
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
model.fit([X['review'], X['types']], Y, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

In [None]:
loss, mae = model.evaluate([X['review'], X['types']], Y)
print(f'Model has a loss of {loss} and a mean absolute error of {mae}')

In [None]:
model.save('ml_model.keras')

In [None]:
content_model = load_model('ml_model.keras')

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def get_place_recommendations(model, place_id, dataset, top_n=10):
    place_idx = dataset[dataset['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]
    
    predicted_sentiments = model.predict([X['review'], X['types']])
    
    place_vector = np.concatenate([place_review, [place_types]])
    all_vector = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vector)[0]
    
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = dataset.iloc[similar_indices]
    
    return similar_places, predicted_sentiments[similar_indices]

In [None]:
place_id = 'ChIJIaGQ-Eg60i0RnT9pzyD_gvM'

recommendations = get_place_recommendations(content_model, place_id, data)
print(recommendations)