In [3]:
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten
from tensorflow.keras.models import Sequential

# from keras_tuner import RandomSearch, HyperModel

# Load the dataset
file_path = 'final-dataset/review_dataset.csv'
data = pd.read_csv(file_path)

# Encode the 'types' column
label_encoder = LabelEncoder()
data['types_encoded'] = label_encoder.fit_transform(data['types'])

# Tokenize the 'review' column
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])

# Pad the sequences
max_sequence_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Create the feature set
X = {
    'review': padded_sequences,
    'types': data['types_encoded'].values,
}

# Normalize the sentiment scores
y = data['sentiment'].values


FileNotFoundError: [Errno 2] No such file or directory: 'final-dataset/review_dataset.csv'

In [None]:
data

In [None]:
# Define input layers
review_input = Input(shape=(max_sequence_length,), name='review')
types_input = Input(shape=(1,), name='types')

# Define embedding and LSTM layers for review input
review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(review_input)
review_lstm = LSTM(128)(review_embedding)

# Define embedding layer for types input
types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
types_flat = Flatten()(types_embedding)

# Concatenate the review and types embeddings
concatenated = Concatenate()([review_lstm, types_flat])

# Sequential part of the model
sequential_model = Sequential([
    Input(shape=(concatenated.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Full model combining the inputs and sequential model
output = sequential_model(concatenated)
full_model = Model(inputs=[review_input, types_input], outputs=output)

# Compile the model
full_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
full_model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
full_model.save('test.keras')

In [14]:
import tensorflow as tf
model = tf.keras.models.load_model('39_test_modelV4.keras')

In [15]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Example 
place_id = 'ChIJIaGQ-Eg60i0RnT9pzyD_gvM'  # Replace with an actual place ID from your dataset
recommendations = get_recommendations(place_id, data, model, top_n=10)
print(recommendations)


2024-06-17 19:23:37.148427: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907


(                                id  \
2552   ChIJIaGQ-Eg60i0RnT9pzyD_gvM   
18987  ChIJjT4DJK5G0i0R3pksi46oHZY   
31801  ChIJAe-Pc09F0i0RFm0SEDsDyU8   
13981  ChIJxdi5l84n0i0RyUyMbwuSf1w   
20320  ChIJqeoHnm2H0S0Re7c7kU8NUtE   
2716   ChIJ65f5180V0i0RkMx79fIo0Ts   
31575  ChIJt_0lSYZz0i0RfM-BdC8kMhU   
30428  ChIJQ7sXNoZB0i0RLRxhrTl5500   
16520  ChIJh_J43ZI40i0RSVe_79mX-BM   
26908  ChIJIyxSdyFi0S0RouWBS4YwZtw   

                                              types review_number  \
2552                                     cafe, food      review 1   
18987                                    cafe, food      review 3   
31801                            tourist_attraction      review 5   
13981                              restaurant, food      review 3   
20320                      park, tourist_attraction      review 4   
2716                               restaurant, food      review 1   
31575                              restaurant, food      review 5   
30428                       

In [None]:
# Save the model
model.save('39_test_model.keras')

In [None]:
# Load the model
# model = tf.keras.models.load_model('39_test_model.keras')

In [None]:
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations(rand_id, df_review, model, top_n=10)

In [None]:

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

In [17]:
# Test cell, Run this cell to get recommendations for a random place in the dataset

import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten


print(tf.__version__)

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Create the feature set
    X = {
        'review': padded_sequences,
        'types': data['types_encoded'].values,
    }
    print('len of X feautre set',len(X['review']))
    print('shape of X feautre set', X['review'].shape)
    
    # Normalize the sentiment scores
    y = data['sentiment'].values

    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# model = tf.keras.models.load_model('best_sentiment_model.keras')
model = tf.keras.models.load_model('39_test_modelV4.keras')

df_review = pd.read_csv('combined-dataset/final_reviews_datav2.csv')
print('len of df', len(df_review))
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations(rand_id, df_review, model, top_n=10)[0]

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names without rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

2.15.0
len of df 32745
Random place :ChIJ4e5z3slB0i0RD9knyHb6U6E TAN-PANAMA COFFEE
len of X feautre set 32745
shape of X feautre set (32745, 779)
                                   name  \
6                       House of hobbit   
2                           Nusa Penida   
0                     TAN-PANAMA COFFEE   
1                      Wild Habit Pizza   
7  Oribinal Burger (by Ketumbar Studio)   
9               25:PM Coffee - Nusa Dua   
5                           Ampik Batur   
4                     Damuh Guest House   
3   IBB Waroeng ( Ikan Bakar Buleleng )   
8                  Vrindavan Ubud Villa   

                                             types_x  rating  
6                                            lodging     4.4  
2                                     hotel, lodging     4.9  
0                     coffee_shop, cafe, store, food     4.6  
1  pizza_restaurant, italian_restaurant, restaura...     4.8  
7  hamburger_restaurant, american_restaurant, res...     4.9  
9 

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X_review[place_idx]
    place_types = X_types[place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types], batch_size=128, verbose=0)

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X_review, X_types.reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n-1:][::-1]
    similar_indices = similar_indices[similar_indices != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import faiss

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X_review[place_idx]
    place_types = X_types[place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types], batch_size=128, verbose=0)

    # Combine review and types vectors
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X_review, X_types.reshape(-1, 1)])

    # Using Faiss for approximate nearest neighbors
    d = all_vectors.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(all_vectors.astype(np.float32))
    D, I = index.search(np.array([place_vector.astype(np.float32)]), top_n + 1)

    # Get top N similar places (excluding the place itself)
    similar_indices = I[0][I[0] != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import faiss

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values.reshape(-1, 1)

    # Combine review and typ es vectors
    combined_vectors = np.hstack([X_review, X_types])

    # Dimensionality reduction using PCA
    pca = PCA(n_components=50)  # Adjust the number of components as needed
    reduced_vectors = pca.fit_transform(combined_vectors)

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_vector = reduced_vectors[place_idx]

    # Using Faiss for approximate nearest neighbors
    d = reduced_vectors.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(reduced_vectors.astype(np.float32))
    D, I = index.search(np.array([place_vector.astype(np.float32)]), top_n + 1)

    # Get top N similar places (excluding the place itself)
    similar_indices = I[0][I[0] != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types.squeeze()], batch_size=128, verbose=0)

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


2024-06-16 00:47:52.505285: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-16 00:47:52.614954: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-16 00:47:52.615008: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-16 00:47:52.616515: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-16 00:47:52.625781: I tensorflow/core/platform/cpu_feature_guar

ModuleNotFoundError: No module named 'faiss'

In [2]:
import onnxruntime as ort

session = ort.InferenceSession('test.onnx', providers=['CUDAExecutionProvider'])
results_ort = session.run(None, {"types": X['types'].reshape(-1, 1), "review": X['review'].astype(np.float32)})
print(type(X['types']))
X['types']

[1;31m2024-06-16 00:47:34.827301102 [E:onnxruntime:Default, provider_bridge_ort.cc:1744 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1426 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcudnn.so.8: cannot open shared object file: No such file or directory
[m
[0;93m2024-06-16 00:47:34.827376805 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:870 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Please reference https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirementsto ensure all dependencies are met.[m


NameError: name 'X' is not defined