In [1]:
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten
from tensorflow.keras.models import Sequential

# from keras_tuner import RandomSearch, HyperModel

# Load the dataset
file_path = 'combined-dataset/final_reviews_data.csv'
data = pd.read_csv(file_path)

# Encode the 'types' column
label_encoder = LabelEncoder()
data['types_encoded'] = label_encoder.fit_transform(data['types'])

# Tokenize the 'review' column
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])

# Pad the sequences
max_sequence_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Create the feature set
X = {
    'review': padded_sequences,
    'types': data['types_encoded'].values,
}

# Normalize the sentiment scores
y = data['sentiment'].values


2024-06-15 22:02:58.386681: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-15 22:02:58.386749: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-15 22:02:58.387338: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-15 22:02:58.391827: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Define input layers
review_input = Input(shape=(max_sequence_length,), name='review')
types_input = Input(shape=(1,), name='types')

# Define embedding and LSTM layers for review input
review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(review_input)
review_lstm = LSTM(128)(review_embedding)

# Define embedding layer for types input
types_embedding = Embedding(input_dim=data['types_encoded'].nunique(), output_dim=10)(types_input)
types_flat = Flatten()(types_embedding)

# Concatenate the review and types embeddings
concatenated = Concatenate()([review_lstm, types_flat])

# Sequential part of the model
sequential_model = Sequential([
    Input(shape=(concatenated.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Full model combining the inputs and sequential model
output = sequential_model(concatenated)
full_model = Model(inputs=[review_input, types_input], outputs=output)

# Compile the model
full_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
full_model.fit([X['review'], X['types']], y, epochs=10, batch_size=32, validation_split=0.2)
full_model.save('test.keras')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [4]:
import tensorflow as tf
model = tf.keras.models.load_model('test.keras')

ValueError: Layer 'embedding_2' expected 1 variables, but received 0 variables during loading. Expected: ['embedding_2/embeddings:0']

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Example 
place_id = 'ChIJIaGQ-Eg60i0RnT9pzyD_gvM'  # Replace with an actual place ID from your dataset
recommendations = get_recommendations(place_id, data, model, top_n=10)
print(recommendations)


In [None]:
# Save the model
model.save('39_test_model.keras')

In [None]:
# Load the model
# model = tf.keras.models.load_model('39_test_model.keras')

In [None]:
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations(rand_id, df_review, model, top_n=10)

In [None]:

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

In [None]:
# Test cell, Run this cell to get recommendations for a random place in the dataset

import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten


print(tf.__version__)

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Create the feature set
    X = {
        'review': padded_sequences,
        'types': data['types_encoded'].values,
    }
    
    # Normalize the sentiment scores
    y = data['sentiment'].values

    place_idx = data[data['id'] == place_id].index[0]
    place_review = X['review'][place_idx]
    place_types = X['types'][place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X['review'], X['types']])

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X['review'], X['types'].reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n:][::-1]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# model = tf.keras.models.load_model('best_sentiment_model.keras')
model = tf.keras.models.load_model('39_test_modelV3.keras')

df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place :{rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

recommendations = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)[0]

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')
# sort reccomendations by sentiment
sorted_reccomendations = merged_recommendations.sort_values(by='sentiment', ascending=False)
# Print the recommendations with place names with out rand_ind
print(sorted_reccomendations[['name','types_x', 'rating']])

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X_review[place_idx]
    place_types = X_types[place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types], batch_size=128, verbose=0)

    # Calculate similarity
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X_review, X_types.reshape(-1, 1)])
    similarities = cosine_similarity([place_vector], all_vectors)[0]

    # Get top N similar places
    similar_indices = np.argsort(similarities)[-top_n-1:][::-1]
    similar_indices = similar_indices[similar_indices != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import faiss

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_review = X_review[place_idx]
    place_types = X_types[place_idx]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types], batch_size=128, verbose=0)

    # Combine review and types vectors
    place_vector = np.concatenate([place_review, [place_types]])
    all_vectors = np.hstack([X_review, X_types.reshape(-1, 1)])

    # Using Faiss for approximate nearest neighbors
    d = all_vectors.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(all_vectors.astype(np.float32))
    D, I = index.search(np.array([place_vector.astype(np.float32)]), top_n + 1)

    # Get top N similar places (excluding the place itself)
    similar_indices = I[0][I[0] != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


In [9]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import faiss

# Function to get recommendations based on a place ID
def get_recommendations(place_id, data, model, top_n=10):
    # Encode the 'types' column
    label_encoder = LabelEncoder()
    data['types_encoded'] = label_encoder.fit_transform(data['types'])
    
    # Tokenize the 'review' column
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['review'])
    sequences = tokenizer.texts_to_sequences(data['review'])
    
    # Pad the sequences
    max_sequence_length = max(len(seq) for seq in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
    
    # Prepare the input features
    X_review = padded_sequences
    X_types = data['types_encoded'].values.reshape(-1, 1)

    # Combine review and types vectors
    combined_vectors = np.hstack([X_review, X_types])

    # Dimensionality reduction using PCA
    pca = PCA(n_components=50)  # Adjust the number of components as needed
    reduced_vectors = pca.fit_transform(combined_vectors)

    # Get the index of the specified place_id
    place_idx = data[data['id'] == place_id].index[0]
    place_vector = reduced_vectors[place_idx]

    # Using Faiss for approximate nearest neighbors
    d = reduced_vectors.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(reduced_vectors.astype(np.float32))
    D, I = index.search(np.array([place_vector.astype(np.float32)]), top_n + 1)

    # Get top N similar places (excluding the place itself)
    similar_indices = I[0][I[0] != place_idx][:top_n]
    similar_places = data.iloc[similar_indices]

    # Predict the sentiment for all places
    predicted_sentiments = model.predict([X_review, X_types.squeeze()], batch_size=128, verbose=0)

    return similar_places, predicted_sentiments[similar_indices]

# Load the model
model = tf.keras.models.load_model('39_test_modelV3.keras')

# Load the datasets
df_review = pd.read_csv('combined-dataset/final_reviews_data.csv')
df_place = pd.read_csv('combined-dataset/combined_datasetV2.csv')

# Get a random place ID
random_place = df_review.sample(1)
rand_id = random_place['id'].values[0]
print(f'Random place: {rand_id}', df_place[df_place['id'] == rand_id]['name'].values[0])

# Get recommendations
recommendations, predicted_sentiments = get_recommendations('ChIJQ5jInls_0i0Ra53iWVquuq8', df_review, model, top_n=10)

# Merge the recommendations with place names based on 'id'
merged_recommendations = recommendations.merge(df_place, on='id')

# Sort recommendations by sentiment
sorted_recommendations = merged_recommendations.sort_values(by='sentiment', ascending=False)

# Print the recommendations with place names without rand_id
print(sorted_recommendations[['name', 'types_x', 'rating']])


NameError: name 'results_ort' is not defined

In [23]:
import onnxruntime as ort

session = ort.InferenceSession('model.onnx', providers=['CUDAExecutionProvider'])

[0;93m2024-06-15 17:15:29.548250196 [W:onnxruntime:, session_state.cc:1166 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.[m
[0;93m2024-06-15 17:15:29.548294410 [W:onnxruntime:, session_state.cc:1168 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.[m


In [24]:
results_ort = session.run(None, {"types": X['types'].reshape(-1, 1), "review": X['review'].astype(np.float32)})

InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Unexpected input data type. Actual: (tensor(int64)) , expected: (tensor(float))

In [16]:
print(type(X['types']))

<class 'numpy.ndarray'>


In [17]:
X['types']

array([381, 469, 896, ..., 860,  59, 560])