In [None]:
# Import libraries used for collaborative filtering

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Flatten, Dense, Input, Concatenate
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

In [None]:
# Load the data
df = pd.read_csv('combined-dataset/final_reviews_dataV2.csv')
df.head(10)

In [None]:
import random
import string
import hashlib
import time

def generate_cuid():
    c = 'c'
    timestamp_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))  # Random 8 char string for timestamp
    counter = random.randint(0, 9999)  # Random counter
    client_fingerprint = ''.join(random.choices(string.ascii_lowercase + string.digits, k=4))  # Random 4 char string
    random_string = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))  # Random 8 char string

    return f'{c}{timestamp_str}{counter}{client_fingerprint}{random_string}'

unique_user_ids = df['user_id'].unique()
user_id_to_cuid = {user_id: generate_cuid() for user_id in unique_user_ids}

# Replace original user IDs with CUIDs
df['user_cuid'] = df['user_id'].map(user_id_to_cuid)

In [None]:

# Encode the place types and place ids
type_encoder = LabelEncoder()
df['types_encoded'] = type_encoder.fit_transform(df['types'])

place_encoder = LabelEncoder()
df['place_id_encoded'] = place_encoder.fit_transform(df['id'])

# Prepare the dataset
X = df[['types_encoded', 'review']].copy()
y = df['place_id_encoded'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the model
class RecommenderModel(tf.keras.Model):
    def __init__(self, num_places, num_types, embedding_dim):
        super().__init__()
        self.type_embedding = tf.keras.layers.Embedding(num_types, embedding_dim)
        self.review_vectorizer = tf.keras.layers.TextVectorization(max_tokens=10000, output_mode='tf_idf')
        self.dense = tf.keras.layers.Dense(128, activation='relu')
        self.output_layer = tf.keras.layers.Dense(num_places, activation='softmax')
        self.review_vectorizer.adapt(X_train['review'])

    def call(self, inputs):
        type_embedding = self.type_embedding(inputs['types_encoded'])
        review_embedding = self.review_vectorizer(inputs['review'])
        x = tf.concat([type_embedding, review_embedding], axis=1)
        x = self.dense(x)
        return self.output_layer(x)

# Initialize the model
num_places = df['place_id_encoded'].nunique()
num_types = df['types_encoded'].nunique()
embedding_dim = 50

model = RecommenderModel(num_places, num_types, embedding_dim)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Create a custom data generator
def data_generator(X, y, batch_size=2):
    num_samples = len(X)
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        for start in range(0, num_samples, batch_size):
            end = min(start + batch_size, num_samples)
            batch_indices = indices[start:end]
            batch_X = {
                'types_encoded': np.array(X.iloc[batch_indices]['types_encoded']),
                'review': np.array(X.iloc[batch_indices]['review'])
            }
            batch_y = np.array(y[batch_indices])
            yield batch_X, batch_y

# Train the model
train_gen = data_generator(X_train, y_train)
test_gen = data_generator(X_test, y_test)

model.fit(train_gen, steps_per_epoch=len(X_train) // 2, epochs=10, validation_data=test_gen, validation_steps=len(X_test) // 2)

# Make recommendations
def recommend(user_cuid, model, df, place_encoder, type_encoder):
    user_reviews = df[df['user_cuid'] == user_cuid]['review'].values
    user_types = df[df['user_cuid'] == user_cuid]['types'].values
    
    recommendations = []
    for review, place_type in zip(user_reviews, user_types):
        type_encoded = type_encoder.transform([place_type])
        user_input = {
            'types_encoded': np.array(type_encoded),
            'review': np.array([review])
        }
        predictions = model.predict(user_input)
        recommended_place_ids = np.argsort(predictions[0])[-5:][::-1]
        recommendations.extend(place_encoder.inverse_transform(recommended_place_ids))
    
    return recommendations

# Example of making recommendations
user_cuid = "cp8gyzxgi9766uylq0jc3hkrq"
recommendations = recommend(user_cuid, model, df, place_encoder, type_encoder)
print("Recommended places:", recommendations)


In [None]:

# Make recommendations
def recommend(user_cuid, model, df, place_encoder, type_encoder):
    user_reviews = df[df['user_cuid'] == user_cuid]['review'].values
    user_types = df[df['user_cuid'] == user_cuid]['types'].values

    recommendations = []
    for review, place_type in zip(user_reviews, user_types):
        type_encoded = type_encoder.transform([place_type])
        user_input = {
            'types_encoded': np.array(type_encoded),
            'review': np.array([review])
        }
        predictions = model.predict(user_input)
        recommended_place_ids = np.argsort(predictions[0])[-5:][::-1]
        recommendations.extend(place_encoder.inverse_transform(recommended_place_ids))
        # drop duplicates
        recommendations = list(set(recommendations))

    return recommendations

# Example of making recommendations
selected_cuid = "cdmnbu1268093imcrbhl9wsu9"

user_cuid = df.sample(1)['user_cuid'].values[0]

df_place = pd.read_csv('final-dataset/main_dataset.csv')

recommendations = recommend(user_cuid, model, df, place_encoder, type_encoder)
print(f"Recommended places for {user_cuid}:")
# print that user reviews
print(df[df['user_cuid'] == user_cuid]['review'].values)
# print name based on place_id
for place_id in recommendations:
    print(df_place[df_place['id'] == place_id]['name'].values)
    



In [None]:
model.save('collab_modelVwhatever.keras')