In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Concatenate

In [None]:
# Constants
PLACE_TYPES = [
    "car_rental", "art_gallery", "museum", "performing_arts_theater", "hiking_area",
    "national_park", "night_club", "park", "tourist_attraction", "zoo",
    "american_restaurant", "bar", "barbecue_restaurant", "cafe", "chinese_restaurant",
    "coffee_shop", "fast_food_restaurant", "french_restaurant", "greek_restaurant",
    "indian_restaurant", "indonesian_restaurant", "italian_restaurant",
    "japanese_restaurant", "korean_restaurant", "lebanese_restaurant",
    "mediterranean_restaurant", "mexican_restaurant", "middle_eastern_restaurant",
    "restaurant", "seafood_restaurant", "spanish_restaurant", "steak_house",
    "sushi_restaurant", "thai_restaurant", "turkish_restaurant", "vietnamese_restaurant",
    "cottage", "guest_house", "hostel", "hotel", "lodging", "motel", "private_guest_room",
    "resort_hotel"
]

In [None]:
# Function to load and preprocess data
def load_and_preprocess_data():
    places_data = pd.read_csv("final-dataset/dataset_main.csv")
    reviews_data = pd.read_csv("final-dataset/dataset_reviews.csv")

    # Merge datasets on 'id'
    user_place_reviews = pd.merge(reviews_data, places_data, on='id')

    # Drop irrelevant columns
    user_place_reviews = user_place_reviews.drop(columns=[
        'review_number', 'latitude', 'longitude', 'address', 'url', 'status', 'phone',
        'types_y', 'price-level', 'review 1', 'review 2', 'review 3', 'review 4', 'review 5'
    ])

    # Split 'types_x' into multiple types
    user_place_reviews['types_x'] = user_place_reviews['types_x'].str.split(', ')

    # One-hot encode 'types_x'
    mlb = MultiLabelBinarizer()
    one_hot = mlb.fit_transform(user_place_reviews['types_x'])
    one_hot_df = pd.DataFrame(one_hot, columns=mlb.classes_)
    user_place_reviews = pd.concat([user_place_reviews, one_hot_df], axis=1)

    # Remove duplicates
    user_place_reviews = user_place_reviews.drop_duplicates(subset=['user_id', 'id'])

    # Compute user preferences
    user_preferences = user_place_reviews.groupby('user_id')[PLACE_TYPES].sum().reset_index()
    user_preferences[PLACE_TYPES] = user_preferences[PLACE_TYPES].div(
        user_preferences[PLACE_TYPES].sum(axis=1), axis=0
    )
    user_preferences[PLACE_TYPES] = user_preferences[PLACE_TYPES].applymap(lambda x: 1 if x > 0 else 0)
    
    user_place_reviews.to_csv('other-dataset/place_reviews.csv', index=False)
    user_preferences.to_csv('other-dataset/user_preferences.csv', index=False)

    return user_place_reviews, user_preferences

In [None]:
def create_model(input_dim, embedding_dim=50):
    # User model
    user_input = Input(shape=(input_dim,), name='user_input')
    user_embedding = Dense(embedding_dim, activation='relu')(user_input)

    # Place model
    place_input = Input(shape=(input_dim,), name='place_input')
    place_embedding = Dense(embedding_dim, activation='relu')(place_input)

    # Concatenate embeddings
    merged = Concatenate()([user_embedding, place_embedding])
    dense_1 = Dense(128, activation='relu')(merged)
    dense_2 = Dense(64, activation='relu')(dense_1)
    output = Dense(1, activation='sigmoid')(dense_2)

    # Create model
    model = Model(inputs=[user_input, place_input], outputs=output)
    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
# Function to recommend places
def recommend_places(model, user_preferences, user_place_reviews, selected_types, scaler, user_id=None, top_n=20):
    # Get user preferences
    if user_id:
        user_pref = user_preferences[user_preferences['user_id'] == user_id][PLACE_TYPES].values
    else:
        user_pref = np.random.randint(2, size=(1, len(PLACE_TYPES)))

    # Filter places by selected types
    filtered_places = user_place_reviews[user_place_reviews[selected_types].any(axis=1)]

    place_features = filtered_places[PLACE_TYPES].drop_duplicates().values
    place_ids = filtered_places['id'].drop_duplicates().values

    place_features_scaled = scaler.transform(place_features)
    user_pref_scaled = scaler.transform(np.repeat(user_pref, len(place_features), axis=0))

    predictions = model.predict([user_pref_scaled, place_features_scaled])

    top_indices = np.argsort(predictions[:, 0])[-top_n:][::-1]
    recommended_place_ids = place_ids[top_indices]

    unique_recommendations = set(recommended_place_ids)
    recommended_places = filtered_places[filtered_places['id'].isin(unique_recommendations)]
    sorted_recommendations = recommended_places.sort_values(by='rating', ascending=False)
    sorted_recommendations = sorted_recommendations.drop_duplicates(subset=['name'])

    return sorted_recommendations[['name', 'primary-type', 'rating']]

In [None]:
# Main execution
def main():
    np.random.seed(42)
    tf.random.set_seed(42)

    # Load and preprocess data
    user_place_reviews, user_preferences = load_and_preprocess_data()

    # Merge data with user preferences
    merged_data = user_place_reviews.merge(user_preferences, on='user_id', suffixes=('', '_user'))
    
    merged_data.to_csv('other-dataset/merged_data.csv', index=False)

    # Prepare features
    user_features = merged_data[[f'{ptype}_user' for ptype in PLACE_TYPES]].values
    place_features = merged_data[PLACE_TYPES].values

    # Normalize features
    scaler = StandardScaler()
    user_features_scaled = scaler.fit_transform(user_features)
    place_features_scaled = scaler.fit_transform(place_features)

    # Generate labels
    labels = np.random.randint(2, size=(len(user_features),))

    # Split data into train and test sets
    user_train, user_test, place_train, place_test, y_train, y_test = train_test_split(
        user_features_scaled, place_features_scaled, labels, test_size=0.3, random_state=42
    )

    # Create and train the model
    model = create_model(input_dim=len(PLACE_TYPES))
    model.summary()

    history = model.fit(
        [user_train, place_train], y_train,
        epochs=15, batch_size=32,
        validation_data=([user_test, place_test], y_test)
    )

    # Save the model
    # model.save('user_pref_model.keras')

    # Make recommendations
    random_user = user_place_reviews['user_id'].sample(1).values[0]
    selected_types = ['hotel', 'lodging']

    recommendations = recommend_places(model, user_preferences, user_place_reviews, selected_types, scaler, user_id=random_user, top_n=5)
    model.save('other-dataset/user_pref.keras')

    print(f"Top Recommendations for user {random_user}:")
    print(recommendations)

In [None]:
if __name__ == "__main__":
    main()