In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate

places_data = pd.read_csv("final-dataset/main_dataset.csv") 
reviews_data = pd.read_csv("final-dataset/review_dataset.csv")  

user_place_reviews = pd.merge(reviews_data,places_data, on='id')

# drop review_number, lat long, address, url,status, phone, types_y, price-level,review 1-5
user_place_reviews = user_place_reviews.drop(columns=['review_number', 'latitude', 'longitude', 'address', 'url', 'status', 'phone', 'types_y', 'price-level', 'review 1', 'review 2', 'review 3', 'review 4', 'review 5'])
user_place_reviews.head()




Unnamed: 0,id,types_x,review,user_id,sentiment-score,name,primary-type,rating,rating-count
0,ChIJYcGr7GSb0S0RckePBrCWikw,"hotel, lodging","It has quite small room, and the hallway is qu...",user_2573,0.50625,Singaraja Hotel (ex- POP! Hotel Hardys Singara...,hotel,4.1,2581.0
1,ChIJZbWX6Aia0S0R0tM3h1RZ1h8,"indonesian_restaurant, restaurant, food","Surprisingly, a really good warung that’s hidd...",user_2062,0.60625,Warung Bik Juk,indonesian_restaurant,4.5,648.0
2,ChIJYyHbhgia0S0RzdjNXLmcf54,"tourist_attraction, restaurant, food","Only had a fleeting visit here, came by coach,...",user_2348,0.491667,Harbour Tourist Area of Buleleng,tourist_attraction,4.3,2800.0
3,ChIJ6zf9LJCb0S0RFv3BdLl61ZY,"coffee_shop, cafe, food, store","One word, underrated! How come place like this...",user_1448,0.672338,Abuela Coffee,coffee_shop,4.9,164.0
4,ChIJxaITmQia0S0RyrbukE8vsJU,"tourist_attraction, place_of_worship","This temple is located in Singaraja, located i...",user_4426,0.569762,Klenteng Ling Gwan Kiong,place_of_worship,4.6,142.0


In [2]:
# print(type(user_place_reviews))

In [3]:
from sklearn.preprocessing import MultiLabelBinarizer

user_place_reviews['types_x'] = user_place_reviews['types_x'].str.split(', ')

mlb = MultiLabelBinarizer()
one_hot = mlb.fit_transform(user_place_reviews['types_x'])

one_hot_df = pd.DataFrame(one_hot, columns=mlb.classes_)

user_place_reviews = pd.concat([user_place_reviews, one_hot_df], axis=1)

user_place_reviews = user_place_reviews.drop_duplicates(subset=['user_id', 'id'])

# user_place_reviews = user_place_reviews.drop(columns=['types_x'])

user_place_reviews.head()

Unnamed: 0,id,types_x,review,user_id,sentiment-score,name,primary-type,rating,rating-count,airport,...,tourist_attraction,travel_agency,turkish_restaurant,university,vegan_restaurant,vegetarian_restaurant,vietnamese_restaurant,wedding_venue,wholesaler,zoo
0,ChIJYcGr7GSb0S0RckePBrCWikw,"[hotel, lodging]","It has quite small room, and the hallway is qu...",user_2573,0.50625,Singaraja Hotel (ex- POP! Hotel Hardys Singara...,hotel,4.1,2581.0,0,...,0,0,0,0,0,0,0,0,0,0
1,ChIJZbWX6Aia0S0R0tM3h1RZ1h8,"[indonesian_restaurant, restaurant, food]","Surprisingly, a really good warung that’s hidd...",user_2062,0.60625,Warung Bik Juk,indonesian_restaurant,4.5,648.0,0,...,0,0,0,0,0,0,0,0,0,0
2,ChIJYyHbhgia0S0RzdjNXLmcf54,"[tourist_attraction, restaurant, food]","Only had a fleeting visit here, came by coach,...",user_2348,0.491667,Harbour Tourist Area of Buleleng,tourist_attraction,4.3,2800.0,0,...,1,0,0,0,0,0,0,0,0,0
3,ChIJ6zf9LJCb0S0RFv3BdLl61ZY,"[coffee_shop, cafe, food, store]","One word, underrated! How come place like this...",user_1448,0.672338,Abuela Coffee,coffee_shop,4.9,164.0,0,...,0,0,0,0,0,0,0,0,0,0
4,ChIJxaITmQia0S0RyrbukE8vsJU,"[tourist_attraction, place_of_worship]","This temple is located in Singaraja, located i...",user_4426,0.569762,Klenteng Ling Gwan Kiong,place_of_worship,4.6,142.0,0,...,1,0,0,0,0,0,0,0,0,0


In [4]:
user_place_reviews.to_csv('final-dataset/user_place_reviews.csv', index=False)


In [5]:
# List of place type columns
place_types = [
    "car_rental",
    "art_gallery",
    "museum",
    "performing_arts_theater",
    "hiking_area",
    "national_park",
    "night_club",
    "park",
    "tourist_attraction",
    "zoo",
    "american_restaurant",
    "bar",
    "barbecue_restaurant",
    "cafe",
    "chinese_restaurant",
    "coffee_shop",
    "fast_food_restaurant",
    "french_restaurant",
    "greek_restaurant",
    "indian_restaurant",
    "indonesian_restaurant",
    "italian_restaurant",
    "japanese_restaurant",
    "korean_restaurant",
    "lebanese_restaurant",
    "mediterranean_restaurant",
    "mexican_restaurant",
    "middle_eastern_restaurant",
    "restaurant",
    "seafood_restaurant",
    "spanish_restaurant",
    "steak_house",
    "sushi_restaurant",
    "thai_restaurant",
    "turkish_restaurant",
    "vietnamese_restaurant",
    "cottage",
    "guest_house",
    "hostel",
    "hotel",
    "lodging",
    "motel",
    "private_guest_room",
    "resort_hotel"
]



# Sum the place type columns for each user to get their preferences
user_preferences = user_place_reviews.groupby('user_id')[place_types].sum().reset_index()

# Normalize the preferences
user_preferences[place_types] = user_preferences[place_types].div(user_preferences[place_types].sum(axis=1), axis=0)

# Replace non zero values with 1
user_preferences[place_types] = user_preferences[place_types].applymap(lambda x: 1 if x > 0 else 0)

user_preferences.head(1)


  user_preferences[place_types] = user_preferences[place_types].applymap(lambda x: 1 if x > 0 else 0)


Unnamed: 0,user_id,car_rental,art_gallery,museum,performing_arts_theater,hiking_area,national_park,night_club,park,tourist_attraction,...,turkish_restaurant,vietnamese_restaurant,cottage,guest_house,hostel,hotel,lodging,motel,private_guest_room,resort_hotel
0,user_1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,1,0,0,1


In [6]:
# user_preferences.to_csv('final-dataset/user_preferences.csv', index=False)

In [7]:
# # Convert place_types to csv
# import csv
# 
# with open("final-dataset/place_types.csv", mode='w', newline='') as file:
#     writer = csv.writer(file)
#     for item in place_types:
#         writer.writerow([item])

In [8]:


# Normalize the preferences
user_preferences[place_types] = user_preferences[place_types].div(user_preferences[place_types].sum(axis=1), axis=0)

# Replace non-zero values with 1
user_preferences[place_types] = user_preferences[place_types].applymap(lambda x: 1 if x > 0 else 0)

# Merge data with user preferences to align place features with user preferences
merged_data = user_place_reviews.merge(user_preferences, on='user_id', suffixes=('', '_user'))

# Prepare user features and place features for each review
user_features = merged_data[[f'{ptype}_user' for ptype in place_types]].values
place_features = merged_data[place_types].values

# Normalize features
scaler = StandardScaler()
user_features_scaled = scaler.fit_transform(user_features)
place_features_scaled = scaler.fit_transform(place_features)

# Generate labels (assuming binary relevance for simplicity)
labels = np.random.randint(2, size=(len(user_features),))

# Split into train and test sets
user_train, user_test, place_train, place_test, y_train, y_test = train_test_split(
    user_features_scaled, place_features_scaled, labels, test_size=0.3, random_state=42
)

# Hyperparameters
embedding_dim = 50

# User model
user_input = Input(shape=(len(place_types),), name='user_input')
user_embedding = Dense(embedding_dim, activation='relu')(user_input)

# Place model
place_input = Input(shape=(len(place_types),), name='place_input')
place_embedding = Dense(embedding_dim, activation='relu')(place_input)

# Concatenate user and place embeddings
merged = Concatenate()([user_embedding, place_embedding])
dense_1 = Dense(128, activation='relu')(merged)
dense_2 = Dense(64, activation='relu')(dense_1)
output = Dense(1, activation='sigmoid')(dense_2)

# Create and compile the model
model = Model(inputs=[user_input, place_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

print(model.summary())

# # Fit the model
# history = model.fit(
#     [user_train, place_train],
#     y_train,
#     epochs=15,
#     batch_size=32,
#     validation_data=([user_test, place_test], y_test)
# )

# Save the model
# model.save('user_pref_model.keras')


  user_preferences[place_types] = user_preferences[place_types].applymap(lambda x: 1 if x > 0 else 0)



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 44)]                 0         []                            
                                                                                                  
 place_input (InputLayer)    [(None, 44)]                 0         []                            
                                                                                                  
 dense (Dense)               (None, 50)                   2250      ['user_input[0][0]']          
                                                                                                  
 dense_1 (Dense)             (None, 50)                   2250      ['place_input[0][0]']         
                                                                                             

In [11]:
user_features = merged_data[[f'{ptype}_user' for ptype in place_types]]
place_features = merged_data[place_types]

In [12]:
user_features.to_csv('final-dataset/user_features.csv', index=False)

In [13]:
place_features.to_csv('final-dataset/place_features.csv', index=False)

In [14]:
merged_data.to_csv('final-dataset/merged_data.csv', index=False)

In [None]:
def recommend_places(selected_types, user_id=None, top_n=20):
    # Get user preferences
    if user_id:
        user_pref = user_preferences[user_preferences['user_id'] == user_id][place_types].values
    else:
        user_pref = np.random.randint(2, size=(1, len(place_types)))

    filtered_places = user_place_reviews[user_place_reviews[selected_types].any(axis=1)]

    place_features = filtered_places[place_types].drop_duplicates().values
    place_ids = filtered_places['id'].drop_duplicates().values

    place_features_scaled = scaler.transform(place_features)
    user_pref_scaled = scaler.transform(np.repeat(user_pref, len(place_features), axis=0))

    predictions = model.predict([user_pref_scaled, place_features_scaled])

    top_indices = np.argsort(predictions[:, 0])[-top_n:][::-1]
    recommended_place_ids = place_ids[top_indices]

    unique_recommendations = set(recommended_place_ids)
    recommended_places = filtered_places[filtered_places['id'].isin(unique_recommendations)]
    sorted_recommendations = recommended_places.sort_values(by='rating', ascending=False)
    sorted_recommendations = sorted_recommendations.drop_duplicates(subset=['name'])

    return sorted_recommendations[['name', 'primary-type', 'rating']]

# Select a random user and type
random_user = user_place_reviews['user_id'].sample(1).values[0]
random_type = user_place_reviews['types_x'].sample(1).values[0]

selected_user = 'user_1'
selected_types = ['hotel', 'lodging']

recommendations = recommend_places(random_type, top_n=5, user_id=random_user)

print(f"Top Recommendations for {random_user} with types {random_type}:")
print("---------------------------------")
for index, row in recommendations.iterrows():
    print(f"Name: {row['name']}")
    print(f"Type: {row['primary-type']}")
    print(f"Rating: {row['rating']}")
    print("---------------------------------")


In [None]:
user_id = 'user_1'

user_pref = user_preferences[user_preferences['user_id'] == user_id][place_types].values

In [None]:
print(user_pref)

In [None]:
# model.save('test_pref.keras')

In [None]:
# model = tf.keras.models.load_model('test_pref.keras')