### Import library

In [101]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [102]:
df = pd.read_csv('Copyofmerged.csv')

In [103]:
print(df.columns)

Index(['like_id', 'user_id', 'cafe_id', 'Kpopers', 'JapanLovers',
       'AnimalLovers', 'Quite', 'MusicLovers', 'BookLovers', 'ArtLovers',
       'ViewsLovers', 'CoffeeLovers', 'NonCoffeeLovers', 'groupsComer',
       'outdoor', 'Indoor', 'RetroVintage', 'MinimalisCafe', 'IndustrialCafe',
       'ModernCafe', 'ArtCafe', 'PetCafe', 'BooksCafe', '24HoursCafe',
       'MeetingCafe', 'StudyCafe', 'goodViews', 'FamilyCafe', 'CountryFood',
       'Smoking', 'NonSmoking', 'Coffee', 'NonCoffee', 'GardenCafe', 'vote'],
      dtype='object')


In [106]:
print(df.head(5))

   like_id  user_id  cafe_id  Kpopers  JapanLovers  AnimalLovers  Quite  \
0        1      478       31        0            0             0      1   
1        2      775       54        1            0             0      1   
2        3      231       96        1            1             0      0   
3        4      447       33        1            1             1      0   
4        5      435       15        0            1             1      1   

   MusicLovers  BookLovers  ArtLovers  ...  StudyCafe  goodViews  FamilyCafe  \
0            0           0          0  ...          1          0           1   
1            1           0          0  ...          1          0           0   
2            0           1          1  ...          1          1           0   
3            0           0          1  ...          1          1           1   
4            0           1          1  ...          1          0           0   

   CountryFood  Smoking  NonSmoking  Coffee  NonCoffee  GardenCafe  

In [107]:
df_model = df.drop(columns=['like_id', 'user_id', 'cafe_id'])

In [108]:
print(df_model.columns)

Index(['Kpopers', 'JapanLovers', 'AnimalLovers', 'Quite', 'MusicLovers',
       'BookLovers', 'ArtLovers', 'ViewsLovers', 'CoffeeLovers',
       'NonCoffeeLovers', 'groupsComer', 'outdoor', 'Indoor', 'RetroVintage',
       'MinimalisCafe', 'IndustrialCafe', 'ModernCafe', 'ArtCafe', 'PetCafe',
       'BooksCafe', '24HoursCafe', 'MeetingCafe', 'StudyCafe', 'goodViews',
       'FamilyCafe', 'CountryFood', 'Smoking', 'NonSmoking', 'Coffee',
       'NonCoffee', 'GardenCafe', 'vote'],
      dtype='object')


In [114]:
print("Jumlah sampel dalam df_model:", len(df_model))

Jumlah sampel dalam df_model: 1000


### Split data and standarized

In [109]:
# Split the data into features (X) and target (y)
X = df_model.drop(columns=['vote'])  # Features
y = df_model['vote']  # Target
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [115]:
print("Jumlah sampel dalam X_train:", len(X_train))

Jumlah sampel dalam X_train: 800


In [117]:
print("Dimensi X_train:", X_train.shape)
print("Info X_train:")
print(X_train.info())

Dimensi X_train: (800, 31)
Info X_train:
<class 'pandas.core.frame.DataFrame'>
Index: 800 entries, 29 to 102
Data columns (total 31 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   Kpopers          800 non-null    int64
 1   JapanLovers      800 non-null    int64
 2   AnimalLovers     800 non-null    int64
 3   Quite            800 non-null    int64
 4   MusicLovers      800 non-null    int64
 5   BookLovers       800 non-null    int64
 6   ArtLovers        800 non-null    int64
 7   ViewsLovers      800 non-null    int64
 8   CoffeeLovers     800 non-null    int64
 9   NonCoffeeLovers  800 non-null    int64
 10  groupsComer      800 non-null    int64
 11  outdoor          800 non-null    int64
 12  Indoor           800 non-null    int64
 13  RetroVintage     800 non-null    int64
 14  MinimalisCafe    800 non-null    int64
 15  IndustrialCafe   800 non-null    int64
 16  ModernCafe       800 non-null    int64
 17  ArtCafe          

In [110]:
type(X)
print(X.columns)
print(X[:5])

Index(['Kpopers', 'JapanLovers', 'AnimalLovers', 'Quite', 'MusicLovers',
       'BookLovers', 'ArtLovers', 'ViewsLovers', 'CoffeeLovers',
       'NonCoffeeLovers', 'groupsComer', 'outdoor', 'Indoor', 'RetroVintage',
       'MinimalisCafe', 'IndustrialCafe', 'ModernCafe', 'ArtCafe', 'PetCafe',
       'BooksCafe', '24HoursCafe', 'MeetingCafe', 'StudyCafe', 'goodViews',
       'FamilyCafe', 'CountryFood', 'Smoking', 'NonSmoking', 'Coffee',
       'NonCoffee', 'GardenCafe'],
      dtype='object')
   Kpopers  JapanLovers  AnimalLovers  Quite  MusicLovers  BookLovers  \
0        0            0             0      1            0           0   
1        1            0             0      1            1           0   
2        1            1             0      0            0           1   
3        1            1             1      0            0           0   
4        0            1             1      1            0           1   

   ArtLovers  ViewsLovers  CoffeeLovers  NonCoffeeLovers  ...  

In [111]:
type(y)
# print(y.columns)
print(y[:5])

0    0
1    0
2    1
3    1
4    0
Name: vote, dtype: int64


In [112]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [113]:
print("Dimensi X_train_scaled:", X_train_scaled.shape)
print("Dimensi X_test_scaled:", X_test_scaled.shape)

Dimensi X_train_scaled: (800, 31)
Dimensi X_test_scaled: (200, 31)


In [116]:
print("Mean setelah transformasi:", X_train_scaled.mean(axis=0))
print("Standar deviasi setelah transformasi:", X_train_scaled.std(axis=0))

Mean setelah transformasi: [-8.99280650e-17 -2.44249065e-17 -1.15463195e-16 -1.28785871e-16
 -3.55271368e-17  3.55271368e-17  9.54791801e-17 -7.10542736e-17
 -1.11022302e-16 -6.43929354e-17  1.02140518e-16 -2.22044605e-17
  6.88338275e-17  4.44089210e-17 -7.10542736e-17 -7.54951657e-17
  3.55271368e-17 -3.77475828e-17  2.22044605e-17  0.00000000e+00
 -4.44089210e-18  2.99760217e-17  1.11022302e-16  8.43769499e-17
 -9.10382880e-17 -1.33226763e-17  1.06581410e-16 -1.59872116e-16
 -7.77156117e-17  1.90958360e-16 -7.54951657e-17]
Standar deviasi setelah transformasi: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1.]


### Modelling, Evaluate, Save model

In [118]:
# Build a recommendation model (Neural Network)
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

In [119]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [120]:
# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=512, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x18cdde96350>

In [121]:
# Evaluate the model
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

Model Accuracy: 0.475


In [97]:
model.save("PPmodel.h5", save_format='h5')

  saving_api.save_model(


In [98]:
import joblib
joblib.dump(scaler, "scaler_PPmodel.pkl")

['scaler_PPmodel.pkl']

### Fuction

In [122]:
# def palateCheck(user_id_to_predict, cafe_id_to_predict):
#   user_cafe_data = df_model[(df['user_id'] == user_id_to_predict) & (df['cafe_id'] == cafe_id_to_predict)].drop(columns=['vote'])
#   user_cafe_data_scaled = scaler.transform(user_cafe_data)
#   prediction = model.predict(user_cafe_data_scaled)
#   return prediction
#   #return value of cocoklogi

In [126]:
def palateCheck(user_id_to_predict, cafe_id_to_predict):
    user_cafe_data = df[(df['user_id'] == user_id_to_predict) & (df['cafe_id'] == cafe_id_to_predict)].drop(columns=['vote'])

    # Cek apakah user_cafe_data tidak kosong
    if user_cafe_data.empty:
        raise ValueError("No data found for the given user_id and cafe_id")

    user_cafe_data_scaled = scaler.transform(user_cafe_data)
    prediction = model.predict(user_cafe_data_scaled)
    return prediction

In [82]:
def palateFilterSearch(Kpopers=0, JapanLovers=0, AnimalLovers=0, Quite=0, 
                       MusicLovers=0, BookLovers=0, ArtLovers=0, ViewsLovers=0, 
                       CoffeeLovers=0, NonCoffeeLovers=0, groupsComer=0):
    kafe_cocok = []
    for cafe in df_cafe:
        user_cafe_data = df_model[(df_model['Kpopers'] == Kpopers),
                                  (df_model['JapanLovers'] == JapanLovers),
                                  (df_model['AnimalLovers'] == AnimalLovers),
                                  (df_model['Quite'] == Quite),
                                  (df_model['MusicLovers'] == MusicLovers),
                                  (df_model['BookLovers'] == BookLovers),
                                  (df_model['ArtLovers'] == ArtLovers),
                                  (df_model['ViewsLovers'] == ViewsLovers),
                                  (df_model['CoffeeLovers'] == CoffeeLovers),
                                  (df_model['NonCoffeeLovers'] == NonCoffeeLovers),
                                  (df_model['groupsComer'] == groupsComer)]
        user_cafe_data_scaled = scaler.transform(user_cafe_data)
        prediction = model.predict(user_cafe_data_scaled)
        if prediction > 0.5:
            kafe_cocok.append(cafe)
    return kafe_cocok
#return array of cafe_ids that pass the cocoklogi thereshold

# def cari_kafe(jarak_x, kafe):
#     kafe_cocok = []
#     for i in range(len(kafe)):
#         jarak = get_jarak("lokasi_saya", kafe[i][2])
#         jarak = float(jarak.split()[0].replace(",", "."))
#         if jarak == jarak_x:
#             kafe_cocok.append(kafe[i][0])
#     kafe_cocok.sort()
#     return kafe_cocok

### Implementation

In [127]:
print(palateCheck(478,51))

ValueError: No data found for the given user_id and cafe_id

In [41]:
# Example: Making predictions for a specific user (replace 'user_id_to_predict' with the actual user ID)
user_id_to_predict = 478
user_data = df_model[df['user_id'] == user_id_to_predict].drop(columns=['vote'])
user_data_scaled = scaler.transform(user_data)
recommendation = (model.predict(user_data_scaled) > 0.5).astype(int)

# Display the recommendation for the user
print(f'Recommendation for User {user_id_to_predict}: {recommendation}')

Recommendation for User 478: [[1]
 [1]]


In [42]:
# Example: Making predictions for a specific user and cafe combination
user_id_to_predict = 478
cafe_id_to_predict = 'ChIJ0aVF46v2aS4Rnh8lZ3d5tUQ'  # Replace with the actual cafe ID
user_cafe_data = df_model[(df['user_id'] == user_id_to_predict) & (df['cafe_id'] == cafe_id_to_predict)].drop(columns=['vote'])
user_cafe_data_scaled = scaler.transform(user_cafe_data)
prediction = model.predict(user_cafe_data_scaled)

# Display the predicted likelihood of the user liking the cafe
print(f'Predicted Likelihood for User {user_id_to_predict} liking Cafe {cafe_id_to_predict}: {prediction}')

Predicted Likelihood for User 478 liking Cafe ChIJ0aVF46v2aS4Rnh8lZ3d5tUQ: [[0.9999821]]
