# Import Library

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import re
from os import getcwd

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cwd = getcwd() + "/drive/Shareddrives/CAPSTONE_ML/"
print(cwd)

/content/drive/Shareddrives/CAPSTONE_ML/


In [3]:
df_all = pd.read_excel(cwd + "df_all_new.xlsx")
df_all.head()

Unnamed: 0,Page_URL,Shop_name,Rating,Reviews,Kategori,Address,Phone_Number,Opening_Hours,Range_Harga,Latitude,Longitude,Eco_Friendly,Categorize_Weather
0,https://www.google.com/maps/place/Wedang+Uwuh+...,Wedang Uwuh Solo - Pak Aden,4.6,5,Wedang Uwuh,"Mutihan rt 04 rw 1, Surakarta, Sondakan, Kec. ...",82324006000,"{'Senin': ['08.00–20.00'], 'Selasa': ['08.00–2...",Rp 1–25 rb,-7.566731,110.716919,0,Dingin
1,https://www.google.com/maps/place/Rajanya+JAHE...,Rajanya JAHE REMPAH wedhang uwuh Sumber,4.6,32,Wedang Uwuh,"Jl. Letjen Suprapto No.76c, Sumber, Kec. Banja...",85641727445,"{'Minggu': ['Buka 24 jam'], 'Senin': ['Buka 24...",Rp 1–25 rb,-7.566731,110.716919,0,Dingin
2,https://www.google.com/maps/place/Wedang+uwuh+...,Wedang uwuh jelantik solo,5.0,1,Wedang Uwuh,"Jl. Sungai Musi No.1, Sangkrah, Kec. Ps. Kliwo...",81229070267,"{'Senin': ['Buka 24 jam'], 'Selasa': ['Buka 24...",Rp 1–25 rb,-7.576765,110.768229,0,Dingin
3,https://www.google.com/maps/place/Wedang+uwuh+...,Wedang uwuh mbok tuo,4.9,14,Wedang Uwuh,"Jl. Kutai Tim. 5B, RT.05/RW.08, Sumber, Kec. B...",81578614885,"{'Senin': ['07.00–19.00'], 'Selasa': ['07.00–1...",Rp 1–25 rb,-7.547894,110.730811,0,Dingin
4,https://www.google.com/maps/place/wedang+uwuh+...,wedang uwuh sya,4.7,3,Wedang Uwuh,"Jl. Reksoniten No.20, Gajahan, Kec. Ps. Kliwon...",89608149211,"{'Minggu': ['Buka 24 jam'], 'Senin': ['Buka 24...",Rp 1–25 rb,-7.547894,110.730811,0,Dingin


Fungsi untuk menghitung jarak menggunakan formula haversine

In [4]:
def calculate_haversine(lat1, lon1, lat2, lon2):
    earth_radius = 6371  # Radius bumi dalam kilometer
    delta_lat = np.radians(lat2 - lat1)
    delta_lon = np.radians(lon2 - lon1)
    a = np.sin(delta_lat / 2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(delta_lon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return earth_radius * c

Menghitung jarak tiap lokasi berdasarkan input latitude dan longitude pengguna

In [5]:
# Lokasi pengguna
current_lat = -7.5589608
current_lon = 110.856106

# Perhitungan jarak untuk setiap lokasi dalam dataset
df_all['computed_distance'] = df_all.apply(
    lambda row: calculate_haversine(current_lat, current_lon, row['Latitude'], row['Longitude']), axis=1
)

In [6]:
df_all['Categorize_Weather'].value_counts()

C = df_all['Rating'].mean()
M = df_all['Reviews'].quantile(0.5)
df_all['weighted_rating'] = ((df_all['Reviews'] / (df_all['Reviews'] + M)) * df_all['Rating']) + ((M / (df_all['Reviews'] + M)) * C)

In [7]:
df_all.dtypes

Unnamed: 0,0
Page_URL,object
Shop_name,object
Rating,float64
Reviews,int64
Kategori,object
Address,object
Phone_Number,object
Opening_Hours,object
Range_Harga,object
Latitude,float64


In [8]:
selected_df = df_all[['Categorize_Weather', 'weighted_rating', 'Kategori', 'Rating']]

# train, test = train_test_split(selected_df, test_size=0.2, random_state=42)
train, val = train_test_split(selected_df, test_size=0.2, random_state=42)

print(len(train), 'train examples')
print(len(val), 'validation examples')
# print(len(test), 'test examples')

485 train examples
122 validation examples


In [9]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()

    labels = dataframe.pop('Rating')

    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))

    if shuffle:
      ds = ds.shuffle(4)

    ds = ds.batch(batch_size)

    return ds

In [10]:
import tensorflow as tf
from tensorflow.keras.layers import Normalization, StringLookup, CategoryEncoding

# Data numerik
numeric_data = df_all['weighted_rating'].to_numpy()
normalizer = Normalization()
normalizer.adapt(numeric_data)

# Data kategorik: 'weather'
weather_class = ['Dingin', 'Dingin/Panas', 'Panas']
weather_lookup = StringLookup(vocabulary=weather_class, output_mode="one_hot")

# Data kategorik: 'category'
category_class = df_all['Kategori'].unique().tolist()
category_lookup = StringLookup(vocabulary=category_class, output_mode="one_hot")

# Contoh pipeline preprocessing
inputs = {
    "weighted_rating": tf.keras.Input(shape=(1,), name="weighted_rating"),
    "weather": tf.keras.Input(shape=(1,), name="weather", dtype=tf.string),
    "category": tf.keras.Input(shape=(1,), name="category", dtype=tf.string),
}

preprocessed = [
    normalizer(inputs["weighted_rating"]),
    weather_lookup(inputs["weather"]),
    category_lookup(inputs["category"]),
]

# Gabungkan semua preprocessing
concatenated = tf.keras.layers.Concatenate()(preprocessed)

# Model akhir
outputs = tf.keras.layers.Dense(10, activation="relu")(concatenated)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.summary()


In [12]:
feature_layer = tf.keras.layers.Concatenate()(preprocessed)

In [13]:
train_ds = df_to_dataset(train)
val_ds = df_to_dataset(val, shuffle=False)
# test_ds = df_to_dataset(test, shuffle=False)

In [18]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(feature_layer.shape[1],)),
    feature_layer,
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam',
              loss='mse',
              metrics=['mape'])



ValueError: Only instances of `keras.Layer` can be added to a Sequential model. Received: <KerasTensor shape=(None, 23), dtype=float32, sparse=False, name=keras_tensor_5> (of type <class 'keras.src.backend.common.keras_tensor.KerasTensor'>)

In [None]:
model.fit(train_ds,
          validation_data=val_ds,
          epochs=100)

ValueError: You must call `compile()` before using the model.

# Menggunakan input np hstack

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

df_all['Kategori_encoded'] = label_encoder.fit_transform(df_all['Kategori'])
category = tf.keras.utils.to_categorical(df_all['Kategori_encoded'])

df_all['Categorize_Weather_encoded'] = label_encoder.fit_transform(df_all['Categorize_Weather'])
weather = tf.keras.utils.to_categorical(df_all['Categorize_Weather_encoded'])

In [None]:
C = df_all['Rating'].mean()
M = df_all['Reviews'].quantile(0.5)
df_all['weighted_rating'] = ((df_all['Reviews'] / (df_all['Reviews'] + M)) * df_all['Rating']) + ((M / (df_all['Reviews'] + M)) * C)

# df_all['weighted_rating'] = (df_all['Rating'] * np.log(df_all['Reviews'] + 1))

In [None]:
feature_input = np.hstack([category, weather, np.expand_dims(df_all['weighted_rating'].values, axis=1)])

In [None]:
feature_tensor = tf.convert_to_tensor(feature_input, dtype=tf.float32)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(feature_tensor.shape[1],)),
    # tf.keras.layers.Dense(512, activation='relu'),
    # tf.keras.layers.Dense(512, activation='relu'),
    # tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

target = df_all['Rating'].values

model.fit(feature_tensor, target, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 17.7552 - mae: 4.0329 - val_loss: 13.5118 - val_mae: 3.5730
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 12.0555 - mae: 3.3807 - val_loss: 8.7694 - val_mae: 2.9201
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 7.1605 - mae: 2.6267 - val_loss: 5.3036 - val_mae: 2.2828
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 3.8984 - mae: 1.9206 - val_loss: 3.1399 - val_mae: 1.7001
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2.1251 - mae: 1.3060 - val_loss: 2.1771 - val_mae: 1.2807
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.5277 - mae: 0.8870 - val_loss: 1.8192 - val_mae: 1.0133
Epoch 7/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - los

<keras.src.callbacks.history.History at 0x79abb5036c20>

In [None]:
test_loss, test_mae = model.evaluate(feature_tensor, target, verbose=0)
print(f"Mean Absolute Error pada data uji: {test_mae}")

Mean Absolute Error pada data uji: 0.6267900466918945


In [None]:
# Langkah 2: Prediksi menggunakan model yang sudah dilatih
predictions = model.predict(feature_tensor)  # Hasil prediksi berupa array

# Langkah 3: Tambahkan prediksi ke df_all sebagai kolom baru
df_all['toprecommendation'] = predictions

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [None]:
top_recommendations = df_all.sort_values(by='toprecommendation', ascending=False).head(607)
print(top_recommendations[['Shop_name', 'Kategori', 'Categorize_Weather', 'weighted_rating', 'Rating', 'Reviews', 'toprecommendation']])

                                  Shop_name          Kategori  \
121                     Soto Solo Mas Bedjo              Soto   
266            Warung Selat & Sop "LEZATIE"             Selat   
378                   Nasi Liwet Mbak Yanti        Sego Liwet   
369                  Nasi liwet mbak endang        Sego Liwet   
485                          Sate Pak Pledi       Sate Buntel   
..                                      ...               ...   
292              Aneka Sop Dan Selat J Nice             Selat   
576           Es Gempol Pleret Abang Muscle  Es Gempol Pleret   
35                    Angkringan Omah Semar       Wedang Uwuh   
573   Es Gempol Pleret & Es Dawet Mbak Lena  Es Gempol Pleret   
373  Nasi Liwet dan Ayam Geprek Mbak Laksmi        Sego Liwet   

    Categorize_Weather  weighted_rating  Rating  Reviews  toprecommendation  
121             Dingin         4.768433     4.9       85           4.756782  
266             Dingin         4.915769     5.0      173       

In [None]:
top_nasi_liwet = top_recommendations[top_recommendations['Kategori'] == 'Sego Liwet'].sort_values(by='toprecommendation', ascending=False).head(10)
print(top_nasi_liwet[['Shop_name', 'Kategori', 'Categorize_Weather', 'weighted_rating', 'Rating', 'Reviews', 'toprecommendation']])

# top_recommendations[top_recommendations['Kategori'] == 'Sego Liwet']

                              Shop_name    Kategori Categorize_Weather  \
378               Nasi Liwet Mbak Yanti  Sego Liwet             Dingin   
369              Nasi liwet mbak endang  Sego Liwet             Dingin   
363               Nasi Liwet Mbah Kisut  Sego Liwet             Dingin   
345                   Nasi Liwet Sukini  Sego Liwet             Dingin   
310  Nasi Liwet & Cabuk Rambak Bu Parmi  Sego Liwet             Dingin   
341  Nasi Liwet & Bubur Lemu Mbah Narti  Sego Liwet             Dingin   
326        Nasi Liwet Mbak Marni Gentan  Sego Liwet             Dingin   
331     Nasi Liwet MBAK GIYEM Solo Baru  Sego Liwet             Dingin   
325                 Nasi Liwet Bu Darmi  Sego Liwet             Dingin   
346      nasi liwet solo pagi mbak atin  Sego Liwet             Dingin   

     weighted_rating  Rating  Reviews  toprecommendation  
378         4.655013     4.9       35           4.748380  
369         4.641650     4.9       32           4.736915  
363     