In [32]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer, MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

In [33]:
# Load dataset
data = pd.read_csv('file_updated.csv')

# Mengonversi kolom 'Jawaban' dari string menjadi list
data['Jawaban'] = data['Jawaban'].apply(lambda x: eval(x))  # Mengubah string list menjadi list Python

# Preprocessing Produk
#label_encoder_produk = LabelEncoder()
# Initialize MultiLabelBinarizer for Produk
label_encoder_produk = MultiLabelBinarizer()

# Split the 'Produk' column by commas to create a list of products
data['Produk'] = data['Produk'].apply(lambda x: x.split(', '))

# Apply MultiLabelBinarizer to encode the 'Produk' column
produk_encoded = label_encoder_produk.fit_transform(data['Produk'])

print("Produk Desc")
print(produk_encoded.shape)  # Shape of the encoded matrix
print(produk_encoded[:2])  # Displaying first two rows of the encoded data

# Checking the labels used for encoding
print("Produk classes:", label_encoder_produk.classes_)


# Preprocessing Fitur
#label_encoder_fitur = LabelEncoder()
label_encoder_fitur = MultiLabelBinarizer()
#data['Fitur'] = label_encoder_fitur.fit_transform(data['Fitur'])
data['Fitur'] = data['Fitur'].apply(lambda x: x.split(', '))
fitur_encoded = label_encoder_fitur.fit_transform(data['Fitur'])

print("Fitur Desc")
#print(data['Fitur'].shape)
#print(data['Fitur'][:2])
print(fitur_encoded.shape)
print(fitur_encoded[:2])



# Preprocessing Bahan
bahan_binarizer = MultiLabelBinarizer()
data['Bahan'] = data['Bahan'].apply(lambda x: x.split(', '))  # Pastikan bahan berbentuk list
bahan_encoded = bahan_binarizer.fit_transform(data['Bahan'])

print("Bahan Desc")

print(bahan_encoded.shape)
print(bahan_encoded[:2])
print("Bahan yang terpisah:", bahan_binarizer.classes_)



Produk Desc
(2000, 10)
[[0 0 0 0 1 0 0 0 1 0]
 [1 0 0 0 0 0 0 0 1 0]]
Produk classes: ['Booster' 'Calming Mask' 'Cleanser' 'Essence' 'Mask' 'Moisturizer'
 'Serum' 'Spot Treatment' 'Sunscreen' 'Toner']
Fitur Desc
(2000, 15)
[[0 0 0 0 1 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0]]
Bahan Desc
(2000, 12)
[[1 0 0 0 0 0 0 1 1 0 0 1]
 [1 0 0 0 1 0 0 0 1 0 0 1]]
Bahan yang terpisah: ['Allantoin' 'Aloe Vera' 'Bakuchiol' 'Ceramide' 'Cica' 'Glycerin' 'Glycol'
 'Niacinamide' 'Panthenol' 'Retinol' 'Salicylic Acid' 'Water']


In [34]:
# Mengubah kolom 'Jawaban' menjadi array numpy
X = np.array(data['Jawaban'].tolist())  # Mengubah list ke dalam array

# Output Labels
y_produk = produk_encoded
y_fitur = fitur_encoded
y_bahan = bahan_encoded

# Normalize Input
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_produk_train, y_produk_test, y_fitur_train, y_fitur_test, y_bahan_train, y_bahan_test = train_test_split(
    X, y_produk, y_fitur, y_bahan, test_size=0.2, random_state=42
)

In [35]:
print('Produk shape:', y_produk_train.shape)
print('Fitur shape:', y_fitur_train.shape)
print('Bahan shape:', y_bahan_train.shape)


Produk shape: (1600, 10)
Fitur shape: (1600, 15)
Bahan shape: (1600, 12)


In [39]:
# Define ANN model
input_layer = Input(shape=(14,))
# hidden_layer1 = Dense(2048, activation='relu')(input_layer)
# dropout1 = Dropout(0.3)(hidden_layer1)  # Dropout layer for regularization
# hidden_layer2 = Dense(1024, activation='relu')(dropout1)
# dropout2 = Dropout(0.5)(hidden_layer2)
# hidden_layer3 = Dense(512, activation='relu')(dropout2)

# # Output layer for Produk (jumlah kelas produk harus sesuai dengan target)
# output_produk = Dense(12, activation='softmax', name='Produk_Output')(hidden_layer3)

# # Output layer for Fitur (jumlah kelas fitur harus sesuai dengan target)
# output_fitur = Dense(15, activation='softmax', name='Fitur_Output')(hidden_layer3)

# # Output layer for Bahan (jumlah bahan yang terpisah, yang berupa binari, sesuai dengan target)
# output_bahan = Dense(10, activation='sigmoid', name='Bahan_Output')(hidden_layer3)

# Add hidden layers with BatchNormalization and L2 regularization
hidden_layer1 = Dense(1024, activation='relu', kernel_regularizer=l2(0.001))(input_layer)
batch_norm1 = BatchNormalization()(hidden_layer1)
dropout1 = Dropout(0.5)(batch_norm1)

hidden_layer2 = Dense(512, activation='relu', kernel_regularizer=l2(0.001))(dropout1)
batch_norm2 = BatchNormalization()(hidden_layer2)
dropout2 = Dropout(0.5)(batch_norm2)

hidden_layer3 = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(dropout2)
batch_norm3 = BatchNormalization()(hidden_layer3)

# Output layer for Produk
output_produk = Dense(12, activation='softmax', name='Produk_Output')(batch_norm3)

# Output layer for Fitur
output_fitur = Dense(15, activation='softmax', name='Fitur_Output')(batch_norm3)

# Output layer for Bahan
output_bahan = Dense(10, activation='sigmoid', name='Bahan_Output')(batch_norm3)



# Combine model
model = Model(inputs=input_layer, outputs=[output_produk, output_fitur, output_bahan])

# Compile model
optimizer = Adam(learning_rate=0.0007)

model.compile(optimizer=optimizer,
              loss={'Produk_Output': 'categorical_crossentropy',
                    'Fitur_Output': 'categorical_crossentropy',
                    'Bahan_Output': 'binary_crossentropy'},
              metrics={'Produk_Output': 'accuracy',
                       'Fitur_Output': 'accuracy',
                       'Bahan_Output': 'accuracy'})



# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train model
# history = model.fit(
#     X_train,
#     {'Produk_Output': y_produk_train, 'Fitur_Output': y_fitur_train, 'Bahan_Output': y_bahan_train},
#     epochs=50,
#     batch_size=16,
#     validation_split=0.1,
#     verbose=1
# )
# history = model.fit(
#     X_train,
#     {'Produk_Output': y_produk_train, 'Fitur_Output': y_fitur_train, 'Bahan_Output': y_bahan_train},
#     epochs=100,
#     batch_size=16,
#     validation_split=0.1,
#     verbose=1
# )
history = model.fit(
    X_train,
    {'Produk_Output': y_produk_train, 'Fitur_Output': y_fitur_train, 'Bahan_Output': y_bahan_train},
    epochs=50,
    batch_size=32,  # Increased batch size for better generalization
    validation_split=0.1,
    verbose=1,
    callbacks=[early_stopping]
)


# Evaluate model
# loss, produk_acc, fitur_acc, bahan_acc = model.evaluate(
#     X_test,
#     {'Produk_Output': y_produk_test, 'Fitur_Output': y_fitur_test, 'Bahan_Output': y_bahan_test},
#     verbose=0
# )

# print(f"Produk Accuracy: {produk_acc:.2f}")
# print(f"Fitur Accuracy: {fitur_acc:.2f}")
# print(f"Bahan Accuracy: {bahan_acc:.2f}")

# Evaluate model
results = model.evaluate(
    X_test,
    {'Produk_Output': y_produk_test, 'Fitur_Output': y_fitur_test, 'Bahan_Output': y_bahan_test},
    verbose=0
)

# Unpack the results and print
total_loss = results[0]
produk_loss = results[1]
fitur_loss = results[2]
bahan_loss = results[3]
produk_acc = results[4]
fitur_acc = results[5]
bahan_acc = results[6]

print(f"Total Loss: {total_loss:.2f}")
print(f"Produk Loss: {produk_loss:.2f}, Accuracy: {produk_acc:.2f}")
print(f"Fitur Loss: {fitur_loss:.2f}, Accuracy: {fitur_acc:.2f}")
print(f"Bahan Loss: {bahan_loss:.2f}, Accuracy: {bahan_acc:.2f}")






# Normalize X_test using the same scaler as training
X_test_normalized = scaler.transform(X_test)

# # Predict for X_test
# produk_pred, fitur_pred, bahan_pred = model.predict(X_test_normalized)


# Decode Produk predictions (single-label classification)
produk_pred = model.predict(X_test_normalized)[0]  # Get Produk predictions
predicted_produk_indices = np.argmax(produk_pred, axis=1)
predicted_produk = label_encoder_produk.inverse_transform(predicted_produk_indices)

# Decode Fitur predictions (multi-label classification)
fitur_pred = model.predict(X_test_normalized)[1]
predicted_fitur = label_encoder_fitur.inverse_transform((fitur_pred > 0.5).astype(int))

# Decode Bahan predictions (multi-label classification)
bahan_pred = model.predict(X_test_normalized)[2]
predicted_bahan = bahan_binarizer.inverse_transform((bahan_pred > 0.5).astype(int))

# Display predictions for the first few samples
for i in range(5):  # Displaying results for the first 5 test samples
    print(f"Sample {i + 1}:")
    print(f"Rekomendasi Produk: {predicted_produk[i]}")
    print(f"Rekomendasi Fitur: {', '.join(predicted_fitur[i])}")
    print(f"Rekomendasi Bahan: {', '.join(predicted_bahan[i])}")
    print("-" * 30)



Epoch 1/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - Bahan_Output_accuracy: 0.0977 - Bahan_Output_loss: 5.6295 - Fitur_Output_accuracy: 0.1144 - Fitur_Output_loss: 6.4093 - Produk_Output_accuracy: 0.0824 - Produk_Output_loss: 0.8608 - loss: 13.8055 - val_Bahan_Output_accuracy: 0.0688 - val_Bahan_Output_loss: 4.3399 - val_Fitur_Output_accuracy: 0.1688 - val_Fitur_Output_loss: 5.3129 - val_Produk_Output_accuracy: 0.0500 - val_Produk_Output_loss: 0.6575 - val_loss: 11.2068
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Bahan_Output_accuracy: 0.1502 - Bahan_Output_loss: 4.7584 - Fitur_Output_accuracy: 0.2739 - Fitur_Output_loss: 5.4809 - Produk_Output_accuracy: 0.0937 - Produk_Output_loss: 0.7641 - loss: 11.8985 - val_Bahan_Output_accuracy: 0.0437 - val_Bahan_Output_loss: 4.1006 - val_Fitur_Output_accuracy: 0.2375 - val_Fitur_Output_loss: 5.2947 - val_Produk_Output_accuracy: 0.1063 - val_Produk_Output_loss: 0.6388 - v

IndexError: tuple index out of range