In [2]:
!pip install -q keras-tuner

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/129.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models
import keras_tuner as kt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
file_path = '/content/drive/MyDrive/DL/DLWeek5/ReviewTokoBaju.csv'
data = pd.read_csv(file_path)

data.head()

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses


In [10]:
# Drop any rows with missing review text or target
data = data.dropna(subset=['Review Text', 'Recommended IND'])

# Use the 'Review Text' as the input and 'Recommended IND' as the target
X = data['Review Text']
y = data['Recommended IND']

# Tokenize the text data
num_words = 10000  # We will limit the vocabulary to the top 10,000 words
maxlen = 500  # Maximum length of the text sequences (padding/truncating to this length)

tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X_pad = pad_sequences(X_seq, maxlen=maxlen)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

In [11]:
# Function to create the BiRNN model
def create_birnn_model(hp):
    embedding_dim = hp.Int('embedding_dim', min_value=64, max_value=128, step=64)
    rnn_units = hp.Int('rnn_units', min_value=64, max_value=256, step=64)
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.3, step=0.1)

    model = models.Sequential([
        layers.Embedding(input_dim=num_words, output_dim=embedding_dim, input_length=maxlen),
        layers.Bidirectional(layers.LSTM(rnn_units, dropout=dropout_rate)),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Use Hyperband for hyperparameter tuning
tuner = kt.Hyperband(
    create_birnn_model,
    objective='val_accuracy',
    max_epochs=10,
    hyperband_iterations=2,
    directory='my_dir',
    project_name='review_toko_baju_tuning'
)



In [None]:
# Run hyperparameter search
tuner.search(X_train, y_train, epochs=3, validation_split=0.2)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model
y_pred_best = best_model.predict(X_test)
y_pred_binary_best = (y_pred_best > 0.5).astype(int)

accuracy_best = accuracy_score(y_test, y_pred_binary_best)
precision_best = precision_score(y_test, y_pred_binary_best)
recall_best = recall_score(y_test, y_pred_binary_best)
f1_best = f1_score(y_test, y_pred_binary_best)
roc_auc_best = roc_auc_score(y_test, y_pred_best)

print("\nBest Model Evaluation Metrics:")
print(f'Accuracy: {accuracy_best:.4f}')
print(f'Precision: {precision_best:.4f}')
print(f'Recall: {recall_best:.4f}')
print(f'F1 Score: {f1_best:.4f}')
print(f'ROC AUC: {roc_auc_best:.4f}')


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
128               |128               |embedding_dim
192               |192               |rnn_units
0.2               |0.2               |dropout_rate
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2
