In [1]:
!pip install optuna optuna-integration

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting optuna-integration
  Downloading optuna_integration-4.1.0-py3-none-any.whl.metadata (12 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading optuna_integration-4.1.0-py3-none-any.whl (97 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.4/97.4 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m7.0 MB/s

In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import optuna
from optuna_integration import TFKerasPruningCallback
import string

In [3]:
tf.random.set_seed(200)
np.random.seed(200)

In [4]:
import os
print(os.getcwd())


/content


In [5]:
from google.colab import files
uploaded = files.upload()

Saving ArticlesMarch2018.csv to ArticlesMarch2018.csv


In [6]:
article_df = pd.read_csv("ArticlesMarch2018.csv")
all_headlines = [h for h in article_df.headline.values if h != "Unknown"]

In [7]:
def clean_text(txt):
    return "".join(v for v in txt.lower() if v not in string.punctuation)

corpus = [clean_text(x) for x in all_headlines]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

def create_sequences(tokenizer, corpus):
    input_sequences = []
    for line in corpus:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
    predictors, label = input_sequences[:,:-1], input_sequences[:,-1]
    label = to_categorical(label, num_classes=total_words)
    return predictors, label, max_sequence_len

In [8]:
predictors, label, max_sequence_len = create_sequences(tokenizer, corpus)

def build_and_compile_model(input_len, total_words):
    global model
    model = Sequential([
        Embedding(total_words, 32, input_length=input_len),
        SimpleRNN(200, return_sequences=True),
        BatchNormalization(),
        Dropout(0.2),
        SimpleRNN(200),
        Dropout(0.2),
        Dense(total_words, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [9]:
def objective(trial):
    # Define the hyperparameters
    rnn_units = trial.suggest_categorical("rnn_units", [50, 100, 200])
    dropout_rate = trial.suggest_uniform("dropout_rate", 0.1, 0.5)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])

    # Model building
    model = build_and_compile_model(max_sequence_len-1, total_words)

    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min', restore_best_weights=True),
        TFKerasPruningCallback(trial, "val_accuracy"),
        TensorBoard(log_dir='./logs')
    ]

    # Model training
    history = model.fit(predictors, label, epochs=25, batch_size=batch_size, validation_split=0.2, callbacks=callbacks, verbose=1)
    return history.history['val_accuracy'][-1]

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

best_rnn_units = study.best_params['rnn_units']
best_dropout_rate = study.best_params['dropout_rate']
best_batch_size = study.best_params['batch_size']

print(f"Best parameters - RNN Units: {best_rnn_units}, Dropout Rate: {best_dropout_rate}, Batch Size: {best_batch_size}")

[I 2024-12-16 04:03:01,910] A new study created in memory with name: no-name-7f6a8225-a8b0-426f-9505-2d8b851e2511
  dropout_rate = trial.suggest_uniform("dropout_rate", 0.1, 0.5)


Epoch 1/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 57ms/step - accuracy: 0.0250 - loss: 7.7557 - val_accuracy: 0.0174 - val_loss: 7.6603
Epoch 2/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0254 - loss: 6.8759 - val_accuracy: 0.0174 - val_loss: 8.0050
Epoch 3/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0280 - loss: 6.7893 - val_accuracy: 0.0335 - val_loss: 8.2032
Epoch 4/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.0305 - loss: 6.7611 - val_accuracy: 0.0335 - val_loss: 8.4734
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


[I 2024-12-16 04:03:21,037] Trial 0 finished with value: 0.033477991819381714 and parameters: {'rnn_units': 100, 'dropout_rate': 0.48510847069432217, 'batch_size': 64}. Best is trial 0 with value: 0.033477991819381714.


Epoch 1/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 110ms/step - accuracy: 0.0225 - loss: 7.8368 - val_accuracy: 0.0335 - val_loss: 7.6529
Epoch 2/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0275 - loss: 7.0871 - val_accuracy: 0.0174 - val_loss: 7.9316
Epoch 3/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0327 - loss: 7.0034 - val_accuracy: 0.0254 - val_loss: 8.0528
Epoch 4/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0311 - loss: 6.8605 - val_accuracy: 0.0335 - val_loss: 8.4025
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


[I 2024-12-16 04:03:34,215] Trial 1 finished with value: 0.033477991819381714 and parameters: {'rnn_units': 50, 'dropout_rate': 0.4648901179213317, 'batch_size': 128}. Best is trial 0 with value: 0.033477991819381714.


Epoch 1/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 85ms/step - accuracy: 0.0239 - loss: 7.8282 - val_accuracy: 0.0273 - val_loss: 7.6190
Epoch 2/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.0257 - loss: 7.0459 - val_accuracy: 0.0174 - val_loss: 7.9106
Epoch 3/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0311 - loss: 6.9572 - val_accuracy: 0.0254 - val_loss: 7.9613
Epoch 4/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0293 - loss: 6.8487 - val_accuracy: 0.0174 - val_loss: 8.1551
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


[I 2024-12-16 04:03:49,608] Trial 2 finished with value: 0.01735895872116089 and parameters: {'rnn_units': 50, 'dropout_rate': 0.17818707651741184, 'batch_size': 128}. Best is trial 0 with value: 0.033477991819381714.


Epoch 1/25
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 37ms/step - accuracy: 0.0212 - loss: 7.7485 - val_accuracy: 0.0335 - val_loss: 7.7783
Epoch 2/25
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.0256 - loss: 6.8241 - val_accuracy: 6.1996e-04 - val_loss: 9.0235
Epoch 3/25
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.0268 - loss: 6.5680 - val_accuracy: 0.0000e+00 - val_loss: 9.5777
Epoch 4/25
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.0305 - loss: 6.5132 - val_accuracy: 0.0031 - val_loss: 9.0528
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


[I 2024-12-16 04:04:09,320] Trial 3 finished with value: 0.0030998140573501587 and parameters: {'rnn_units': 200, 'dropout_rate': 0.3317190643402939, 'batch_size': 32}. Best is trial 0 with value: 0.033477991819381714.


Epoch 1/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - accuracy: 0.0236 - loss: 7.7787 - val_accuracy: 0.0335 - val_loss: 7.6422
Epoch 2/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0283 - loss: 6.9123 - val_accuracy: 0.0335 - val_loss: 7.9565
Epoch 3/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0329 - loss: 6.7922 - val_accuracy: 0.0335 - val_loss: 8.3713
Epoch 4/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.0271 - loss: 6.6243 - val_accuracy: 0.0099 - val_loss: 8.2120
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


[I 2024-12-16 04:04:21,909] Trial 4 finished with value: 0.009919404983520508 and parameters: {'rnn_units': 100, 'dropout_rate': 0.25961668185018205, 'batch_size': 64}. Best is trial 0 with value: 0.033477991819381714.


Epoch 1/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.0252 - loss: 7.7618

[I 2024-12-16 04:04:33,172] Trial 5 pruned. Trial was pruned at epoch 0.


Epoch 1/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.0210 - loss: 7.8145

[I 2024-12-16 04:04:43,154] Trial 6 pruned. Trial was pruned at epoch 0.


Epoch 1/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.0244 - loss: 7.7538

[I 2024-12-16 04:04:53,523] Trial 7 pruned. Trial was pruned at epoch 0.


Epoch 1/25
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0221 - loss: 7.7633

[I 2024-12-16 04:05:04,047] Trial 8 pruned. Trial was pruned at epoch 0.


Epoch 1/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 123ms/step - accuracy: 0.0196 - loss: 7.8510 - val_accuracy: 0.0335 - val_loss: 7.6275
Epoch 2/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.0280 - loss: 7.0156 - val_accuracy: 0.0335 - val_loss: 7.8538
Epoch 3/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0340 - loss: 6.8853 - val_accuracy: 0.0335 - val_loss: 8.2897
Epoch 4/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0300 - loss: 6.8987 - val_accuracy: 0.0174 - val_loss: 8.3751
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


[I 2024-12-16 04:05:19,311] Trial 9 finished with value: 0.01735895872116089 and parameters: {'rnn_units': 200, 'dropout_rate': 0.3856175034784649, 'batch_size': 128}. Best is trial 0 with value: 0.033477991819381714.


Best parameters - RNN Units: 100, Dropout Rate: 0.48510847069432217, Batch Size: 64


In [10]:
# prompt: plots model training hyper parameters results with optuna

import optuna.visualization as vis
# Plot the parallel coordinate plot
vis.plot_parallel_coordinate(study)

In [11]:
# Plot the correlation matrix between hyperparameters
vis.plot_slice(study)


In [12]:
# Plot the hyperparameter importances
vis.plot_param_importances(study)

In [13]:
# Visualize the optimization process
vis.plot_optimization_history(study)


In [14]:
import numpy as np
from sklearn.metrics import confusion_matrix

# Predict the classes
y_pred = model.predict(predictors)
y_pred_classes = np.argmax(y_pred, axis=1)

# Compute the confusion matrix
cm = confusion_matrix(label.argmax(axis=1), y_pred_classes)

# Print the confusion matrix
print(cm)


[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
[[276   0   0 ...   0   0   0]
 [201   0   0 ...   0   0   0]
 [215   0   0 ...   0   0   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  1   0   0 ...   0   0   0]
 [  1   0   0 ...   0   0   0]]


In [15]:
best_trial = study.best_trial
print(f"Best trial number: {best_trial.number}")
print(f"Best parameters - RNN Units: {best_trial.params['rnn_units']}, Dropout Rate: {best_trial.params['dropout_rate']}, Batch Size: {best_trial.params['batch_size']}")
print(f"Best validation accuracy: {best_trial.value}")

Best trial number: 0
Best parameters - RNN Units: 100, Dropout Rate: 0.48510847069432217, Batch Size: 64
Best validation accuracy: 0.033477991819381714
