In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os

from llmize import OPRO
import llmize

2025-03-17 14:39:24.054654: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-17 14:39:24.062734: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742236764.073881 1679097 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742236764.077397 1679097 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-17 14:39:24.087837: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
def build_model(units, dropout, learning_rate):
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(28, 28)),
        keras.layers.Dense(units, activation='relu'),
        keras.layers.Dropout(dropout),
        keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def objective_function(hps, x_train, y_train, x_test, y_test, epochs=5):
    units, dropout, learning_rate = hps
    model = build_model(units, dropout, learning_rate)
    model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=0)
    _, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy



In [3]:
# Load dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0


accuracy = objective_function(hps=[16, 0.2, 0.001], x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)

  super().__init__(**kwargs)
[0mI0000 00:00:1742236765.982626 1679097 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4480 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:02:00.0, compute capability: 7.5
I0000 00:00:1742236767.531213 1679394 service.cc:148] XLA service 0x7f03b80043f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742236767.531245 1679394 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1660 Ti, Compute Capability 7.5
2025-03-17 14:39:27.544907: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1742236767.590352 1679394 cuda_dnn.cc:529] Loaded cuDNN version 90800
I0000 00:00:1742236768.205893 1679394 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


In [4]:
print(accuracy)

0.9408000111579895[0m
[0m

In [5]:
# Set random seed for reproducibility
np.random.seed(42)

batch_size = 16

# Generate batch_size of random hps
random_hps = []
for _ in range(batch_size):
    units = np.random.randint(2, 128)  # Random number of units between 16-512
    dropout = round(np.random.uniform(0.1, 0.3), 2)  # Random dropout rate between 0.1-0.5 
    learning_rate = round(np.random.uniform(0.0001, 0.01), 4)  # Random learning rate between 0.0001-0.01
    random_hps.append([units, dropout, learning_rate])


In [6]:
print(random_hps)

[[104, 0.26, 0.0019], [73, 0.22, 0.0016], [84, 0.12, 0.0046], [118, 0.22, 0.0071], [23, 0.11, 0.0072], [31, 0.14, 0.0019], [22, 0.22, 0.0062], [126, 0.19, 0.003], [60, 0.18, 0.0006], [61, 0.17, 0.0046], [48, 0.22, 0.0039], [117, 0.22, 0.0006], [52, 0.24, 0.0046], [19, 0.29, 0.0097], [115, 0.18, 0.0003], [3, 0.24, 0.0045]][0m
[0m

In [7]:
# Evaluate accuracy for each hps
accuracies = []
for hps in random_hps:
    accuracy = objective_function(hps=hps, x_train=x_train, y_train=y_train, 
                                x_test=x_test, y_test=y_test)
    print(accuracy)
    accuracies.append(accuracy)


0.9768999814987183[0m
[0m0.9745000004768372[0m
[0m0.9672999978065491[0m
[0m0.9641000032424927[0m
[0m0.9473999738693237[0m
[0m0.9603000283241272[0m
[0m0.9369999766349792[0m
[0m0.9763000011444092[0m
[0m0.9700000286102295[0m
[0m0.9606999754905701[0m
[0m0.9678000211715698[0m
[0m0.9760000109672546[0m
[0m0.9635999798774719[0m
[0m0.9085999727249146[0m
[0m0.9692999720573425[0m
[0m0.503000020980835[0m
[0m

In [8]:
with open("mnist_tf.txt", "r") as f:
    problem_text = f.read()

obj_func = lambda x: objective_function(x, x_train, y_train, x_test, y_test)


# Initialize the OPRO optimizer
opro = OPRO(problem_text=problem_text, obj_func=obj_func,
            llm_model="gemini-2.0-flash", api_key=os.getenv("GEMINI_API_KEY"))

prompt = opro.get_sample_prompt(init_samples=random_hps, init_scores=accuracies, optimization_type="maximize")
response = opro.get_sample_response(prompt)

llmize.utils.pretty_print(prompt=prompt, response=response)

[0mPrompt:[0m
[0mTask: Optimize the hyperparameters for a feedforward neural network model to classify handwritten digits in the MNIST dataset.

Model Architecture:

Input Layer: 28x28 pixel images (flattened to 784 inputs)
Hidden Layer: A single dense layer with configurable number of units, activated by ReLU
Dropout Layer: Added for regularization to prevent overfitting
Output Layer: Dense layer with 10 units (representing digits 0-9), softmax activation for multi-class classification
Hyperparameters to Optimize:

units: The number of neurons in the hidden layer
dropout: The dropout rate (probability of randomly deactivating neurons during training)
learning_rate: The learning rate for the Adam optimizer
Training Setup:

Optimizer: Adam
Loss Function: Sparse categorical cross-entropy (ideal for multi-class classification tasks)
Metrics: Accuracy
Epochs: 5 epochs
Validation data used during training
Data Processing:

Dataset: MNIST
Pixel values normalized to range [0, 1] by dividin

In [9]:
from llmize.callbacks import EarlyStopping, AdaptTempOnPlateau, OptimalScoreStopping

# Define the early stopping callback
earlystop_callback = EarlyStopping(monitor='best_score', min_delta=0.001, patience=50, verbose=1)

# Define the optimal score stopping callback
optimal_score_callback = OptimalScoreStopping(optimal_score=0.990, tolerance=0.005)

# Define the temperature adaptation callback
adapt_temp_callback = AdaptTempOnPlateau(monitor='best_score', init_temperature=1.0, min_delta=0.001, patience=20, factor=1.1, max_temperature=1.9, verbose=1)

callbacks = [earlystop_callback, optimal_score_callback, adapt_temp_callback]

: 

In [10]:
results = opro.maximize(init_samples=random_hps, init_scores=accuracies, num_steps=250, batch_size=batch_size, callbacks=callbacks)


[37mRunning OPRO optimization with 250 steps and batch size 16...[0m
[0m[37mStep 0 - Best Initial Score: 0.977, Average Initial Score: 0.933[0m
[0m[37mStep 1 - Current Best Score: 0.977, Average Batch Score: 0.973 - Best Batch Score: 0.976[0m
[0m[37mNo improvement in best_score. Patience count: 1/50[0m
[0m[37mStep 2 - Current Best Score: 0.978, Average Batch Score: 0.976 - Best Batch Score: 0.978[0m
[0m[37mStep 3 - Current Best Score: 0.980, Average Batch Score: 0.977 - Best Batch Score: 0.980[0m
[0m[37mStep 4 - Current Best Score: 0.980, Average Batch Score: 0.977 - Best Batch Score: 0.980[0m
[0m[37mNo improvement in best_score. Patience count: 1/50[0m
[0m[37mStep 5 - Current Best Score: 0.980, Average Batch Score: 0.978 - Best Batch Score: 0.980[0m
[0m[37mNo improvement in best_score. Patience count: 2/50[0m
[0m[37mStep 6 - Current Best Score: 0.980, Average Batch Score: 0.977 - Best Batch Score: 0.980[0m
[0m[37mNo improvement in best_score. Patience