In [21]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os

from llmize import OPRO
import llmize

from dotenv import load_dotenv
load_dotenv()

True

In [22]:
def build_model(units, dropout, learning_rate):
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(28, 28)),
        keras.layers.Dense(units, activation='relu'),
        keras.layers.Dropout(dropout),
        keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def objective_function(hps, x_train, y_train, x_test, y_test, epochs=3):
    units, dropout, learning_rate = hps
    model = build_model(units, dropout, learning_rate)
    model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=0)
    _, accuracy = model.evaluate(x_test, y_test, verbose=0)
    return accuracy



In [23]:
# Load dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0


accuracy = objective_function(hps=[16, 0.2, 0.001], x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)

In [24]:
print(accuracy)

0.9284999966621399[0m
[0m

In [25]:
# Set random seed for reproducibility
np.random.seed(42)

batch_size = 8

# Generate batch_size of random hps
random_hps = []
for _ in range(batch_size):
    units = np.random.randint(2, 16)  
    dropout = round(np.random.uniform(0.3, 0.5), 2)  
    learning_rate = round(np.random.uniform(0.0001, 0.01), 4) 
    random_hps.append([units, dropout, learning_rate])


In [26]:
print(random_hps)

[[8, 0.46, 0.0019], [9, 0.42, 0.0016], [4, 0.32, 0.0046], [6, 0.42, 0.0071], [7, 0.31, 0.0072], [15, 0.34, 0.0019], [6, 0.42, 0.0062], [14, 0.39, 0.003]][0m
[0m

In [27]:
# Evaluate accuracy for each hps
accuracies = []
for hps in random_hps:
    accuracy = objective_function(hps=hps, x_train=x_train, y_train=y_train, 
                                x_test=x_test, y_test=y_test)
    print(accuracy)
    accuracies.append(accuracy)


0.8781999945640564[0m
[0m0.8810999989509583[0m
[0m0.6845999956130981[0m
[0m0.7671999931335449[0m
[0m0.8616999983787537[0m
[0m0.9182999730110168[0m
[0m0.748199999332428[0m
[0m0.9085000157356262[0m
[0m

In [28]:
with open("mnist_tf.txt", "r") as f:
    problem_text = f.read()

obj_func = lambda x: objective_function(x, x_train, y_train, x_test, y_test)


# Initialize the OPRO optimizer
opro = OPRO(problem_text=problem_text, obj_func=obj_func,
            llm_model="gemma-3-27b-it", api_key=os.getenv("GEMINI_API_KEY"))

prompt = opro.get_sample_prompt(init_samples=random_hps, init_scores=accuracies, optimization_type="maximize")
response = opro.get_sample_response(prompt)

llmize.utils.pretty_print(prompt=prompt, response=response)

[0mPrompt:[0m
[0mTask: Optimize the hyperparameters for a feedforward neural network model to classify handwritten digits in the MNIST dataset.

Model Architecture:

Input Layer: 28x28 pixel images (flattened to 784 inputs)
Hidden Layer: A single dense layer with configurable number of units, activated by ReLU
Dropout Layer: Added for regularization to prevent overfitting
Output Layer: Dense layer with 10 units (representing digits 0-9), softmax activation for multi-class classification

Hyperparameters to Optimize:

units: The number of neurons in the hidden layer
dropout: The dropout rate (probability of randomly deactivating neurons during training)
learning_rate: The learning rate for the Adam optimizer

Training Setup:

Optimizer: Adam
Loss Function: Sparse categorical cross-entropy (ideal for multi-class classification tasks)
Metrics: Accuracy
Epochs: 5 epochs
Validation data used during training

Data Processing:

Dataset: MNIST
Pixel values normalized to range [0, 1] by divi

In [29]:
from llmize.callbacks import EarlyStopping, AdaptTempOnPlateau, OptimalScoreStopping

# Define the early stopping callback
earlystop_callback = EarlyStopping(monitor='best_score', min_delta=0.001, patience=5, verbose=1)

# Define the optimal score stopping callback
optimal_score_callback = OptimalScoreStopping(optimal_score=0.990, tolerance=0.005)

# Define the temperature adaptation callback
adapt_temp_callback = AdaptTempOnPlateau(monitor='best_score', init_temperature=1.0, min_delta=0.001, patience=2, factor=1.1, max_temperature=1.9, verbose=1)

callbacks = [earlystop_callback, optimal_score_callback, adapt_temp_callback]

In [30]:
results = opro.maximize(init_samples=random_hps, init_scores=accuracies, num_steps=250, batch_size=batch_size, callbacks=callbacks)


[37mRunning OPRO optimization with 250 steps and batch size 8...[0m
[0m[37mStep 0 - Best Initial Score: 0.918, Average Initial Score: 0.831[0m
[0m[37mStep 1 - Current Best Score: 0.931, Average Batch Score: 0.908 - Best Batch Score: 0.931[0m
[0m[37mNo improvement in best_score. Patience count: 1/5[0m
[0m[37mStep 2 - Current Best Score: 0.932, Average Batch Score: 0.927 - Best Batch Score: 0.932[0m
[0m[37mStep 3 - Current Best Score: 0.945, Average Batch Score: 0.930 - Best Batch Score: 0.945[0m
[0m[37mStep 4 - Current Best Score: 0.955, Average Batch Score: 0.946 - Best Batch Score: 0.955[0m
[0m[37mStep 5 - Current Best Score: 0.955, Average Batch Score: 0.933 - Best Batch Score: 0.946[0m
[0m[37mNo improvement in best_score. Patience count: 1/5[0m
[0m[37mStep 6 - Current Best Score: 0.955, Average Batch Score: 0.928 - Best Batch Score: 0.945[0m
[0m[37mNo improvement in best_score. Patience count: 2/5[0m
[0m[37mNo improvement in best_score for 2 steps. 

In [33]:
from llmize.utils.plotting import plot_scores

plot_scores(results, max_steps=7)

TypeError: plot_scores() got an unexpected keyword argument 'max_steps'