In [1]:
import numpy as np


from llmize import OPRO
import llmize
import os

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
def lp_maximization_problem(x):
    """
    Linear Programming maximization problem with penalty constraints.
    
    Args:
        x (list or numpy array): Decision variables [x1, x2, x3]
    
    Returns:
        float: Penalized objective function value (to be maximized)
    """
    x1, x2, x3 = x
    # Objective Function (Maximize Z)
    Z = 3*x1 + 4*x2 + 6*x3  # No negation since we maximize directly

    # Define a large penalty for constraint violations
    penalty = 0
    large_penalty = -1e2  # Large NEGATIVE penalty (to push infeasible solutions down)

    # Constraints
    if 2*x1 + 3*x2 + x3 > 15:  # Resource 1 constraint
        penalty += large_penalty
    if x1 + 2*x2 + 3*x3 > 20:  # Resource 2 constraint
        penalty += large_penalty
    if 4*x1 + x2 + 2*x3 > 16:  # Resource 3 constraint
        penalty += large_penalty
    if x1 < 0 or x2 < 0 or x3 < 0:  # Non-negativity constraints
        penalty += large_penalty

    return Z + penalty  # Maximize Z while discouraging violations

In [3]:
# Generate random solutions (list of lists) and scores (list of floats)
# Generate initial random solutions
num_samples = 16  # Batch size
num_vars = 3  # Number of decision variables
#round to 2 decimal places
init_samples = [np.round(np.random.uniform(0, 10, num_vars), 2) for _ in range(num_samples)]

# Calculate scores for initial solutions, round to 2 decimal places
init_scores = [np.round(lp_maximization_problem(x), 2) for x in init_samples]

print(init_samples)
print(init_scores)

[array([4.58, 9.47, 9.6 ]), array([3.09, 6.15, 0.78]), array([5.18, 2.33, 8.76]), array([4.08, 9.35, 2.95]), array([2.7 , 8.16, 5.99]), array([1.16, 5.72, 5.99]), array([5.09, 5.8 , 2.63]), array([5.73, 7.77, 2.54]), array([9.32, 9.84, 8.23]), array([1.71, 1.69, 7.56]), array([0.25, 3.64, 4.87]), array([8.75, 7.06, 3.84]), array([4.99, 6.39, 9.01]), array([9.32, 8.57, 8.07]), array([5.16, 0.98, 1.25]), array([2.87, 0.47, 7.43])][0m
[0m[np.float64(-190.78), np.float64(-161.45), np.float64(-222.58), np.float64(-232.66), np.float64(-223.32), np.float64(-237.7), np.float64(-245.75), np.float64(-236.49), np.float64(-183.3), np.float64(-242.75), np.float64(-155.47), np.float64(-222.47), np.float64(-205.41), np.float64(-189.34), np.float64(-73.1), np.float64(-144.93)][0m
[0m

In [None]:
with open("lp_problem.txt", "r") as f:
    problem_text = f.read()

# Initialize the OPRO optimizer
opro = OPRO(problem_text=problem_text, obj_func=lp_maximization_problem,
            llm_model="gemma-3-27b-it", api_key=os.getenv("GEMINI_API_KEY"))

prompt = opro.get_sample_prompt(init_samples=init_samples, init_scores=init_scores, optimization_type="maximize")
response = opro.get_sample_response(prompt)

llmize.utils.pretty_print(prompt=prompt, response=response)

[0mPrompt:[0m
[0mProblem: Linear Programming Optimization
-----------------------------------------------------
Objective: Maximize the function
    Z = 3x1 + 4x2 + 6x3

Subject to constraints:
    2x1 + 3x2 + x3 ≤ 15
    x1 + 2x2 + 3x3 ≤ 20
    4x1 + x2 + 2x3 ≤ 16
    x1, x2, x3 ≥ 0

Below are some examples of solutions and their scores:

<sol> 4.58,9.47,9.6 <\sol>
score: -190.780

<sol> 3.09,6.15,0.78 <\sol>
score: -161.450

<sol> 5.18,2.33,8.76 <\sol>
score: -222.580

<sol> 4.08,9.35,2.95 <\sol>
score: -232.660

<sol> 2.7,8.16,5.99 <\sol>
score: -223.320

<sol> 1.16,5.72,5.99 <\sol>
score: -237.700

<sol> 5.09,5.8,2.63 <\sol>
score: -245.750

<sol> 5.73,7.77,2.54 <\sol>
score: -236.490

<sol> 9.32,9.84,8.23 <\sol>
score: -183.300

<sol> 1.71,1.69,7.56 <\sol>
score: -242.750

<sol> 0.25,3.64,4.87 <\sol>
score: -155.470

<sol> 8.75,7.06,3.84 <\sol>
score: -222.470

<sol> 4.99,6.39,9.01 <\sol>
score: -205.410

<sol> 9.32,8.57,8.07 <\sol>
score: -189.340

<sol> 5.16,0.98,1.25 <\sol>


In [5]:
from llmize.callbacks import EarlyStopping, AdaptTempOnPlateau, OptimalScoreStopping

# Define the early stopping callback
earlystop_callback = EarlyStopping(monitor='best_score', min_delta=0.001, patience=50, verbose=1)

# Define the optimal score stopping callback
optimal_score_callback = OptimalScoreStopping(optimal_score=41.08, tolerance=0.01)

# Define the temperature adaptation callback
adapt_temp_callback = AdaptTempOnPlateau(monitor='best_score', init_temperature=1.0, min_delta=0.001, patience=20, factor=1.1, max_temperature=1.9, verbose=1)

callbacks = [earlystop_callback, optimal_score_callback, adapt_temp_callback]

In [6]:
results = opro.maximize(init_samples=init_samples, init_scores=init_scores, num_steps=250, batch_size=16, callbacks=callbacks)


[37mRunning OPRO optimization with 250 steps and batch size 16...[0m
[0m[37mStep 0 - Best Initial Score: -73.100, Average Initial Score: -197.969[0m
[0m

KeyboardInterrupt: 