In [1]:
import numpy as np


from llmize import OPRO
import llmize
import os


In [2]:
def lp_maximization_problem(x):
    """
    Linear Programming maximization problem with penalty constraints.
    
    Args:
        x (list or numpy array): Decision variables [x1, x2, x3]
    
    Returns:
        float: Penalized objective function value (to be maximized)
    """
    x1, x2, x3 = x
    # Objective Function (Maximize Z)
    Z = 3*x1 + 4*x2 + 6*x3  # No negation since we maximize directly

    # Define a large penalty for constraint violations
    penalty = 0
    large_penalty = -1e2  # Large NEGATIVE penalty (to push infeasible solutions down)

    # Constraints
    if 2*x1 + 3*x2 + x3 > 15:  # Resource 1 constraint
        penalty += large_penalty
    if x1 + 2*x2 + 3*x3 > 20:  # Resource 2 constraint
        penalty += large_penalty
    if 4*x1 + x2 + 2*x3 > 16:  # Resource 3 constraint
        penalty += large_penalty
    if x1 < 0 or x2 < 0 or x3 < 0:  # Non-negativity constraints
        penalty += large_penalty

    return Z + penalty  # Maximize Z while discouraging violations

In [3]:
# Generate random solutions (list of lists) and scores (list of floats)
# Generate initial random solutions
num_samples = 16  # Batch size
num_vars = 3  # Number of decision variables
#round to 2 decimal places
init_samples = [np.round(np.random.uniform(0, 10, num_vars), 2) for _ in range(num_samples)]

# Calculate scores for initial solutions, round to 2 decimal places
init_scores = [np.round(lp_maximization_problem(x), 2) for x in init_samples]

print(init_samples)
print(init_scores)

[array([1.65, 3.37, 8.95]), array([0.33, 5.51, 8.7 ]), array([7.7 , 9.67, 1.07]), array([1.87, 7.17, 2.5 ]), array([1.87, 8.11, 5.54]), array([6.77, 4.58, 5.18]), array([9.35, 5.71, 1.53]), array([6.2 , 7.26, 6.22]), array([0.45, 6.73, 6.63]), array([5.82, 6.79, 4.88]), array([2.11, 9.39, 6.32]), array([3.82, 2.55, 4.51]), array([1.92, 1.  , 1.23]), array([4.98, 0.39, 7.48]), array([3.23, 0.1 , 0.12]), array([8.66, 4.32, 9.25])][0m
[0m[np.float64(-227.87), np.float64(-224.77), np.float64(-231.8), np.float64(-250.71), np.float64(-228.71), np.float64(-230.29), np.float64(-239.93), np.float64(-215.04), np.float64(-231.95), np.float64(-226.1), np.float64(-218.19), np.float64(-251.28), np.float64(17.14), np.float64(-238.62), np.float64(10.81), np.float64(-201.24)][0m
[0m

In [4]:
with open("lp_problem.txt", "r") as f:
    problem_text = f.read()

# Initialize the OPRO optimizer
opro = OPRO(problem_text=problem_text, obj_func=lp_maximization_problem,
            llm_model="gemini-2.0-flash", api_key=os.getenv("GEMINI_API_KEY"))

prompt = opro.get_sample_prompt(init_samples=init_samples, init_scores=init_scores, optimization_type="maximize")
response = opro.get_sample_response(prompt)

llmize.utils.pretty_print(prompt=prompt, response=response)

[0mPrompt:[0m
[0mProblem: Linear Programming Optimization
-----------------------------------------------------
Objective: Maximize the function
    Z = 3x1 + 4x2 + 6x3

Subject to constraints:
    2x1 + 3x2 + x3 ≤ 15
    x1 + 2x2 + 3x3 ≤ 20
    4x1 + x2 + 2x3 ≤ 16
    x1, x2, x3 ≥ 0

Below are some examples of solutions and their scores:

<sol> 1.65,3.37,8.95 <\sol>
score: -227.87

<sol> 0.33,5.51,8.7 <\sol>
score: -224.77

<sol> 7.7,9.67,1.07 <\sol>
score: -231.80

<sol> 1.87,7.17,2.5 <\sol>
score: -250.71

<sol> 1.87,8.11,5.54 <\sol>
score: -228.71

<sol> 6.77,4.58,5.18 <\sol>
score: -230.29

<sol> 9.35,5.71,1.53 <\sol>
score: -239.93

<sol> 6.2,7.26,6.22 <\sol>
score: -215.04

<sol> 0.45,6.73,6.63 <\sol>
score: -231.95

<sol> 5.82,6.79,4.88 <\sol>
score: -226.10

<sol> 2.11,9.39,6.32 <\sol>
score: -218.19

<sol> 3.82,2.55,4.51 <\sol>
score: -251.28

<sol> 1.92,1.0,1.23 <\sol>
score: 17.14

<sol> 4.98,0.39,7.48 <\sol>
score: -238.62

<sol> 3.23,0.1,0.12 <\sol>
score: 10.81

<sol>

In [5]:
from llmize.callbacks import EarlyStopping, AdaptTempOnPlateau, OptimalScoreStopping

# Define the early stopping callback
earlystop_callback = EarlyStopping(monitor='best_score', min_delta=0.001, patience=50, verbose=1)

# Define the optimal score stopping callback
optimal_score_callback = OptimalScoreStopping(optimal_score=41.08, tolerance=0.01)

# Define the temperature adaptation callback
adapt_temp_callback = AdaptTempOnPlateau(monitor='best_score', init_temperature=1.0, min_delta=0.001, patience=20, factor=1.1, max_temperature=1.9, verbose=1)

callbacks = [earlystop_callback, optimal_score_callback, adapt_temp_callback]

In [6]:
results = opro.maximize(init_samples=init_samples, init_scores=init_scores, num_steps=250, batch_size=16, callbacks=callbacks)


[37mRunning OPRO optimization with 250 steps and batch size 16...[0m
[0m[37mStep 0 - Best Initial Score: 17.14, Average Initial Score: -199.28[0m
[0m[37mStep 1 - Current Best Score: 17.14, Average Batch Score: -191.39 - Best Batch Score: -61.70[0m
[0m[37mNo improvement in best_score. Patience count: 1/50[0m
[0m[37mStep 2 - Current Best Score: 26.00, Average Batch Score: -65.49 - Best Batch Score: 26.00[0m
[0m[37mStep 3 - Current Best Score: 36.50, Average Batch Score: -31.81 - Best Batch Score: 36.50[0m
[0m[37mStep 4 - Current Best Score: 36.50, Average Batch Score: -192.88 - Best Batch Score: -59.60[0m
[0m[37mNo improvement in best_score. Patience count: 1/50[0m
[0m[37mStep 5 - Current Best Score: 40.00, Average Batch Score: -50.58 - Best Batch Score: 40.00[0m
[0m[37mStep 6 - Current Best Score: 40.00, Average Batch Score: -86.09 - Best Batch Score: 39.00[0m
[0m[37mNo improvement in best_score. Patience count: 1/50[0m
[0m[37mStep 7 - Current Best Scor