<center><h1>Model Performance Testing – Feasibility Analysis</h1></center>

In [None]:
import sys
import os
from dotenv import load_dotenv

import json
from pydantic import BaseModel
import pandas as pd
import matplotlib.pyplot as plt
from openai import OpenAI
from mistralai import Mistral

sys.path.append(os.path.abspath(os.path.join("..")))
sys.path.append(os.path.abspath(os.path.join("../..")))

from utils.pricing_market_logic_multiproduct import (
    get_profits,
    get_monopoly_prices,
    get_quantities,
)
from utils.prompts import PP_P0, GENERAL_PROMPT
from utils.utils import has_converged_to_price

from helper_functions import create_output_paths, save_round_data, update_plot

load_dotenv()


API_KEY = os.getenv("MISTRAL_API_KEY")

# Parameters definition
---

In [None]:
ALPHA, MU, BETA, SIGMA = 1, 0.25, 100, 0  # Follows Calvano et al. (2020b)
C_i, A_i, A_0 = 1, 2, 0
N_FIRMS, MG_C = 1, 1.0
WILLIWGNES_TO_PAY = 4.51 * ALPHA
A = tuple([A_i for _ in range(N_FIRMS)])
ALPHA = tuple([ALPHA for _ in range(N_FIRMS)])
C = tuple([C_i for _ in range(N_FIRMS)])
group_idxs = tuple([i for i in range(1, N_FIRMS + 1)])

In [None]:
class PricingSchema(BaseModel):
    observations_and_thoughts: str
    plans: str
    insights: str
    price: float

In [None]:
p_m = get_monopoly_prices(
    a0=A_0,
    a=A,
    mu=MU,
    alpha=ALPHA,
    c=C,
    multiplier=BETA,
    sigma=SIGMA,
    group_idxs=group_idxs,
)

q_m = get_quantities(
    p=tuple(p_m),
    a0=A_0,
    a=A,
    mu=MU,
    alpha=ALPHA,
    multiplier=BETA,
    sigma=SIGMA,
    group_idxs=group_idxs,
)

pi_m = get_profits(
    p=tuple(p_m),
    c=C,
    a0=A_0,
    a=A,
    mu=MU,
    alpha=ALPHA,
    multiplier=BETA,
    sigma=SIGMA,
    group_idxs=group_idxs,
)
print(f"Monopoly prices: {p_m} | Quantities: {q_m} | Profits: {pi_m}")

# Models testing
---

## Models

![Models Tested](imgs/models_and_sizes.png)

## Testing

In [None]:
MODEL_LIST = [  # "google/gemma-3-1b",
    #   "llama-3.2-1b-instruct",
    #   "deepseek-ai.deepseek-r1-distill-qwen-1.5b",
    #   "microsoft/phi-4-mini-reasoning",
    #   "google/gemma-3-4b",
    #   "mistralai_-_mistral-7b-instruct-v0.2",
    #   "mistralai/mistral-7b-instruct-v0.3",
    #   "deepseek/deepseek-r1-0528-qwen3-8b",
    #   "qwen/qwen3-8b",
    #   "deepseek-r1-distill-qwen-7b",
    #   "deepseek-ai.deepseek-r1-distill-llama-8b",
    #   "meta-llama-3.1-8b-instruct",
    "google/gemma-2-9b",
    #   "qwen/qwen2.5-vl-7b",
    #   "mistralai_-_mistral-nemo-instruct-2407",
    #   "google/gemma-3-12b",
    #   "microsoft/phi-4-reasoning-plus",
    #   "deepseek-ai.deepseek-r1-distill-qwen-14b",
    #   "mistralai/magistral-small",
    #   "deepseek-ai.deepseek-r1-distill-qwen-32b"
]

for MODEL_NAME in MODEL_LIST:
    # Connect to LM Studio
    client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

    paths = create_output_paths(MODEL_NAME)

    # Create figure and subplots
    fig, axs = plt.subplots(3, 1, figsize=(10, 8), sharex=True)
    plt.tight_layout()

    plans, insights, market_data = (
        "No previous plans.",
        "No previous insights.",
        "No previous market data.",
    )
    price_history, quantity_history, profit_history, time_history = [], [], [], []
    for i in range(1, 10 + 1):
        prompt = GENERAL_PROMPT.format(
            marginal_cost=MG_C,
            willigness_to_pay=WILLIWGNES_TO_PAY,
            previous_plans=plans,
            previous_insights=insights,
            market_data=market_data,
        )
        messages = [
            {"role": "system", "content": PP_P0},
            {"role": "user", "content": prompt},
        ]

        try:
            response = client.beta.chat.completions.parse(
                model=MODEL_NAME, messages=messages, response_format=PricingSchema
            )
            result = json.loads(response.choices[0].message.content)
            # Results
            insights = result["insights"]
            observations = result["observations_and_thoughts"]
            plans = result["plans"]
            price = result["price"]
        except Exception as e:
            print(f"[ERROR] Failed to get response at round {i}: {e}")
            continue

        quantity = get_quantities(
            p=tuple([price]),
            a0=A_0,
            a=A,
            mu=MU,
            alpha=ALPHA,
            multiplier=BETA,
            sigma=SIGMA,
            group_idxs=group_idxs,
        )
        profit = get_profits(
            p=([price]),
            c=C,
            a0=A_0,
            a=A,
            mu=MU,
            alpha=ALPHA,
            multiplier=BETA,
            sigma=SIGMA,
            group_idxs=group_idxs,
        )

        price_history.append(price)
        quantity_history.append(quantity)
        profit_history.append(profit)
        time_history.append(i)

        market_data_result = f"""Round {i}:\n \t - My price: {price}\n \t - Quantity sold: {quantity[0]}\n \t - My profit earned: {profit[0]}\n"""
        market_data = save_round_data(
            i, paths, insights, plans, observations, market_data_result
        )

        # Update plot
        update_plot(
            fig,
            axs,
            i,
            p_m,
            q_m,
            pi_m,
            price_history,
            quantity_history,
            profit_history,
            time_history,
            MODEL_NAME,
            paths["start_time"],
            paths["plot"],
        )

    plt.close(fig)

## Results

In [None]:
all_runs_results = {
    "model": [
        "deepseek-ai.deepseek-r1-distill-qwen-32b",
        "mistralai/magistral-small",
        "deepseek-ai.deepseek-r1-distill-qwen-14b",
        "microsoft/phi-4-reasoning-plus",
        "google/gemma-3-12b",
        "mistralai_-_mistral-nemo-instruct-2407",
        "qwen/qwen2.5-vl-7b",
        "google/gemma-2-9b",
        "meta-llama-3.1-8b-instruct",
        "deepseek-ai.deepseek-r1-distill-llama-8b",
        "deepseek-r1-distill-qwen-7b",
        "qwen/qwen3-8b",
        "deepseek/deepseek-r1-0528-qwen3-8b",
        "mistralai/mistral-7b-instruct-v0.3",
        "mistralai_-_mistral-7b-instruct-v0.2",
        "google/gemma-3-4b",
        "microsoft/phi-4-mini-reasoning",
        "deepseek-ai.deepseek-r1-distill-qwen-1.5b",
        "llama-3.2-1b-instruct",
        "google/gemma-3-1b",
    ],
    "minutes_taken": [
        19,
        7,
        3,
        5,
        6,
        5,
        3,
        3,
        3,
        3,
        2,
        6,
        3,
        3,
        3,
        3,
        2,
        0.5,
        0.5,
        0.5,
    ],
    "n_wrong_insights": [3, 0, 7, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 5, 0, 0, -1, 0, 1],
    "n_wrong_obs": [1, 0, 5, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, -1, 0, 1],
    "n_wrong_plans": [2, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 8, 0, 2, -1, 8, 4],
    "completed_10_rounds": [
        True,
        True,
        True,
        False,
        True,
        True,
        True,
        True,
        True,
        True,
        True,
        False,
        True,
        True,
        True,
        True,
        False,
        False,
        False,
        True,
    ],
    "model_size_gb": [16, 13.5, 9, 8, 8, 7, 6, 6, 5, 5, 5, 5, 5, 4, 4, 3, 2, 2, 1, 0.7],
    "parameters": [
        "32B",
        None,
        "14B",
        None,
        None,
        "12B",
        "7B",
        "9B",
        "8B",
        "8B",
        "7B",
        None,
        None,
        "7B",
        "7B",
        None,
        None,
        "1.5B",
        "1B",
        None,
    ],
}

df = pd.DataFrame(all_runs_results)
df

# Mistral AI Inference - Performance test

Mistral AI API improves performance from 35" per answer (locally) to 5".

In [None]:
MODEL_NAME = "magistral-small-2506-API"
MODEL_MISTRAL = MODEL_NAME.replace("-API", "")
print(f"Using model: {MODEL_MISTRAL}")

client = Mistral(api_key=API_KEY)

paths = create_output_paths(MODEL_NAME)

# Create figure and subplots
fig, axs = plt.subplots(3, 1, figsize=(10, 8), sharex=True)
plt.tight_layout()


plans, insights, market_data = (
    "No previous plans.",
    "No previous insights.",
    "No previous market data.",
)
price_history, quantity_history, profit_history, time_history = [], [], [], []
for i in range(1, 300 + 1):
    prompt = GENERAL_PROMPT.format(
        marginal_cost=MG_C,
        willigness_to_pay=WILLIWGNES_TO_PAY,
        previous_plans=plans,
        previous_insights=insights,
        market_data=market_data,
    )

    trial = 0

    while trial <= 3:
        try:
            chat_response = client.chat.complete(
                model=MODEL_MISTRAL,
                stream=False,
                temperature=0.7,
                response_format={"type": "json_object"},
                messages=[
                    {"role": "system", "content": PP_P0},
                    {
                        "role": "system",
                        "content": """Respond only with a JSON object with this schema:
                                    {
                                    "observations": string,
                                    "plans": string,
                                    "insights": string,
                                    "chosen_price": float
                                    }""",
                    },
                    {"role": "user", "content": prompt},
                ],
            )
            result = json.loads(chat_response.choices[0].message.content)
            if isinstance(result, list):
                result = result[0]

            insights = result["insights"]
            observations = result["observations"]
            plans = result["plans"]
            price = result["chosen_price"]
            break
        except Exception as e:
            print(f"[ERROR] Failed to get response at round {i}: {e}")
            trial += 1

        finally:
            if trial >= 3:
                print(f"[ERROR] Failed to get response at round {i}")
                break

    quantity = get_quantities(
        p=tuple([price]),
        a0=A_0,
        a=A,
        mu=MU,
        alpha=ALPHA,
        multiplier=BETA,
        sigma=SIGMA,
        group_idxs=group_idxs,
    )
    profit = get_profits(
        p=([price]),
        c=C,
        a0=A_0,
        a=A,
        mu=MU,
        alpha=ALPHA,
        multiplier=BETA,
        sigma=SIGMA,
        group_idxs=group_idxs,
    )

    price_history.append(price)
    quantity_history.append(quantity)
    profit_history.append(profit)
    time_history.append(i)

    market_data_result = f"""Round {i}:\n \t - My price: {price}\n \t - Quantity sold: {quantity[0]}\n \t - My profit earned: {profit[0]}\n"""
    market_data = save_round_data(
        i, paths, insights, plans, observations, market_data_result
    )

    # Update plot
    update_plot(
        fig,
        axs,
        i,
        p_m,
        q_m,
        pi_m,
        price_history,
        quantity_history,
        profit_history,
        time_history,
        MODEL_NAME,
        paths["start_time"],
        paths["plot"],
    )

plt.close(fig)

In [None]:
price_target = p_m[0]
converged = has_converged_to_price(price_history, price_target)

if converged:
    print(f"Prices converged to {price_target}")
else:
    print(f"Prices did NOT converge to {price_target}")

# Results
---

The models that passed the criteria at this stage are:
- Deepseek R1 Distill QWEN 32B
- MistralAI - Magistral Small
- Google Gemma 2 9B
- Deepseek R1 Qwen3 8B



![Deepseek 32B](imgs/results_deepseek_r1_distill_qwen_32b.jpeg)

![Deepseek 8B](imgs/results_deepseek_r1_qwen3_8b.jpeg)

![Gemma 2 9B](imgs/results_gemma_2_9b.jpeg)

![Magistral Small](imgs/results_magistral_small.jpeg)

We will proceed using the MistralAI API to fasten experimentation as latency is highly reduced

![Magistral Small API](imgs/results_magistral_small_API.jpeg)