## Analyze data for a Run

In [None]:
from evaluation_helpers import calculate_metrics, fetch_traces
from sms_classifier import Label

classes = [l.value for l in Label]

## Baseline

In [None]:
# Run-qwen2.5:0.5b-1729425728
traces_qwen25_0_5 = fetch_traces(run_id="Run-qwen2.5:0.5b-1729425728")
metrics_qwen25_0_5 = calculate_metrics(traces_qwen25_0_5, classes)
metrics_qwen25_0_5["macro"]

In [None]:
# Run-qwen2.5:1.5b-1729430987
traces_qwen25_1_5 = fetch_traces(run_id="Run-qwen2.5:1.5b-1729430987")
metrics_qwen25_1_5 = calculate_metrics(traces_qwen25_1_5, classes)
metrics_qwen25_1_5["macro"]

In [None]:
# Run-qwen2.5:3b-1729431832
traces_qwen25_3 = fetch_traces(run_id="Run-qwen2.5:3b-1729431832")
metrics_qwen25_3 = calculate_metrics(traces_qwen25_3, classes)
metrics_qwen25_3["macro"]

In [None]:
# Run-qwen2.5:7b-1729432095
traces_qwen25_7 = fetch_traces(run_id="Run-qwen2.5:7b-1729432095")
metrics_qwen25_7 = calculate_metrics(traces_qwen25_7, classes)
metrics_qwen25_7["macro"]

In [None]:
# Run-gemma2:9b-1729433837
traces_gemma2_9 = fetch_traces(run_id="Run-gemma2:9b-1729433837")
metrics_gemma2_9 = calculate_metrics(traces_gemma2_9, classes)
metrics_gemma2_9["macro"]

In [None]:
# Run-llama3.1:8b-1729435232
traces_llama31_8 = fetch_traces(run_id="Run-llama3.1:8b-1729435232")
metrics_llama31_8 = calculate_metrics(traces_llama31_8, classes)
metrics_llama31_8["macro"]

In [None]:
# Run-gpt-4o-ai-factory-1729432239
traces_gpt_4 = fetch_traces(run_id="Run-gpt-4o-ai-factory-1729432239")
metrics_gpt_4 = calculate_metrics(traces_gpt_4, classes)
metrics_gpt_4["macro"]

In [None]:
metrics_baseline = dict()
metrics_baseline["qwen2.5:0.5b"] = metrics_qwen25_0_5["macro"]
metrics_baseline["qwen2.5:1.5b"] = metrics_qwen25_1_5["macro"]
metrics_baseline["qwen2.5:3b"] = metrics_qwen25_3["macro"]
metrics_baseline["qwen2.5:7b"] = metrics_qwen25_7["macro"]
metrics_baseline["gemma2:9b"] = metrics_gemma2_9["macro"]
metrics_baseline["llama3.1:8b"] = metrics_llama31_8["macro"]
metrics_baseline["gpt-4o-ai-factory"] = metrics_gpt_4["macro"]

In [None]:
import matplotlib.pyplot as plt
import numpy as np


def plot_metrics(metrics, labels, title):
    x = np.arange(len(labels))  # the label locations
    width = 0.10  # the width of the bars
    fig, ax = plt.subplots()
    k = 0
    for m in metrics:
        ax.bar(
            x + width * k,
            [metrics[m]["precision"], metrics[m]["recall"], metrics[m]["f1"]],
            width,
            label=m,
        )
        k += 1
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.set_ylabel("Scores")
    ax.set_title(title)
    ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.05), shadow=True, ncol=4)
    ax.grid(True)
    fig.set_size_inches(10, 5)
    plt.show()


plot_metrics(
    metrics_baseline,
    ["Precision", "Recall", "F1"],
    "Scores by model before optimization",
)

## After running the optimzation

In [None]:
# Run-qwen2.5:7b-optim-1729433657
traces_qwen25_7_optim = fetch_traces(run_id="Run-qwen2.5:7b-optim-1729433657")
metrics_qwen25_7_optim = calculate_metrics(traces_qwen25_7_optim, classes)
metrics_qwen25_7_optim["macro"]

In [None]:
# Run-gemma2:9b-optim-1729434900
traces_gemma2_9_optim = fetch_traces(run_id="Run-gemma2:9b-optim-1729434900")
metrics_gemma2_9_optim = calculate_metrics(traces_gemma2_9_optim, classes)
metrics_gemma2_9_optim["macro"]

In [None]:
# Run-llama3.1:8b-optim-1729436001
traces_llama31_8_optim = fetch_traces(run_id="Run-llama3.1:8b-optim-1729436001")
metrics_llama31_8_optim = calculate_metrics(traces_llama31_8_optim, classes)
metrics_llama31_8_optim["macro"]

In [None]:
# Run-qwen2.5:1.5b-optim-1729436454
traces_qwen25_1_5_optim = fetch_traces(run_id="Run-qwen2.5:1.5b-optim-1729436454")
metrics_qwen25_1_5_optim = calculate_metrics(traces_qwen25_1_5_optim, classes)
metrics_qwen25_1_5_optim["macro"]

In [None]:
# Run-qwen2.5:3b-optim-1729437009
traces_qwen25_3_optim = fetch_traces(run_id="Run-qwen2.5:3b-optim-1729437009")
metrics_qwen25_3_optim = calculate_metrics(traces_qwen25_3_optim, classes)
metrics_qwen25_3_optim["macro"]

In [None]:
# Run-qwen2.5:0.5b-optim-1729437842
traces_qwen25_0_5_optim = fetch_traces(run_id="Run-qwen2.5:0.5b-optim-1729437842")
metrics_qwen25_0_5_optim = calculate_metrics(traces_qwen25_0_5_optim, classes)
metrics_qwen25_0_5_optim["macro"]

In [None]:
metrics_optim = dict()
metrics_optim["qwen2.5:0.5b"] = metrics_qwen25_0_5_optim["macro"]
metrics_optim["qwen2.5:1.5b"] = metrics_qwen25_1_5_optim["macro"]
metrics_optim["qwen2.5:3b"] = metrics_qwen25_3_optim["macro"]
metrics_optim["qwen2.5:7b"] = metrics_qwen25_7_optim["macro"]
metrics_optim["gemma2:9b"] = metrics_gemma2_9_optim["macro"]
metrics_optim["llama3.1:8b"] = metrics_llama31_8_optim["macro"]
metrics_optim["gpt-4o-ai-factory-baseline"] = metrics_gpt_4["macro"]

In [None]:
plot_metrics(
    metrics_optim, ["Precision", "Recall", "F1"], "Scores by model after optimization"
)