In [146]:
import pandas as pd
import ast

# Load the CSV file into a DataFrame
file_path = 'combined_tinyml_benchmark_data.csv'
df = pd.read_csv(file_path)

# Function to clean tags
def clean_tags(tags_str):
    if pd.isna(tags_str) or tags_str == '[]':
        return []
    try:
        # Safely evaluate the string to a list
        tags_list = ast.literal_eval(tags_str)
        cleaned_list = []
        for tag in tags_list:
            if tag == 'benchmark':
                continue
            if ':latest' in tag:
                tag = tag.replace(':latest', '')
            if '_generator':
                tag=tag.replace('_sketch_generator','')
            if '_batch':
                tag=tag.replace('_batch','')
            cleaned_list.append(tag)
        return cleaned_list
    except (ValueError, SyntaxError):
        # Handle cases where the string might not be a valid list representation
        return []

# Apply the cleaning function to the 'tags' column
df['tags'] = df['tags'].apply(clean_tags)

df = df[~df['batch_id'].str.contains('_dp_', na=False)]
df = df[~df['batch_id'].str.contains('_sg_', na=False)]
df = df[~df['batch_id'].str.contains('_mc_', na=False)]
 
df["test_date"] = df["test_date"].str.replace("07.28","07.27")
df["test_date"] = df["test_date"].str.replace("07.29","07.28")
df["test_date"] = df["test_date"].str.replace("07.30_a","07.29")
df["test_date"] = df["test_date"].str.replace("07.30_b","07.30")
# Display the first few rows with the cleaned 'tags' column
print("DataFrame after cleaning 'tags' column:")
display(df[['name', 'tags']].head())

display(df['test_date'].unique())

DataFrame after cleaning 'tags' column:


Unnamed: 0,name,tags
0,e2fa_tpu_sketch_generator,"[codestral, tpu]"
1,4a9e_tpu_sketch_generator,"[codestral, tpu]"
2,d3b0_tpu_sketch_generator,"[codestral, tpu]"
3,8c83_tpu_sketch_generator,"[codestral, tpu]"
4,05c6_tpu_sketch_generator,"[codestral, tpu]"


array(['08.24', '05.21', '08.08', '08.09', '08.14', '08.26', '07.27',
       '08.15', '08.23', '08.16', '08.13', '07.30', '08.12', '08.07',
       '08.18', '08.20', '08.19', '08.17', '08.10', '08.04', '08.05',
       '08.25', '08.21', '07.29', '08.22', '08.11', '08.03', '07.28',
       '08.01', '08.06', '07.31', '05.19', '08.02'], dtype=object)

In [147]:
import pandas as pd

# üì• Load the CSV file (replace 'your_file.csv' with your actual file name)
# df = pd.read_csv(file_path)

# üß† Create the 'parameters_status' column
df['parameters_status'] = df['parameters'].apply(lambda x: 'P' if pd.notnull(x) and str(x).strip() != '' else 'NP')

split_vals = df['batch_id'].str.split('_')
part_2_3 = split_vals.str[2] + '_' + split_vals.str[3]

df['category'] = df['model_config'].astype(str) + '_' + part_2_3+'_'+ df['parameters_status'].astype(str)
df['category'] = df['category'].apply(lambda x:  x.replace('_batch','') if '_batch' in x else x)

# üóëÔ∏è Drop the 'prompt cost' column
df.drop(columns=['prompt_cost','completion_cost','total_cost','source_file','source_path','parameters', 'parameters_status', 'tags', 'name','trace_id', 'timestamp','prompt_tokens','completion_tokens'], inplace=True)

df = df.rename(columns={"model_config": "model"})



# üëÄ Preview the first 5 rows
print("First 5 rows of the dataset:")
display(df.sample(n=30))
# display(df.head(10))

First 5 rows of the dataset:


Unnamed: 0,num_run,batch_id,status,latency,total_tokens,generation_count,test_date,model,category
993,2,qwen2.5-coder:14b_3193_tpusg_batch,success,13.57,2609,1,8.04,qwen2.5-coder:14b,qwen2.5-coder:14b_tpusg_P
689,29,phi4_7854_psg_batch,success,17.34,2426,1,8.2,phi4,phi4_psg_NP
810,29,phi4_85a9_tpusg_batch,success,19.78,2735,1,7.3,phi4,phi4_tpusg_NP
722,16,phi4_7854_tpusg_batch,success,107.2,15157,5,8.18,phi4,phi4_tpusg_NP
688,29,phi4_7854_psg_batch,success,17.34,2426,1,8.18,phi4,phi4_psg_NP
675,22,phi4_7854_psg_batch,success,80.47,13625,5,8.2,phi4,phi4_psg_NP
1067,16,qwen2.5-coder:14b_33b8_tpusg_batch,success,19.9,2655,1,8.05,qwen2.5-coder:14b,qwen2.5-coder:14b_tpusg_P
1227,27,qwen2.5-coder:14b_c83f_psg_batch,success,24.46,5116,2,8.22,qwen2.5-coder:14b,qwen2.5-coder:14b_psg_NP
1606,18,qwen2.5-coder:32b_57d5_psg_batch,failure,126.41,14112,5,8.06,qwen2.5-coder:32b,qwen2.5-coder:32b_psg_P
1051,30,qwen2.5-coder:14b_33b8_psg_batch,failure,52.23,11828,5,8.05,qwen2.5-coder:14b,qwen2.5-coder:14b_psg_P


In [148]:
import numpy as np
import pandas as pd
# -- Helper functions --# 
# Helper enforcing value uniqueness
def assert_single_value(series):
    vals = series.dropna().unique()
    if len(vals) == 1:
        # normal case
        return vals[0]
    # return just the distinct values, sorted for consistency
    return ValueError(f"Expected single unique value, found: {vals}")
import uuid
import pandas as pd

# Assign new batch_ids where test_date varies within a batch_id
def split_batch_id_by_test_date(df: pd.DataFrame) -> pd.DataFrame:
    # Detect which batch_ids need splitting
    bad = df.groupby("batch_id")["test_date"].nunique()
    bad = bad[bad > 1].index
    if len(bad) == 0:
        return df  # nothing to change

    mask = df["batch_id"].isin(bad)
    # Split only those needing modification
    sub = df.loc[mask].copy()
    # Construct new batch_id for each (batch_id, test_date) branch
    sub["new_token"] = (
        sub.groupby(["batch_id", "test_date"]).ngroup()
        .map(lambda _: uuid.uuid4().hex[:4])
    )
    # Reassemble batch_id: prefix + new token + suffix
    parts = sub["batch_id"].str.split("_", n=2, expand=True)
    sub["batch_id"] = parts[0] + "_" + sub["new_token"] + "_" + parts[2]

    # Replace back into df
    df.loc[mask, "batch_id"] = sub["batch_id"]

    return df


# --- Safety: ensure required columns exist and are typed well
required = {"category", "status", "latency", "total_tokens", "generation_count"}
missing = required - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {missing}")

df = split_batch_id_by_test_date(df)
# Cast where reasonable
df["generation_count"] = pd.to_numeric(df["generation_count"], errors="coerce")
df["latency"] = pd.to_numeric(df["latency"], errors="coerce")
df["total_tokens"] = pd.to_numeric(df["total_tokens"], errors="coerce")

# --- Base grouped aggregations
# g = df.groupby("category", dropna=False)
g = df.groupby("batch_id", dropna=False)

base = g.agg(
    total_runs=("category", "size"),

    # total_tokens stats
    total_tokens_ave=("total_tokens", "mean"),   # 'ave' == average (same as mean)
    total_tokens_mean=("total_tokens", "mean"),
    total_tokens_median=("total_tokens", "median"),
    total_tokens_min=("total_tokens", "min"),
    total_tokens_max=("total_tokens", "max"),

    # latency stats
    latency_ave=("latency", "mean"),
    latency_mean=("latency", "mean"),
    latency_median=("latency", "median"),
    latency_min=("latency", "min"),
    latency_max=("latency", "max"),

    # status counts
    count_failures=("status", lambda s: (s == "failure").sum()),
    count_success=("status", lambda s: (s == "success").sum()),
    category=("category", assert_single_value),
    test_date=("test_date", assert_single_value),
)



# Traditional success rate
base["success_rate"] = (
    base["count_success"] / (base["count_success"] + base["count_failures"]).replace(0, np.nan)
) * 100
base["success_rate"] = base["success_rate"].fillna(0.0)

# --- Helper functions for the 4 weighted success rates
def _success_generation_counts(group):
    """Return the generation_count values for successful rows (as a 1D array)."""
    s = group.loc[group["status"] == "success", "generation_count"].dropna().to_numpy()
    return s

def _normalize(sum_scores, n_total):
    """Normalize by N_total and convert to percentage."""
    if n_total == 0:
        return 0.0
    return (sum_scores / n_total) * 100.0

def r_efficiency(group):
    # Sum over successes of (1 / g_i)
    gsucc = _success_generation_counts(group)
    return _normalize(np.sum(1.0 / gsucc) if gsucc.size else 0.0, len(group))

def r_exponential(group):
    # Sum over successes of exp(-0.5 * (g_i - 1))
    gsucc = _success_generation_counts(group)
    return _normalize(np.sum(np.exp(-0.5 * (gsucc - 1))) if gsucc.size else 0.0, len(group))

def r_linear(group):
    # Sum over successes of max(1 - 0.2*(g_i - 1), 0.1)
    gsucc = _success_generation_counts(group)
    if gsucc.size == 0:
        return 0.0
    scores = np.maximum(1.0 - 0.2 * (gsucc - 1), 0.1)
    return _normalize(np.sum(scores), len(group))

def r_robust(group):
    # Piecewise buckets per your spec
    gsucc = _success_generation_counts(group)
    if gsucc.size == 0:
        return 0.0
    scores = np.where(gsucc <= 2, 1.0,
             np.where(gsucc <= 4, 0.6,
             np.where(gsucc <= 6, 0.3, 0.1)))
    return _normalize(np.sum(scores), len(group))

# --- Compute the additional 4 metrics per type and combine
extra = g.apply(lambda grp: pd.Series({
    "R_efficiency": r_efficiency(grp),
    "R_exponential": r_exponential(grp),
    "R_linear":      r_linear(grp),
    "R_robust":      r_robust(grp),
}))

metrics_by_batch = base.join(extra)



metrics_by_batch['model']=metrics_by_batch["category"].str.split('_').str[0]
metrics_by_batch['processor']=metrics_by_batch["category"].str.split('_').str[1]
metrics_by_batch['parameters']=metrics_by_batch["category"].str.split('_').str[2]

# Optional: nicer ordering of columns
ordered_cols = [
    "model",
    "parameters",
    "processor", "category","total_runs",
    "count_failures","count_success","success_rate",
    "R_efficiency","R_exponential","R_linear","R_robust",
    "total_tokens_ave","total_tokens_mean","total_tokens_median","total_tokens_min","total_tokens_max",
    "latency_ave","latency_mean","latency_median","latency_min","latency_max","test_date"

]
metrics_by_batch = metrics_by_batch[ordered_cols]

metrics_by_batch.to_csv("metrics_by_batch.csv", index=True)

# Show the result
display(metrics_by_batch.round(3))

  extra = g.apply(lambda grp: pd.Series({


Unnamed: 0_level_0,model,parameters,processor,category,total_runs,count_failures,count_success,success_rate,R_efficiency,R_exponential,...,total_tokens_mean,total_tokens_median,total_tokens_min,total_tokens_max,latency_ave,latency_mean,latency_median,latency_min,latency_max,test_date
batch_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
codestral_34a5_tpusg_batch,codestral,P,tpusg,codestral_tpusg_P,30,27,3,10.000,2.611,2.421,...,13403.333,13441.5,9151,14988,95.992,95.992,96.655,48.59,129.67,08.24
codestral_366b_psg_batch,codestral,NP,psg,codestral_psg_NP,30,22,8,26.667,24.000,23.784,...,11268.967,13558.5,2645,15053,64.491,64.491,72.350,16.17,120.98,05.21
codestral_3adb_psg_batch,codestral,P,psg,codestral_psg_P,30,28,2,6.667,1.333,0.902,...,13345.700,13346.5,12551,14062,94.940,94.940,94.805,74.73,113.13,08.08
codestral_4b06_psg_batch,codestral,P,psg,codestral_psg_P,30,30,0,0.000,0.000,0.000,...,13853.367,13867.0,13007,14707,76.871,76.871,76.420,60.69,95.37,05.21
codestral_62f3_psg_batch,codestral,P,psg,codestral_psg_P,30,29,1,3.333,1.111,1.226,...,13399.167,13513.0,7931,14253,70.635,70.635,68.495,39.02,126.36,08.09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
qwen2.5-coder:32b_ae24_tpusg_batch,qwen2.5-coder:32b,NP,tpusg,qwen2.5-coder:32b_tpusg_NP,19,2,17,89.474,60.088,62.270,...,6734.632,5708.0,2587,15231,70.939,70.939,56.950,22.69,218.03,07.27
qwen2.5-coder:32b_c8a6_psg_batch,qwen2.5-coder:32b,P,psg,qwen2.5-coder:32b_psg_P,16,7,9,56.250,14.896,13.657,...,12492.000,13910.5,5099,14243,108.222,108.222,119.675,42.08,128.62,08.06
qwen2.5-coder:32b_c8a6_tpusg_batch,qwen2.5-coder:32b,P,tpusg,qwen2.5-coder:32b_tpusg_P,30,21,9,30.000,8.833,9.004,...,14032.900,15318.5,8976,16275,140.582,140.582,152.100,87.76,177.71,08.06
qwen2.5-coder:32b_ee11_psg_batch,qwen2.5-coder:32b,P,psg,qwen2.5-coder:32b_psg_P,30,20,10,33.333,7.278,5.579,...,13932.300,14001.0,8031,16079,127.461,127.461,123.945,68.78,180.49,08.02


In [None]:
print(list(metrics_by_batch.columns))
print(metrics_by_batch["test_date"].unique())

# Experiment: groupping by category 

df_test = metrics_by_batch.groupby(["processor","model","parameters"]).agg({"total_runs": "sum", "success_rate": "mean"}).reset_index()
print(df_test)


['model', 'parameters', 'processor', 'category', 'total_runs', 'count_failures', 'count_success', 'success_rate', 'R_efficiency', 'R_exponential', 'R_linear', 'R_robust', 'total_tokens_ave', 'total_tokens_mean', 'total_tokens_median', 'total_tokens_min', 'total_tokens_max', 'latency_ave', 'latency_mean', 'latency_median', 'latency_min', 'latency_max', 'test_date']
['08.24' '05.21' '08.08' '08.09' '08.14' '08.26' '07.27' '08.15' '08.23'
 '08.16' '08.13' '07.30' '08.12' '08.17' '08.19' '08.18' '08.20' '08.07'
 '08.10' '08.04' '08.05' '08.25' '08.21' '07.29' '08.22' '08.11' '08.03'
 '07.28' '08.01' '08.06' '07.31' '05.19' '08.02']
   processor              model parameters  total_runs  success_rate
0        psg          codestral         NP         102     47.738095
1        psg          codestral          P         120      7.500000
2        psg         gemma3:27b         NP          60     53.333333
3        psg         gemma3:27b          P          52     73.787879
4        psg       

In [150]:
def analyze_batch_level(df):
    # Total batches
    total_runs = df["total_runs"].sum()
    psg_runs = df[df["processor"] == "psg"]["total_runs"].sum()
    tpusg_runs = df[df["processor"] == "tpusg"]["total_runs"].sum()

    total_batches = len(df)
    psg_batches = (df["processor"] == "psg").sum()
    tpusg_batches = (df["processor"] == "tpusg").sum()

    models = sorted(df["model"].unique())
    params_counts = df["parameters"].value_counts()

    print(f"Total runs: {total_runs}: PSG/TPUSG runs: {psg_runs}/{tpusg_runs}")
    print(f"Total batches: {total_batches}: PSG/TPUSG runs: {psg_batches}/{tpusg_batches}")
    print(f"Models: {', '.join(models)}")
    print(f"Parameter conditions: P ({params_counts.get('P', 0)}) vs NP ({params_counts.get('NP', 0)})\n")

    # Convert parameters into logical form (True = P, False = NP) for clean indexing
    df = df.copy()
    df["param_bool"] = df["parameters"].map(lambda x: True if x == "P" else False)

    # Processor comparison matrix (mean success_rate per batch)
    pivot = (
        df.groupby(["model", "param_bool", "processor"])["success_rate"]
          .mean()
          .reset_index()
          .pivot(index=["model", "param_bool"], columns="processor", values="success_rate")
          .fillna(0)
          .rename_axis(["model_config", "parameters"])
    )

    print("üìà Complete Processor Comparison Matrix:")
    print("------------------------------------------------------------")
    display(pivot.round(1))
    print()

    print("üéØ PROCESSOR ADVANTAGE ANALYSIS:")
    print("------------------------------------------------------------")

    results = []
    for (model, param_bool), row in pivot.iterrows():
        param_label = "With params" if param_bool else "Without params"
        psg = row.get("psg", 0)
        tpusg = row.get("tpusg", 0)
        diff = tpusg - psg
        
        if abs(diff) < 1e-9:
            result = f"TIE (+0.0%)"
        elif diff > 0:
            result = f"TPUSG (+{diff:.1f}%)"
        else:
            result = f"PSG (‚àí{abs(diff):.1f}%)"

        print(f"{model} ({param_label}): PSG {psg:.1f}% vs TPUSG {tpusg:.1f}% ‚Üí {result}")
        results.append(result)

    total = len(results)
    psg_wins = sum("PSG" in r for r in results)
    tpusg_wins = sum("TPUSG" in r for r in results)
    ties = sum("TIE" in r for r in results)

    print("\nüìä SUMMARY:")
    print("------------------------------------------------------------")
    print(f"PSG wins: {psg_wins}/{total} configurations")
    print(f"TPUSG wins: {tpusg_wins}/{total} configurations")
    
    print(f"Ties: {ties}/{total} configurations")
analyze_batch_level(metrics_by_batch)

Total runs: 1774: PSG/TPUSG runs: 947/827
Total batches: 297: PSG/TPUSG runs: 149/148
Models: codestral, gemma3:27b, phi4, qwen2.5-coder:14b, qwen2.5-coder:32b
Parameter conditions: P (149) vs NP (148)

üìà Complete Processor Comparison Matrix:
------------------------------------------------------------


Unnamed: 0_level_0,processor,psg,tpusg
model_config,parameters,Unnamed: 2_level_1,Unnamed: 3_level_1
codestral,False,47.7,33.3
codestral,True,7.5,14.4
gemma3:27b,False,53.3,6.7
gemma3:27b,True,73.8,0.0
phi4,False,86.2,93.4
phi4,True,100.0,100.0
qwen2.5-coder:14b,False,52.2,93.8
qwen2.5-coder:14b,True,4.7,100.0
qwen2.5-coder:32b,False,57.8,96.5
qwen2.5-coder:32b,True,51.0,35.0



üéØ PROCESSOR ADVANTAGE ANALYSIS:
------------------------------------------------------------
codestral (Without params): PSG 47.7% vs TPUSG 33.3% ‚Üí PSG (‚àí14.4%)
codestral (With params): PSG 7.5% vs TPUSG 14.4% ‚Üí TPUSG (+6.9%)
gemma3:27b (Without params): PSG 53.3% vs TPUSG 6.7% ‚Üí PSG (‚àí46.7%)
gemma3:27b (With params): PSG 73.8% vs TPUSG 0.0% ‚Üí PSG (‚àí73.8%)
phi4 (Without params): PSG 86.2% vs TPUSG 93.4% ‚Üí TPUSG (+7.2%)
phi4 (With params): PSG 100.0% vs TPUSG 100.0% ‚Üí TIE (+0.0%)
qwen2.5-coder:14b (Without params): PSG 52.2% vs TPUSG 93.8% ‚Üí TPUSG (+41.6%)
qwen2.5-coder:14b (With params): PSG 4.7% vs TPUSG 100.0% ‚Üí TPUSG (+95.3%)
qwen2.5-coder:32b (Without params): PSG 57.8% vs TPUSG 96.5% ‚Üí TPUSG (+38.8%)
qwen2.5-coder:32b (With params): PSG 51.0% vs TPUSG 35.0% ‚Üí PSG (‚àí16.0%)

üìä SUMMARY:
------------------------------------------------------------
PSG wins: 4/10 configurations
TPUSG wins: 5/10 configurations
Ties: 1/10 configurations


### Older results

Total runs: 1,774: PSG/TPUSG runs: 947/827
Models : codestral, gemma3:27b, phi4, qwen2.5-coder:14b, qwen2.5-coder:32b
Parameter conditions: P (948) vs NP (826)

üìà Complete Processor Comparison Matrix:
------------------------------------------------------------
processor                       psg  tpusg
model             parameters              
codestral         False        47.7   33.3
                  True          7.5   14.4
gemma3:27b        False        53.3    6.7
                  True         73.8    0.0
phi4              False        86.2   93.4
                  True        100.0  100.0
qwen2.5-coder:14b False        52.2   93.8
                  True          4.7  100.0
qwen2.5-coder:32b False        57.8   96.5
                  True         51.0   35.0

üéØ PROCESSOR ADVANTAGE ANALYSIS:
------------------------------------------------------------
codestral (Without params): PSG 47.7% vs TPUSG 33.3% ‚Üí PSG (+-14.4%)
codestral (With params): PSG 7.5% vs TPUSG 14.4% ‚Üí TPUSG (++6.9%)
gemma3:27b (Without params): PSG 53.3% vs TPUSG 6.7% ‚Üí PSG (+-46.6%)
gemma3:27b (With params): PSG 73.8% vs TPUSG 0.0% ‚Üí PSG (+-73.8%)
phi4 (Without params): PSG 86.2% vs TPUSG 93.4% ‚Üí TPUSG (++7.2%)
phi4 (With params): PSG 100.0% vs TPUSG 100.0% ‚Üí TIE (++0.0%)
qwen2.5-coder:14b (Without params): PSG 52.2% vs TPUSG 93.8% ‚Üí TPUSG (++41.6%)
qwen2.5-coder:14b (With params): PSG 4.7% vs TPUSG 100.0% ‚Üí TPUSG (++95.3%)
qwen2.5-coder:32b (Without params): PSG 57.8% vs TPUSG 96.5% ‚Üí TPUSG (++38.7%)
qwen2.5-coder:32b (With params): PSG 51.0% vs TPUSG 35.0% ‚Üí PSG (+-16.0%)

üìä SUMMARY:
PSG wins: 4/10 configurations
TPUSG wins: 5/10 configurations
Ties: 1/10 configurations

üìã COMPLETE COMPARISON TABLE (Traditional + Weighted Success Rates):
----------------------------------------------------------------------------------------------------
                  category             model processor  parameters  success_rate  R_efficiency  R_exponential  R_linear  R_robust
         codestral_tpusg_P         codestral     tpusg        True        10.000         2.611          2.421     4.000     5.000
          codestral_psg_NP         codestral       psg       False        26.667        24.000         23.784    24.000    24.333
           codestral_psg_P         codestral       psg        True         6.667         1.333          0.902     1.333     2.000
           codestral_psg_P         codestral       psg        True         0.000         0.000          0.000     0.000     0.000
           codestral_psg_P         codestral       psg        True         3.333         1.111          1.226     2.000     2.000
          codestral_psg_NP         codestral       psg       False        16.667         7.222          7.273    10.000    11.333
        codestral_tpusg_NP         codestral     tpusg       False        33.333        14.389         15.519    20.667    24.667
          codestral_psg_NP         codestral       psg       False        80.952        45.238         51.312    64.762    75.238
          codestral_psg_NP         codestral       psg       False        66.667        30.714         34.830    46.667    55.714
        codestral_tpusg_NP         codestral     tpusg       False        33.333        11.500         11.182    16.667    20.333
           codestral_psg_P         codestral       psg        True        20.000         5.556          5.428     9.333    12.000
         codestral_tpusg_P         codestral     tpusg        True        26.667         7.611          7.587    12.667    15.000
         codestral_tpusg_P         codestral     tpusg        True         6.667         1.944          1.970     3.333     4.000
         gemma3:27b_psg_NP        gemma3:27b       psg       False        26.667        18.056         18.551    21.333    22.667
       gemma3:27b_tpusg_NP        gemma3:27b     tpusg       False         6.667         1.778          1.677     2.667     3.000
          gemma3:27b_psg_P        gemma3:27b       psg        True        56.667        12.333          9.425    15.333    23.000
        gemma3:27b_tpusg_P        gemma3:27b     tpusg        True         0.000         0.000          0.000     0.000     0.000
         gemma3:27b_psg_NP        gemma3:27b       psg       False        80.000        42.778         45.711    58.000    65.333
       gemma3:27b_tpusg_NP        gemma3:27b     tpusg       False         6.667         6.667          6.667     6.667     6.667
        gemma3:27b_tpusg_P        gemma3:27b     tpusg        True         0.000         0.000          0.000     0.000     0.000
          gemma3:27b_psg_P        gemma3:27b       psg        True        90.909        20.000         15.496    25.455    38.182
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        25.000         22.313    40.000    60.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        25.000         22.313    40.000    60.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False         0.000         0.000          0.000     0.000     0.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        20.000         13.534    20.000    30.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        25.000         22.313    40.000    60.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False         0.000         0.000          0.000     0.000     0.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        20.000         13.534    20.000    30.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
               phi4_psg_NP              phi4       psg       False       100.000        25.000         22.313    40.000    60.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False        60.000        31.000         30.480    36.667    41.333
             phi4_tpusg_NP              phi4     tpusg       False       100.000        79.722         82.704    90.000    96.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        20.000         13.534    20.000    30.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        50.000         60.653    80.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False         0.000         0.000          0.000     0.000     0.000
               phi4_psg_NP              phi4       psg       False       100.000        25.000         22.313    40.000    60.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        20.000         13.534    20.000    30.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        20.000         13.534    20.000    30.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        25.000         22.313    40.000    60.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        20.000         13.534    20.000    30.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
               phi4_psg_NP              phi4       psg       False         0.000         0.000          0.000     0.000     0.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False         0.000         0.000          0.000     0.000     0.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        20.000         13.534    20.000    30.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000        50.000         60.653    80.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
              phi4_tpusg_P              phi4     tpusg        True       100.000       100.000        100.000   100.000   100.000
               phi4_psg_NP              phi4       psg       False       100.000        33.333         36.788    60.000    60.000
                phi4_psg_P              phi4       psg        True       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
             phi4_tpusg_NP              phi4     tpusg       False       100.000       100.000        100.000   100.000   100.000
   qwen2.5-coder:14b_psg_P qwen2.5-coder:14b       psg        True         6.667         2.500          2.766     4.000     5.333
 qwen2.5-coder:14b_tpusg_P qwen2.5-coder:14b     tpusg        True       100.000       100.000        100.000   100.000   100.000
   qwen2.5-coder:14b_psg_P qwen2.5-coder:14b       psg        True         0.000         0.000          0.000     0.000     0.000
 qwen2.5-coder:14b_tpusg_P qwen2.5-coder:14b     tpusg        True       100.000       100.000        100.000   100.000   100.000
qwen2.5-coder:14b_tpusg_NP qwen2.5-coder:14b     tpusg       False       100.000       100.000        100.000   100.000   100.000
  qwen2.5-coder:14b_psg_NP qwen2.5-coder:14b       psg       False        53.333        29.722         32.339    40.667    45.333
qwen2.5-coder:14b_tpusg_NP qwen2.5-coder:14b     tpusg       False       100.000        72.333         76.369    86.000    93.667
qwen2.5-coder:14b_tpusg_NP qwen2.5-coder:14b     tpusg       False        81.818        70.455         71.179    74.545    78.182
  qwen2.5-coder:14b_psg_NP qwen2.5-coder:14b       psg       False        26.667        16.889         16.688    18.667    19.333
qwen2.5-coder:14b_tpusg_NP qwen2.5-coder:14b     tpusg       False        87.500        63.542         67.303    76.250    82.500
  qwen2.5-coder:14b_psg_NP qwen2.5-coder:14b       psg       False        76.667        38.167         41.569    54.000    62.333
qwen2.5-coder:14b_tpusg_NP qwen2.5-coder:14b     tpusg       False        93.333        73.056         74.962    82.000    85.333
   qwen2.5-coder:14b_psg_P qwen2.5-coder:14b       psg        True         7.407         1.667          1.328     2.222     3.333
qwen2.5-coder:14b_tpusg_NP qwen2.5-coder:14b     tpusg       False       100.000        76.667         79.234    87.000    94.000
  qwen2.5-coder:32b_psg_NP qwen2.5-coder:32b       psg       False        40.000        29.722         30.573    34.000    36.000
qwen2.5-coder:32b_tpusg_NP qwen2.5-coder:32b     tpusg       False       100.000        74.833         77.714    86.000    92.333
   qwen2.5-coder:32b_psg_P qwen2.5-coder:32b       psg        True        43.333         9.667          7.620    12.667    19.000
 qwen2.5-coder:32b_tpusg_P qwen2.5-coder:32b     tpusg        True        60.000        16.056         15.215    26.000    34.000
   qwen2.5-coder:32b_psg_P qwen2.5-coder:32b       psg        True        40.000        10.778          9.915    14.667    19.667
 qwen2.5-coder:32b_tpusg_P qwen2.5-coder:32b     tpusg        True        23.333         6.444          6.258    10.000    11.000
qwen2.5-coder:32b_tpusg_NP qwen2.5-coder:32b     tpusg       False       100.000        82.576         83.614    89.091    92.727
   qwen2.5-coder:32b_psg_P qwen2.5-coder:32b       psg        True        33.333         7.944          6.750    11.333    16.000
  qwen2.5-coder:32b_psg_NP qwen2.5-coder:32b       psg       False        60.000        41.333         41.424    46.000    48.667
qwen2.5-coder:32b_tpusg_NP qwen2.5-coder:32b     tpusg       False        96.667        73.611         77.543    86.000    94.000
   qwen2.5-coder:32b_psg_P qwen2.5-coder:32b       psg        True       100.000       100.000        100.000   100.000   100.000
  qwen2.5-coder:32b_psg_NP qwen2.5-coder:32b       psg       False        73.333        69.444         69.915    71.333    72.000
qwen2.5-coder:32b_tpusg_NP qwen2.5-coder:32b     tpusg       False        89.474        60.088         62.270    71.579    78.947
   qwen2.5-coder:32b_psg_P qwen2.5-coder:32b       psg        True        56.250        14.896         13.657    21.250    28.750
 qwen2.5-coder:32b_tpusg_P qwen2.5-coder:32b     tpusg        True        30.000         8.833          9.004    14.667    16.000
   qwen2.5-coder:32b_psg_P qwen2.5-coder:32b       psg        True        33.333         7.278          5.579     8.667    12.000
 qwen2.5-coder:32b_tpusg_P qwen2.5-coder:32b     tpusg        True        26.667         6.722          6.037    10.000    13.000