In [1]:
import pandas as pd
import os
import time
from dotenv import load_dotenv
from openai import OpenAI

# === Load API key ===
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# === Config ===
TASK_CSV = "task_list.csv"
TEMP = 0.5
OUTPUT_CSV = f"{TEMP}_rec_RQ1.csv"  

# === Load entire task list ===
df = pd.read_csv(TASK_CSV)
df = df.reset_index(drop=True)

# Prepare tasks and sources
tasks = df["description"].dropna().tolist()
sources = df["source"].tolist()

# === OpenAI Client ===
client = OpenAI(api_key=api_key)

# === Store all results ===
results = []

# === 5 Rounds of recommendations ===
for run_id in range(1, 6):
    print(f"\n Starting run {run_id}/5 with temperature={TEMP}...")

    for i, (task, source) in enumerate(zip(tasks, sources)):
        print(f" Run {run_id} | Task {i+1}/{len(tasks)} ({source.upper()}): {task}")

        # Prompt for GPT
        prompt = (
            "You are a Javascript backend developer.\n"
            "Recommend an npm library based on the given task.\n"
            "Return only the library name.\n\n"
            f"Task: {task}\n"
            "library:\n"
        )

        try:
            start_time = time.time()
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=TEMP,
                timeout=60,
            )
            duration = time.time() - start_time
            answer = response.choices[0].message.content.strip()
            print(f"  Recommended: {answer} (in {duration:.1f}s)")
        except Exception as e:
            answer = "Error"
            print(" GPT error:", e)

        # Append to results
        results.append({
            "Run": run_id,
            "Task_Index": i,
            "Source": source,
            "Description": task,
            "Temperature": TEMP,
            "Recommended_Library": answer
        })

        time.sleep(1)  # Optional: be kind to the API

# === Save all results ===
output_df = pd.DataFrame(results)
output_df.to_csv(OUTPUT_CSV, index=False)
print(f"\n All results saved to: {OUTPUT_CSV}")



 Starting run 1/5 with temperature=0.5...
 Run 1 | Task 1/1459 (TOP): Node.js body parsing middleware
  Recommended: body-parser (in 2.1s)
 Run 1 | Task 2/1459 (TOP): Add callbacks to requests in flight to avoid async duplication
  Recommended: p-queue (in 1.0s)
 Run 1 | Task 3/1459 (TOP): The ultimate javascript content-type utility.
  Recommended: content-type-utils (in 0.7s)
 Run 1 | Task 4/1459 (TOP): Media Type Database
  Recommended: mongoose (in 0.5s)
 Run 1 | Task 5/1459 (TOP): Regular expression for matching ANSI escape codes
  Recommended: ansi-regex (in 0.6s)
 Run 1 | Task 6/1459 (TOP): A JSON with CSS color names
  Recommended: color-name (in 1.2s)
 Run 1 | Task 7/1459 (TOP): A querystring parser and serializer with nesting support
  Recommended: qs (in 6.8s)
 Run 1 | Task 8/1459 (TOP): Safer Node.js Buffer API
  Recommended: safe-buffer (in 0.7s)
 Run 1 | Task 9/1459 (TOP): Port of the OpenBSD `bcrypt_pbkdf` function to pure Javascript
  Recommended: bcryptjs (in 0.5s)
 R

KeyboardInterrupt: 

Check matching

In [6]:
import pandas as pd

# === Config ===
RECOMMENDED_CSV = "0_rec_RQ1.csv"        # has Run, Recommended_Library, Source
DESCRIPTIONS_CSV = "descriptions_output.csv"  # has library, source, description
OUTPUT_CSV = "0_rec_RQ1_checked.csv"

# === Load data ===
rec_df = pd.read_csv(RECOMMENDED_CSV)
desc_df = pd.read_csv(DESCRIPTIONS_CSV)

# normalize text
rec_df['Recommended_Library'] = rec_df['Recommended_Library'].astype(str).str.strip().str.lower()
rec_df['Source'] = rec_df['Source'].astype(str).str.strip().str.lower()
desc_df['library'] = desc_df['library'].astype(str).str.strip().str.lower()
desc_df['source'] = desc_df['source'].astype(str).str.strip().str.lower()

# sets for fast lookup
exact_set = set(zip(desc_df['library'], desc_df['source']))
lib_set = set(desc_df['library'])

# classify each row
def classify(row):
    lib = row['Recommended_Library']
    src = row['Source']
    if (lib, src) in exact_set:
        return "Exact Match"
    elif lib in lib_set:
        return "In List"
    else:
        return "Not in List"

rec_df['Status'] = rec_df.apply(classify, axis=1)

# === summary per run + source ===
summary = rec_df.groupby(['Run','Source','Status']).size().reset_index(name='Count')

# pivot into columns
pivot = summary.pivot_table(index=['Run','Source'], columns='Status', values='Count', fill_value=0).reset_index()

# === average across runs ===
avg_summary = pivot.groupby('Source')[['Exact Match','In List','Not in List']].mean().reset_index()
avg_summary['Run'] = 'Average'

# combine
final = pd.concat([pivot, avg_summary], ignore_index=True)

# save
final.to_csv(OUTPUT_CSV, index=False)

print("‚úÖ Done! Results saved to", OUTPUT_CSV)
print(final)


‚úÖ Done! Results saved to 0_rec_RQ1_checked.csv
Status      Run  Source  Exact Match  In List  Not in List
0             1  bottom         43.0     25.0        391.0
1             1  middle        111.0     17.0        372.0
2             1     top        165.0      6.0        329.0
3             2  bottom         42.0     20.0        397.0
4             2  middle        113.0     16.0        371.0
5             2     top        169.0      6.0        325.0
6             3  bottom         42.0     23.0        394.0
7             3  middle        113.0     17.0        370.0
8             3     top        170.0      7.0        323.0
9             4  bottom         41.0     24.0        394.0
10            4  middle        113.0     19.0        368.0
11            4     top        171.0      6.0        323.0
12            5  bottom         41.0     25.0        393.0
13            5  middle        112.0     19.0        369.0
14            5     top        170.0      6.0        324.0
15     

Generate Description

In [None]:
import pandas as pd
import os
import time
from dotenv import load_dotenv
from openai import OpenAI

# === Config ===
INPUT_CSV = "lib_name.csv"
TEMP = 0.5
OUTPUT_CSV = f"{TEMP}_desc_RQ2.csv"

# === Load API key ===
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# === Load libraries ===
df = pd.read_csv(INPUT_CSV)

# Make sure lib_name column exists
if "lib_name" not in df.columns and "Recommended_Library" in df.columns:
    df["lib_name"] = df["Recommended_Library"]

libs = df["lib_name"].dropna().tolist()
sources = df["source"].tolist()

# === Store results ===
results = []

# === Run 5 rounds ===
for run_id in range(1, 6):
    print(f"\nüöÄ Starting round {run_id}/5 with temp={TEMP}...")

    for i, (lib, source) in enumerate(zip(libs, sources)):
        print(f"üîç Run {run_id} | {i+1}/{len(libs)}: {lib}")

        prompt = (
            "You are a JavaScript backend developer.\n"
            f"Provide a concise description for the JavaScript library named '{lib}'."
        )

        try:
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=TEMP,
                timeout=20,
            )
            generated_desc = response.choices[0].message.content.strip()
            print(f"‚úÖ {lib}: {generated_desc}")

        except Exception as e:
            generated_desc = "Error"
            print(f"‚ùå Error generating description for {lib}: {e}")

        # Save row with run info
        results.append({
            "Run": run_id,
            "Lib_Name": lib,
            "Source": source,
            "Temperature": TEMP,
            "Generated_Description": generated_desc
        })

        time.sleep(1)  # Respect API rate limits

# === Save all results ===
output_df = pd.DataFrame(results)
output_df.to_csv(OUTPUT_CSV, index=False)
print(f"\nüìÅ Finished! Saved all 5 rounds to: {OUTPUT_CSV}")


In [1]:
import pandas as pd
import os
import time
from dotenv import load_dotenv
from openai import OpenAI

# === Config ===
INPUT_CSV = "lib_name.csv"
TEMP = 0.7
OUTPUT_CSV = f"{TEMP}_desc_RQ2.csv"

# === Load API key ===
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# === Load libraries ===
df = pd.read_csv(INPUT_CSV)

if "lib_name" not in df.columns and "Recommended_Library" in df.columns:
    df["lib_name"] = df["Recommended_Library"]

libs = df["lib_name"].dropna().tolist()
sources = df["source"].tolist()

results = []

for run_id in range(1, 6):
    print(f"\nüöÄ Starting round {run_id}/5 with temp={TEMP}...")

    for i, (lib, source) in enumerate(zip(libs, sources)):
        print(f"üîç Run {run_id} | {i+1}/{len(libs)}: {lib}")

        prompt = (
            "You are a JavaScript backend developer.\n"
            f"Provide a concise description for the JavaScript library named '{lib}'."
        )

        try:
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=TEMP,
                timeout=20,
            )
            generated_desc = response.choices[0].message.content.strip()
        except Exception as e:
            print(f"‚ùå Error generating description for {lib}: {e}")
            generated_desc = "Error"

        results.append({
            "Run": run_id,
            "Lib_Name": lib,
            "Source": source,
            "Temperature": TEMP,
            "Generated_Description": generated_desc
        })

        time.sleep(1)  # rate-limit safety

    pd.DataFrame(results).to_csv(OUTPUT_CSV, index=False)
    print(f"üíæ Progress saved after round {run_id} -> {OUTPUT_CSV}")





üöÄ Starting round 1/5 with temp=0.7...
üîç Run 1 | 1/1500: body-parser
üîç Run 1 | 2/1500: inflight
üîç Run 1 | 3/1500: mime-types
üîç Run 1 | 4/1500: mime-db
üîç Run 1 | 5/1500: ansi-regex
üîç Run 1 | 6/1500: color-name
üîç Run 1 | 7/1500: qs
üîç Run 1 | 8/1500: safe-buffer
üîç Run 1 | 9/1500: node-bcrypt-pbkdf
üîç Run 1 | 10/1500: cliui
üîç Run 1 | 11/1500: array-flatten
üîç Run 1 | 12/1500: babel-loader
üîç Run 1 | 13/1500: aproba
üîç Run 1 | 14/1500: string-width
üîç Run 1 | 15/1500: execa
üîç Run 1 | 16/1500: normalize-path
üîç Run 1 | 17/1500: browserslist
üîç Run 1 | 18/1500: ansi-escapes
üîç Run 1 | 19/1500: wrap-ansi
üîç Run 1 | 20/1500: node-sshpk
üîç Run 1 | 21/1500: npm-run-path
üîç Run 1 | 22/1500: node-cross-spawn
üîç Run 1 | 23/1500: read-pkg-up
üîç Run 1 | 24/1500: bn.js
üîç Run 1 | 25/1500: define-properties
üîç Run 1 | 26/1500: escape-string-regexp
üîç Run 1 | 27/1500: is-buffer
üîç Run 1 | 28/1500: globby
üîç Run 1 | 29/1500: camelcase

Check Description

In [4]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# === CONFIG ===
GEN_FILE = "0.7_desc_RQ2.csv"          # has 5 rounds: Run, lib_name/Lib_Name, Description/Generated_Description, (optional) Source
REF_FILE = "descriptions_output.csv"   # has: library, source, description
OUTPUT_CSV = "0.7_RQ2_similarity.csv"  # ONE FILE: per-library average similarity across 5 runs

# === LOAD DATA ===
gen = pd.read_csv(GEN_FILE)
ref = pd.read_csv(REF_FILE)

# --- normalize column names in 'gen'
if "Lib_Name" in gen.columns:
    gen["lib_name"] = gen["Lib_Name"]
elif "lib_name" not in gen.columns:
    raise ValueError("Need a 'Lib_Name' or 'lib_name' column in the generated file.")

if "Generated_Description" in gen.columns:
    gen["gen_desc"] = gen["Generated_Description"]
elif "Description" in gen.columns:
    gen["gen_desc"] = gen["Description"]
else:
    raise ValueError("Need a 'Generated_Description' or 'Description' column in the generated file.")

if "Run" not in gen.columns:
    gen["Run"] = 1  # fallback if single run
if "Source" not in gen.columns and "source" in gen.columns:
    gen["Source"] = gen["source"]

# --- normalize join keys
gen["lib_norm"] = gen["lib_name"].astype(str).str.strip().str.lower()
ref["lib_norm"] = ref["library"].astype(str).str.strip().str.lower()

# keep only libs that exist in reference
merged = gen.merge(
    ref[["lib_norm", "library", "source", "description"]],
    on="lib_norm",
    how="inner"
)

# text cleanup
merged["gen_desc"] = merged["gen_desc"].astype(str).fillna("")
merged["description"] = merged["description"].astype(str).fillna("")

# keep a nice original-cased name & source to report later
# prefer name from reference if available, else from gen
merged["Lib_Name_Display"] = merged["library"].fillna(merged["lib_name"])
# choose a source column to report (prefer gen Source if present)
if "Source" in merged.columns:
    merged["Source_Display"] = merged["Source"].fillna(merged["source"])
else:
    merged["Source_Display"] = merged["source"]

# === MODEL (load once)
model = SentenceTransformer("all-MiniLM-L6-v2")

# pre-embed reference descriptions per lib (avoid recompute)
ref_map = (
    merged[["lib_norm", "description"]]
    .drop_duplicates()
    .set_index("lib_norm")["description"]
    .to_dict()
)
ref_vecs_map = {
    k: v for k, v in zip(ref_map.keys(), model.encode(list(ref_map.values()), convert_to_tensor=False))
}

# compute similarity for EVERY row (all runs), then aggregate per library
sims_all = []
for run_id, g in merged.groupby("Run", sort=True):
    libs = g["lib_norm"].tolist()
    new_texts = g["gen_desc"].tolist()

    new_vecs = model.encode(new_texts, convert_to_tensor=False)
    old_vecs = np.stack([ref_vecs_map[l] for l in libs], axis=0)

    sims = np.array([
        cosine_similarity([new_vecs[i]], [old_vecs[i]])[0][0]
        for i in range(len(new_vecs))
    ])

    temp = g.copy()
    temp["Cosine_Similarity"] = sims
    sims_all.append(temp)

all_rows = pd.concat(sims_all, ignore_index=True)

# === AGGREGATE: per-library average across runs ===
def first_nonnull(s):
    for x in s:
        if pd.notna(x):
            return x
    return np.nan

agg = (
    all_rows
    .groupby("lib_norm", as_index=False)
    .agg(
        Lib_Name=("Lib_Name_Display", first_nonnull),
        Source=("Source_Display", first_nonnull),
        N_Runs=("Cosine_Similarity", "count"),
        Mean_Cosine_Similarity=("Cosine_Similarity", "mean"),
        Std_Cosine_Similarity=("Cosine_Similarity", "std"),
        Median_Cosine_Similarity=("Cosine_Similarity", "median"),
    )
    .sort_values(["Mean_Cosine_Similarity", "N_Runs"], ascending=[False, False])
)

# optional: round for pretty output
for col in ["Mean_Cosine_Similarity", "Std_Cosine_Similarity", "Median_Cosine_Similarity"]:
    agg[col] = agg[col].astype(float).round(4)

# drop lib_norm helper and save ONE summary file
final_out = agg.drop(columns=["lib_norm"])
final_out.to_csv(OUTPUT_CSV, index=False)

print("‚úÖ Saved per-library averages across 5 rounds ->", OUTPUT_CSV)
print(final_out.head())


‚úÖ Saved per-library averages across 5 rounds -> 0.7_RQ2_similarity.csv
                   Lib_Name  Source  N_Runs  Mean_Cosine_Similarity  \
929                   konva  middle       5                  0.9219   
298         couchdb-fauxton  bottom       5                  0.9013   
1412           toml-require  middle       5                  0.8946   
1422           tweetnacl-js     top       5                  0.8899   
1075  node-get-all-js-files  bottom       5                  0.8885   

      Std_Cosine_Similarity  Median_Cosine_Similarity  
929                  0.0098                    0.9256  
298                  0.0063                    0.9016  
1412                 0.0227                    0.8983  
1422                 0.0359                    0.9017  
1075                 0.0117                    0.8870  


In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# ==== CONFIG =====
FILES = {
    0.2: "0.2_RQ2_similarity.csv",
    0.5: "0.5_RQ2_similarity.csv",
    0.7: "0.7_RQ2_similarity.csv",
}
VAL_COL = "Mean_Cosine_Similarity"  # change if you named it differently
SOURCE_COL = "Source"               # Top / Middle / Bottom
OUTDIR = Path("figs_rq2")
OUTDIR.mkdir(exist_ok=True)

# ==== LOAD & CONCAT ====
dfs = []
for t, fp in FILES.items():
    df = pd.read_csv(fp)
    if VAL_COL not in df.columns:
        raise ValueError(f"{fp} must contain column '{VAL_COL}'")
    if SOURCE_COL not in df.columns:
        # if missing, fill unknown
        df[SOURCE_COL] = "Unknown"
    df["Temperature"] = t
    # keep only needed columns
    df = df[[VAL_COL, SOURCE_COL, "Temperature"]].dropna()
    dfs.append(df)

all_df = pd.concat(dfs, ignore_index=True)

# ==== SUMMARY: overall by temp ====
def summary(df, label):
    s = df[VAL_COL].describe(percentiles=[0.25, 0.5, 0.75])
    print(f"\n--- {label} ---")
    print(f"N={len(df)} | mean={df[VAL_COL].mean():.4f} | std={df[VAL_COL].std():.4f} "
          f"| Q1={s['25%']:.4f} | median={s['50%']:.4f} | Q3={s['75%']:.4f} "
          f"| min={df[VAL_COL].min():.4f} | max={df[VAL_COL].max():.4f}")

for t, grp in all_df.groupby("Temperature"):
    summary(grp, f"Temperature {t}")

# ==== SUMMARY: by temp √ó tier ====
for t, grp in all_df.groupby("Temperature"):
    print(f"\n=== Per-tier @ Temperature {t} ===")
    for tier, g2 in grp.groupby(SOURCE_COL):
        summary(g2, f"{tier}")

# ==== 1) BOX PLOT with median & IQR annotations ====
temps = sorted(all_df["Temperature"].unique())
data_by_temp = [all_df[all_df["Temperature"] == t][VAL_COL].values for t in temps]

fig, ax = plt.subplots(figsize=(7,5), dpi=150)
bp = ax.boxplot(data_by_temp, patch_artist=False, labels=[str(t) for t in temps])
ax.set_title("Distribution of Cosine Similarity by Temperature (RQ2)")
ax.set_xlabel("Temperature")
ax.set_ylabel("Cosine Similarity")
ax.set_ylim(0, 1)

# annotate medians & IQR
for i, vals in enumerate(data_by_temp, start=1):
    if len(vals) == 0: continue
    q1, med, q3 = np.percentile(vals, [25, 50, 75])
    ax.text(i+0.1, med, f"med={med:.3f}", va="center", fontsize=9)
    ax.text(i+0.1, q3, f"Q3={q3:.3f}", va="bottom", fontsize=8)
    ax.text(i+0.1, q1, f"Q1={q1:.3f}", va="top", fontsize=8)

fig.tight_layout()
fig.savefig(OUTDIR / "rq2_boxplot_by_temp.png", bbox_inches="tight")
plt.close(fig)

# ==== 2) "VIOLIN"-LIKE DENSITY (matplotlib only) ====
# we'll draw simple mirrored density curves for each temp
from scipy.stats import gaussian_kde

fig, ax = plt.subplots(figsize=(7,5), dpi=150)
x = np.linspace(0, 1, 400)

for idx, t in enumerate(temps, start=1):
    vals = all_df[all_df["Temperature"] == t][VAL_COL].values
    if len(vals) < 2:
        continue
    kde = gaussian_kde(vals, bw_method="scott")
    y = kde(x)
    y = y / y.max() * 0.35  # normalize width

    ax.fill_betweenx(x, idx - y, idx + y, alpha=0.25)
    ax.plot([idx]*len(x), x, linewidth=0.8)
    ax.text(idx, 1.02, f"t={t}", ha="center")

ax.set_xlim(0.5, len(temps) + 0.5)
ax.set_ylim(0, 1)
ax.set_xticks([])
ax.set_title("Cosine Similarity Distributions (KDE, RQ2)")
ax.set_ylabel("Cosine Similarity")

fig.tight_layout()
fig.savefig(OUTDIR / "rq2_violin_like_by_temp.png", bbox_inches="tight")
plt.close(fig)

# ==== 3) TIER-WISE BOX PLOTS (Top/Middle/Bottom) ====
tiers = ["Top", "Middle", "Bottom"]
fig, axes = plt.subplots(1, len(tiers), figsize=(12,4), dpi=150, sharey=True)

for ax, tier in zip(axes, tiers):
    tier_df = all_df[all_df[SOURCE_COL].str.lower() == tier.lower()]
    if tier_df.empty:
        ax.set_title(f"{tier} (no data)")
        continue
    d = [tier_df[tier_df["Temperature"] == t][VAL_COL].values for t in temps]
    ax.boxplot(d, labels=[str(t) for t in temps])
    ax.set_title(f"{tier}")
    ax.set_xlabel("Temp")
    ax.set_ylim(0, 1)

axes[0].set_ylabel("Cosine Similarity")
fig.suptitle("Tier-wise Cosine Similarity by Temperature (RQ2)")
fig.tight_layout()
fig.savefig(OUTDIR / "rq2_boxplot_by_temp_tierwise.png", bbox_inches="tight")
plt.close(fig)

print(f"\n‚úÖ Saved figures to: {OUTDIR.resolve()}")



--- Temperature 0.2 ---
N=1500 | mean=0.5737 | std=0.1988 | Q1=0.4707 | median=0.6090 | Q3=0.7232 | min=-0.0860 | max=0.9189

--- Temperature 0.5 ---
N=1500 | mean=0.5725 | std=0.1985 | Q1=0.4683 | median=0.6073 | Q3=0.7206 | min=-0.0775 | max=0.9114

--- Temperature 0.7 ---
N=1500 | mean=0.5729 | std=0.1982 | Q1=0.4687 | median=0.6079 | Q3=0.7191 | min=-0.0728 | max=0.9219

=== Per-tier @ Temperature 0.2 ===

--- bottom ---
N=500 | mean=0.5327 | std=0.2214 | Q1=0.4063 | median=0.5692 | Q3=0.7050 | min=-0.0779 | max=0.9014

--- middle ---
N=500 | mean=0.6039 | std=0.1913 | Q1=0.4976 | median=0.6525 | Q3=0.7508 | min=-0.0860 | max=0.9189

--- top ---
N=500 | mean=0.5843 | std=0.1745 | Q1=0.4998 | median=0.6058 | Q3=0.7055 | min=-0.0527 | max=0.9089

=== Per-tier @ Temperature 0.5 ===

--- bottom ---
N=500 | mean=0.5325 | std=0.2207 | Q1=0.4073 | median=0.5719 | Q3=0.6977 | min=-0.0775 | max=0.9005

--- middle ---
N=500 | mean=0.6026 | std=0.1914 | Q1=0.4894 | median=0.6508 | Q3=0.7501 

  bp = ax.boxplot(data_by_temp, patch_artist=False, labels=[str(t) for t in temps])
  ax.boxplot(d, labels=[str(t) for t in temps])
  ax.boxplot(d, labels=[str(t) for t in temps])
  ax.boxplot(d, labels=[str(t) for t in temps])



‚úÖ Saved figures to: C:\Users\HP\Documents\GitHub\mahidol-Intern25\Praewa\Github Action Research\figs_rq2


In [7]:
import pandas as pd
import numpy as np

# === CONFIG ===
GEN_FILES = {
    0.2: "0.2_desc_RQ2.csv",
    0.5: "0.5_desc_RQ2.csv",
    0.7: "0.7_desc_RQ2.csv",
}
REF_FILE = "descriptions_output.csv"
OUTPUT_CSV = "RQ2_wordcount_diff_summary.csv"

# === LOAD REF ===
ref = pd.read_csv(REF_FILE)
ref["lib_norm"] = ref["library"].astype(str).str.strip().str.lower()
ref["ref_len"] = ref["description"].astype(str).str.split().apply(len)

summary_rows = []

for temp, gen_file in GEN_FILES.items():
    gen = pd.read_csv(gen_file)

    # normalize columns
    if "Lib_Name" in gen.columns:
        gen["lib_name"] = gen["Lib_Name"]
    if "Generated_Description" in gen.columns:
        gen["gen_desc"] = gen["Generated_Description"]
    elif "Description" in gen.columns:
        gen["gen_desc"] = gen["Description"]

    gen["lib_norm"] = gen["lib_name"].astype(str).str.strip().str.lower()

    # merge with ref descriptions
    merged = gen.merge(ref[["lib_norm", "ref_len"]], on="lib_norm", how="inner")
    merged["gen_len"] = merged["gen_desc"].astype(str).str.split().apply(len)
    merged["word_diff"] = (merged["gen_len"] - merged["ref_len"]).abs()

    # average per library across 5 runs
    lib_avg = merged.groupby("lib_norm")["word_diff"].mean().reset_index()

    # overall average across all libraries
    overall_avg = lib_avg["word_diff"].mean()

    summary_rows.append({
        "Temperature": temp,
        "Num_Libraries": lib_avg.shape[0],
        "Mean_Word_Diff": round(overall_avg, 2),
        "Std_Word_Diff": round(lib_avg["word_diff"].std(), 2)
    })

# save summary
summary_df = pd.DataFrame(summary_rows)
summary_df.to_csv(OUTPUT_CSV, index=False)

print(summary_df)


   Temperature  Num_Libraries  Mean_Word_Diff  Std_Word_Diff
0          0.2           1500           25.45           7.63
1          0.5           1500           25.79           6.95
2          0.7           1500           25.42           6.74
