In [None]:
# ======= simple_focused_inference.py =======
import re, json, numpy as np, pandas as pd, torch
from tqdm import tqdm
from transformers import pipeline, AutoTokenizer
from sklearn.metrics import r2_score, mean_squared_error
from scipy.stats import spearmanr

# ---- file paths ----
TRANSCRIPTS_PATH = "/home/labuser/research/VLM-Project/data/Transcripts_All.xlsx"
SCORES_PATH      = "/home/labuser/research/VLM-Project/data/All_Scores.xlsx"
OUT_PATH         = "/home/labuser/research/VLM-Project/data/focused_results.csv"

# ---- load data ----
transcripts_df = pd.read_excel(TRANSCRIPTS_PATH)
scores_df      = pd.read_excel(SCORES_PATH)

# ---- ensure expected columns ----
def norm_col(df, want):
    if want in df.columns: return
    for c in df.columns:
        if c.strip().lower() == want.lower():
            df.rename(columns={c: want}, inplace=True); return
    raise AssertionError(f"Missing '{want}' column.")
norm_col(transcripts_df, "Participant")
norm_col(transcripts_df, "Transcripts")

# ---- load model ----
print("Loading model...")
device = 0 if torch.cuda.is_available() else -1
model_name = "microsoft/phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
pad_id = getattr(tokenizer, "pad_token_id", getattr(tokenizer, "eos_token_id", None))
generator = pipeline(
    "text-generation",
    model=model_name,
    device=device,
    torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
    max_new_tokens=16,
    do_sample=False,
    temperature=0.0,
    top_p=1.0,
    pad_token_id=pad_id,
    return_full_text=False
)
print(f"✅ Model loaded: {model_name}")

# ---- prompt for Focused ----
FOCUSED_PROMPT = """You are an evaluator.

Give ONE overall score for the candidate’s **Focused** quality (0–9, decimals allowed).
Definition:
- Stays on-topic and directly answers questions.
- Clear, organized, and logical.
- Uses relevant details; avoids unrelated tangents.
- Ignore pleasantries or filler words.

0 = completely unfocused, 5 = somewhat focused, 9 = consistently direct and organized.
Return only a number between 0 and 9.

Transcript:
"""

# ---- helper to extract number ----
def extract_score(text):
    nums = re.findall(r'[-+]?\d*\.?\d+', str(text))
    for n in nums:
        try:
            v = float(n)
            if 0.0 <= v <= 9.0:
                return v
        except:
            pass
    return None

# ---- scoring loop ----
preds = []
for _, row in tqdm(transcripts_df.iterrows(), total=len(transcripts_df)):
    transcript = str(row["Transcripts"])
    prompt = FOCUSED_PROMPT + transcript + "\n\nScore:"
    out = generator(prompt)
    txt = out[0]["generated_text"] if isinstance(out, list) and "generated_text" in out[0] else str(out)
    score = extract_score(txt)
    preds.append({"Participant": row["Participant"], "Predicted_Focused": score})

pred_df = pd.DataFrame(preds)

# ---- merge + evaluate ----
final_df = pred_df.merge(scores_df, on="Participant", how="left")
if "Focused" in final_df.columns:
    y_true = pd.to_numeric(final_df["Focused"], errors="coerce")
    y_pred = pd.to_numeric(final_df["Predicted_Focused"], errors="coerce")
    mask = y_true.notna() & y_pred.notna()
    r2  = r2_score(y_true[mask], y_pred[mask]) if mask.any() else np.nan
    mse = mean_squared_error(y_true[mask], y_pred[mask]) if mask.any() else np.nan
    rho = spearmanr(y_true[mask], y_pred[mask])[0] if mask.sum() >= 3 else np.nan
    print(f"\nR²: {r2:.4f}" if r2==r2 else "R²: n/a")
    print(f"MSE: {mse:.4f}" if mse==mse else "MSE: n/a")
    print(f"Spearman ρ: {rho:.4f}" if rho==rho else "Spearman ρ: n/a")
else:
    print("\n⚠️ 'Focused' column not found; only predictions generated.")

# ---- save ----
final_df.to_csv(OUT_PATH, index=False)
print(f"\n✅ Results saved to: {OUT_PATH}")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


[Info] GPU load failed, trying CPU. Reason: CUDA out of memory. Tried to allocate 188.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 32.12 MiB is free. Process 4588 has 14.71 GiB memory in use. Of the allocated memory 14.33 GiB is allocated by PyTorch, and 257.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]