In [None]:
import pandas as pd
from openai import OpenAI

client = OpenAI(
    base_url='http://127.0.0.1:11434/v1',
    api_key='ollama',
)

csv_file_path = "/home/intern08/Documents/chest_xray_project/archive/linked_xray_data(1).csv"
df = pd.read_csv(csv_file_path)

df[['findings', 'impression']] = df[['findings', 'impression']].fillna("").astype(str)
df['findings'] = df['findings'].str.strip()
df['impression'] = df['impression'].str.strip()

df = df[~((df['findings'] == "") & (df['impression'] == ""))]


df_grouped = (
    df.groupby('uid', as_index=False)
      .agg({
          'findings': lambda x: " ".join(pd.Series(x).dropna().unique()),
          'impression': lambda x: " ".join(pd.Series(x).dropna().unique()),
          'projection': lambda x: ",".join(x.astype(str).unique())
      })
)

system_prompt = (
    "You are a radiologist. OUTPUT ONLY these three tags with no extra text or metadata:\n"
    "<think>...</think>\n<cat>...</cat>\n<area>...</area>\n\n"
    "Requirements:\n"
    "- <think> must contain step by step key findings, each finding in separate line with number like: 1), 2)....\n"
    "- Each step should be 6-12 words, concise, action-oriented.\n"
    "- No preamble, no explanations, and do NOT emit tokens like <unused94>.\n"
    "- <cat>: one short disease category (1-3 words).\n"
    "- <area>: one or two anatomical areas.\n"
)

results = []

for _, row in df_grouped.head(30).iterrows():
    combined_text = " ".join([row['findings'], row['impression']]).strip()
    user_prompt = f"Findings and Impression: <dep>{combined_text}</dep>\nProduce the output exactly as required."

    try:
        response = client.chat.completions.create(
            model="symptoma/medgemma3:27b",
            messages=[
                {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
                {"role": "user", "content": [{"type": "text", "text": user_prompt}]}
            ],
            max_tokens=500,
            temperature=0.0,
        )

        out = response.choices[0].message.content

        results.append({
            "uid": row['uid'],
            "projection": row['projection'],
            "cot_output": out
        })

        print(f"Processed UID: {row['uid']}")

    except Exception as e:
        print(f"Error UID {row['uid']}: {e}")

output_path = "/home/intern08/Documents/chest_xray_project/archive/cot_outputs.csv"
pd.DataFrame(results).to_csv(output_path, index=False)

print(f" All results saved to {output_path}")
