In [1]:
!pip install google-genai





In [2]:
!pip install -U google-genai





In [37]:
import os
os.environ["GEMINI_API_KEY"] = "<api_key>"

In [38]:
from google import genai

client = genai.Client(api_key = "<api_key>")
models = client.models.list()
for m in models:
    print(m.name)
    print(m.display_name)

models/embedding-gecko-001
Embedding Gecko
models/gemini-2.5-flash
Gemini 2.5 Flash
models/gemini-2.5-pro
Gemini 2.5 Pro
models/gemini-2.0-flash-exp
Gemini 2.0 Flash Experimental
models/gemini-2.0-flash
Gemini 2.0 Flash
models/gemini-2.0-flash-001
Gemini 2.0 Flash 001
models/gemini-2.0-flash-exp-image-generation
Gemini 2.0 Flash (Image Generation) Experimental
models/gemini-2.0-flash-lite-001
Gemini 2.0 Flash-Lite 001
models/gemini-2.0-flash-lite
Gemini 2.0 Flash-Lite
models/gemini-2.0-flash-lite-preview-02-05
Gemini 2.0 Flash-Lite Preview 02-05
models/gemini-2.0-flash-lite-preview
Gemini 2.0 Flash-Lite Preview
models/gemini-exp-1206
Gemini Experimental 1206
models/gemini-2.5-flash-preview-tts
Gemini 2.5 Flash Preview TTS
models/gemini-2.5-pro-preview-tts
Gemini 2.5 Pro Preview TTS
models/gemma-3-1b-it
Gemma 3 1B
models/gemma-3-4b-it
Gemma 3 4B
models/gemma-3-12b-it
Gemma 3 12B
models/gemma-3-27b-it
Gemma 3 27B
models/gemma-3n-e4b-it
Gemma 3n E4B
models/gemma-3n-e2b-it
Gemma 3n E2B
mod

In [43]:

def refine_report_gemini(raw_report: str) -> str:
    """
    Refine a medical report using Gemini Pro (google-genai client)
    No Vertex AI, no Google Cloud billing.
    """

    client = genai.Client()   # Reads GEMINI_API_KEY automatically

    prompt = f"""
You are a clinical radiology report editor.

Rewrite the following garbled medical report into readable English:

"{raw_report}"

Rules:
- Fix grammar.
- Do NOT add new clinical findings.
- Only reorganize and clean what is already present.
- Return only the refined report.
"""

    # Correct usage for this version:
    response = client.models.generate_content(
        model="models/gemini-2.5-flash",   # or gemini-2.5-flash
        contents=prompt
    )

    return response.text.strip()


In [7]:
!pip install pandas





In [11]:
import pandas as pd

# read csv
df = pd.read_csv(r"D:\fyp_manish_shyam\results\raw_output_fixed_bug.csv")

# extract columns as lists
ground_truth_list = df["ground_truth"].tolist()
generated_list = df["generated_report"].tolist()

print(len(ground_truth_list), len(generated_list))


10487 10487


In [44]:
refined_reports = []
failed_indices = []

In [None]:
from tqdm import tqdm

for idx, gen in enumerate(
        tqdm(generated_list[:50], total=50, desc="Refining reports")
    ):
    try:
        refined = refine_report_gemini(gen)
    except Exception as e:
        print(f"[Error] Refinement failed at index {idx}: {e}")
        refined = gen
        failed_indices.append(idx)

    refined_reports.append(refined)


In [None]:
import json
import os

log_dir = r"D:\fyp_manish_shyam\logs"
os.makedirs(log_dir, exist_ok=True)

file_path = os.path.join(log_dir, "failed_indices.json")

with open(file_path, "w") as f:
    json.dump(failed_indices, f)

print("Failed indices saved to failed_indices.json")


In [47]:
!pip install openai

Collecting openai




  Downloading openai-2.13.0-py3-none-any.whl.metadata (29 kB)
Collecting jiter<1,>=0.10.0 (from openai)
  Downloading jiter-0.12.0-cp310-cp310-win_amd64.whl.metadata (5.3 kB)
Downloading openai-2.13.0-py3-none-any.whl (1.1 MB)
   ---------------------------------------- 0.0/1.1 MB ? eta -:--:--
   ------------------- -------------------- 0.5/1.1 MB 4.2 MB/s eta 0:00:01
   ---------------------------------------- 1.1/1.1 MB 6.4 MB/s  0:00:00
Downloading jiter-0.12.0-cp310-cp310-win_amd64.whl (204 kB)
Installing collected packages: jiter, openai

   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [

In [49]:
from openai import OpenAI
import time

client = OpenAI()   # reads OPENAI_API_KEY automatically


def refine_report_gpt_go(raw_report: str) -> str:
    response = client.chat.completions.create(
        model="gpt-5-mini",   # ChatGPT Go compatible & stable
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a clinical radiology report editor. "
                    "Your task is to improve clarity and grammar without altering medical meaning."
                )
            },
            {
                "role": "user",
                "content": f"""
Rewrite the following radiology report into clear, readable English.

Report:
\"\"\"{raw_report}\"\"\"

Rules:
- Do NOT add, remove, or infer clinical findings.
- Do NOT normalize terminology beyond what is already present.
- Preserve uncertainty, negations, and clinical intent exactly.
- Fix grammar, repetition, and word order only.
- Output ONLY the refined report text.
"""
            }
        ],
        temperature=0.2,   # very important for medical safety
        max_tokens=400
    )

    return response.choices[0].message.content.strip()


In [50]:
import time
from tqdm import tqdm

refined_reports = []
failed_indices = []

MAX_REPORTS = 50          # limit batch size
SLEEP_BETWEEN_CALLS = 1.5 # safe for ChatGPT Go
MAX_RETRIES = 3


def refine_with_retry(gen, idx):
    for attempt in range(MAX_RETRIES):
        try:
            return refine_report_gpt_go(gen)
        except Exception as e:
            print(f"[Retry {attempt+1}] Index {idx} failed: {e}")
            time.sleep(2 * (attempt + 1))  # simple backoff
    return gen


In [None]:

for idx in tqdm(range(min(MAX_REPORTS, len(generated_list))),
                desc="Refining reports",
                total=min(MAX_REPORTS, len(generated_list))):
    gen = generated_list[idx]
    refined = refine_with_retry(gen, idx)
    
    if refined == gen:
        failed_indices.append(idx)
        
    refined_reports.append(refined)
    time.sleep(SLEEP_BETWEEN_CALLS)
