In [1]:
! pip install -U transformers


Collecting transformers
  Downloading transformers-4.57.6-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.57.6-py3-none-any.whl (12.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m153.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.57.3
    Uninstalling transformers-4.57.3:
      Successfully uninstalled transformers-4.57.3
Successfully installed transformers-4.57.6


In [2]:
import torch
from transformers import pipeline
from PIL import Image
import pandas as pd
import time
import os

In [4]:
from google.colab import drive

In [5]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def load_medgemma_pipeline(model_name="google/medgemma-4b-it",device=None):
    if device is None:
        device = 0 if torch.cuda.is_available() else -1

    pipe = pipeline(
        "image-text-to-text",
        model=model_name,
        dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
        device=device
    )

    return pipe


In [7]:
path = '/content/drive/MyDrive/theMedGemma_heckaton/ecg_images'

In [9]:
def fill_ecg_summary_with_medgemma(
    core_df: pd.DataFrame,
    image_root: str = "",
    model_name: str = "google/medgemma-4b-it",
    batch_size: int = 1,
    autosave_every: int = 10,
    out_csv: str = "core_df_with_summaries.csv",
    max_new_tokens: int = 200,
    sleep_between: float = 0.5,
):
    """
    Fills core_df['ecg_summary'] using MedGemma.

    core_df: must contain columns ['image_path', 'ecg_id', 'ecg_summary']
    """

    df = core_df.copy()

    pipe = load_medgemma_pipeline(model_name=model_name)

    for idx, row in df.iterrows():

        # skip already processed
        if isinstance(row.get("ecg_summary", ""), str) and len(row["ecg_summary"]) > 10:
            continue

        image_path = row["image_path"]
        if image_root:
            image_path = os.path.join(image_root, image_path)

        if not os.path.exists(image_path):
            print(f"[WARN] Missing image: {image_path}")
            continue

        try:
            image = Image.open(image_path).convert("RGB")

            messages = [
                {
                    "role": "system",
                    "content": [
                        {"type": "text", "text": "You are an expert cardiologist."}
                    ]
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": (
                                "Analyze this 12-lead ECG.\n"
                                "Provide a concise clinical summary including:\n"
                                "- rhythm\n"
                                "- heart rate (approximate)\n"
                                "- ST-segment changes\n"
                                "- T-wave abnormalities\n"
                                "- conduction abnormalities\n"
                                "- other notable findings\n\n"
                                "Do not provide a diagnosis."
                            )
                        },
                        {
                            "type": "image",
                            "image": image
                        }
                    ]
                }
            ]

            output = pipe(
                text=messages,
                max_new_tokens=max_new_tokens
            )

            # MedGemma output format
            generated = output[0]["generated_text"][-1]["content"]

            df.at[idx, "ecg_summary"] = generated

            print(f"[OK] ECG {row.ecg_id} processed.")

        except Exception as e:
            print(f"[ERROR] ECG {row.ecg_id}: {e}")
            df.at[idx, "ecg_summary"] = ""

        # autosave checkpoint
        if (idx + 1) % autosave_every == 0:
            df.to_csv(out_csv, index=False)
            print(f"[SAVE] Progress saved to {out_csv}")

        # be gentle with memory / GPU
        time.sleep(sleep_between)

    # final save
    df.to_csv(out_csv, index=False)
    print(f"[DONE] All summaries saved to {out_csv}")

    return df

In [10]:
core_df = pd.read_csv('/content/drive/MyDrive/theMedGemma_heckaton/ecg_images/core_db.csv')

core_df = fill_ecg_summary_with_medgemma(
    core_df,
    image_root=path,             # jeśli image_path jest pełną ścieżką → ""
    model_name="google/medgemma-4b-it",
    autosave_every=10,
    out_csv="core_df_with_summaries.csv"
)

config.json:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/90.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.64G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Device set to use cuda:0

*   **Rhythm:** Sinus rhythm
*   **Heart Rate:** Approximately 72 bpm
*   **ST-segment changes:** No significant ST-segment elevation or depression is apparent.
*   **T-wave abnormalities:** No obvious T-wave inversions or significant abnormalities are noted.
*   **Conduction abnormalities:** No obvious conduction delays or blocks are seen.
*   **Other notable findings:** The ECG appears to be a standard 12-lead tracing.

**Disclaimer:** This analysis is for informational purposes only and should not be considered a substitute for a professional medical evaluation. A qualified healthcare professional should interpret the ECG in the context of the patient's clinical presentation and medical history.
' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.at[idx, "ecg_summary"] = generated


[OK] ECG 146 processed.
[OK] ECG 177 processed.
[OK] ECG 223 processed.
[OK] ECG 544 processed.
[OK] ECG 631 processed.
[OK] ECG 931 processed.
[OK] ECG 993 processed.
[OK] ECG 1061 processed.
[OK] ECG 1116 processed.
[OK] ECG 1124 processed.
[SAVE] Progress saved to core_df_with_summaries.csv


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[OK] ECG 1131 processed.
[OK] ECG 1380 processed.
[OK] ECG 1586 processed.
[OK] ECG 1600 processed.
[OK] ECG 1737 processed.
[OK] ECG 1796 processed.
[OK] ECG 1840 processed.
[OK] ECG 2049 processed.
[OK] ECG 2070 processed.
[OK] ECG 2100 processed.
[SAVE] Progress saved to core_df_with_summaries.csv
[OK] ECG 2118 processed.
[OK] ECG 2159 processed.
[OK] ECG 2276 processed.
[OK] ECG 2311 processed.
[OK] ECG 2337 processed.
[OK] ECG 2469 processed.
[OK] ECG 2548 processed.
[OK] ECG 2732 processed.
[OK] ECG 2843 processed.
[OK] ECG 2932 processed.
[SAVE] Progress saved to core_df_with_summaries.csv
[OK] ECG 2950 processed.
[OK] ECG 3005 processed.
[OK] ECG 3275 processed.
[OK] ECG 3285 processed.
[OK] ECG 3781 processed.
[OK] ECG 3785 processed.
[OK] ECG 3928 processed.
[OK] ECG 4056 processed.
[OK] ECG 4247 processed.
[OK] ECG 4396 processed.
[SAVE] Progress saved to core_df_with_summaries.csv
[OK] ECG 4427 processed.
[OK] ECG 4443 processed.
[OK] ECG 4498 processed.
[OK] ECG 4506 proce