In [None]:
# ==============================================================================
# SENTINELA FLUVIAL - MEDGEMMA 4B INFERENCE PIPELINE
# ==============================================================================
# Installing dependencies for 4-bit execution (memory efficient)
!pip install -q transformers

print("✅ All packages installed!")

✅ All packages installed!


In [2]:
import json
import os

from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

# HF token from Kaggle secrets
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")

login(token=hf_token)

print("✅ Logged in to Hugging Face!")

✅ Logged in to Hugging Face!


In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "google/medgemma-4b-it"

print(f"Loading model and tokenizer: {model_id}...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
print("✅ Model loaded successfully!")

In [None]:
# Dataset and Output paths
dataset_path = '/kaggle/input/datasets/richardsonsouza/dataset-prompts-v2/dataset_prompts.jsonl'
# Note: /kaggle/input is read-only. We save to /kaggle/working/ and then move if needed.
output_path = '/kaggle/working/result.json'

print(f"STARTING BATCH PROCESSING: {dataset_path}\n")

# Generation Configuration
gen_config = {
    "max_new_tokens": 1024,
    "temperature": 0.2,
    "do_sample": True,
    "top_p": 0.95
}

# Test limit (Set to None to process the entire file)
LIMITE_TESTE = 6
contador = 0
results = []

try:
    with open(dataset_path, 'r', encoding='utf-8') as f:
        for line in f:
            # 1. Safety stop for testing
            if LIMITE_TESTE and contador >= LIMITE_TESTE:
                print(f"Test limit reached ({LIMITE_TESTE} cases). Stopping...")
                break
                
            # 2. JSON Parsing
            case = json.loads(line)
            
            # 3. Metadata Extraction
            meta = case.get('meta', {})
            municipio = meta.get('municipio', 'Unknown')
            competencia = meta.get('competencia', 'N/A')
            estacao = meta.get('estacao', 'N/A')
            
            # 4. Prompt Extraction
            prompt_text = case.get('prompt')
            
            if not prompt_text:
                print(f"Skip: Empty prompt for ID {case.get('id')}")
                continue

            # 5. Model Formatting
            chat = [
                { "role": "user", "content": prompt_text },
            ]
            formatted_prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
            
            # 6. Inference
            inputs = tokenizer(formatted_prompt, return_tensors="pt").to("cuda")
            
            with torch.no_grad():
                outputs = model.generate(**inputs, **gen_config)
            
            # 7. Decoding (Extracting only the new response)
            generated_tokens = outputs[0][inputs.input_ids.shape[-1]:]
            clean_response = tokenizer.decode(generated_tokens, skip_special_tokens=True)

            # 8. Report Display
            print(f"CASE {contador + 1}: {municipio.upper()} ({competencia}) | Season: {estacao}")
            print("-" * 80)
            print("MEDGEMMA RESPONSE (JSON):")
            print(clean_response)
            print("=" * 80 + "\n")
            
            # 9. Result Persistence and Formatting
            try:
                json_response = json.loads(clean_response)
                results.append({
                    "id": case.get('id'),
                    "meta": meta,
                    "response": json_response
                })
            except json.JSONDecodeError:
                print("Warning: Response is not valid JSON. Storing as raw string.")
                results.append({
                    "id": case.get('id'),
                    "meta": meta,
                    "response_raw": clean_response
                })

            # 10. Memory Management and Increment
            del inputs, outputs
            torch.cuda.empty_cache()
            contador += 1

    # Saving consolidated results
    with open(output_path, 'w', encoding='utf-8') as out_f:
        json.dump(results, out_f, indent=4, ensure_ascii=False)
    print(f"✅ Successfully persisted {len(results)} results to {output_path}")

except FileNotFoundError:
    print("Error: Dataset file not found. Check the path.")
except Exception as e:
    print(f"Unexpected error: {e}")

print("Processing completed.")