In [1]:
# ===============================================================
#  KAGGLE SETUP & IMPORTS
# ===============================================================
!pip install -q google-generativeai

import pandas as pd
import os
import time
from kaggle_secrets import UserSecretsClient # To get our API key securely

import google.generativeai as genai
# Import the specific types for safety settings
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# For our dynamic few-shot selection
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# --- Load API Key from Kaggle Secrets ---
try:
    user_secrets = UserSecretsClient()
    # Get the Google API key from the new secret
    secret_value = user_secrets.get_secret("GOOGLE_API_KEY") 
    genai.configure(api_key=secret_value)
    print("Google API key loaded successfully.")
    
    # Define robust safety settings
    safety_settings_config = [
        {
            "category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            "threshold": HarmBlockThreshold.BLOCK_NONE,
        },
        {
            "category": HarmCategory.HARM_CATEGORY_HARASSMENT,
            "threshold": HarmBlockThreshold.BLOCK_NONE,
        },
        {
            "category": HarmCategory.HARM_CATEGORY_HATE_SPEECH,
            "threshold": HarmBlockThreshold.BLOCK_NONE,
        },
        {
            "category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            "threshold": HarmBlockThreshold.BLOCK_NONE,
        },
    ]
    
    # Instantiate the Gemini model
    gemini_model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20')
except Exception as e:
    print("Could not load Google API key or configure model. Make sure it's stored as a secret named 'GOOGLE_API_KEY'.")
    print(f"Error: {e}")
    # You might want to stop execution if the key isn't found
    # For a hackathon, we'll let it continue and fail on the API call
    gemini_model = None


# --- Load Data ---
# Adjust 'competition-folder-name' to the actual folder name in /kaggle/input/
DATA_PATH = "/kaggle/input/kenya-clinical-reasoning-challenge20250407/"
train_df = pd.read_csv(DATA_PATH + 'train.csv')
test_df = pd.read_csv(DATA_PATH + 'test.csv')
sample_submission_df = pd.read_csv(DATA_PATH + 'SampleSubmission.csv')

print(f"Train data shape: {train_df.shape}")
print(f"Test data shape: {test_df.shape}")


[0m


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


  from .autonotebook import tqdm as notebook_tqdm


Google API key loaded successfully.


Train data shape: (400, 12)
Test data shape: (100, 7)


In [2]:
# ===============================================================
#  DYNAMIC FEW-SHOT SELECTOR (Our "Secret Sauce")
# ===============================================================
# We will use TF-IDF to find the most similar prompts in the training data
# to use as examples in our LLM prompts. This is much better than static examples.

# Pre-process the training data for searching
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
train_prompt_vectors = vectorizer.fit_transform(train_df['Prompt'])

def get_most_similar_examples(query_prompt: str, top_n: int = 5) -> pd.DataFrame:
    """Finds the top_n most similar training examples to a given query prompt."""
    query_vector = vectorizer.transform([query_prompt])
    similarities = cosine_similarity(query_vector, train_prompt_vectors)
    # Get the indices of the top_n most similar prompts
    top_indices = np.argsort(similarities[0])[-top_n:][::-1]
    return train_df.iloc[top_indices]


In [3]:
# ===============================================================
#  SIMPLIFIED API CALL FUNCTION (Direct Inference)
# ===============================================================

def create_direct_response_prompt(prompt_text: str) -> str:
    """Creates a direct few-shot prompt for generating the clinician response."""
    # We'll use 2 examples, as this prompt is shorter than the old response prompt
    examples_df = get_most_similar_examples(prompt_text, top_n=5)
    
    prompt = "You are an expert Kenyan clinician providing practical guidance to a colleague. Your response MUST match the persona of the nurse in the prompt and the resources available at their facility. Structure your response with a summary, diagnosis, and a clear management plan. Emulate the style and reasoning from the examples below. Be concise and direct.\n\n--- EXAMPLES ---\n"
    
    for _, row in examples_df.iterrows():
        # We now show the direct Prompt -> Clinician mapping
        prompt += f"Vignette: \"{row['Prompt']}\"\n"
        prompt += f"Clinician Response: {row['Clinician']}\n\n"
        
    prompt += "--- TASK ---\n"
    prompt += f"Vignette: \"{prompt_text}\"\n"
    prompt += "Clinician Response:\n"
    return prompt

def generate_direct_response(prompt_text: str) -> str:
    """Uses a single, direct few-shot prompt to generate the clinician's response."""
    full_prompt = create_direct_response_prompt(prompt_text)
    
    try:
        generation_config = genai.GenerationConfig(temperature=0.4, max_output_tokens=65536)
        response = gemini_model.generate_content(full_prompt, generation_config=generation_config, safety_settings=safety_settings_config)

        if response.parts:
            return response.text.strip()
        else:
            finish_reason = response.candidates[0].finish_reason if response.candidates else 'UNKNOWN'
            return f"Error: No text generated. Finish Reason: {finish_reason}"

    except Exception as e:
        print(f"API Error in response generation: {e}")
        try:
            print(response.prompt_feedback)
        except:
            pass
        return "Error: Could not generate response."

In [4]:
# ===============================================================
#  MAIN EXECUTION SCRIPT
# ===============================================================
if gemini_model:
    results = []
    print("Starting prediction process with robust 'Direct-Inference' strategy...")
    
    RPM = 10
    requests_this_minute = 0
    start_time = time.time()
    
    for row in test_df.itertuples():
        print(f"Processing row {row.Index + 1}/{len(test_df)}: Master_Index = {row.Master_Index}")
        
        # Rate Limiting Check (now we can do more rows per minute!)
        if requests_this_minute >= RPM:
            elapsed_time = time.time() - start_time
            if elapsed_time < 60:
                sleep_time = 60.1 - elapsed_time
                print(f"  > Rate limit reached. Sleeping for {sleep_time:.2f} seconds...")
                time.sleep(sleep_time)
            start_time = time.time()
            requests_this_minute = 0

        prompt_text = row.Prompt
        
        # ONLY ONE API CALL PER ROW
        final_response = generate_direct_response(prompt_text)
        requests_this_minute += 1 # Increment counter
        
        print(f"  > Generated Response: {repr(final_response[:70])}...")
        
        results.append({'Master_Index': row.Master_Index, 'Clinician': final_response})
        print("-" * 20)
            
    print("Prediction process finished.")
    
    submission_df = pd.DataFrame(results)
    submission_df = submission_df[sample_submission_df.columns]
    submission_df.to_csv('submission.csv', index=False)
    
    print("Submission file 'submission.csv' has been created successfully!")
    print(submission_df.head())
else:
    print("Execution halted because Gemini model could not be initialized.")


Starting prediction process with robust 'Direct-Inference' strategy...
Processing row 1/100: Master_Index = ID_CUAOY


  > Generated Response: 'summary 24 yr old female with sharp right-sided nasal pain worsening o'...
--------------------
Processing row 2/100: Master_Index = ID_OGSAY


  > Generated Response: 'summary a 3 year old boy with a bean seed deep in the right nostril di'...
--------------------
Processing row 3/100: Master_Index = ID_TYHSA


  > Generated Response: 'summary 22 year old male presented with progressive lower limb weaknes'...
--------------------
Processing row 4/100: Master_Index = ID_CZXLD


  > Generated Response: 'summary a 6 year old girl presents with hx of sudden onset twitching f'...
--------------------
Processing row 5/100: Master_Index = ID_ZJQUQ


  > Generated Response: 'summary a 1 year old boy presented to mch clinic unvaccinated for meas'...
--------------------
Processing row 6/100: Master_Index = ID_HYSCV


  > Generated Response: 'summary a patient is brought to the ward for a second excision of kelo'...
--------------------
Processing row 7/100: Master_Index = ID_DXHPF


  > Generated Response: 'summary a 14 year old female presents with right lower limb pain for o'...
--------------------
Processing row 8/100: Master_Index = ID_GDFDN


  > Generated Response: 'summary a mother with premature rupture of membranes 7cm dilated with '...
--------------------
Processing row 9/100: Master_Index = ID_UFAFI


  > Generated Response: 'summary 23 year old lady 10 days postpartum brought to mch clinic with'...
--------------------
Processing row 10/100: Master_Index = ID_KMBGG


  > Generated Response: 'summary a 4 year old boy with gradual abdominal distention for 3 weeks'...
--------------------
Processing row 11/100: Master_Index = ID_GCHQJ


  > Generated Response: 'summary female patient with altered level of consciousness for one day'...
--------------------
Processing row 12/100: Master_Index = ID_FBVXH


  > Generated Response: 'summary 8 year old male easy fatigability weight loss drenching night '...
--------------------
Processing row 13/100: Master_Index = ID_GFQXW


  > Generated Response: 'summary a 10 day postnatal mother presents with painful swelling and f'...
--------------------
Processing row 14/100: Master_Index = ID_KQFSM


  > Generated Response: 'summary a 20-year-old at 6th anc visit asymptomatic a dual retest for '...
--------------------
Processing row 15/100: Master_Index = ID_OTEWX


  > Generated Response: 'summary an 85-year-old male patient with a history of recurrent heart '...
--------------------
Processing row 16/100: Master_Index = ID_KTTZQ


  > Generated Response: 'summary a 30 year old woman with per vaginal bleeding for one month on'...
--------------------
Processing row 17/100: Master_Index = ID_DFFBJ


  > Generated Response: 'summary 12 year old girl brought to emergency with difficulty in breat'...
--------------------
Processing row 18/100: Master_Index = ID_ZQLND


  > Generated Response: 'summary a 4th day post-delivery mother presenting with altered mental '...
--------------------
Processing row 19/100: Master_Index = ID_LXBDD


  > Generated Response: 'summary an 8 year old boy presents to the ed with a visible stick embe'...
--------------------
Processing row 20/100: Master_Index = ID_PWETS


  > Generated Response: 'summary a 7-week primigravida presents with per vaginal bleeding but n'...
--------------------
Processing row 21/100: Master_Index = ID_VJVBS


  > Generated Response: 'summary a 6 year old male with 1 month history of abdominal distention'...
--------------------
Processing row 22/100: Master_Index = ID_OZCVT


  > Generated Response: 'summary 40 year old lady with right lower leg swelling tenderness and '...
--------------------
Processing row 23/100: Master_Index = ID_HBKUL


  > Generated Response: 'summary a 32 year old male presents with acute onset difficulty in bre'...
--------------------
Processing row 24/100: Master_Index = ID_ZVYUH


  > Generated Response: 'summary a 46 year old female presented with 3 day history of radio-uln'...
--------------------
Processing row 25/100: Master_Index = ID_SHIKK


  > Generated Response: 'summary a 60 year old patient with generalized weakness muscle cramps '...
--------------------
Processing row 26/100: Master_Index = ID_BZMKN


  > Generated Response: 'summary a 27 year old male patient with tracheostomy secondary to atte'...
--------------------
Processing row 27/100: Master_Index = ID_GWZRC


  > Generated Response: 'summary a 30 year old medical student pricked herself with a needle fr'...
--------------------
Processing row 28/100: Master_Index = ID_BMKRM


  > Generated Response: 'summary a 49 year old male presents with abdominal swelling and not pa'...
--------------------
Processing row 29/100: Master_Index = ID_QDMBJ


  > Generated Response: 'summary 40 year old female had attempted jadelle removal only one rod '...
--------------------
Processing row 30/100: Master_Index = ID_LWSTJ


  > Generated Response: 'summary a 50 year old male presents with 4 days of fever chills fatigu'...
--------------------
Processing row 31/100: Master_Index = ID_APFPK


  > Generated Response: 'summary 6 year old boy presenting with vomiting and abdominal pain no '...
--------------------
Processing row 32/100: Master_Index = ID_NTHZR


  > Generated Response: 'summary a 15 year old male patient is brought in with altered mental s'...
--------------------
Processing row 33/100: Master_Index = ID_UAHZM


  > Generated Response: 'summary a 42 year old para 2 gravida 3 at 43 weeks gestation presentin'...
--------------------
Processing row 34/100: Master_Index = ID_UHJDN


  > Generated Response: 'summary a 1 year old abandoned baby brought by grandmother with unknow'...
--------------------
Processing row 35/100: Master_Index = ID_FNAFQ


  > Generated Response: 'summary a patient presents with acute onset vomiting blood and passing'...
--------------------
Processing row 36/100: Master_Index = ID_WRMNW


  > Generated Response: 'summary 22 year old male presents with traumatically amputated right h'...
--------------------
Processing row 37/100: Master_Index = ID_OMWIY


  > Generated Response: 'summary a 37 year old primigravida at 40 weeks gestation presenting wi'...
--------------------
Processing row 38/100: Master_Index = ID_ZNYCZ


  > Generated Response: 'summary a known hypertensive patient presents with a bp of 180 100 mmh'...
--------------------
Processing row 39/100: Master_Index = ID_NCNYB


  > Generated Response: 'summary a 47 year old man presents with a history of being kicked in t'...
--------------------
Processing row 40/100: Master_Index = ID_WKQEY


  > Generated Response: 'summary a 48 year old man is brought to the emergency department one h'...
--------------------
Processing row 41/100: Master_Index = ID_VLNLB


  > Generated Response: 'summary a 12 year old boy presents with polyphagia polydipsia polyuria'...
--------------------
Processing row 42/100: Master_Index = ID_DGSSB


  > Generated Response: 'summary a 6 year old male presents with history of bean inserted in ea'...
--------------------
Processing row 43/100: Master_Index = ID_JADVZ


  > Generated Response: 'summary a 65 year old male client was brought to the emergency unit wi'...
--------------------
Processing row 44/100: Master_Index = ID_ILPTC


  > Generated Response: 'summary 24 year old female presenting with severe epigastric pain vomi'...
--------------------
Processing row 45/100: Master_Index = ID_BGRFD


  > Generated Response: 'summary 60 year old male presenting with history of alopecia (hair los'...
--------------------
Processing row 46/100: Master_Index = ID_HDKYR


  > Generated Response: 'summary a 1 year old baby boy with adenoids causing nasal obstruction '...
--------------------
Processing row 47/100: Master_Index = ID_QAHFT


  > Generated Response: 'summary a 30 year old g5p4 at 34 weeks gestation presents for first an'...
--------------------
Processing row 48/100: Master_Index = ID_PESEJ


  > Generated Response: 'summary 80 year old male semi conscious bedridden for 1 month post spi'...
--------------------
Processing row 49/100: Master_Index = ID_OFDGY


  > Generated Response: 'summary 5 year old boy with swollen tender right wrist after a fall vi'...
--------------------
Processing row 50/100: Master_Index = ID_WGNEX


  > Generated Response: 'summary middle aged man presented with chest pain after being stabbed '...
--------------------
Processing row 51/100: Master_Index = ID_SIPNK


  > Generated Response: 'summary a 60 year old known diabetic with good adherence for 7 years p'...
--------------------
Processing row 52/100: Master_Index = ID_MXHUP


  > Generated Response: 'summary a 32 year old female presents with nausea general body weaknes'...
--------------------
Processing row 53/100: Master_Index = ID_KBQBD


  > Generated Response: 'summary 38 year old pregnant woman at 36 weeks gestation admitted with'...
--------------------
Processing row 54/100: Master_Index = ID_AYZDZ


  > Generated Response: 'summary 44 year old grand multipara in active labour on oxytocin infus'...
--------------------
Processing row 55/100: Master_Index = ID_GAWOX


  > Generated Response: 'summary a 1 year old female presents with 2 days confusion more than 1'...
--------------------
Processing row 56/100: Master_Index = ID_DCOHP


  > Generated Response: 'summary a 17-year-old female, HIV positive with an undetectable viral '...
--------------------
Processing row 57/100: Master_Index = ID_RWISH


  > Generated Response: 'summary patient presents with acute onset talkativeness and violent be'...
--------------------
Processing row 58/100: Master_Index = ID_CQUKB


  > Generated Response: 'summary 44 year old male brought to A&E with pain and difficulty in br'...
--------------------
Processing row 59/100: Master_Index = ID_FFWID


  > Generated Response: 'summary a 19 year old girl presents with epigastric pain typically at '...
--------------------
Processing row 60/100: Master_Index = ID_XQHOA


  > Generated Response: 'summary a nanny was brought in by her employer with abdominal cramping'...
--------------------
Processing row 61/100: Master_Index = ID_PJSQE


  > Generated Response: 'summary a 55 year old female admitted with a right tibia fibula fractu'...
--------------------
Processing row 62/100: Master_Index = ID_EFYMF


  > Generated Response: 'summary a 50 year old male presents with loss of consciousness for 3 h'...
--------------------
Processing row 63/100: Master_Index = ID_QMQHY


  > Generated Response: 'summary 24 year old female admitted with reduced urine output, general'...
--------------------
Processing row 64/100: Master_Index = ID_QSQZO


  > Generated Response: 'summary a newborn 8 hours old brought in with respiratory distress fou'...
--------------------
Processing row 65/100: Master_Index = ID_HLPRM


  > Generated Response: 'summary 2 weeks post-delivery, mother presents with an unhealed, foul-'...
--------------------
Processing row 66/100: Master_Index = ID_WOBZV


  > Generated Response: 'Summary: A 26-year-old primigravida at 38 weeks gestation presents wit'...
--------------------
Processing row 67/100: Master_Index = ID_QPFCH


  > Generated Response: 'summary a 23 year old male presents with sudden bilateral vision loss '...
--------------------
Processing row 68/100: Master_Index = ID_CEPRG


  > Generated Response: '**Summary:** A mother two days postpartum is withdrawn and not interes'...
--------------------
Processing row 69/100: Master_Index = ID_SKCAI


  > Generated Response: 'summary a 60 year old male brought to outpatient clinic with inability'...
--------------------
Processing row 70/100: Master_Index = ID_JENNS


  > Generated Response: 'summary a 25 year old male presented with abdominal pain inability to '...
--------------------
Processing row 71/100: Master_Index = ID_QSPEG


  > Generated Response: 'summary 1 year old girl presents with bean inserted in right nostril 2'...
--------------------
Processing row 72/100: Master_Index = ID_ZTHIV


  > Generated Response: 'summary 27 year old male with mandibular pain and difficulty eating sw'...
--------------------
Processing row 73/100: Master_Index = ID_HVSDS


  > Generated Response: 'summary a 2 month old baby born at home and unimmunized presents for r'...
--------------------
Processing row 74/100: Master_Index = ID_BFBYN


  > Generated Response: 'summary a 7 year old boy with cystic fibrosis admitted for respiratory'...
--------------------
Processing row 75/100: Master_Index = ID_EISIY


  > Generated Response: 'summary an 18 y o female presented with a history of low moods reduced'...
--------------------
Processing row 76/100: Master_Index = ID_GCTMK


  > Generated Response: 'summary a 39 year old male presents with cracked lips and eroded surro'...
--------------------
Processing row 77/100: Master_Index = ID_NIVMV


  > Generated Response: 'summary a 35 year old mother in active labour suddenly developed diffi'...
--------------------
Processing row 78/100: Master_Index = ID_WZHQK


  > Generated Response: 'summary a 23 year old para 1 0 gravida 2 at 34 weeks gestation present'...
--------------------
Processing row 79/100: Master_Index = ID_ZHSMO


  > Generated Response: 'summary 4-day old baby brought with irritability, jaundice, sunken eye'...
--------------------
Processing row 80/100: Master_Index = ID_RKTZL


  > Generated Response: 'summary a 2 week old male neonate presents with hotness of body and re'...
--------------------
Processing row 81/100: Master_Index = ID_PGEOO


  > Generated Response: 'summary a 60 year old male known case of diabetes presents with a sept'...
--------------------
Processing row 82/100: Master_Index = ID_VTRFF


  > Generated Response: 'summary 25 year old male with severe dysuria and purulent urethral dis'...
--------------------
Processing row 83/100: Master_Index = ID_CWEIL


  > Generated Response: 'summary 8 month old child brought to CWC with sudden onset of inabilit'...
--------------------
Processing row 84/100: Master_Index = ID_WLSXB


  > Generated Response: 'summary an 18yr old girl presents with difficulty in breathing restles'...
--------------------
Processing row 85/100: Master_Index = ID_VOVAB


  > Generated Response: '**Summary:**\n42 year old male presents with body swelling, anuria, poo'...
--------------------
Processing row 86/100: Master_Index = ID_XTMGG


  > Generated Response: 'summary 1-year-old child presenting with inability to hold neck (head '...
--------------------
Processing row 87/100: Master_Index = ID_ZHZRH


  > Generated Response: 'summary 10 year old boy with 3 day history of high fever fatigue muscl'...
--------------------
Processing row 88/100: Master_Index = ID_MVXJK


  > Generated Response: 'summary 56 year old male known hypertensive non-compliant with medicat'...
--------------------
Processing row 89/100: Master_Index = ID_OPCIZ


  > Generated Response: 'summary 5 year old boy ingested hydrogen peroxide 2 hours ago presenti'...
--------------------
Processing row 90/100: Master_Index = ID_LACGV


  > Generated Response: 'summary a 55 year old female presents with one week history of frequen'...
--------------------
Processing row 91/100: Master_Index = ID_SCOBO


  > Generated Response: 'summary 30 year old female presenting with joint pain in wrists and an'...
--------------------
Processing row 92/100: Master_Index = ID_WQREZ


  > Generated Response: 'summary an 8 year old female brought to emergency department with seve'...
--------------------
Processing row 93/100: Master_Index = ID_KYWMF


  > Generated Response: 'summary a 35 year old male presents with a deep cut wound on the leg f'...
--------------------
Processing row 94/100: Master_Index = ID_MURHB


  > Generated Response: 'summary a 17 year old girl was brought to the facility unresponsive af'...
--------------------
Processing row 95/100: Master_Index = ID_MLUOW


  > Generated Response: 'summary a 5 month old infant is brought to the emergency department le'...
--------------------
Processing row 96/100: Master_Index = ID_AYCAI


  > Generated Response: 'summary a 6 year old child with a painful, swollen, protruded, injecte'...
--------------------
Processing row 97/100: Master_Index = ID_CLEYN


  > Generated Response: 'summary a 49 year old female with known hypertension presents with pal'...
--------------------
Processing row 98/100: Master_Index = ID_BRIIW


  > Generated Response: 'summary 35 year old male with 1 year infertility reduced libido and oc'...
--------------------
Processing row 99/100: Master_Index = ID_SNZBL


  > Generated Response: 'summary 48 year old post thyroidectomy patient with dizziness loss of '...
--------------------
Processing row 100/100: Master_Index = ID_KXLGG


  > Generated Response: 'summary a 58 year old patient presents with fever and chills and gener'...
--------------------
Prediction process finished.


Submission file 'submission.csv' has been created successfully!


  Master_Index                                          Clinician
0     ID_CUAOY  summary 24 yr old female with sharp right-side...
1     ID_OGSAY  summary a 3 year old boy with a bean seed deep...
2     ID_TYHSA  summary 22 year old male presented with progre...
3     ID_CZXLD  summary a 6 year old girl presents with hx of ...
4     ID_ZJQUQ  summary a 1 year old boy presented to mch clin...
