# Disaster Retrieval and Classification

**Purpose**: Minimal example retrieving the closest disaster description and classifying it using an Ollama model.

In [ ]:
import pandas as pd
import numpy as np
import requests
from pydantic import BaseModel
from typing import Literal

simulate = True

In [ ]:
df = pd.read_csv('disaster-examples.csv')
df.head()

In [ ]:
def embed(text):
    if simulate:
        np.random.seed(abs(hash(text)) % (2**32))
        return np.random.rand(3)
    payload = {'model': 'mxbai-embed-large', 'prompt': text}
    r = requests.post('http://localhost:11434/api/embeddings', json=payload)
    r.raise_for_status()
    return np.array(r.json()['embedding'])

In [ ]:
embeddings = np.vstack([embed(t) for t in df['description_en']])

In [ ]:
class DisasterLabel(BaseModel):
    type: Literal['earthquake','flood','drought','wildfire','hurricane','other']

In [ ]:
def build_prompt(text):
    return f'Evaluate the disaster type of this description and respond with JSON like {{"type": "flood"}}: {text}'

In [ ]:
def classify(text):
    if simulate:
        t = text.lower()
        if 'earthquake' in t: return {'type':'earthquake'}
        if 'flood' in t: return {'type':'flood'}
        if 'drought' in t: return {'type':'drought'}
        if 'fire' in t: return {'type':'wildfire'}
        if 'hurricane' in t: return {'type':'hurricane'}
        return {'type':'other'}
    payload = {'model': 'llama3', 'prompt': build_prompt(text)}
    r = requests.post('http://localhost:11434/api/generate', json=payload)
    r.raise_for_status()
    return DisasterLabel.model_validate_json(r.json()['response']).model_dump()

In [ ]:
def ask(question):
    q_vec = embed(question)
    scores = embeddings @ q_vec
    idx = scores.argmax()
    context = df.loc[idx, 'description_en']
    label = classify(context)
    return context, label

In [ ]:
context, label = ask('What happened last year?')
print('Nearest description:', context)
print('Predicted label:', label)