# Retrieval Augmented Generation (RAG)

**Purpose**: Simple semantic search over a tiny corpus then feed result to LLM.

**Dependencies**: `requests`, `pandas`, `numpy`


In [ ]:
import numpy as np
import pandas as pd
import requests

simulate = True

In [ ]:
docs = [
    'The earthquake caused major damage.',
    'Floods destroyed crops in the valley.',
    'Wildfires spread due to strong winds.'
]
df = pd.DataFrame({'text': docs})
df

In [ ]:
def embed(text):
    if simulate:
        return np.random.rand(3)
    payload = {'model': 'mxbai-embed-large', 'prompt': text}
    r = requests.post('http://localhost:11434/api/embeddings', json=payload)
    r.raise_for_status()
    return np.array(r.json()['embedding'])

In [ ]:
embeddings = np.vstack([embed(t) for t in df['text']])
embeddings

In [ ]:
query = 'What disaster damaged crops?'
q_vec = embed(query)
scores = embeddings @ q_vec
idx = scores.argmax()
context = df['text'][idx]
context

In [ ]:
def answer_with_context(question, context):
    if simulate:
        return {'response': f'{context} Therefore: floods damaged crops.'}
    prompt = f'{context}
Question: {question}
Answer:'
    payload = {'model': 'llama3', 'prompt': prompt}
    r = requests.post('http://localhost:11434/api/generate', json=payload)
    r.raise_for_status()
    return r.json()

In [ ]:
result = answer_with_context(query, context)
print(result['response'])