In [None]:
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import os

from src.inference import init_model, generate_with_context
from src.retriever import build_faiss_index, retrieve_top_sections

In [None]:
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")

if HF_TOKEN is None:
    raise ValueError("HF_TOKEN not found in .env")

init_model(HF_TOKEN)

In [None]:
df = pd.read_csv("../data/processed/osh_sections_with_vectors.csv")

# Convert string embeddings to lists if needed
df["vector_embedding"] = df["vector_embedding"].apply(eval)

embedder = SentenceTransformer("all-MiniLM-L6-v2")

index = build_faiss_index(df)

In [None]:
def answer_query(query, k=3, show_context=True):
    # Retrieve top sections
    top_sections = retrieve_top_sections(query, embedder, df, index, k)
    context = "\n\n---\n\n".join(top_sections)

    # Generate final answer
    answer = generate_with_context(query, context)

    if show_context:
        print("ðŸ“˜ Context Used:\n", context[:600])
        print("\nðŸ¤– Answer:\n", answer)

    return answer


In [None]:
answer_query("What are the duties of an employer?")