In [6]:
import os
import pandas as pd
from langchain_community.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
import json, re

# ---------------- CONFIG ----------------
FACTS_CSV = "facts.csv"            # your CSV must be next to the notebook
DB_DIR = "vectorstore"             # where Chroma DB will be saved
EMBED_MODEL = "nomic-embed-text"   # embedding model
LLM_MODEL = "gemma2:2b"            # LLM model for checking
TOP_K = 3
# ----------------------------------------

# Create DB directory
os.makedirs(DB_DIR, exist_ok=True)

# Step 1: Load CSV
print("Checking CSV at:", FACTS_CSV)
df = pd.read_csv(FACTS_CSV)
print("CSV Loaded Successfully!")
print(df.head())
print("Columns:", df.columns)

# Automatically pick the first column
SOURCE_COL = df.columns[0]
print("Using source_column:", SOURCE_COL)

# Step 2: Build vectorstore
loader = CSVLoader(file_path=FACTS_CSV, source_column=SOURCE_COL)
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(docs)

embed = OllamaEmbeddings(model=EMBED_MODEL)
# vectorstore = Chroma.from_documents(chunks, embed, persist_directory=DB_DIR)
# vectorstore.persist()
vectorstore = FAISS.from_documents(chunks, embed)
vectorstore.save_local(DB_DIR)

print("Vectorstore created successfully!")

# Step 3: Load DB & Create retriever
embed = OllamaEmbeddings(model=EMBED_MODEL)
# vectorstore = Chroma(persist_directory=DB_DIR, embedding_function=embed)
vectorstore = FAISS.load_local(DB_DIR, embed, allow_dangerous_deserialization=True)

retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})

llm = Ollama(model=LLM_MODEL)

# Step 4: Test Query
query = "The Indian government has announced free electricity to all farmers starting July 2025."

docs = retriever.get_relevant_documents(query)
context = "\n\n".join([d.page_content for d in docs]) if docs else ""

prompt = (
    "You are a fact-checking assistant.\n\n"
    "Using ONLY the evidence below, evaluate the user's question and return EXACTLY one JSON object with keys: verdict, evidence, reasoning.\n\n"
    "Evidence:\n" + context + "\n\n"
    "Question: " + query + "\n\n"
    "Return only the JSON object."
)

# resp = llm.invoke(prompt)
# raw = resp.content if hasattr(resp, "content") else str(resp)
resp = llm.invoke(prompt)
if isinstance(resp, str):
    raw = resp
else:
    raw = getattr(resp, "content", str(resp))

print("Raw output:\n", raw)

# Step 5: Parse JSON
m = re.search(r"\{.*\}", raw, flags=re.S)
if m:
    try:
        parsed = json.loads(m.group(0))
        print("\nParsed JSON:")
        print(json.dumps(parsed, indent=2))
    except Exception as e:
        print("Failed to parse JSON:", e)
else:
    print("No JSON found in output.")


Checking CSV at: facts.csv
CSV Loaded Successfully!
                                                text      source        date
0  Government announces PM-KISAN scheme to provid...  pib.gov.in  2019-02-24
1  The Union Cabinet approves National Education ...  pib.gov.in  2020-07-29
2  The Ministry of Power launches the Saubhagya s...  pib.gov.in  2017-09-25
3  Digital India initiative expands broadband acc...  pib.gov.in  2021-03-15
4  Ayushman Bharat provides free health insurance...  pib.gov.in  2018-09-23
Columns: Index(['text', 'source', 'date'], dtype='object')
Using source_column: text
Vectorstore created successfully!


  llm = Ollama(model=LLM_MODEL)
  docs = retriever.get_relevant_documents(query)


Raw output:
 ```json
{
 "verdict": "FALSE",
 "evidence": "The provided text does not mention any announcement of free electricity for all farmers in India starting July 2025.",
 "reasoning": "The provided evidence consists of government announcements related to electrification, self-reliance, and financial support to farmers. None of these mentions the provision of free electricity for farmers." 
}
```

Parsed JSON:
{
  "verdict": "FALSE",
  "evidence": "The provided text does not mention any announcement of free electricity for all farmers in India starting July 2025.",
  "reasoning": "The provided evidence consists of government announcements related to electrification, self-reliance, and financial support to farmers. None of these mentions the provision of free electricity for farmers."
}
