In [1]:
import os
import json
from datetime import datetime

HISTORY_FILE = "chat-history.json"
LAST_UPDATE_FILE = "results/last_update.txt"
INDEX_PATH = "results/faiss_index"
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

def validate_chat_history_format():
    if not os.path.exists(HISTORY_FILE):
        return
    with open(HISTORY_FILE, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
        except json.JSONDecodeError:
            return
    if isinstance(data, list) and data and isinstance(data[0], dict) and "role" in data[0]:
        wrapped = [{"timestamp": datetime.now().isoformat(), "conversation": data}]
        with open(HISTORY_FILE, "w", encoding="utf-8") as f:
            json.dump(wrapped, f, indent=2, ensure_ascii=False)

def fix_chat_history_format():
    validate_chat_history_format()

In [2]:
import os
import json
from datetime import datetime
from pydantic import BaseModel

HISTORY_FILE = "chat-history.json"

def save_history(conversation: list[dict]):
    """
    conversation: a list of plain dicts, each with keys "role" and "content".
    Appends it as a new entry under a timestamp, preserving any existing history.
    """
    # 1) Load existing timeline
    timeline: list = []
    if os.path.exists(HISTORY_FILE):
        try:
            with open(HISTORY_FILE, "r", encoding="utf-8") as f:
                timeline = json.load(f)
                if not isinstance(timeline, list):
                    timeline = []
        except (json.JSONDecodeError, OSError) as e:
            print(f"⚠️  Could not read/parse {HISTORY_FILE}: {e!r}. Starting fresh.")
            timeline = []

    # 2) Append this turn
    entry = {
        "timestamp": datetime.now().isoformat(),
        "conversation": conversation
    }
    timeline.append(entry)

    # 3) Write atomically
    tmp_file = HISTORY_FILE + ".tmp"
    try:
        with open(tmp_file, "w", encoding="utf-8") as f:
            json.dump(timeline, f, indent=2, ensure_ascii=False)
        os.replace(tmp_file, HISTORY_FILE)
    except Exception as e:
        print(f"❌ Failed writing history to {HISTORY_FILE}: {e!r}")
        # Cleanup tmp if it remains
        try:
            if os.path.exists(tmp_file):
                os.remove(tmp_file)
        except OSError:
            pass


In [3]:
from datasets import load_dataset
dataset = load_dataset("gsm8k", "main", split="train")
first_element = next(iter(dataset))

print(first_element)

{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?', 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}


In [4]:
import pandas as pd
import json
with open("intents.json", "r") as f:
    intents_data = json.load(f)

# Convert to DataFrame if needed
df = pd.json_normalize(intents_data["intents"])
print(df.head())

             tag                                           patterns  \
0    abstraction  [Explain data abstraction., What is data abstr...   
1          error  [What is a syntax error, Explain syntax error,...   
2  documentation  [Explain program documentation. Why is it impo...   
3        testing                        [What is software testing?]   
4  datastructure             [How do you explain a data structure?]   

                                           responses  
0  [Data abstraction is a technique used in compu...  
1  [A syntax error is an error in the structure o...  
2  [Program documentation is written information ...  
3  [Software testing is the process of evaluating...  
4  [A data structure is a way of organizing and s...  


In [5]:
from datasets import load_dataset

ds = load_dataset("google-research-datasets/mbpp", "sanitized")

In [6]:
import os
import torch
import numpy as np

from datasets import load_dataset, concatenate_datasets
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    RagTokenizer,
    RagRetriever,
    RagSequenceForGeneration,
    DataCollatorForSeq2Seq,
    Trainer,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain import HuggingFacePipeline


In [7]:
# ── Paths & names ─────────────────────────────────────────────────────────────
OUTPUT_DIR         = "results/rag-llama"
FAISS_INDEX_PATH   = os.path.join(OUTPUT_DIR, "faiss_index")
DOCS_PATH          = os.path.join(OUTPUT_DIR, "docs.jsonl")

# ── Hugging Face models ───────────────────────────────────────────────────────
GEN_MODEL_NAME     = "meta-llama/Llama-3.1-8b"
EMBED_MODEL_NAME   = "sentence-transformers/all-MiniLM-L6-v2"

# ── Datasets ─────────────────────────────────────────────────────────────────
MBPP_ID            = "google-research-datasets/mbpp"
MBPP_CFG           = "sanitized"
GSM8K_ID           = "gsm8k"
GSM8K_SPLIT        = "train"

# ── RAG / Retrieval params ────────────────────────────────────────────────────
CHUNK_SIZE         = 1000
CHUNK_OVERLAP      = 200

# ── LoRA fine-tuning (optional) ───────────────────────────────────────────────
LORA_R             = 16
LORA_ALPHA         = 32
LORA_DROPOUT       = 0.05

# ── Trainer hyperparameters (for fine-tuning generator) ──────────────────────
NUM_EPOCHS         = 3
TRAIN_BS           = 2
EVAL_BS            = 2
GRAD_ACCUM_STEPS   = 8
LEARNING_RATE      = 2e-4


In [8]:
from datasets import load_dataset, concatenate_datasets

# Cell 2: fix_chat_history_format()

def fix_chat_history_format():
    """
    Alias for validate_chat_history_format, intended to run
    right before loading via datasets or similar.
    """
    validate_chat_history_format()
    print("Ran fix_chat_history_format()")


# Call this function before loading the dataset
fix_chat_history_format()

# 1) Chat‐history (no built‐in validation split here, only “train”):
raw_chat = load_dataset(
    "json",
    data_files={"train": "chat-history.json"}
)
def format_chat_batch(batch):
    inps, tgts = [], []
    for conv in batch["conversation"]:
        # conv is a list of {role,content} dicts
        user = [t["content"] for t in conv if t["role"]=="user"]
        asst = [t["content"] for t in conv if t["role"]=="assistant"]
        inps.append(" ".join(user))
        tgts.append(" ".join(asst))
    return {"input_text": inps, "target_text": tgts}

chat_ds = raw_chat["train"].map(
    format_chat_batch,
    batched=True,
    remove_columns=["timestamp","conversation"]
)

# 2) Intents.json
raw_intents = load_dataset(
    "json",
    data_files={"train": "intents.json"}
)
def format_intents_batch(batch):
    # assume batch["intents"] is a list-of-lists of intent dicts
    inps, tgts = [], []
    for intents_list in batch["intents"]:
        for intent in intents_list:
            for pat in intent["patterns"]:
                inps.append(pat)
                tgts.append(intent["responses"][0])
    return {"input_text": inps, "target_text": tgts}

intents_ds = raw_intents["train"].map(
    format_intents_batch,
    batched=True,
    remove_columns=["intents"]
)

# 3) MBPP “sanitized” (splits: validation & prompt)
mbpp = load_dataset("google-research-datasets/mbpp", "sanitized")
def format_mbpp_batch(batch):
    inps, tgts = [], []
    for p, c in zip(batch["prompt"], batch["code"]):
        inps.append(p)
        tgts.append(f"```python\n{c}\n```")
    return {"input_text": inps, "target_text": tgts}

# concatenate both splits
mbpp_ds = concatenate_datasets([
    mbpp["validation"].map(format_mbpp_batch, batched=True, remove_columns=mbpp["validation"].column_names),
    mbpp["prompt"].    map(format_mbpp_batch, batched=True, remove_columns=mbpp["prompt"].column_names),
])

# 4) GSM8K “main” train
gsm = load_dataset("gsm8k", "main", split="train")
def format_gsm_batch(batch):
    inps = ["Problem:\n"+q for q in batch["question"]]
    tgts = ["Answer:\n"+a   for a in batch["answer"]]
    return {"input_text": inps, "target_text": tgts}

gsm_ds = gsm.map(
    format_gsm_batch,
    batched=True,
    remove_columns=gsm.column_names
)

# 5) Combine all training sets
train_ds = concatenate_datasets([chat_ds, intents_ds, mbpp_ds, gsm_ds])
print("Total training examples:", len(train_ds))


Ran fix_chat_history_format()


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Total training examples: 7890


In [9]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 4.1 Concatenate input+target into a list of raw docs
raw_texts = [
    ex["input_text"] + "\n\n" + ex["target_text"]
    for ex in train_ds
]
metadatas = [
    {"source": f"doc-{i}"}
    for i in range(len(raw_texts))
]

# 4.2 Chunk long docs into 1 000-token windows with 200-token overlap
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

docs = []
for text, meta in zip(raw_texts, metadatas):
    for chunk in splitter.split_text(text):
        docs.append(Document(page_content=chunk, metadata=meta))

print(f"▶ Created {len(docs)} chunks from {len(raw_texts)} documents.")


▶ Created 8151 chunks from 7890 documents.


In [10]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 5.1 Initialize your embedding model
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)

# 5.2 Create FAISS index from Document objects
vectorstore = FAISS.from_documents(docs, embedder)

# 5.3 (Optional) persist to disk for later reuse
INDEX_PATH = "results/faiss_index"
vectorstore.save_local(INDEX_PATH)
print(f"✔ FAISS index saved to '{INDEX_PATH}'.")


  embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)


✔ FAISS index saved to 'results/faiss_index'.


In [11]:
# Option B: set it directly in your environment
import os
os.environ["HUGGINGFACE_HUB_TOKEN"] = "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"


In [12]:
# from langchain.prompts import PromptTemplate
#
# custom_prompt = PromptTemplate.from_template("""
# You are a calm and knowledgeable tutor. Your task is to assist students by carefully analyzing each problem and providing clear, thoughtful solutions. Use the provided context if it contains relevant information, and otherwise rely on your own reasoning.
#
# Always begin your answer with:
# "I am here to help with a solution for this problem:"
#
# Instructions:
# - Carefully read the entire question. Pay close attention to:
#   - Numbers, units, or constraints mentioned
#   - Special conditions or exceptions
#   - What exactly is being asked
# - First look in the Context.
#   - If you find the answer there, use it and briefly mention the source.
#   - If the Context does not contain the answer, compute or explain it yourself using logical steps.
# - Clearly show your step-by-step reasoning.
# - Conclude with a direct and final answer prefixed by: "Answer:"
# - Do not make up facts outside your reasoning.
# - Stay calm and explanatory, like a patient teacher.
#
# Context:
# {context}
#
# Question:
# {question}
#
# Answer:
# """)

from langchain.prompts import PromptTemplate


custom_prompt = PromptTemplate.from_template("""
You are a calm, patient, and highly knowledgeable tutor. Your job is to answer exactly one student question per invocation.
 *Under no circumstances*, do *not* show the context
 - Do *not* reprint the context unless asked.
- *No extra answers*
- **Do not invent or hallucinate.** If you are not absolutely certain of a fact or it’s not in the context, respond:
 Questions may be:

- **Math** (numeric, algebra, calculus)
- **Informatics** (algorithms, data structures, programming)
- **Pure Theory** (definitions, concepts)

**0. Context Check**
First, read the **Context** below.
- If it contains the exact answer (a worked math solution, a definition, or code snippet), use that verbatim and note “(taken from context)”.
- If not, proceed to the relevant step below.
- Do *not* reprint the context unless asked.
- *No extra answers*
- **Do not invent or hallucinate.** If you are not absolutely certain of a fact or it’s not in the context, respond:


**1. Math**
- If the question is *only* a basic arithmetic expression (`+`, `-`, `*`, `/`, parentheses), compute it directly and return.
- For all other math, pick the simplest method, show only necessary steps with clear reasoning.

**2. Informatics**
- Identify the concept or algorithm.
- Explain concisely, using code examples only when they clarify.
- Focus strictly on the question.

**3. Pure Theory**
- Provide a crisp authoritative definition.
- If the context has it, use it verbatim with “(taken from context)”.
- Otherwise, supply your own clear definition.

---





Respond in this format:

---

Problem Type: [Math or Informatics].

[Your detailed explanation or step-by-step solution]

Answer: <final result or conclusion>

---
Context:
{context}

Question:
{question}

Answer:
""")




In [13]:
import os
from transformers import pipeline, BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# ── 0) Grab your token from env ────────────────────────────────────────────────
hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
if not hf_token:
    raise ValueError("Please set HUGGINGFACE_HUB_TOKEN in your environment before running this cell.")

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains import LLMChain

# ── 1) Reload FAISS index ──
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local(
    "results/faiss_index",
    embedder,
    allow_dangerous_deserialization=True
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# ── 2) Connect to local LM Studio API ──────────────────────────────────────────
llm = ChatOpenAI(
    model_name="meta-llama-3.1-8b-instruct",  # Just for tracking, not actually used to load model
    openai_api_key="lm-studio",               # Dummy API key as used in your chat() function
    openai_api_base="http://localhost:1234/v1", # Your LM Studio API endpoint
    temperature=0.7,
    max_tokens=512
)
# Chain to generate answers with a defined prompt
llm_chain = LLMChain(
    llm=llm,
    prompt=custom_prompt
)


# # ── 3) Build & run RetrievalQA ─────────────────────────────────────────────────
# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     chain_type="stuff",       # or "map_reduce" / "refine"
#     retriever=retriever,
#     return_source_documents=True,
# ) -> pre-fabricated

# Plug this into a RetrievalQA system
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",  # or "map_reduce", "refine" if you prefer
    chain_type_kwargs={"prompt": custom_prompt},
    return_source_documents=True,
)#custom retrieval

# # ── 4) Test query ──────────────────────────────────────────────────────────────
# query = "How would you implement binary search in Python?"
# result = qa_chain(query)
# print("Answer:\n", result["result"])
# print("\nSources:")
# for doc in result["source_documents"]:
#     print("-", doc.metadata["source"])

# Test the chain with the correct input format
# test_query = {"context": "", "query": "What is Python?"}  # Ensure both keys are included
# try:
#     test_result = qa_chain(test_query)
#     print("✅ QA chain test successful!")
#     print(f"Result: {test_result['result'][:100]}...")
# except Exception as e:
#     print(f"❌ QA chain test failed with error: {e}")


  llm = ChatOpenAI(
  llm_chain = LLMChain(


In [14]:
# ── Cell: Load FAISS index & build retriever ────────────────────────────────────

from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Recreate your embedder exactly as when you built the index
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load your on-disk FAISS index (you trust its provenance)
vectorstore = FAISS.load_local(
    "results/faiss_index",
    embedder,
    allow_dangerous_deserialization=True
)

# Wrap as a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


In [15]:
import requests

URL = "http://localhost:1234/v1/chat/completions"
MODEL_ID = "meta-llama-3.1-8b-instruct"
HEADERS = {"Content-Type": "application/json", "Authorization": "Bearer lm-studio"}

def chat():
    messages = [{"role": "system", "content": "You are a helpful programming tutor."}]
    while True:
        user_input = input("You: ")
        # if user_input.lower() == "exit":
        #     save_history(messages)
        #     print(f"Conversation saved to {HISTORY_FILE}")
        #     break
        messages.append({"role": "user", "content": user_input})
        payload = {"model": MODEL_ID, "messages": messages, "temperature": 0.7}
        response = requests.post(URL, headers=HEADERS, json=payload, timeout=1000)
        answer = response.json()["choices"][0]["message"]["content"].strip()
        messages.append({"role": "assistant", "content": answer})
        # print(answer)
        # if user_input.lower() == "exit":
        #     serialiable = [m.dict() if isinstance(m, BaseModel)
        #                    else m
        #                    for m in messages]
        #     save_history(serialiable)
        print(f"Conversation saved to {HISTORY_FILE}")
        break

In [16]:
from datasets import load_dataset, concatenate_datasets

fix_chat_history_format()
raw_chat = load_dataset("json", data_files={"train": HISTORY_FILE})

def format_chat_batch(batch):
    inps, tgts = [], []
    for conv in batch["conversation"]:
        user = [m["content"] for m in conv if m["role"] == "user"]
        asst = [m["content"] for m in conv if m["role"] == "assistant"]
        inps.append(" ".join(user))
        tgts.append(" ".join(asst))
    return {"input_text": inps, "target_text": tgts}

chat_ds = raw_chat["train"].map(format_chat_batch, batched=True, remove_columns=["timestamp","conversation"])

raw_intents = load_dataset("json", data_files={"train": "intents.json"})
def format_intents_batch(batch):
    inps, tgts = [], []
    for intents_list in batch["intents"]:
        for intent in intents_list:
            for pat in intent["patterns"]:
                inps.append(pat)
                tgts.append(intent["responses"][0])
    return {"input_text": inps, "target_text": tgts}

intents_ds = raw_intents["train"].map(format_intents_batch, batched=True, remove_columns=["intents"])

mbpp = load_dataset("google-research-datasets/mbpp", "sanitized")
def format_mbpp_batch(batch):
    inps, tgts = [], []
    for p, c in zip(batch["prompt"], batch["code"]):
        inps.append(p)
        tgts.append(f"```python\n{c}\n```")
    return {"input_text": inps, "target_text": tgts}

mbpp_ds = concatenate_datasets([
    mbpp["validation"].map(format_mbpp_batch, batched=True, remove_columns=mbpp["validation"].column_names),
    mbpp["prompt"].map(format_mbpp_batch, batched=True, remove_columns=mbpp["prompt"].column_names),
])

gsm = load_dataset("gsm8k", "main", split="train")
def format_gsm_batch(batch):
    inps = ["Problem:\n"+q for q in batch["question"]]
    tgts = ["Answer:\n"+a for a in batch["answer"]]
    return {"input_text": inps, "target_text": tgts}

gsm_ds = gsm.map(format_gsm_batch, batched=True, remove_columns=gsm.column_names)

train_ds = concatenate_datasets([chat_ds, intents_ds, mbpp_ds, gsm_ds])
print("Total training examples:", len(train_ds))

Ran fix_chat_history_format()


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Total training examples: 7890


In [17]:
from typing import List, Dict
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

def get_new_conversations() -> List[Dict]:
    last_update = None
    if os.path.exists(LAST_UPDATE_FILE):
        with open(LAST_UPDATE_FILE, "r", encoding="utf-8") as f:
            last_update = f.read().strip()
    if not os.path.exists(HISTORY_FILE):
        return []
    with open(HISTORY_FILE, "r", encoding="utf-8") as f:
        history = json.load(f)
        if not isinstance(history, list):
            history = [history]
    if last_update:
        return [c for c in history if c.get("timestamp", "") > last_update]
    return history

def is_correction(conv: Dict) -> bool:
    msgs = conv.get("conversation", [])
    for i in range(1, len(msgs)):
        if msgs[i]["role"] == "user" and msgs[i-1]["role"] == "assistant":
            txt = msgs[i]["content"].lower()
            if any(kw in txt for kw in ["wrong","incorrect","mistake","no,"]):
                return True
    return False

def conversations_to_docs(convs: List[Dict]) -> List[Document]:
    docs = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    for idx, conv in enumerate(convs):
        text = ""
        for msg in conv["conversation"]:
            if msg["role"] != "system":
                prefix = "Question: " if msg["role"]=="user" else "Answer: "
                text += f"{prefix}{msg['content']}\n\n"
        metadata = {"source": f"conversation-{idx}", "timestamp": conv.get("timestamp","")}
        for chunk in splitter.split_text(text):
            docs.append(Document(page_content=chunk, metadata=metadata))
    return docs

def update_knowledge_base():
    new_convs = get_new_conversations()
    if not new_convs:
        print("No new conversations found.")
        return
    corrections = [c for c in new_convs if is_correction(c)]
    print(f"Found {len(new_convs)} convs, {len(corrections)} with corrections")
    docs = conversations_to_docs(new_convs)
    if not docs:
        print("No documents to add.")
        return
    embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)
    try:
        vs = FAISS.load_local(INDEX_PATH, embedder, allow_dangerous_deserialization=True)
    except:
        vs = FAISS.from_documents(docs, embedder)
    vs.add_documents(docs)
    vs.save_local(INDEX_PATH)
    os.makedirs(os.path.dirname(LAST_UPDATE_FILE), exist_ok=True)
    with open(LAST_UPDATE_FILE, "w", encoding="utf-8") as f:
        f.write(datetime.now().isoformat())
    print("Knowledge base updated")

In [18]:
# # ────────────────────────────────
# # 1) FastAPI & Required Imports
# # ────────────────────────────────
# from fastapi import FastAPI, HTTPException
# from fastapi.middleware.cors import CORSMiddleware
# from pydantic import BaseModel
# from typing import List, Literal
# import re
# from langchain_experimental.tools.python.tool import PythonREPLTool
#
# # ────────────────────────────────
# # 2) Initialize Math Tool (Optional Shortcut for Arithmetic)
# # ────────────────────────────────
# # This tool allows your system to directly execute math expressions using Python
# python_tool = PythonREPLTool()
#
# # ────────────────────────────────
# # 3) FastAPI App Setup + CORS Config
# # ────────────────────────────────
# app = FastAPI()
#
# # This allows frontend (e.g., React at localhost:3000) to communicate with backend
# app.add_middleware(
#     CORSMiddleware,
#     allow_origins=["http://localhost:3000"],
#     allow_credentials=True,
#     allow_methods=["*"],
#     allow_headers=["*"],
# )
#
# # ────────────────────────────────
# # 4) Data Models for Request and Response
# # ────────────────────────────────
#
# # A single message in the conversation
# class ChatMessage(BaseModel):
#     role: Literal["user", "assistant", "system"]  # Who sent the message
#     content: str
#
# # The client sends a message + full history
# class ChatRequest(BaseModel):
#     message: str
#     history: List[ChatMessage]
#
# # The server returns the latest answer + updated history
# class ChatResponse(BaseModel):
#     answer: str
#     history: List[ChatMessage]
#
# # ────────────────────────────────
# # 5) Utility: Detect simple arithmetic expressions like "17 * 19"
# # ────────────────────────────────
# ARITH_PATTERN = re.compile(r'^[\d\s\+\-\*\/\(\)]+$')  # Accepts + - * / and parentheses
# def is_arithmetic(q: str) -> bool:
#     return bool(ARITH_PATTERN.fullmatch(q.strip()))
#
# # ────────────────────────────────
# # 6) Main Endpoint: POST /rag_chat
# # ────────────────────────────────
# @app.post("/rag_chat", response_model=ChatResponse)
# async def rag_chat_endpoint(req: ChatRequest):
#     # A) Start from user-submitted chat history, or empty
#     messages = req.history or []
#
#     # B) Sanitize history:
#     # If any past assistant messages contain incorrect arithmetic, remove them
#     clean_hist: List[ChatMessage] = []
#     for m in messages:
#         if m.role == "assistant" and is_arithmetic(m.content):
#             correct = python_tool.run(m.content)
#             if m.content.strip() != correct.strip():
#                 # Replace with a warning message
#                 clean_hist.append(ChatMessage(
#                     role="system",
#                     content="⚠️ Removed previous unsupported arithmetic answer"
#                 ))
#                 continue
#         clean_hist.append(m)
#     messages = clean_hist  # Use the cleaned list going forward
#
#     # C) Append the new user message to the conversation
#     if not any(m.role == "user" and m.content == req.message for m in messages):
#         messages.append(ChatMessage(role="user", content=req.message))
#
#     try:
#         # D) Short-circuit: if the message is just math, compute it directly
#         if is_arithmetic(req.message):
#             answer = python_tool.run(req.message)
#
#             # Log answer into history and save
#             messages.append(ChatMessage(role="assistant", content=answer))
#             save_history([m.dict() for m in messages])  # Your implementation
#             return ChatResponse(answer=answer, history=messages)
#
#         # E) Fallback: normal LLM-powered RAG pipeline
#         result = qa_chain({"query": req.message})  # This uses your RetrievalQA
#         answer = result["result"]
#         docs = result["source_documents"]  # These are the context documents
#
#         # F) Filter for toxicity (banned keywords)
#         toxic = {
#             "kill", "hate", "stupid", "dumb", "racist", "sexist",
#             "violence", "bomb", "terror", "die", "suicide"
#         }
#         if any(word in answer.lower() for word in toxic):
#             answer = "⚠️ Response blocked due to potentially inappropriate content."
#
#         else:
#             # G) Hallucination filter:
#             # Check if answer actually shares tokens with source context
#             context_text = " ".join(doc.page_content.lower() for doc in docs)
#             shared = [word for word in answer.lower().split() if word in context_text]
#             if len(shared) < 5:  # Tune this threshold to your needs
#                 answer = "⚠️ I'm not confident this answer is grounded in the provided context."
#
#     except Exception as e:
#         # If anything breaks in the chain (e.g. LLM crashed), report it cleanly
#         raise HTTPException(status_code=500, detail=str(e))
#
#     # H) Append assistant's response, persist, and return everything
#     messages.append(ChatMessage(role="assistant", content=answer))
#     save_history([m.dict() for m in messages])  # Your implementation
#     return ChatResponse(answer=answer, history=messages)


In [19]:
# # Notebook cell: run the RAG FastAPI server in the background
# import threading
# import uvicorn
#
# def run_server():
#     uvicorn.run(
#         app,
#         host="0.0.0.0",
#         port=8000,
#         log_level="info",
#         reload=False,        # disable auto-reload in notebooks
#     )
#
# # Start Uvicorn in a daemon thread so the notebook stays interactive
# server_thread = threading.Thread(target=run_server, daemon=True)
# server_thread.start()
#
# print("🚀 RAG server is now running at http://localhost:8000")


In [1]:
# Cell 1: RAG‐only FastAPI app

import os, json, re
from datetime import datetime
from typing import List, Literal, Optional, Dict, Any

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from datasets import load_dataset
from evaluate import load as load_metric
from langchain_experimental.tools.python.tool import PythonREPLTool

# ── Chat‐history persistence (messages only) ──
HISTORY_FILE = "chat-history.json"
def save_history(conv: List[Dict[str,str]]):
    timeline = []
    if os.path.exists(HISTORY_FILE):
        try:
            with open(HISTORY_FILE, "r", encoding="utf-8") as f:
                timeline = json.load(f) or []
        except:
            timeline = []
    timeline.append({"timestamp": datetime.now().isoformat(),"conversation":conv})
    tmp = HISTORY_FILE + ".tmp"
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(timeline, f, ensure_ascii=False, indent=2)
    os.replace(tmp, HISTORY_FILE)

# ── Metrics & intent data loaders ──
bleu_metric  = load_metric("bleu")
rouge_metric = load_metric("rouge")
with open("intents.json","r",encoding="utf-8") as f:
    intents_data = json.load(f)["intents"]

python_tool = PythonREPLTool()
ALL_DATASETS = ["gsm8k","mbpp","intents"]
ARITH = re.compile(r'^[\d\s\+\-\*\/\(\)]+$')
def is_arithmetic(q: str) -> bool:
    return bool(ARITH.fullmatch(q.strip()))

def find_ground_truth(source: str, q: str) -> Optional[str]:
    ql = q.lower()
    try:
        if source=="gsm8k":
            ds = load_dataset("gsm8k","main",split="train")
            for it in ds:
                if ql in it["question"].lower():
                    return it["answer"]
        elif source=="mbpp":
            ds = load_dataset("google-research-datasets/mbpp","sanitized",split="train")
            for it in ds:
                if ql in it["text"].lower():
                    return it["code"]
        elif source=="intents":
            for intent in intents_data:
                if any(ql in p.lower() for p in intent["patterns"]):
                    return intent["responses"][0]
    except Exception as e:
        print("⚠️ GT lookup error:", e)
    return None

# ── FastAPI wiring ──
app = FastAPI()
app.add_middleware(CORSMiddleware,
    allow_origins=["http://localhost:3000"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class ChatMessage(BaseModel):
    role: Literal["user","assistant","system"]
    content: str

class ChatRequest(BaseModel):
    message: str
    history: List[ChatMessage]
    ground_truth_source: Optional[Literal["gsm8k","mbpp","intents","all"]] = None

class Metric(BaseModel):
    dataset: str
    bleu: Optional[float]
    rouge: Optional[Dict[str,float]]

class ChatResponse(BaseModel):
    answer: str
    history: List[ChatMessage]
    metrics: Optional[List[Metric]] = None

@app.post("/rag_chat", response_model=ChatResponse)
async def rag_chat_endpoint(req: ChatRequest):
    # 1) sanitize history
    msgs: List[ChatMessage] = []
    for m in req.history:
        if m.role=="assistant" and is_arithmetic(m.content):
            corr = python_tool.run(m.content)
            if m.content.strip() != corr.strip():
                msgs.append(ChatMessage(role="system",content="⚠️ Removed bad arithmetic"))
                continue
        msgs.append(m)

    # 2) append new user turn
    if not any(m.role=="user" and m.content==req.message for m in msgs):
        msgs.append(ChatMessage(role="user",content=req.message))

    # 3) math‐only
    if is_arithmetic(req.message):
        ans = python_tool.run(req.message)
        msgs.append(ChatMessage(role="assistant",content=ans))
        save_history([m.dict() for m in msgs])
        return ChatResponse(answer=ans, history=msgs)

    result = qa_chain({"query": req.message})
    ans    = result["result"]
    docs   = result["source_documents"]

    # F) Toxicity filter
    toxic = {
        "kill", "hate", "stupid", "dumb", "racist", "sexist",
        "violence", "bomb", "terror", "die", "suicide"
    }
    if any(word in ans.lower() for word in toxic):
        # if any banned keyword appears, block the response
        ans = "⚠️ Response blocked due to potentially inappropriate content."
    else:
        # G) Hallucination filter
        # Build a lowercase blob of all retrieved context
        context_text = " ".join(doc.page_content.lower() for doc in docs)
        # Count how many answer tokens actually appear in the context
        shared = [tok for tok in ans.lower().split() if tok in context_text]
        # If fewer than 5 tokens matched, assume ungrounded/hallucinated
        if len(shared) < 5:
            ans = "⚠️ I'm not confident this answer is grounded in the provided context."



    # 5) metrics
    mets = []
    if req.ground_truth_source:
        to_do = ALL_DATASETS if req.ground_truth_source=="all" else [req.ground_truth_source]  # type: ignore
        for ds in to_do:
            gt = find_ground_truth(ds, req.message)
            b,r = None,None
            if gt:
                b = bleu_metric.compute(predictions=[ans],references=[[gt]])["bleu"]
                raw = rouge_metric.compute(predictions=[ans],references=[gt])
                r = {k:(v if isinstance(v,(float,int)) else v["recall"]) for k,v in raw.items()}
            mets.append({"dataset":ds,"bleu":b,"rouge":r})
    msgs.append(ChatMessage(role="assistant",content=ans))
    save_history([m.dict() for m in msgs])
    return ChatResponse(answer=ans, history=msgs, metrics=mets)


In [21]:

# ────────────────────────────────
# 7) TEST CALL (in-notebook) to see prints under this cell
# ────────────────────────────────
# client = TestClient(app)
# response = client.post(
#     "/rag_chat",
#     json={
#         "message": "What is computer architecture?",
#         "history": []
#     }
# )
# print("Response JSON:", response.json())


In [22]:
# # ────────────────────────────────
# # Cell: LLM-Judge with real-time feedback
# # ────────────────────────────────
# import os
# from fastapi import FastAPI, HTTPException
# from fastapi.middleware.cors import CORSMiddleware
# from fastapi.testclient import TestClient
# from pydantic import BaseModel
# from typing import List, Literal, Dict, Any, Optional
# from datasets import load_dataset
# from evaluate import load as load_metric
# from langchain_experimental.tools.python.tool import PythonREPLTool
# from langchain import OpenAI, LLMChain
# from langchain.prompts import PromptTemplate
# from dotenv import load_dotenv
#
# # 1) Ensure API key is set (or replace with your key string)
# load_dotenv(dotenv_path="llm.env")
#
# api_key = os.getenv("OPENAI_API_KEY")
# if not api_key:
#     raise ValueError(
#         "Missing OPENAI_API_KEY environment variable. "
#         "Please create a .env file with your key."
#     )
# # 2) Build the judge prompt & chain
# judge_prompt = PromptTemplate.from_template("""
# You are an expert evaluator. Given a Question and an Answer, produce:
# Verdict: Correct or Incorrect
# Score: a number between 0.0 and 1.0
# Comments: concise feedback.
#
# Question:
# {question}
#
# Answer:
# {answer}
# """)
# judge_llm = OpenAI(temperature=0, openai_api_key=api_key)
# judge_chain = LLMChain(llm=judge_llm, prompt=judge_prompt)
#
# # 3) (Re)define your FastAPI app and models as before
# app = FastAPI()
# app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
#
# class ChatMessage(BaseModel):
#     role: Literal["user","assistant","system"]
#     content: str
#
# class ChatRequest(BaseModel):
#     message: str
#     history: List[ChatMessage]
#
# class ChatResponse(BaseModel):
#     answer: str
#     history: List[ChatMessage]
#
# # (Include your is_arithmetic, find_ground_truth, metrics logic here…)
#
# @app.post("/rag_chat", response_model=ChatResponse)
# async def rag_chat_with_judge(req: ChatRequest):
#     # Call your original rag_chat logic (inline or imported)
#     # For brevity, assume rag_chat returns ChatResponse
#
#     resp: ChatResponse = await rag_chat_endpoint(req)
#
#     # 4) Run the judge immediately and print it
#     evaluation = judge_chain.run(question=req.message, answer=resp.answer)
#     print("\n🔍 LLM Judge Evaluation:\n" + evaluation)
#
#     return resp
#
# # 5) Test in-notebook to see prints under this cell
# client = TestClient(app)
# response = client.post("/rag_chat", json={"message":"2+4","history":[]})
# print("Response JSON:", response.json())


In [23]:
# Notebook cell: run the RAG FastAPI server in the background
import threading
import uvicorn

def run_server():
    uvicorn.run(
        app,
        host="0.0.0.0",
        port=8000,
        log_level="info",
        reload=False,        # disable auto-reload in notebooks
    )

# Start Uvicorn in a daemon thread so the notebook stays interactive
server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()

print("🚀 RAG server is now running at http://localhost:8000")


🚀 RAG server is now running at http://localhost:8000


In [3]:
# Cell 2: LLM‐Judge FastAPI app

import os, logging
from typing import Literal, List
from fastapi import FastAPI, BackgroundTasks
from pydantic import BaseModel
from langchain import OpenAI, LLMChain
from langchain.prompts import PromptTemplate
from openai import RateLimitError

# logging to file
logging.basicConfig(
  level=logging.INFO,
  handlers=[logging.StreamHandler(),logging.FileHandler("judge.log")],
)
logger = logging.getLogger()

os.environ.setdefault("OPENAI_API_KEY","sk-YOUR_KEY")

judge_prompt = PromptTemplate.from_template("""
You are an expert evaluator...
Question:
{question}

Answer:
{answer}
""")
judge_chain = LLMChain(
  llm=OpenAI(temperature=0,openai_api_key=os.getenv("OPENAI_API_KEY")),
  prompt=judge_prompt
)

def run_judge(q: str, a: str) -> str:
    try:
        v = judge_chain.run(question=q,answer=a)
        logger.info("Verdict:\n%s",v)
        return v
    except RateLimitError:
        msg="Judge skipped – rate limit"
        logger.warning(msg)
        return msg
    except Exception as e:
        logger.error("Judge error: %s",e)
        return f"Judge error: {e}"



class EvaluateRequest(BaseModel):
    question: str
    answer:   str

class EvaluateResponse(BaseModel):
    evaluation: str

@app.post("/evaluate",response_model=EvaluateResponse)
async def evaluate(req: EvaluateRequest):
    verdict = run_judge(req.question,req.answer)
    return EvaluateResponse(evaluation=verdict)


  llm=OpenAI(temperature=0,openai_api_key=os.getenv("OPENAI_API_KEY")),
  judge_chain = LLMChain(


In [4]:
# Cell 3: TestClient for both endpoints
from fastapi.testclient import TestClient

client_chat  = TestClient(app)
client_eval  = TestClient(app)

# 1) Chat + metrics
resp = client_chat.post("/rag_chat", json={
  "message":"What is 2^10?",
  "history":[],
  "ground_truth_source":"all"
})
data=resp.json()
print("Chat Response:",data)

# 2) Judge
ver = client_eval.post("/evaluate",json={
  "question":"What is 2^10?",
  "answer":data["answer"],
}).json()["evaluation"]
print("Judge Verdict:\n",ver)


NameError: name 'qa_chain' is not defined