In [251]:
import os
import json
from datetime import datetime

HISTORY_FILE = "chat-history.json"

def save_history(messages):
    """
    Append a new timestamped conversation to chat-history.json,
    wrapping any existing flat list into the proper format first.
    """
    existing = []
    if os.path.exists(HISTORY_FILE):
        with open(HISTORY_FILE, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
            except json.JSONDecodeError:
                data = []

        # If data is a flat list of {"role", "content"} messages, wrap it
        if isinstance(data, list) and data and isinstance(data[0], dict) and "role" in data[0]:
            existing = [{
                "timestamp": datetime.now().isoformat(),
                "conversation": data
            }]
        else:
            existing = data if isinstance(data, list) else []

    # Append the new session
    existing.append({
        "timestamp": datetime.now().isoformat(),
        "conversation": messages
    })

    # Write back the full history
    with open(HISTORY_FILE, "w", encoding="utf-8") as f:
        json.dump(existing, f, indent=2, ensure_ascii=False)

    print(f"Appended chat entry at {existing[-1]['timestamp']}")


In [252]:
import os
import json
from datetime import datetime

def ensure_correct_format():
    """Ensures chat-history.json is always in the correct format"""
    history_file = "chat-history.json"

    if not os.path.exists(history_file):
        return

    # Read current content
    with open(history_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    # If already in flat format (list of messages with 'role'), convert to correct format
    if isinstance(data, list) and data and "role" in data[0]:
        corrected = [{
            "timestamp": datetime.now().isoformat(),
            "conversation": data
        }]

        with open(history_file, "w", encoding="utf-8") as f:
            json.dump(corrected, f, indent=4, ensure_ascii=False)
            print("Fixed chat history format")

# Run this function periodically or before any operation that reads/writes to the file

# Exploring the Capabilities of LLM Models

In this notebook, I aim to evaluate and compare the capabilities of two large language models (LLMs):

1. **Codestral22B**
   A state-of-the-art model designed for advanced code generation and natural language understanding tasks.

2. **Llama 3.1-8B**
   A highly efficient and compact model optimized for general-purpose language tasks with an 8-billion parameter architecture.

The goal is to analyze their performance across various tasks, including but not limited to:

- Code generation and completion
- Natural language understanding
- Contextual reasoning
- Problem-solving capabilities

This comparison will help identify the strengths and weaknesses of each model and provide insights into their practical applications.

# AI-Powered Programming Tutor with RAG

This project focuses on building an AI-powered programming tutor designed to assist students in understanding code and solving problems. The tutor leverages **Retrieval-Augmented Generation (RAG)** to provide accurate and personalized explanations grounded in real university materials, such as:

- Past assignments
- Lecture notes
- Tutorials`

The system integrates two large language models (LLMs), **Codestral22B** and **Llama 3.1-8B**, to evaluate their performance in generating solutions and explanations for programming-related queries. The goal is to determine which model provides better support for students in a university setting.

---

## Key Features

- **Personalized Explanations**: Tailored responses based on retrieved university materials.
- **Code Understanding**: Helps students debug and understand code snippets.
- **Problem Solving**: Provides step-by-step solutions to programming problems.
- **Model Comparison**: Evaluates the performance of Codestral22B and Llama 3.1-8B.

---



### Basically, I will try to implement and test Codestral22B and Llama 3.1-8B

In [253]:
# from datetime import datetime
# import json
# from datetime import datetime
# from pprint import pprint
# from os.path import exists
#
# import requests
#
# #API endpoint exposed in Lm studio
# url = "http://localhost:1234/v1/chat/completions"
#
# #model ID
# model_id = "meta-llama-3.1-8b-instruct"
#
# headers={
#     "Content-Type" : "application/json",
#     "Authorization" :"Bearer lm-studio" #Dummy API key
# }
#
# # messages: [ #keep conversation history
# #                 {"role":"user", #what you type, only sends current prompt
# #                  "content":user_input}
# #             ]
#
# #Keep the message history
#
# #History file path, to keep conversation
# history_file = "chat-history.json"
# def save_history(messages):
#     # Load existing history if the file exists
#     if exists(history_file):
#         with open(history_file, "r", encoding="utf-8") as f:
#             full_history = json.load(f)
#             if isinstance(full_history, list):
#                 pass
#             else:
#                 full_history = [full_history]
#     else:
#         full_history = []
#
#     # Add this session with timestamp
#     full_history.append({
#         "timestamp": datetime.now().isoformat(),
#         "conversation": messages
#     })
#
#     # Save the full conversation list
#     with open(history_file, "w", encoding="utf-8") as f:
#         json.dump(full_history, f, indent=4, ensure_ascii=False)
#
#
# #Prompt loop
# def chat():
#     print(" Talk to LLaMA 3.1 (type 'exit' to quit)\n")
#     messages = [
#     {"role": "system", #Sets the intial behavior, the text below
#      "content": "You are a helpful programming tutor."}
# ] #Messages reset each time
#
#     while True:
#         user_input = input(" You: ")
#         if user_input.lower() == "exit":
#             #save chat history
#             save_history(messages)
#             print(f"\n Conversation saved to {history_file}")
#             break
#
#         # Add user message
#         messages.append({"role": "user",
#                          "content": user_input})
#
#         payload = {
#             "model": model_id,#id of model
#             "messages": messages,#chat history to preserve context
#             "temperature": 0.7 #control creativiy
#         }
#         print("Your question is: ")
#         print(user_input)
#         print("\n")
#
#         try:#send request to lm api
#             response = requests.post(url, headers=headers, json=payload, timeout=60)
#
#             if response.status_code == 200:
#                 data = response.json()
#                 answer = data['choices'][0]['message']['content'].strip()
#
#                 # Add assistant message
#                 messages.append({"role": "assistant", "content": answer})
#
#                 print("\n LLaMA:", flush=True)
#                 print(answer, flush=True)
#                 print("-" * 60 + "\n")
#
#             else:
#                 print(f" Error {response.status_code}: {response.text}\n")
#
#         except requests.exceptions.RequestException as e:
#             print(" Connection error:", e)
#             break


In [254]:
# chat()

# Step 1: Load the Datasets
## 1.  OpenMathInstruct-1 (from Hugging Face)
- This dataset contains 1.8 million math problem-solution pairs, making it ideal for enhancing mathematical reasoning in LLMs.

In [255]:
# from datasets import load_dataset
# from IPython import get_ipython
# from IPython.display import display
#
# #Load training split
# dataset = load_dataset("nvidia/OpenMathInstruct-1", split="train")
#
# first_element = next(iter(dataset))
#
# print(first_element)

#Give up on it, waaaay to much data in dataset

# Small & Clean Math Datasets
## 1. GSM8K
- Size: ~8.5K problems

- Focus: Grade school math word problems

- Good for: step-by-step reasoning, small LLM finetuning

In [256]:
from datasets import load_dataset
dataset = load_dataset("gsm8k", "main", split="train")
first_element = next(iter(dataset))

print(first_element)

{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?', 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}


# 2. Computer Science Theory QA Dataset (from Kaggle)
- This dataset offers a comprehensive collection of theoretical computer science questions, suitable for training chatbots and QA systems.

In [257]:
import pandas as pd
import json
ensure_correct_format()
with open("intents.json", "r") as f:
    intents_data = json.load(f)

# Convert to DataFrame if needed
df = pd.json_normalize(intents_data["intents"])
print(df.head())

             tag                                           patterns  \
0    abstraction  [Explain data abstraction., What is data abstr...   
1          error  [What is a syntax error, Explain syntax error,...   
2  documentation  [Explain program documentation. Why is it impo...   
3        testing                        [What is software testing?]   
4  datastructure             [How do you explain a data structure?]   

                                           responses  
0  [Data abstraction is a technique used in compu...  
1  [A syntax error is an error in the structure o...  
2  [Program documentation is written information ...  
3  [Software testing is the process of evaluating...  
4  [A data structure is a way of organizing and s...  


In [258]:
from datasets import load_dataset

ds = load_dataset("google-research-datasets/mbpp", "sanitized")

# 1. Install & Import Dependencies
We’ll need:

-  Transformers & Datasets

-  LangChain & an embedding backend (here HuggingFaceEmbeddings)

-  FAISS for the vector index

-  Accelerate + PEFT if you plan to fine-tune your generator

In [259]:
# !pip install \
#  transformers datasets faiss-cpu \
#  langchain sentence-transformers \
#  accelerate peft evaluate

In [260]:
#!pip install --upgrade peft

In [261]:
import os
import torch
import numpy as np

from datasets import load_dataset, concatenate_datasets
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    RagTokenizer,
    RagRetriever,
    RagSequenceForGeneration,
    DataCollatorForSeq2Seq,
    Trainer,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain import HuggingFacePipeline


# 2. Configuration
Centralize all paths, model names, and hyperparameters.

In [262]:
# ── Paths & names ─────────────────────────────────────────────────────────────
OUTPUT_DIR         = "results/rag-llama"
FAISS_INDEX_PATH   = os.path.join(OUTPUT_DIR, "faiss_index")
DOCS_PATH          = os.path.join(OUTPUT_DIR, "docs.jsonl")

# ── Hugging Face models ───────────────────────────────────────────────────────
GEN_MODEL_NAME     = "meta-llama/Llama-3.1-8b"
EMBED_MODEL_NAME   = "sentence-transformers/all-MiniLM-L6-v2"

# ── Datasets ─────────────────────────────────────────────────────────────────
MBPP_ID            = "google-research-datasets/mbpp"
MBPP_CFG           = "sanitized"
GSM8K_ID           = "gsm8k"
GSM8K_SPLIT        = "train"

# ── RAG / Retrieval params ────────────────────────────────────────────────────
CHUNK_SIZE         = 1000
CHUNK_OVERLAP      = 200

# ── LoRA fine-tuning (optional) ───────────────────────────────────────────────
LORA_R             = 16
LORA_ALPHA         = 32
LORA_DROPOUT       = 0.05

# ── Trainer hyperparameters (for fine-tuning generator) ──────────────────────
NUM_EPOCHS         = 3
TRAIN_BS           = 2
EVAL_BS            = 2
GRAD_ACCUM_STEPS   = 8
LEARNING_RATE      = 2e-4


In [263]:
# Cell 1: validate_chat_history_format()

import os
import json
from datetime import datetime

HISTORY_FILE = "chat-history.json"

def validate_chat_history_format():
    """
    Ensure chat-history.json is a list of timestamped conversations.
    If it finds a flat list of messages, wrap them in a single timestamped entry.
    """
    if not os.path.exists(HISTORY_FILE):
        return

    # Load existing data
    with open(HISTORY_FILE, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
        except json.JSONDecodeError:
            # Invalid JSON; skip fixing
            return

    # Detect flat list of messages (each item has a 'role' key)
    if isinstance(data, list) and data and isinstance(data[0], dict) and "role" in data[0]:
        wrapped = [{
            "timestamp": datetime.now().isoformat(),
            "conversation": data
        }]
        # Overwrite with wrapped format
        with open(HISTORY_FILE, "w", encoding="utf-8") as f:
            json.dump(wrapped, f, indent=2, ensure_ascii=False)
        print("Fixed chat-history format: wrapped flat list into timestamped conversation.")


# 3. Load & Merge Datasets
Load your local Q&A (if any), plus MBPP (test split) and GSM8K train. Then standardize to a single list of “documents” with id and text.

In [264]:
from datasets import load_dataset, concatenate_datasets

# Cell 2: fix_chat_history_format()

def fix_chat_history_format():
    """
    Alias for validate_chat_history_format, intended to run
    right before loading via datasets or similar.
    """
    validate_chat_history_format()
    print("Ran fix_chat_history_format()")


# Call this function before loading the dataset
fix_chat_history_format()

# 1) Chat‐history (no built‐in validation split here, only “train”):
raw_chat = load_dataset(
    "json",
    data_files={"train": "chat-history.json"}
)
def format_chat_batch(batch):
    inps, tgts = [], []
    for conv in batch["conversation"]:
        # conv is a list of {role,content} dicts
        user = [t["content"] for t in conv if t["role"]=="user"]
        asst = [t["content"] for t in conv if t["role"]=="assistant"]
        inps.append(" ".join(user))
        tgts.append(" ".join(asst))
    return {"input_text": inps, "target_text": tgts}

chat_ds = raw_chat["train"].map(
    format_chat_batch,
    batched=True,
    remove_columns=["timestamp","conversation"]
)

# 2) Intents.json
raw_intents = load_dataset(
    "json",
    data_files={"train": "intents.json"}
)
def format_intents_batch(batch):
    # assume batch["intents"] is a list-of-lists of intent dicts
    inps, tgts = [], []
    for intents_list in batch["intents"]:
        for intent in intents_list:
            for pat in intent["patterns"]:
                inps.append(pat)
                tgts.append(intent["responses"][0])
    return {"input_text": inps, "target_text": tgts}

intents_ds = raw_intents["train"].map(
    format_intents_batch,
    batched=True,
    remove_columns=["intents"]
)

# 3) MBPP “sanitized” (splits: validation & prompt)
mbpp = load_dataset("google-research-datasets/mbpp", "sanitized")
def format_mbpp_batch(batch):
    inps, tgts = [], []
    for p, c in zip(batch["prompt"], batch["code"]):
        inps.append(p)
        tgts.append(f"```python\n{c}\n```")
    return {"input_text": inps, "target_text": tgts}

# concatenate both splits
mbpp_ds = concatenate_datasets([
    mbpp["validation"].map(format_mbpp_batch, batched=True, remove_columns=mbpp["validation"].column_names),
    mbpp["prompt"].    map(format_mbpp_batch, batched=True, remove_columns=mbpp["prompt"].column_names),
])

# 4) GSM8K “main” train
gsm = load_dataset("gsm8k", "main", split="train")
def format_gsm_batch(batch):
    inps = ["Problem:\n"+q for q in batch["question"]]
    tgts = ["Answer:\n"+a   for a in batch["answer"]]
    return {"input_text": inps, "target_text": tgts}

gsm_ds = gsm.map(
    format_gsm_batch,
    batched=True,
    remove_columns=gsm.column_names
)

# 5) Combine all training sets
train_ds = concatenate_datasets([chat_ds, intents_ds, mbpp_ds, gsm_ds])
print("Total training examples:", len(train_ds))


Ran fix_chat_history_format()


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Total training examples: 7871


# 4. Build & Chunk the Retrieval Corpus
We’ll treat each training example as a “document” by concatenating input_text + target_text and splitting into overlapping chunks.

In [265]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 4.1 Concatenate input+target into a list of raw docs
raw_texts = [
    ex["input_text"] + "\n\n" + ex["target_text"]
    for ex in train_ds
]
metadatas = [
    {"source": f"doc-{i}"}
    for i in range(len(raw_texts))
]

# 4.2 Chunk long docs into 1 000-token windows with 200-token overlap
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

docs = []
for text, meta in zip(raw_texts, metadatas):
    for chunk in splitter.split_text(text):
        docs.append(Document(page_content=chunk, metadata=meta))

print(f"▶ Created {len(docs)} chunks from {len(raw_texts)} documents.")


▶ Created 8121 chunks from 7871 documents.


# 5. Embed & Build a FAISS Vector Index
Use a Sentence-Transformer to embed each chunk, then store in FAISS for fast nearest-neighbour lookup.

In [266]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 5.1 Initialize your embedding model
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)

# 5.2 Create FAISS index from Document objects
vectorstore = FAISS.from_documents(docs, embedder)

# 5.3 (Optional) persist to disk for later reuse
INDEX_PATH = "results/faiss_index"
vectorstore.save_local(INDEX_PATH)
print(f"✔ FAISS index saved to '{INDEX_PATH}'.")


✔ FAISS index saved to 'results/faiss_index'.


# 6. Wire Up a LangChain RetrievalQA Pipeline
We now plug your FAISS store and the Meta-Llama-3.1-8b generator into a single retrieval-augmented chain.

In [267]:
# Option B: set it directly in your environment
import os
os.environ["HUGGINGFACE_HUB_TOKEN"] = "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"


In [268]:
# ── Cell: Load FAISS index & build retriever ────────────────────────────────────

from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Recreate your embedder exactly as when you built the index
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load your on-disk FAISS index (you trust its provenance)
vectorstore = FAISS.load_local(
    "results/faiss_index",
    embedder,
    allow_dangerous_deserialization=True
)

# Wrap as a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


In [269]:
import os
from transformers import pipeline, BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# ── 0) Grab your token from env ────────────────────────────────────────────────
hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
if not hf_token:
    raise ValueError("Please set HUGGINGFACE_HUB_TOKEN in your environment before running this cell.")

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# ── 1) Reload FAISS index ──
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local(
    "results/faiss_index",
    embedder,
    allow_dangerous_deserialization=True
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# ── 2) Connect to local LM Studio API ──────────────────────────────────────────
llm = ChatOpenAI(
    model_name="meta-llama-3.1-8b-instruct",  # Just for tracking, not actually used to load model
    openai_api_key="lm-studio",               # Dummy API key as used in your chat() function
    openai_api_base="http://localhost:1234/v1", # Your LM Studio API endpoint
    temperature=0.7,
    max_tokens=512
)

# ── 3) Build & run RetrievalQA ─────────────────────────────────────────────────
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",       # or "map_reduce" / "refine"
    retriever=retriever,
    return_source_documents=True,
)

# # ── 4) Test query ──────────────────────────────────────────────────────────────
# query = "How would you implement binary search in Python?"
# result = qa_chain(query)
# print("Answer:\n", result["result"])
# print("\nSources:")
# for doc in result["source_documents"]:
#     print("-", doc.metadata["source"])




In [270]:
# Cell: normalize_history.py

import os, json
from datetime import datetime

H = "chat-history.json"

if os.path.exists(H):
    data = json.load(open(H, "r", encoding="utf-8"))
    # if flat list of messages, wrap it
    if isinstance(data, list) and data and isinstance(data[0], dict) and "role" in data[0]:
        wrapped = [{
            "timestamp": datetime.now().isoformat(),
            "conversation": data
        }]
        json.dump(wrapped, open(H, "w", encoding="utf-8"), indent=2, ensure_ascii=False)
        print("Normalized existing chat-history.json")


In [271]:
from datetime import datetime
import json
from datetime import datetime
from pprint import pprint
from os.path import exists

import requests

#API endpoint exposed in Lm studio
url = "http://localhost:1234/v1/chat/completions"

#model ID
model_id = "meta-llama-3.1-8b-instruct"

headers={
    "Content-Type" : "application/json",
    "Authorization" :"Bearer lm-studio" #Dummy API key
}

# messages: [ #keep conversation history
#                 {"role":"user", #what you type, only sends current prompt
#                  "content":user_input}
#             ]

#Keep the message history
ensure_correct_format()
#History file path, to keep conversation
# Cell 3: save_history()

import os
import json
from datetime import datetime

history_file = "chat-history.json"

# Cell: save_history.py

import os, json
from datetime import datetime

H = "chat-history.json"



#Prompt loop
def chat():
    print(" Talk to LLaMA 3.1 (type 'exit' to quit)\n")
    messages = [
    {"role": "system", #Sets the intial behavior, the text below
     "content": "You are a helpful programming tutor."}
] #Messages reset each time

    while True:
        user_input = input(" You: ")
        if user_input.lower() == "exit":
            #save chat history
            save_history(messages)
            print(f"\n Conversation saved to {history_file}")
            break

        # Add user message
        messages.append({"role": "user",
                         "content": user_input})

        payload = {
            "model": model_id,#id of model
            "messages": messages,#chat history to preserve context
            "temperature": 0.7 #control creativiy
        }
        print("Your question is: ")
        print(user_input)
        print("\n")

        try:#send request to lm api
            response = requests.post(url, headers=headers, json=payload, timeout=1000)

            if response.status_code == 200:
                data = response.json()
                answer = data['choices'][0]['message']['content'].strip()

                # Add assistant message
                messages.append({"role": "assistant", "content": answer})

                print("\n LLaMA:", flush=True)
                print(answer, flush=True)
                print("-" * 60 + "\n")

            else:
                print(f" Error {response.status_code}: {response.text}\n")

        except requests.exceptions.RequestException as e:
            print(" Connection error:", e)
            break
        save_history(messages)


In [272]:
# chat()

In [273]:
# # Search for Pythagorean theorem related questions in GSM8K
# pythagorean_questions = []
#
# # Load the dataset if not already loaded
# gsm = load_dataset("gsm8k", "main", split="train")
# #
# # Search for relevant keywords
# keywords = ["pythagora", "pythagorean", "right triangle", "hypotenuse", "a^2 + b^2"]
#
# for i, example in enumerate(gsm):
#     question = example["question"].lower()
#     for keyword in keywords:
#         if keyword.lower() in question:
#             pythagorean_questions.append({
#                 "index": i,
#                 "question": example["question"],
#                 "answer": example["answer"]
#             })
#             break
#
# # Print the number of matching questions
# print(f"Found {len(pythagorean_questions)} questions related to the Pythagorean theorem")
#
# # Display the first few matches if any exist
# for i, q in enumerate(pythagorean_questions[:3]):
#     print(f"\nQuestion {i+1}:")
#     print(q["question"])
#     print("\nAnswer:")
#     print(q["answer"])

In [274]:
# import faiss
# import numpy as np
#
# # Load the FAISS index file
# index = faiss.read_index("results/faiss_index/index.faiss")
#
# # Print basic info
# print("Index type:", type(index).__name__)
# print("Dimension:", index.d)
# # print("Is trained:", index.is_trained)
# # print("Total vectors stored:", index.ntotal)
#
# # Example: retrieve all stored vectors (if they fit in memory)
# try:
#     xb = index.reconstruct_n(0, index.ntotal)  # returns all vectors
#     print("Sample vector (first one):", xb[0])
# except Exception as e:
#     print("Cannot reconstruct vectors:", e)


In [275]:
# Cell 4: update_knowledge_base()

import os
import json
from datetime import datetime
from typing import List, Dict
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

HISTORY_FILE = "chat-history.json"
LAST_UPDATE_FILE = "results/last_update.txt"
INDEX_PATH = "results/faiss_index"
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

def get_new_conversations() -> List[Dict]:
    """Get all timestamped conversations since the last update."""
    last_update = None
    if os.path.exists(LAST_UPDATE_FILE):
        with open(LAST_UPDATE_FILE, "r", encoding="utf-8") as f:
            last_update = f.read().strip()

    if not os.path.exists(HISTORY_FILE):
        return []

    with open(HISTORY_FILE, "r", encoding="utf-8") as f:
        history = json.load(f)
        if not isinstance(history, list):
            history = [history]

    if last_update:
        return [c for c in history if c.get("timestamp", "") > last_update]
    return history

def is_correction(conv: Dict) -> bool:
    """Detect if a user correction follows an assistant message."""
    msgs = conv.get("conversation", [])
    for i in range(1, len(msgs)):
        if msgs[i]["role"] == "user" and msgs[i-1]["role"] == "assistant":
            txt = msgs[i]["content"].lower()
            if any(kw in txt for kw in ["wrong","incorrect","mistake","no,"]):
                return True
    return False

def conversations_to_docs(conversations: List[Dict]) -> List[Document]:
    """Convert timestamped conversations to Document objects for FAISS."""
    docs = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    for idx, conv in enumerate(conversations):
        text = ""
        for msg in conv["conversation"]:
            if msg["role"] != "system":
                prefix = "Question: " if msg["role"]=="user" else "Answer: "
                text += f"{prefix}{msg['content']}\n\n"
        metadata = {"source": f"conversation-{idx}", "timestamp": conv.get("timestamp","")}
        for chunk in splitter.split_text(text):
            docs.append(Document(page_content=chunk, metadata=metadata))
    return docs

def update_knowledge_base():
    """Update the FAISS index with any new conversations."""
    print("Updating knowledge base...")
    new_convs = get_new_conversations()
    if not new_convs:
        print("No new conversations found.")
        return

    corrections = [c for c in new_convs if is_correction(c)]
    print(f"Found {len(new_convs)} new convs, {len(corrections)} with corrections")

    docs = conversations_to_docs(new_convs)
    if not docs:
        print("No documents to add.")
        return

    embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)
    try:
        vs = FAISS.load_local(INDEX_PATH, embedder, allow_dangerous_deserialization=True)
    except Exception:
        vs = FAISS.from_documents(docs, embedder)

    vs.add_documents(docs)
    vs.save_local(INDEX_PATH)

    os.makedirs(os.path.dirname(LAST_UPDATE_FILE), exist_ok=True)
    with open(LAST_UPDATE_FILE, "w", encoding="utf-8") as f:
        f.write(datetime.now().isoformat())

    print("Knowledge base updated successfully")

update_knowledge_base()


Updating knowledge base...
No new conversations found.


In [276]:
def rag_chat():
    """Interactive chat function that uses the RAG-enhanced model instead of direct API calls"""
    print(" Talk to RAG-enhanced LLaMA 3.1 (type 'exit' to quit)\n")
    messages = [
        {"role": "system",
         "content": "You are a helpful programming tutor."}
    ]

    while True:
        user_input = input(" You: ")
        if user_input.lower() == "exit":
            save_history(messages)
            print(f"\n Conversation saved to {history_file}")
            update_knowledge_base()
            break

        # Add user message to history
        messages.append({"role": "user", "content": user_input})

        print("Your question is: ")
        print(user_input)
        print("\n")

        try:
            # Use RAG chain instead of direct LM Studio API
            result = qa_chain(user_input)
            answer = result["result"]

            # Add assistant message to conversation history
            messages.append({"role": "assistant", "content": answer})

            print("\n RAG-LLaMA:", flush=True)
            print(answer, flush=True)
            print("-" * 60 + "\n")

        except Exception as e:
            print(f" Error: {str(e)}\n")
            break

# Start the RAG-enhanced chat


In [277]:
# rag_chat()

In [278]:
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any, Literal
import uvicorn

# assume you’ve already done:
#   from your_langchain_setup import qa_chain, save_history, history_file

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["http://localhost:3000"],
    allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
)

class ChatMessage(BaseModel):
    role: Literal["user","assistant","system"]
    content: str

class ChatRequest(BaseModel):
    message: str
    history: List[ChatMessage]

class ChatResponse(BaseModel):
    answer: str
    history: List[ChatMessage]

@app.post("/rag_chat", response_model=ChatResponse)
async def rag_chat_endpoint(req: ChatRequest):
    # 1) Rebuild the turn list
    messages = req.history.copy() if req.history else [
        {"role":"system","content":"You are a helpful programming tutor."}
    ]
    messages.append({"role":"user","content":req.message})

    # 2) Run RAG chain
    try:
        result: Dict[str, Any] = qa_chain({"query": req.message})
        answer = result["result"]
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

    messages.append({"role":"assistant","content":answer})

    # 3) Persist using your helper (will wrap & append correctly)
    save_history(messages)

    # 4) Return to frontend
    return ChatResponse(answer=answer, history=messages)

# Optional: to run standalone
# if __name__ == "__main__":
#     uvicorn.run(app, host="0.0.0.0", port=8000)


In [282]:
import threading
import uvicorn

def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run_server, daemon=True)
thread.start()
print("RAG server is now running on http://localhost:8000")


RAG server is now running on http://localhost:8000


INFO:     Started server process [3984]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
