# Introduction

This jupyter notebook helps you to build a RAG system from scratch.

I strongly recommend you to checkout the [README](./readme.md) section to gain a background about this topic before diving straight into the code.


# Setup dev env


## Python Virtual Environment

- [Check here](https://realpython.com/python-virtual-environments-a-primer/) why is a venv useful
- Run cell below to create a venv


In [None]:
# Create a Python virtual environment
#!python -m venv rag_venv

# Add the virtual environment folder to ".gitignore" file
#with open(".gitignore", "a") as f:
#    f.write("rag_venv/\n")


- Activate the virtual environment:
  - On Windows - `.\rag_venv\Scripts\activate`
  - On Mac - `source rag_venv/bin/activate`


## Install Packages


In [None]:
# Install all dependencies
!pip install -r requirements.txt


## load

The Groq API key is stored in environment variables using the [python-dotenv package](https://pypi.org/project/python-dotenv/)


In [None]:
from dotenv import load_dotenv
load_dotenv()
import os
#print(os.getenv('MY_VAR'))
#print(os.getenv('GROQ_API_KEY'))


# Ask Questions


In [31]:
import groq
from typing import List
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

# Initialize the Groq client
client = groq.Groq()

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text() + '\n'
    return text

# Function to create chunks
def create_chunks(text, chunk_size=500, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )
    chunks = text_splitter.split_text(text)
    return chunks

# Process PDF files and create chunks
pdf_directory = './input_files/'  # Update this to your PDF directory path
all_chunks = []
for filename in os.listdir(pdf_directory):
    if filename.endswith('.pdf'):
        pdf_path = os.path.join(pdf_directory, filename)
        text = extract_text_from_pdf(pdf_path)
        chunks = create_chunks(text)
        all_chunks.extend(chunks)

# Create embeddings
embeddings = model.encode(all_chunks)

# Create and populate the FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings.astype('float32'))

def retrieve_relevant_chunks(query, top_k=5):
    query_vector = model.encode([query])
    D, I = index.search(query_vector.astype('float32'), top_k)
    return [all_chunks[i] for i in I[0]]

def generate_response(query: str, relevant_chunks: List[str], model: str = "gemma2-9b-it") -> str: #llama-3.1-8b-instant, gemma2-9b-it
    # Prepare the prompt
    context = "\n".join(relevant_chunks)
    prompt = f"""Based on the following context, please answer the question. If the answer is not in the context, say "I don't have enough information to answer that question."

Context:
{context}

Question: {query}

Answer:"""

    # Generate the response using Groq's API
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant that answers questions based on the given context."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        model=model,
        max_tokens=1,
        temperature=0.5,
        # Controls diversity via nucleus sampling: 0.5 means half of all
        # likelihood-weighted options are considered.
        top_p=1024,
        stop=None,
        #stream=True,
    )

    # Extract and return the generated response
    return chat_completion.choices[0].message.content.strip()

def rag_query(query: str, top_k: int = 5) -> str:
    relevant_chunks = retrieve_relevant_chunks(query, top_k)
    response = generate_response(query, relevant_chunks)
    return response

# Test the system
test_query = "Am I allowed to work from home?"
result = rag_query(test_query)
print(f"Query: {test_query}")
print(f"Response: {result}")

Query: Am I alloed to work from home?
Response: Mif


# Playground


In [34]:
import os
import json
from groq import Groq
from datetime import datetime

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user",
            "content": "Give me a funny one-liner.",
        }
    ],
    model="gemma2-9b-it",  # gemma2-9b-it
    temperature=1,
    max_tokens=1024,
    top_p=1,
    stream=False,
    stop=None
)

# Create a dictionary with the desired structure
response_dict = {
    "id": chat_completion.id,
    "object": "chat.completion",
    "created": int(datetime.now().timestamp()),
    "model": chat_completion.model,
    "system_fingerprint": chat_completion.system_fingerprint,  # This might be None
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": chat_completion.choices[0].message.content
            },
            "finish_reason": chat_completion.choices[0].finish_reason,
            "logprobs": None
        }
    ],
    "usage": {
        "prompt_tokens": chat_completion.usage.prompt_tokens,
        "completion_tokens": chat_completion.usage.completion_tokens,
        "total_tokens": chat_completion.usage.total_tokens,
        "prompt_time": round(chat_completion.usage.prompt_time, 3),
        "completion_time": round(chat_completion.usage.completion_time, 3),
        "total_time": round(chat_completion.usage.total_time, 3)
    }
}

# Print the formatted JSON response
print(json.dumps(response_dict, indent=2))

{
  "id": "chatcmpl-8daa49bf-3a07-42fb-ab0e-c954a10d0107",
  "object": "chat.completion",
  "created": 1722858859,
  "model": "gemma2-9b-it",
  "system_fingerprint": "fp_10c08bf97d",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "I'm reading a book about anti-gravity. It's impossible to put down!  \ud83d\udcda\ud83d\ude04  \n\n"
      },
      "finish_reason": "stop",
      "logprobs": null
    }
  ],
  "usage": {
    "prompt_tokens": 28,
    "completion_tokens": 27,
    "total_tokens": 55,
    "prompt_time": 0.003,
    "completion_time": 0.054,
    "total_time": 0.057
  }
}
