<a href="https://colab.research.google.com/github/muhammadalinoor-1982/Blog/blob/master/RAG_with_huggingface_Meta_Llama_3_8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Step-by-Step Code of RAG pipeline with huggingface_Meta-Llama-3-8B**



#**1. Install Necessary Libraries**

In [1]:
!pip install faiss-cpu pymupdf

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting pymupdf
  Downloading pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m83.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m89.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf, faiss-cpu
Successfully installed faiss-cpu-1.13.2 pymupdf-1.26.7


#**2. Import Libraries and Setup Environment**

In [None]:
import fitz  # PyMuPDF
import nltk
import sqlite3
import torch
import numpy as np
import faiss
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM

nltk.download('punkt')

#**3. Database Operations**

In [None]:
def create_database(db_name="documents.db"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS chunks
                      (id INTEGER PRIMARY KEY, content TEXT)''')
    conn.commit()
    conn.close()
    print(f"Database '{db_name}' created with table 'chunks'.")

def insert_chunks(chunks, db_name="documents.db"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.executemany("INSERT INTO chunks (content) VALUES (?)", [(chunk,) for chunk in chunks])
    conn.commit()
    conn.close()
    print(f"Inserted {len(chunks)} chunks into 'chunks' table.")

#**4. Text Extraction and Chunking**

In [None]:
def extract_and_chunk_text_from_pdf(pdf_path, chunk_size=200):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(document)):
        page = document.load_page(page_num)
        text += page.get_text()

    sentences = nltk.sent_tokenize(text)
    chunks = [' '.join(sentences[i:i + chunk_size]) for i in range(0, len(sentences), chunk_size)]
    print(f"Extracted and chunked text from {pdf_path}. Number of chunks: {len(chunks)}")
    return chunks

#**5. Embedding and Retrieval**

In [None]:
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModelForCausalLM.from_pretrained('sentence-transformers/all-MiniLM-L6-v2').to('cuda')  # Move model to GPU

def embed_text(texts):
    inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True).to('cuda')  # Move inputs to GPU
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()  # Move embeddings to CPU
    return embeddings

index = faiss.IndexFlatL2(384)  # Dimension should match the embedding size

def load_chunks_and_index(db_name="documents.db"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.execute("SELECT content FROM chunks")
    chunks = [row[0] for row in cursor.fetchall()]
    conn.close()

    if chunks:
        embeddings = embed_text(chunks)
        index.add(embeddings)
        print(f"Loaded {len(chunks)} chunks and added to FAISS index.")
    else:
        print("No chunks loaded from the database.")

    return chunks

#**6. Retrieval and Ranking**

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def retrieve_and_rank(chunks, query, top_k=5):
    query_embedding = embed_text([query])
    distances, indices = index.search(query_embedding, top_k)

    if len(indices[0]) == 0:
        print("No chunks retrieved from the index.")
        return []

    retrieved_chunks = [chunks[i] for i in indices[0] if i < len(chunks)]

    if not retrieved_chunks:
        print("No valid chunks retrieved after filtering.")
        return []

    chunk_embeddings = embed_text(retrieved_chunks)
    similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
    ranked_chunks = [retrieved_chunks[i] for i in np.argsort(similarities)[::-1]]

    return ranked_chunks

#**7. Generate Responses with LLaMA**

In [None]:
access_token = "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"
model_name = 'meta-llama/Meta-Llama-3-8B'  # Replace with the actual model name you are using
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=access_token)
model = LlamaForCausalLM.from_pretrained(model_name, use_auth_token=access_token).to('cuda')

def generate_response(chunks, query, top_k=5, prompt="Answer the following question based on the provided context:"):
    ranked_chunks = retrieve_and_rank(chunks, query, top_k)

    if not ranked_chunks:
        return "No relevant chunks found to generate a response."

    context = " ".join(ranked_chunks) + "\n" + prompt + "\n" + query

    inputs = tokenizer(context, return_tensors='pt').to('cuda')
    outputs = model.generate(inputs.input_ids, max_new_tokens=150, pad_token_id=tokenizer.eos_token_id)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

#**8. RAG Pipeline Function**

In [None]:
def rag_pipeline(pdf_paths, query, top_k=5, chunk_size=200, prompt="Answer the following question based on the provided context:"):
    create_database()
    for pdf_path in pdf_paths:
        chunks = extract_and_chunk_text_from_pdf(pdf_path, chunk_size)
        insert_chunks(chunks)

    chunks = load_chunks_and_index()

    response = generate_response(chunks, query, top_k, prompt)

    return response

#**9. Upload Multiple PDFs**

In [None]:
from google.colab import files

uploaded = files.upload()

pdf_paths = list(uploaded.keys())

#**10.  Run the RAG Pipeline**

In [None]:
queries = [
    "Configuration File Syntax in linux?",
    "network configuration utility (ncat)",
    "Basic requirements and setup for linux?",
    "Why Guest Security Matters in linux"
]

for query in queries:
    response = rag_pipeline(pdf_paths, query)
    print('\n', '\n')
    print('*' * 100)
    print('Query: ', query)
    print('*'*100)
    print('\n')
    print('-'*100)
    print('Response: ', response)
    print('*' * 100)

#**11.  Evaluate The Model**

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def evaluate_with_cosine_similarity(ground_truth, queries, pdf_paths, top_k=5, chunk_size=200, prompt="Answer the following question based on the provided context:"):
    create_database()
    for pdf_path in pdf_paths:
        chunks = extract_and_chunk_text_from_pdf(pdf_path, chunk_size)
        insert_chunks(chunks)

    chunks = load_chunks_and_index()

    results = {}

    for query in queries:
        ranked_chunks = retrieve_and_rank(chunks, query, top_k)

        if not ranked_chunks:
            results[query] = {"response": "No relevant chunks found to generate a response.", "similarity": 0.0}
            continue

        context = " ".join(ranked_chunks) + "\n" + prompt + "\n" + query
        inputs = tokenizer(context, return_tensors='pt', padding=True, truncation=True, max_length=512)
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        outputs = model.generate(input_ids, attention_mask=attention_mask, max_new_tokens=50, pad_token_id=tokenizer.eos_token_id)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Calculate cosine similarity with ground truth
        ground_truth_embedding = embed_text([ground_truth[query]])
        response_embedding = embed_text([response])
        similarity = cosine_similarity(ground_truth_embedding, response_embedding)[0][0]

        results[query] = {"response": response, "similarity": similarity}

    return results

# Example Usage:
pdf_paths = ["Red_Hat_Enterprise_Linux-7-Migration_Planning_Guide-en-US.pdf", "Red_Hat_Enterprise_Linux-7-Virtualization_Getting_Started_Guide-en-US.pdf", "Red_Hat_Enterprise_Linux-7-Virtualization_Security_Guide-en-US.pdf"]
queries = ["What is the syntax for configuration files in Linux?", "How to configure network utility (ncat) in Linux?", "Basic requirements and setup for linux?", "Why Guest Security Matters in linux"]
ground_truth = {
    "What is the syntax for configuration files in Linux?": "The syntax for configuration files in Linux typically involves directives, parameters, and comments.",
    "How to configure network utility (ncat) in Linux?": "To configure ncat, you need to specify the target host, port, and any desired options such as verbosity or protocols.",
    "Basic requirements and setup for linux?": "Setting up a Linux system involves several steps and requirements.",
    "Why Guest Security Matters in linux": "Guest security is an important aspect of overall system security, especially in Linux environments that may be used by multiple users or that provide guest access. Here are several reasons why guest security matters in Linux."
}

evaluation_results = evaluate_with_cosine_similarity(ground_truth, queries, pdf_paths)
for query, result in evaluation_results.items():
    #print(f"Query: {query}\nResponse: {result['response']}\nSimilarity: {result['similarity']}\n")

    print('*'*100)
    print('Query: ', {query}, '\n')
    print('*'*100)

    print('*'*100)
    print('Response: ', {result['response']}, '\n')
    print('*'*100)

    print('*'*100)
    print('Similarity: ', {result['similarity']}, '\n')
    print('*'*100)