In [None]:
# Step 1: Install Required Libraries
# !pip install -qU transformers torch accelerate bitsandbytes
# !pip install -qU langchain langchain_community langchain_huggingface
# !pip install -qU sentence-transformers faiss-cpu

In [10]:
!!pip install pypdf


['Collecting pypdf',
 '  Downloading pypdf-6.0.0-py3-none-any.whl.metadata (7.1 kB)',
 'Downloading pypdf-6.0.0-py3-none-any.whl (310 kB)',
 '\x1b[?25l   \x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m0.0/310.5 kB\x1b[0m \x1b[31m?\x1b[0m eta \x1b[36m-:--:--\x1b[0m',
 '\x1b[2K   \x1b[38;2;249;38;114m━━━\x1b[0m\x1b[38;2;249;38;114m╸\x1b[0m\x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m30.7/310.5 kB\x1b[0m \x1b[31m1.6 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m',
 '\x1b[2K   \x1b[38;2;249;38;114m━━━━━━━━━━━\x1b[0m\x1b[38;2;249;38;114m╸\x1b[0m\x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m92.2/310.5 kB\x1b[0m \x1b[31m1.3 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m',
 '\x1b[2K   \x1b[38;2;249;38;114m━━━━━━━━━━━━━━━━━━━━━━\x1b[0m\x1b[38;5;237m╺\x1b[0m\x1b[38;5;237m━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m174.1/310.5 kB\x1b[0m \x1b[31m1.7 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m',
 '\x1b[2K   \x1b[38;2;249;38;114m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m\

In [1]:
import os
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pypdf

# Step 1: Walk through the knowledge directory
documents = []
for root, _, files in os.walk("./knowledge"):
    for file in files:
        file_path = os.path.join(root, file)
        if file.lower().endswith(".pdf"):
            print(f"Loading PDF file: {file_path}")
            pdf_loader = PyPDFLoader(file_path)
            documents.extend(pdf_loader.load())
        elif file.lower().endswith(".txt"):
            print(f"Loading TXT file: {file_path}")
            text_loader = TextLoader(file_path)
            documents.extend(text_loader.load())

print(f"Loaded {len(documents)} documents from knowledge base.")

# Step 2: Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks.")

# Step 3: Embed + store in FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embedding_model)
print("Vector store created successfully.")


  from .autonotebook import tqdm as notebook_tqdm


Loading PDF file: ./knowledge/DasunPathirage.pdf
Loading TXT file: ./knowledge/cv.txt
Loaded 3 documents from knowledge base.
Split into 19 chunks.


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Vector store created successfully.


In [7]:

for doc in documents:
    print(f"Source: {doc.metadata['source']}, Length: {(doc.page_content)}")


Source: ./knowledge/DasunPathirage.pdf, Length: C++
Python
Verilog
+94714092010
dasunpathirage@gmail.com
www.linkedin.com/in/dasun-pathirage/
127/1A/7 Kadawatha, Sri Lanka
CONTACT PROFILE
SKILLS
EDUCATION
WORK EXPERIENCE
Programming Languages: Proficient in C/C++,
Python, Verilog/System Verilog,
HTML/CSS/JavaScript, Ladder Programming
Software Development: VSCode, Visual Studio,
Spring Boot, Atmel Studio, Vivado/HLS/SDK,
Android Studio, MPLAB, MATLAB, ARM
Development Studio
Technical Expertise: FPGA, Linux/MacOS, Git, Jira,
Confluence, Machine Learning, Analytical
Thinking, Problem Solving, Creativity/Leadership
MSc in Advanced Software EngineeringUniversity of Westminster
Reading
BSc (Hons) in Electronic &
Telecommunication EngineeringSri Lanka Technological Campus 
2016-2020
Senior Software Engineer
Sagence AI, California, US (Remote from Accelr Sri Lanka)
Member of the Analog Inference back-end software development team.
Responsible for the development an mapping tool capable of aut

In [4]:
!pip install huggingface_hub

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [2]:
from huggingface_hub import login
import os

# Replace "YOUR_HF_TOKEN" with your actual token
# login("YOUR_HF_TOKEN")
hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")

In [2]:
# Step 4: Phase 2 - Setting up the RAG Chain

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from langchain_huggingface import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Hugging Face Hub Login (if not already done in terminal)
# from huggingface_hub import login
# login("YOUR_HF_TOKEN")

# Define the model ID for Llama 3.1 8B Instruct
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
# model_id = "unsloth/llama-3.1-8b-bnb-4bit"

# Use quantization to reduce memory usage
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
)

# Create a text-generation pipeline
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    top_p=0.9,
    temperature=0.1, # Use low temperature for factual answers
)

# Wrap the pipeline in a LangChain object
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Define the RAG prompt template
prompt_template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful assistant. Please answer the user's question based only on the following context. If the answer is not in the context, say you don't know. Do not use any prior knowledge.

CONTEXT:
{context}<|eot_id|><|start_header_id|>user<|end_header_id|>

QUESTION:
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create the retriever from our vector store
retriever = vector_store.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 chunks

# Create the RAG chain using LangChain Expression Language (LCEL)
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
print("RAG chain created and ready to use.")

Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00,  3.54s/it]
Device set to use cuda:0


RAG chain created and ready to use.


In [3]:
def clean_answer(raw_output: str) -> str:
    if "assistant<|end_header_id|>" in raw_output:
        return raw_output.split("assistant<|end_header_id|>")[-1].strip()
    return raw_output.strip()

In [10]:
# Question 1: The answer is in the document.
question1 = "Who is Dasun?"
print(f"Question: {question1}")

# Invoke the chain
# answer1 = rag_chain.invoke(question1)
answer1 = clean_answer(rag_chain.invoke(question1))
print(f"Answer: {answer1}")

print("\n" + "="*50 + "\n")

# Question 2: The answer is NOT in the document.
question2 = "What Dasun Done in Sri Lanka Technological Campus?"
print(f"Question: {question2}")

# Invoke the chain
answer2 = clean_answer(rag_chain.invoke(question2))
print(f"Answer: {answer2}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Question: Who is Dasun?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer: Dasun is Dasun Pathirage, an experienced Senior Software Engineer.


Question: What Dasun Done in Sri Lanka Technological Campus?
Answer: Dasun Pathirage was the Director of the LEO Club of SLTC (2017-2018) and a Student at BSc (Hons) in Electronic & Telecommunication Engineering at Sri Lanka Technological Campus from 2016-2020.


In [9]:
question2 = "ACCELR?"
docs2 = retriever.get_relevant_documents(question2)
context2 = "\n".join([doc.page_content for doc in docs2])
prompt_text2 = prompt.format(context=context2, question=question2)
answer2 = clean_answer(llm.invoke(prompt_text2))
print(f"Answer: {answer2}")


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer: ACCELR is mentioned as the name of the company where Tharindu Dasun Pathirage worked as a Senior Software Engineer, and also as the name of a team (ACCELR team) that he mentored.
