### Imports

In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from langchain.embeddings.base import Embeddings
from ctransformers import AutoModelForCausalLM

### Embedding Model

In [2]:
class LocalEmbeddings(Embeddings):
    def __init__(self):
        self.model = SentenceTransformer("all-MiniLM-L6-v2")

    def embed_documents(self, texts):
        return self.model.encode(texts).tolist()

    def embed_query(self, text):
        return self.model.encode(text).tolist()

embeddings = LocalEmbeddings()



Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


### Load PDF

In [3]:
loader = PyPDFLoader("data/Resume.pdf")
documents = loader.load()

print("Pages loaded:", len(documents))

Pages loaded: 2


### Chunking

In [9]:
splitter = RecursiveCharacterTextSplitter(chunk_size=220, chunk_overlap=40)
docs = splitter.split_documents(documents)

print("Total chunks:", len(docs))

Total chunks: 22


### Create Vector DB

In [10]:
vectorstore = FAISS.from_documents(docs, embeddings)
print("Vector DB ready")

Vector DB ready


### Load Local LLM

In [6]:
llm = AutoModelForCausalLM.from_pretrained(
    "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
    model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
    model_type="mistral",
)

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

### Ask Questions

In [11]:
def ask(query):
    docs = vectorstore.similarity_search(query, k=3)
    context = "\n".join([d.page_content for d in docs])

    prompt = f"""
    You are an experienced technical recruiter evaluating a candidate.

    Your job:
    - Explain the candidate professionally
    - Summarize strengths clearly
    - Never invent information
    - If missing say: Not found in document

    Context:
    {context}

    Question:
    {query}

    Answer:
    """


    response = llm(prompt, max_new_tokens=180, temperature=0.3)
    return response

In [15]:
print(ask("What skills does Noel have?"))

1. Data Analysis: Ability to interpret and draw meaningful insights from data.
    2. Machine Learning: Proficient in designing, implementing, and evaluating machine learning models.
    3. Deep Learning: Skilled in building deep neural networks and applying them for complex problems.
    4. NLP: Experienced with Natural Language Processing techniques for text analysis and sentiment analysis.
    5. Python: Strong programming skills in Python language for data manipulation, cleaning, and analysis.
    6. SQL: Expertise in querying databases using Structured Query Language (SQL).
    7. Power BI & Tableau: Proficient in creating interactive visualizations and reports using these tools.
    8. Model Evaluation: Adept at assessing the performance of machine learning models and selecting appropriate ones for specific use cases.
    


In [14]:
# Models used:

# - Mistral-7B-Instruct (GGUF) → LLM for answer generation

# - all-MiniLM-L6-v2 → embedding model for semantic search

# - FAISS → vector database for retrieval (RAG pipeline)