### Imports

In [21]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
import pypdf
import faiss

### Load LLM

In [4]:
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

print("Mistral model loaded!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]



Mistral model loaded!


### Upload document

In [5]:
from google.colab import files
uploaded = files.upload()

Saving Resume.pdf to Resume.pdf


### Chunking

In [22]:
file_path = list(uploaded.keys())[0]

loader = PyPDFLoader(file_path)
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
docs = splitter.split_documents(documents)

### Create FAISS vector DB


In [23]:
class LocalEmbeddings(Embeddings):
    def __init__(self):
        self.model = SentenceTransformer("all-MiniLM-L6-v2")

    def embed_documents(self, texts):
        return self.model.encode(texts).tolist()

    def embed_query(self, text):
        return self.model.encode(text).tolist()

embeddings = LocalEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)

print("Vector database ready!")

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Vector database ready!


### Ask Questions

In [24]:
def ask(query):

    # ---- Retrieve relevant chunks ----
    docs = vectorstore.similarity_search(query, k=4)

    if len(docs) == 0:
        return "Not found in document"

    context = "\n".join([d.page_content for d in docs])

    # ---- Prompt (instruction format) ----
    prompt = f"""<s>[INST]
You are an AI assistant summarizing a candidate resume.

Rules:
- Give a concise 4-5 sentence summary
- Focus on role, skills, education and projects
- Do not list everything
- Do not repeat information
- If missing say: Not found in document

Question:
{query}

Context:
{context}
[/INST]
"""

    # ---- Tokenize ----
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    # ---- Generate ----
    output = model.generate(
        **inputs,
        max_new_tokens=220,
        do_sample=True,
        temperature=0.3,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
        )

    # ---- Decode only generated part ----
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    # Remove prompt text from response
    response = response.split("[/INST]")[-1].strip()

    return response

In [25]:
print(ask("Give a summary of this document"))

This document summarizes the resume of a data science and analysis professional with experience in designing interactive dashboards using Tableau and Power BI for sales and financial analysis. They hold a diploma in Data Science and Artificial Intelligence from the Boston Institute of Analytics and Technology and a Bachelor's degree in Electronics & Communication from St. Joseph Engineering College with a strong CGPA of 8.19. Their technical skills include proficiency in Python, SQL, Pandas, NumPy, Scikit-Learn, and experience with deep learning, NLP, and data visualization tools. Notable projects include a Twitter sentiment analysis using a BiLSTM-based model and an insurance risk classification model using machine learning techniques.


In [26]:
# Colab (GPU)

# LLM: Mistral-7B-Instruct v0.2 (full HuggingFace Transformers model, GPU)

# Embedding model: all-MiniLM-L6-v2

# Vector DB: FAISS

In [27]:
# The Colab responses were better because it used the full-precision Mistral model on GPU, while locally
# I used a quantized 4-bit version for CPU efficiency, which reduces reasoning accuracy.