In [1]:
# รันใน Colab cell หนึ่งครั้ง
!pip install --quiet transformers accelerate sentencepiece huggingface_hub \
    sentence-transformers faiss-cpu PyPDF2


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m48.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
# ถ้าคุณต้องโหลดโมเดล gated ให้รันอันนี้ก่อนและวาง token
from huggingface_hub import notebook_login
notebook_login()  # จะขึ้น prompt ให้วาง token (hf_xxx...)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os
from PyPDF2 import PdfReader

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# ----- LLM -----
LLM_ID = "google/gemma-2b-it"   # คุณมีสิทธิ์แล้ว ถ้าติดปัญหาให้ใช้ fallback below
FALLBACK_LLM = "google/flan-t5-small"

def load_llm(model_id):
    try:
        tok = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
        m = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=True)
        if m.config.pad_token_id is None:
            m.config.pad_token_id = m.config.eos_token_id
            tok.pad_token = tok.eos_token
        m.to(device)
        print("Loaded LLM:", model_id)
        return tok, m
    except Exception as e:
        print("Could not load", model_id, "-> fallback. Error:", e)
        tok = AutoTokenizer.from_pretrained(FALLBACK_LLM)
        m = AutoModelForCausalLM.from_pretrained(FALLBACK_LLM)
        if m.config.pad_token_id is None:
            m.config.pad_token_id = m.config.eos_token_id
            tok.pad_token = tok.eos_token
        m.to(device)
        print("Loaded fallback LLM:", FALLBACK_LLM)
        return tok, m

llm_tokenizer, llm_model = load_llm(LLM_ID)

# ----- Embedding model -----
embed_model = SentenceTransformer("all-MiniLM-L6-v2")  # เบาและรวดเร็ว
print("Loaded embedding model: all-MiniLM-L6-v2")


Device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loaded LLM: google/gemma-2b-it


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Loaded embedding model: all-MiniLM-L6-v2


In [9]:
# อ่าน PDF (คืนเป็น text) -- ถ้าเป็น .txt ใช้ fallback
def read_pdf(path):
    text = []
    reader = PdfReader(path)
    for p in reader.pages:
        try:
            text.append(p.extract_text() or "")
        except:
            text.append("")
    return "\n".join(text)

def read_text_file(path):
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        return f.read()

# simple text splitter: split by sentences/paragraphs into chunks of ~chunk_size tokens (approx by words)
import re
def text_to_chunks(text, chunk_size=300, overlap=80):
    # crude split by whitespace into token-ish words, then join
    words = re.split(r"\s+", text)
    chunks = []
    i = 0
    N = len(words)
    while i < N:
        j = min(i + chunk_size, N)
        chunk = " ".join(words[i:j])
        chunks.append(chunk)
        # move by chunk_size - overlap
        i += max(1, chunk_size - overlap)
    return chunks

# quick test
sample = "This is sentence one. " * 200
c = text_to_chunks(sample, chunk_size=50, overlap=10)
print("Created", len(c), "chunks; example len:", len(c[0].split()))


Created 21 chunks; example len: 50


In [11]:
# ถ้าใช้ Colab: อัปโหลดไฟล์ PDF/TXT
from google.colab import files
uploaded = files.upload()  # เลือกไฟล์ .pdf หรือ .txt

# หาชื่อไฟล์แรก
fn = list(uploaded.keys())[0]
print("Uploaded:", fn)

# อ่าน
if fn.lower().endswith(".pdf"):
    doc_text = read_pdf(fn)
else:
    doc_text = read_text_file(fn)

print("Document length (chars):", len(doc_text))
# สร้าง chunks
chunks = text_to_chunks(doc_text, chunk_size=300, overlap=80)
print("Chunks:", len(chunks))
print("Preview chunk 0:", chunks[0][:400])


Saving test_text.txt to test_text (2).txt
Uploaded: test_text (2).txt
Document length (chars): 625
Chunks: 1
Preview chunk 0: Hello, my name is Phoom. I am an AI and Data Science graduate with First Class Honors (GPA 3.77) and strong English communication skills (TOEIC 735). I am passionate about applying machine learning and statistical modeling to solve real-world problems and drive data-driven decision making. Skilled in Python, SQL, and modern AI/ML frameworks, with hands-on experience in data preprocessing, model de


In [12]:
# สร้าง embeddings (batch)
batch_size = 32
vecs = []
for i in range(0, len(chunks), batch_size):
    batch = chunks[i:i+batch_size]
    em = embed_model.encode(batch, convert_to_numpy=True, show_progress_bar=False)
    vecs.append(em)
vecs = np.vstack(vecs).astype("float32")
print("Embeddings shape:", vecs.shape)

# normalize for cosine similarity (FAISS inner product after normalization)
faiss.normalize_L2(vecs)

# สร้าง index (Inner Product on normalized vectors = cosine similarity)
d = vecs.shape[1]
index = faiss.IndexFlatIP(d)
index.add(vecs)
print("FAISS index size:", index.ntotal)

# เก็บ metadata เพื่อ map indices -> chunk text
import json
with open("chunks.json", "w", encoding="utf-8") as f:
    json.dump(chunks, f, ensure_ascii=False, indent=2)
# Save index to disk
faiss.write_index(index, "faiss_index.bin")
print("Saved chunks.json and faiss_index.bin")


Embeddings shape: (1, 384)
FAISS index size: 1
Saved chunks.json and faiss_index.bin


In [16]:
# โหลด index และ chunks หากจำเป็น
index = faiss.read_index("faiss_index.bin")
import json
with open("chunks.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)

def retrieve(query, top_k=3):
    ntotal = index.ntotal
    k = min(top_k, max(1, ntotal))
    qvec = embed_model.encode([query], convert_to_numpy=True).astype("float32")
    faiss.normalize_L2(qvec)
    D, I = index.search(qvec, k)
    results = []
    for idx, score in zip(I[0], D[0]):
        # FAISS may return -inf-like numbers or -3.4e38 for empty slots — skip them
        if idx < 0 or idx >= ntotal:
            continue
        results.append((chunks[idx], float(score)))
    return results

# try
# q = "How can one prevent overfitting?"
# res = retrieve(q, top_k=3)
# for i,(text,score) in enumerate(res):
#     print("----", i, "score", score)
#     print(text[:400].strip(), "\n")
res = retrieve("As an AI engineer How to write the resume", top_k=3)
print("Retrieved count:", len(res))
for i,(t,s) in enumerate(res):
    print(i,"score",s,"preview:", t[:200].replace("\n"," "))



Retrieved count: 1
0 score 0.33391129970550537 preview: Hello, my name is Phoom. I am an AI and Data Science graduate with First Class Honors (GPA 3.77) and strong English communication skills (TOEIC 735). I am passionate about applying machine learning an


In [20]:
def build_rag_prompt_fallback(question, retrieved, max_context_chars=2000):
    context = ""
    for chunk, score in retrieved:
        context += chunk.strip() + "\n\n---\n\n"
        if len(context) > max_context_chars:
            break
    prompt = f"""You are an HR manager. Use the following context to give advice.
If the answer can be derived from the context, cite the context.
If the context does not provide the answer, you may provide a concise answer using general HR knowledge but clearly state it is from general knowledge.

Context:
{context}

Question: {question}

Answer concisely, cite context when used, otherwise mention "based on general HR knowledge".
"""
    return prompt


# run RAG
question = "Who am I"
retrieved = retrieve(question, top_k=4)
prompt = build_rag_prompt(question, retrieved)
print("PROMPT preview (first 400 chars):\n", prompt[:400])

# generate with LLM
def generate_from_llm(prompt, max_new_tokens=200, temperature=0.2):
    inputs = llm_tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    out = llm_model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=0.95,
        eos_token_id=llm_model.config.eos_token_id,
        pad_token_id=llm_model.config.pad_token_id,
        no_repeat_ngram_size=3,
        early_stopping=True
    )
    text = llm_tokenizer.decode(out[0], skip_special_tokens=True)
    # optionally strip prompt
    generated = text[len(prompt):].strip() if text.startswith(prompt) else text
    return generated

ans = generate_from_llm(prompt, max_new_tokens=200, temperature=0.2)
print("=== Answer ===\n", ans)


PROMPT preview (first 400 chars):
 You are a Hr menager. Use the following context from documents to give an advice. If the answer is not in the context, say 'I don't know' or answer conservatively.

Context:
Hello, my name is Phoom. I am an AI and Data Science graduate with First Class Honors (GPA 3.77) and strong English communication skills (TOEIC 735). I am passionate about applying machine learning and statistical modeling to 
=== Answer ===
 Phoom is an AI graduate with strong academic performance and a passion for data science.
