### Setup

In [52]:
# ─── Silence all outputs ──────────────────────────────────────────────
import os, sys, warnings, logging

# 1) No Python tracebacks
sys.tracebacklimit = 0

# 2) Drop all stderr (hides C/C++ loader logs too)
sys.stderr = open(os.devnull, 'w')

# 3) Suppress all warnings
warnings.filterwarnings("ignore")

# 4) Suppress all Python logging
logging.disable(logging.CRITICAL)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [53]:
# !pip install pymupdf
# !pip install sentence-transformers
# !pip install tf-keras
# !pip install faiss-cpu
# !pip install "numpy<1.25" --force-reinstall
# !pip install pdfplumber transformers
# !pip install "transformers>=4.40" accelerate bitsandbytes

### Document Pre-processing & Chunk Generation

In [54]:
import re, pathlib, pdfplumber
from typing import List
from transformers import AutoTokenizer

SKIP_PATTERNS = [
    r"^\s*Samsung.*", r"^\s*Warranty\b", r"^\s*Copyright\b",
    r"^\s*Table of contents\b", r"^\s*\d+\s*$"
]
_skip = re.compile("|".join(SKIP_PATTERNS), re.I)

def extract_pdf_text(pdf_path: str) -> List[str]:
    lines = []
    with pdfplumber.open(pdf_path) as pdf:
        for p in pdf.pages:
            txt = p.extract_text() or ""
            lines.extend(txt.splitlines())
    return lines

def clean_lines(raw_lines: List[str]) -> List[str]:
    return [ln.strip() for ln in raw_lines
            if ln.strip() and not _skip.match(ln)]

def chunk_with_overlap(text_lines: List[str], tokenizer,
                       max_tokens: int = 350, overlap: int = 50):
    chunks, cur, cur_len = [], [], 0
    for ln in text_lines:
        tok = tokenizer.encode(ln, add_special_tokens=False)
        if cur_len + len(tok) > max_tokens and cur:
            chunks.append(cur)
            cur = cur[-overlap:] if overlap else []
            cur_len = len(cur)
        cur.extend(tok);  cur_len += len(tok)
    if cur: chunks.append(cur)
    return [tokenizer.decode(toks) for toks in chunks]

In [55]:
pdf_file   = "yourpath/data.pdf"
tokenizer  = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

raw  = extract_pdf_text(pdf_file)
clean = clean_lines(raw)
chunks = chunk_with_overlap(clean, tokenizer,
                            max_tokens=256, overlap=32)

print(f"{len(chunks)=}  (예시) {chunks[0][:200]}...")

from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")

embeddings = model.encode(chunks, show_progress_bar=True,
                          normalize_embeddings=True)

len(chunks)=249  (예시) user guide sm - s921w sm - s926w sm - s928w english ( ca ). 01 / 2024. rev. 1. 0 www. samsung. com / ca getting started 95 multi window ( using multiple apps at once ) 5 device layout and functions 98...


In [56]:
import faiss, numpy as np, pickle, pathlib

vecs = np.asarray(embeddings, dtype="float32")          # (n_chunks, dim)
dim  = vecs.shape[1]

index = faiss.IndexFlatIP(dim)                          # inner-product == cosine
index.add(vecs)                                         # insert all chunk vectors

faiss.write_index(index, "samsung_manual.index")
with open("chunks.pkl", "wb") as f:
    pickle.dump(chunks, f)

print(f" {index.ntotal} chunks indexed")

 249 chunks indexed


In [57]:
def search_manual(query: str, k: int = 5, score_thres: float = 0.30):
    q_vec = model.encode([query], normalize_embeddings=True)
    D, I  = index.search(np.asarray(q_vec, dtype="float32"), k)
    hits  = []
    for score, idx in zip(D[0], I[0]):
        if score < score_thres:     
            continue
        hits.append({"chunk_id": int(idx),
                     "score": float(score),
                     "text":  chunks[idx]})
    return hits

In [58]:
import subprocess, sys, importlib

def pip_install(pkg):
    if importlib.util.find_spec(pkg) is None:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])

pip_install("huggingface_hub")     
pip_install("llama-cpp-python")   



### TinyLlama download & load

In [59]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

repo_id   = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
file_name = "tinyllama-1.1b-chat-v1.0.Q4_0.gguf"    # 4-bit, super-light
local_path = hf_hub_download(repo_id, file_name)

llm = Llama(model_path=local_path,
            n_ctx=2048,         
            n_threads=8,
            n_batch=512)         


In [60]:
import pathlib, pickle, numpy as np, faiss, os

OUT = pathlib.Path("path")
OUT.mkdir(parents=True, exist_ok=True)

with open(OUT / "cleaned_paragraphs.pkl", "wb") as f:
    pickle.dump(chunks, f)               # ← chunks = list[str]

np.save(OUT / "embedding_matrix.npy", embeddings)   # embeddings = np.array

faiss.write_index(index, str(OUT / "faiss_index.faiss"))
print("✓ all artifacts saved to", OUT)

✓ all artifacts saved to /Users/namyoon/Desktop/UMN_MSBA/2025 Summer/NLP/Assignment2


In [61]:
import pickle, faiss, pathlib
DATA = pathlib.Path("path")
texts = pickle.load(open(DATA / "cleaned_paragraphs.pkl", "rb"))
index = faiss.read_index(str(DATA / "faiss_index.faiss"))

### Embedder + retrieval fn

In [62]:
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer("all-MiniLM-L6-v2")     

def search_manual(query, k=3):
    qv = embedder.encode([query], normalize_embeddings=True).astype("float32")
    D, I = index.search(qv, k)
    return [{"chunk_id":int(i), "text":texts[i]} for i in I[0]]

### Prompt builder

In [63]:
def build_prompt(query: str, retrieved):
    context = "\n\n---\n\n".join(
        [f"[#{h['chunk_id']}] {h['text']}" for h in retrieved]
    )
    prompt = f"""You are a Samsung product-manual assistant.
Use ONLY the context to answer. If not answerable, say you don't know.
Show the chunk id you used at the end.

### Context
{context}

### Question
{query}

### Answer:
"""
    return prompt

### LLM ask fn

In [64]:
def ask_local_llm(prompt: str, max_tokens: int = 128) -> str:
    out = llm(prompt, max_tokens=max_tokens, stop=["### Question"])
    return out["choices"][0]["text"].strip()

### Test / Streamlit hand-off

In [65]:
query   = "How do I enable power saving mode?"
hits    = search_manual(query, k=3) 
prompt  = build_prompt(query, hits)

answer  = ask_local_llm(prompt)
print("\n🔹 Answer:\n", answer)


🔹 Answer:
 To enable power saving mode, follow these steps:

1. On the Settings screen, tap Battery.
2. Scroll down to Power Saving and tap Enable Power Saving.
3. Turn on Power Saving Mode to save battery power.
