In [5]:
import json                 # To read JSON data
from pathlib import Path    # For safe file path handling

# Path to the chunk file we saved earlier
chunks_path = Path("data/chunks/terminal_chunks.jsonl")

# This list will store all chunk dictionaries
chunks = []

# Open the JSONL file (one JSON object per line)
with chunks_path.open("r", encoding="utf-8") as f:
    for line in f:
        # Remove newline characters and extra spaces
        line = line.strip()
        
        # Skip empty lines
        if not line:
            continue
        
        # Convert JSON string â†’ Python dictionary
        chunks.append(json.loads(line))

# Print how many chunks were loaded
print("Chunks loaded:", len(chunks))

# Inspect the first chunk
chunks[0]


Chunks loaded: 2


{'id': 0,
 'text': 'Linux & Terminal Command Cheat Sheet 1. File & Directory Navigation pwd ls ls -l ls -a cd folder cd .. cd ../.. cd /path/to/dir cd ~ cd - 2. File & Directory Management touch file.txt mkdir folder mkdir -p a/b/c rm file rm -r folder rm -rf folder cp f1 f2 cp -r dir1 dir2 mv old new cat file.txt head file.txt tail file.txt tail -f file.txt 3. Editing Files nano file.txt vim file.txt code . 4. Permissions & Ownership chmod 755 file chmod +x script.sh chown user:group file sudo command 5. Search & Find find . -name "*.txt" grep "text" file grep -r "text" folder grep -i "text" file 6. Processes & System Monitoring top htop ps aux kill PID kill -9 PID df -h du -sh folder free -h 7. Networking ping google.com curl url wget url ifconfig / ip a ssh user@host scp file user@host:dest 8. Package Management (Ubuntu/WSL) sudo apt update sudo apt upgrade sudo apt install pkg sudo apt remove pkg 9. Git Commands git init git clone URL git status git add . git commit -m "msg" git pu

In [6]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# Load a lightweight model (fast and accurate enough for student projects)
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def get_embedding_local(text: str):
    return embedder.encode([text], normalize_embeddings=True)[0]



In [7]:
texts = [ch["text"] for ch in chunks]
embeddings = embedder.encode(texts, normalize_embeddings=True)

print("Embeddings shape:", embeddings.shape)  


Embeddings shape: (2, 384)


In [8]:
dim = embeddings.shape[1]
index = faiss.IndexFlatIP(dim)  # cosine similarity if normalized
index.add(np.array(embeddings, dtype="float32"))

print("Index size:", index.ntotal)


Index size: 2


In [19]:
query = input('Enter your query :')
qvec = embedder.encode([query], normalize_embeddings=True)

D, I = index.search(np.array(qvec, dtype="float32"), k=5)

for idx in I[0]:
    print(chunks[idx]["source"], chunks[idx].get("word_range"), chunks[idx]["text"][:200])


terminal.pdf [250, 278] -t name . docker run name docker ps docker stop ID 14. System Information uname -a hostname whoami 15. WSL Commands wsl -l -v wsl --shutdown explorer.exe .
terminal.pdf [0, 278] Linux & Terminal Command Cheat Sheet 1. File & Directory Navigation pwd ls ls -l ls -a cd folder cd .. cd ../.. cd /path/to/dir cd ~ cd - 2. File & Directory Management touch file.txt mkdir folder mkd
terminal.pdf [250, 278] -t name . docker run name docker ps docker stop ID 14. System Information uname -a hostname whoami 15. WSL Commands wsl -l -v wsl --shutdown explorer.exe .
terminal.pdf [250, 278] -t name . docker run name docker ps docker stop ID 14. System Information uname -a hostname whoami 15. WSL Commands wsl -l -v wsl --shutdown explorer.exe .
terminal.pdf [250, 278] -t name . docker run name docker ps docker stop ID 14. System Information uname -a hostname whoami 15. WSL Commands wsl -l -v wsl --shutdown explorer.exe .
