In [None]:
!pip install langchain-community langchain-text-splitters langchain-ollama pypdf ollama langchain-huggingface sentence-transformers  --no-deps

Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-text-splitters
  Downloading langchain_text_splitters-1.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-ollama
  Downloading langchain_ollama-1.0.1-py3-none-any.whl.metadata (2.5 kB)
Collecting pypdf
  Downloading pypdf-6.6.2-py3-none-any.whl.metadata (7.1 kB)
Collecting ollama
  Downloading ollama-0.6.1-py3-none-any.whl.metadata (4.3 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-1.2.0-py3-none-any.whl.metadata (2.8 kB)
Downloading langchain_community-0.4.1-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m94.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_text_splitters-1.1.0-py3-none-any.whl (34 kB)
Downloading langchain_ollama-1.0.1-py3-none-any.whl (29 kB)
Downloading pypdf-6.6.2-py3-none-any.whl (329 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
from google.colab import drive

# 1. 掛載 Google Drive
drive.mount('/content/drive')

# 2. 設定模型儲存路徑 (建議在 Drive 建立一個專門的資料夾)
# 這裡設定為 MyDrive 下的 RAG_project/ollama_models 資料夾
my_model_folder = '/content/drive/MyDrive/RAG/ollama_models'

# 如果資料夾不存在，建立它
if not os.path.exists(my_model_folder):
    os.makedirs(my_model_folder)

# 3. 【關鍵】設定環境變數，讓 Ollama 知道去哪裡找模型
os.environ['OLLAMA_MODELS'] = my_model_folder

print(f"Ollama 模型路徑已設定為: {os.environ['OLLAMA_MODELS']}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Ollama 模型路徑已設定為: /content/drive/MyDrive/RAG/ollama_models


In [None]:
import os
import subprocess
import time
from google.colab import drive


# --- 2. 修正並安裝 Ollama ---
print("正在安裝相依套件 zstd...")
!sudo apt-get install -y zstd  # <--- 新增這一行解決你的錯誤

print("正在安裝 Ollama...")
!curl -fsSL https://ollama.com/install.sh | sh

# --- 3. 啟動 Ollama 服務 ---
print("正在啟動 Ollama 服務...")
# 使用完整路徑以防萬一 (通常是 /usr/local/bin/ollama)
process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# 等待服務啟動
print("等待服務啟動中 (約 10 秒)...")
time.sleep(10)

# --- 4. 測試連線 ---
try:
    # 檢查服務是否活著
    check = subprocess.run(["curl", "-s", "http://localhost:11434"], capture_output=True, text=True)
    if "Ollama is running" in check.stdout:
        print("✅ 成功：Ollama 服務已在背景執行！")
    else:
        print("⚠️ 警告：服務似乎未回應，請檢查日誌。")
except Exception as e:
    print(f"檢查連線時發生錯誤: {e}")

正在安裝相依套件 zstd...
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  zstd
0 upgraded, 1 newly installed, 0 to remove and 41 not upgraded.
Need to get 603 kB of archives.
After this operation, 1,695 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 zstd amd64 1.4.8+dfsg-3build1 [603 kB]
Fetched 603 kB in 1s (478 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package zstd.
(Reading database ... 121689 files and direct

In [None]:
#檢查已下載模型
!ollama list

NAME                           ID              SIZE      MODIFIED      
qwen3:4b-instruct-2507-q8_0    aa7252f68dda    4.3 GB    3 seconds ago    
qwen3-embedding:0.6b           ac6da0dfba84    639 MB    3 minutes ago    


In [None]:
#ollama下載模型
model_name = "qwen3:4b-instruct-2507-q8_0"
!ollama pull {model_name}

[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama,OllamaEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
import numpy as np

In [None]:
path=r"/content/drive/MyDrive/RAG/RAG資料集/2401.15884v3.pdf"
Chunk_size=512
Chunk_overlap=0
max_token=1000
embedding_model = HuggingFaceEmbeddings(
    model_name="Qwen/Qwen3-Embedding-0.6B",
    model_kwargs={'device': 'cuda'},
)
question="What the CRAG proposed to improve"
llm=ChatOllama(model="qwen3:4b-instruct-2507-q8_0",temperature=0)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

In [None]:
!ps aux | grep ollama

root        1581 19.4  0.2 2440912 38848 ?       Sl   12:13   1:11 ollama serve
root        3256  0.0  0.0   7376  3520 ?        S    12:19   0:00 /bin/bash -c ps aux | grep ollama
root        3258  0.0  0.0   6484  2528 ?        S    12:19   0:00 grep ollama


In [None]:
llm.invoke("say 1,2,3").content

'1, 2, 3! 😊'

In [None]:
def texts_into_documents(path,chunk_size,chunk_overlap):
    loader=PyPDFLoader(path)
    document=loader.load()

    texts_splitter=RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        )
    documents=texts_splitter.split_documents(document)

    return [document.page_content for document in documents]



def generate_chunk_header(chunk):
    message=([
    ("system", "Generate a concise and informative title for the given text."),
    ("user", "{chunk}")
    ])
    prompt=ChatPromptTemplate.from_messages(message)
    chain= prompt | llm | StrOutputParser()
    respond=chain.invoke({"chunk":chunk})
    return respond

def chunk_text_with_headers(documents):
    chunks = []
    for document in documents:

        header = generate_chunk_header(document)

        chunks.append({
        "text": document,
        "text_embedding": create_embeddings(document),
        "header": header,
        "header_embedding": create_embeddings(header)
        })

    return chunks

def create_embeddings(text):

    return embedding_model.embed_query(text)




In [None]:
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))


def semantic_search(query, chunks, k=3):
    query_embedding = create_embeddings(query)
    similarities = []

    for chunk in chunks:

        sim_text = cosine_similarity(np.array(query_embedding), np.array(chunk["text_embedding"]))
        sim_header = cosine_similarity(np.array(query_embedding), np.array(chunk["header_embedding"]))

        avg_similarity = (sim_text + sim_header) / 2
        similarities.append((chunk, avg_similarity))

    similarities.sort(key=lambda x: x[1], reverse=True)

    return [x for x in similarities[:k]]

In [None]:
documents=texts_into_documents(path,Chunk_size,Chunk_overlap)

In [None]:
chunks=chunk_text_with_headers(documents)

In [None]:
top_k_dcos=semantic_search(question,chunks)

In [None]:
for chunk, score in top_k_dcos:
    print(f"Header: {chunk['header']}")
    print(f"Score: {score:.4f}")
    print(f"Context: {chunk['text'][:200]}")  # 只印前200字避免太長
    print("="*50)



Header: Effectiveness and Flexibility of CRAG Across Different LLMs
Score: 0.6206
Context: effectiveness ofCRAG . Its versatility across a spec-
trum of tasks underscores its robust capabilities and
generalizability across diverse scenarios.
Third, the proposed method exhibited greater
flex
Header: Threefold Contributions to Robust and Self-Correcting RAG: First Attempt at Corrective Strategies with CRAG
Score: 0.5972
Context: In summary, our contributions in this paper are
three-fold: 1) This paper studies the scenarios
where the retriever returns inaccurate results and,
to the best of our knowledge, makes the first
attemp
Header: Title: Experiments on CRAG's Adaptability and Generalizability in RAG-Based Systems
Score: 0.5955
Context: regulated web pages like Wikipedia are preferred,
which can significantly help mitigate these issues.
Moreover, we utilize the URL links to navigate
web pages, transcribe their content, and employ the
