In [1]:
# 必要なライブラリのインポート
import os
import tempfile
from minirag import MiniRAG, QueryParam
from minirag.llm.hf import (
    hf_model_complete,
    hf_embed,
)
# from minirag.llm.openai import openrouter_openai_complete
from minirag.llm.openai import openai_complete_if_cache
from minirag.utils import EmbeddingFunc
from minirag.utils import (
    wrap_embedding_func_with_attrs,
    locate_json_string_body_from_string,
    safe_unicode_decode,
    logger,
)
from transformers import AutoModel, AutoTokenizer
import asyncio
import warnings
warnings.filterwarnings('ignore')

In [None]:
os.environ["OPENROUTER_API_KEY"] = "*********************"
os.environ["OPENAI_API_KEY"] = "*********************"
print("Openrouter APIキーが設定されました")

Openrouter APIキーが設定されました


In [None]:
# 埋め込みモデルの設定
# EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
# MINI モードでの回答ができなくなったので、逆に精度が落ちたかも。
EMBEDDING_MODEL = "hotchpotch/static-embedding-japanese"
EMBEDDING_DIM = 1024

TOKENIZER_MODEL = "hotchpotch/xlm-roberta-japanese-tokenizer"

# LLMの設定
# LLM_MODEL = "Qwen/Qwen3-1.7B"  # または "Qwen/Qwen3-4B", "Qwen/Qwen3-1.7B" など
# LLM_MODEL = "jaeyong2/Qwen2.5-3B-Instruct-Ja-SFT"
LLM_MODEL = "deepseek/deepseek-chat-v3-0324:free"



# 作業ディレクトリの作成
WORKING_DIR = "/tmp/minirag_demo"
os.makedirs(WORKING_DIR, exist_ok=True)

print(f"作業ディレクトリ: {WORKING_DIR}")


# DATA_PATH = args.datapath
# QUERY_PATH = args.querypath
# OUTPUT_PATH = args.outputpath
# print("USING LLM:", LLM_MODEL)
# print("USING WORKING DIR:", WORKING_DIR)

作業ディレクトリ: /tmp/minirag_demo


In [None]:
from sentence_transformers import SentenceTransformer

HF_TOKEN = "**********************"
model = SentenceTransformer(EMBEDDING_MODEL, device="cpu", token=HF_TOKEN)

query = "美味しいラーメン屋に行きたい"
docs = [
    "素敵なカフェが近所にあるよ。落ち着いた雰囲気でゆっくりできるし、窓際の席からは公園の景色も見えるんだ。",
    "新鮮な魚介を提供する店です。地元の漁師から直接仕入れているので鮮度は抜群ですし、料理人の腕も確かです。",
    "あそこは行きにくいけど、隠れた豚骨の名店だよ。スープが最高だし、麺の硬さも好み。",
    "おすすめの中華そばの店を教えてあげる。とりわけチャーシューが手作りで柔らかくてジューシーなんだ。",
]

embeddings = model.encode([query] + docs)
print(embeddings.shape)
similarities = model.similarity(embeddings[0], embeddings[1:])
for i, similarity in enumerate(similarities[0].tolist()):
    print(f"{similarity:.04f}: {docs[i]}")

(5, 1024)
0.1040: 素敵なカフェが近所にあるよ。落ち着いた雰囲気でゆっくりできるし、窓際の席からは公園の景色も見えるんだ。
0.2521: 新鮮な魚介を提供する店です。地元の漁師から直接仕入れているので鮮度は抜群ですし、料理人の腕も確かです。
0.4835: あそこは行きにくいけど、隠れた豚骨の名店だよ。スープが最高だし、麺の硬さも好み。
0.3199: おすすめの中華そばの店を教えてあげる。とりわけチャーシューが手作りで柔らかくてジューシーなんだ。


## transformer の API で使えるように変換する

In [5]:
import torch
from torch import nn
from transformers import PreTrainedModel, PretrainedConfig
from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions


class StaticEmbeddingConfig(PretrainedConfig):
    model_type = "static-embedding"

    def __init__(self, vocab_size=32768, hidden_size=1024, pad_token_id=0, **kwargs):
        super().__init__(pad_token_id=pad_token_id, **kwargs)
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size


class StaticEmbeddingModel(PreTrainedModel):
    config_class = StaticEmbeddingConfig

    def __init__(self, config: StaticEmbeddingConfig):
        super().__init__(config)
        # ★ EmbeddingBag そのものでも OK ですが、
        #   シーケンス長をそろえて attention_mask で平均を取る方が扱いやすいので nn.Embedding に変更
        self.embedding = nn.Embedding(
            num_embeddings=config.vocab_size,
            embedding_dim=config.hidden_size,
            padding_idx=config.pad_token_id,
        )
        self.post_init()  # transformers の重み初期化

    def forward(self, input_ids, attention_mask=None, **kwargs):
        """
        - input_ids      : (batch, seq_len)
        - attention_mask : (batch, seq_len) — 0 は padding
        戻り値は Transformers 共通の BaseModelOutputWithPoolingAndCrossAttentions
        """
        if attention_mask is None:
            attention_mask = (input_ids != self.config.pad_token_id).int()

        token_embs = self.embedding(input_ids)                       # (B, L, H)
        # マスク付き平均プール
        masked_embs = token_embs * attention_mask.unsqueeze(-1)      # (B, L, H)
        lengths = attention_mask.sum(dim=1, keepdim=True).clamp(min=1e-8)  # (B, 1)
        sentence_emb = masked_embs.sum(dim=1) / lengths              # (B, H)

        return BaseModelOutputWithPoolingAndCrossAttentions(
            last_hidden_state=token_embs,  # ここでは token レベルをそのまま
            pooler_output=sentence_emb,    # 文ベクトル
            attentions=None,
            cross_attentions=None,
        )

In [6]:
"""
SentenceTransformer 版 (hotchpotch/static-embedding-japanese) から
StaticEmbeddingModel へ重みをコピーして保存するスクリプト
"""

SRC = "hotchpotch/static-embedding-japanese"   # オリジナル
DST = "./static-embedding-transformers"        # 保存先

# ① SentenceTransformer を読み込む
st_model = SentenceTransformer(SRC)
embedding_weight = st_model[0].embedding.weight.data   # nn.EmbeddingBag の重みを取得

# ② Config → Model を作成
config = StaticEmbeddingConfig(
    vocab_size=embedding_weight.size(0),
    hidden_size=embedding_weight.size(1),
    pad_token_id=0,           # トークナイザの <pad> が id=0
)
model = StaticEmbeddingModel(config)

# ③ 重みコピー
with torch.no_grad():
    model.embedding.weight.copy_(embedding_weight)

# ④ save_pretrained で書き出し
model.save_pretrained(DST)
# st_model.tokenizer.save_pretrained(DST)   # tokenizer.json なども一緒に保存

print(f"✅ 変換完了 — 保存先: {DST}")

✅ 変換完了 — 保存先: ./static-embedding-transformers


In [7]:
from transformers import AutoTokenizer

MODEL_DIR = "./static-embedding-transformers"

# tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_MODEL)
model     = StaticEmbeddingModel.from_pretrained(MODEL_DIR)

sentences = [
    "美味しいラーメン屋に行きたい",
    "あそこは行きにくいけど、隠れた豚骨の名店だよ。スープが最高だし、麺の硬さも好み。",
]

inputs = tokenizer(
    sentences,
    return_tensors="pt",
    padding=True,
    truncation=True,
    add_special_tokens=False,   # 元モデルは special tokens なし
)

with torch.no_grad():
    outputs = model(**inputs)
    vecs = outputs.pooler_output     # (batch, hidden_size)

print("shape:", vecs.shape)          # torch.Size([2, 1024])
similarity = torch.nn.functional.cosine_similarity(vecs[0], vecs[1], dim=0)
print("cosine:", similarity.item())

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


shape: torch.Size([2, 1024])
cosine: 0.4834601879119873


In [9]:
async def openrouter_openai_complete(
    prompt,
    system_prompt=None,
    history_messages=[],
    keyword_extraction=False,
    api_key: str = None,
    **kwargs,
) -> str:
    # if api_key:
    #     os.environ["OPENROUTER_API_KEY"] = api_key

    keyword_extraction = kwargs.pop("keyword_extraction", None)
    result = await openai_complete_if_cache(
        LLM_MODEL,  # change accordingly
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        base_url="https://openrouter.ai/api/v1",
        api_key=api_key,
        **kwargs,
    )
    if keyword_extraction:  # TODO: use JSON API
        return locate_json_string_body_from_string(result)
    return result

In [10]:
!pwd

!ls

/app
MiniRAG_static-embedding-japanese.ipynb  setup.py
minirag					 static-embedding-transformers


In [11]:
# MiniRAGインスタンスの作成
rag = MiniRAG(
    working_dir=WORKING_DIR,

    # ポスグレ
    # kv_storage="PGKVStorage",
    # vector_storage="PGVectorStorage",
    # graph_storage="PGGraphStorage",

    # llm_model_func=hf_model_complete,
    # llm_model_func=gemini_2_5_flash_complete,
    llm_model_func=openrouter_openai_complete,

    llm_model_max_token_size=200,
    llm_model_name=LLM_MODEL,
    embedding_func=EmbeddingFunc(
        embedding_dim=EMBEDDING_DIM,
        max_token_size=1000,
        func=lambda texts: hf_embed(
            texts,
            tokenizer=tokenizer,
            embed_model=model,
        ),
    ),
)

print("MiniRAGが初期化されました！")

INFO:nano-vectordb:Load (31, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': '/tmp/minirag_demo/vdb_entities.json'} 31 data
INFO:nano-vectordb:Load (31, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': '/tmp/minirag_demo/vdb_entities_name.json'} 31 data
INFO:nano-vectordb:Load (39, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': '/tmp/minirag_demo/vdb_relationships.json'} 39 data
INFO:nano-vectordb:Load (4, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': '/tmp/minirag_demo/vdb_chunks.json'} 4 data
INFO:minirag:Loaded document status storage with 4 records


MiniRAGが初期化されました！


In [12]:
# 約12分かかった
import time
start_time = time.time()

# サンプルテキストデータ
sample_texts = [
    """
今日は素晴らしい一日でした。朝早く起きて、近所の公園を散歩しました。
桜の花が満開で、とても美しかったです。午後は友人と映画を見に行きました。
「君の名は。」という映画で、とても感動的でした。
夜は家族と一緒に夕食を取り、楽しい時間を過ごしました。
""",
    """
昨日は仕事で大きなプロジェクトが完了しました。
チーム全員で3ヶ月間取り組んできたAIシステムの開発が終わりました。
機械学習モデルの精度が95%を超え、クライアントからも高い評価をいただきました。
今夜はチームメンバーと祝賀会を開く予定です。
""",
    """
週末は料理に挑戦しました。初めてパスタを一から作ってみました。
小麦粉から麺を作るのは思っていたより難しかったですが、
最終的にはとても美味しいカルボナーラができました。
次回はリゾットに挑戦してみたいと思います。
""",
    """
読書が趣味で、最近は村上春樹の「ノルウェイの森」を読んでいます。
主人公の心情描写がとても繊細で、引き込まれます。
また、技術書も読んでおり、「深層学習」について学んでいます。
理論と実践のバランスが取れた良い本だと思います。
"""
]

# データの挿入
print("データを挿入中...")

async def insert_texts(rag_instance, texts):
    for i, text in enumerate(texts):
        print(f"テキスト {i+1}/{len(texts)} を挿入中...")
        await rag_instance.ainsert(text.strip())

    print("\nすべてのデータが挿入されました！")


# イベントループが既に実行中の場合
try:
    await insert_texts(rag, sample_texts)
except RuntimeError:
    # 新しいループで実行
    asyncio.run(insert_texts(rag, sample_texts))

end_time = time.time()
elapsed_time = end_time - start_time
print(f"処理時間: {elapsed_time:.4f}秒")

INFO:minirag:No new unique documents were found.
INFO:minirag:No documents to process
INFO:minirag:Performing entity extraction on newly processed chunks


データを挿入中...
テキスト 1/4 を挿入中...


INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


⠙ Processed 1 chunks, 5 entities(duplicated), 4 relations(duplicated)

INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


⠼ Processed 4 chunks, 31 entities(duplicated), 22 relations(duplicated)

INFO:minirag:Inserting 31 vectors to entities





Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 26.10batch/s]
INFO:minirag:Inserting 31 vectors to entities
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 103.74batch/s]
INFO:minirag:Inserting 31 vectors to entities_name
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 658.45batch/s]
INFO:minirag:Inserting 22 vectors to relationships
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 118.00batch/s]
INFO:minirag:Writing graph with 40 nodes, 48 edges
INFO:minirag:No new unique documents were found.
INFO:minirag:No documents to process
INFO:minirag:Performing entity extraction on newly processed chunks


テキスト 2/4 を挿入中...


INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


⠸ Processed 3 chunks, 19 entities(duplicated), 15 relations(duplicated)

INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


⠼ Processed 4 chunks, 27 entities(duplicated), 21 relations(duplicated)

INFO:minirag:Inserting 27 vectors to entities





Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 40.25batch/s]
INFO:minirag:Inserting 27 vectors to entities
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 128.83batch/s]
INFO:minirag:Inserting 27 vectors to entities_name
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 438.55batch/s]
INFO:minirag:Inserting 21 vectors to relationships
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 130.64batch/s]
INFO:minirag:Writing graph with 40 nodes, 55 edges
INFO:minirag:No new unique documents were found.
INFO:minirag:No documents to process
INFO:minirag:Performing entity extraction on newly processed chunks


テキスト 3/4 を挿入中...


INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


⠙ Processed 1 chunks, 2 entities(duplicated), 3 relations(duplicated)

INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


⠼ Processed 4 chunks, 21 entities(duplicated), 22 relations(duplicated)

INFO:minirag:Inserting 21 vectors to entities





Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 15.74batch/s]
INFO:minirag:Inserting 21 vectors to entities
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 97.41batch/s]
INFO:minirag:Inserting 21 vectors to entities_name
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 624.34batch/s]
INFO:minirag:Inserting 22 vectors to relationships
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00, 66.00batch/s]
INFO:minirag:Writing graph with 40 nodes, 56 edges
INFO:minirag:No new unique documents were found.
INFO:minirag:No documents to process
INFO:minirag:Performing entity extraction on newly processed chunks


テキスト 4/4 を挿入中...


INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.432895 seconds
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.857874 seconds
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit exceeded: free-models-per-min. ', 'code': 429, 'metadata': {'headers': {'X-RateLimit-Limit': '20', 'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '1752853680000'}, 'provider_name': None}}, 'user_id': 'user_2eRMIrkhoLsGuVqbOgFuCKdizrH'}

INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.437189 seconds


⠙ Processed 1 chunks, 6 entities(duplicated), 5 relations(duplicated)

INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 0.817176 seconds
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


⠹ Processed 2 chunks, 16 entities(duplicated), 12 relations(duplicated)

In [None]:
# 約3分かかった

# # サンプルクエリ
# queries = [
#     "映画について教えて",
#     "仕事のプロジェクトはどうでしたか？",
#     "料理で何を作りましたか？",
#     "読んでいる本について教えて",
#     "散歩について詳しく教えて"
# ]

# # 各モードでクエリを実行。この3つがある
# modes = ["naive", "mini", "light"]

# for query in queries:
#     print(f"\n{'='*50}")
#     print(f"クエリ: {query}")
#     print(f"{'='*50}")

#     for mode in modes:
#         print(f"\n--- {mode.upper()}モード ---")
#         try:
#             answer = rag.query(query, param=QueryParam(mode=mode))     # .replace("\n", "").replace("\r", "")
#             print(f"回答: {answer}")
#         except Exception as e:
#             print(f"エラー: {e}")


async def run_queries(rag_instance, queries):
    # 各モードでクエリを実行。この3つがある
    modes = ["naive", "mini", "light"]

    for query in queries:
        print(f"\n{'='*50}")
        print(f"クエリ: {query}")
        print(f"{'='*50}")

        for mode in modes:
            print(f"\n--- {mode.upper()}モード ---")
            try:
                # 非同期でクエリを実行
                answer = await rag_instance.aquery(query, param=QueryParam(mode=mode))
                print(f"回答: {answer}")
            except Exception as e:
                print(f"エラー: {e}")


start_time = time.time()

sample_queries = [
    "映画について教えて",
    "仕事のプロジェクトはどうでしたか？",
    "料理で何を作りましたか？",
    "読んでいる本について教えて",
    "散歩について詳しく教えて"
]

# イベントループが既に実行中の場合
try:
    await run_queries(rag, sample_queries)
except RuntimeError:
    # 新しいループで実行
    asyncio.run(run_queries(rag, sample_queries))


end_time = time.time()
elapsed_time = end_time - start_time
print(f"処理時間: {elapsed_time:.4f}秒")

INFO:minirag:Query: 映画について教えて, top_k: 60, cosine_better_than_threshold: 0.2
INFO:minirag:Truncate 1 to 1 chunks



クエリ: 映画について教えて

--- NAIVEモード ---


INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


In [None]:
1/0