In [None]:
#クエリに対する論文の検索・表示（gemini使用なし）

import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer
import ipywidgets as widgets
from IPython.display import display, HTML

# == パラメータ ==
INDEX_FILE = "faiss_index.bin"
METADATA_NPZ = "faiss_metadata.npz"
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
EMB_DIM = 384
TOP_K = 5  # 上位何件を表示するか

# == モデル読込 ==
model = SentenceTransformer(MODEL_NAME)

# == FAISSインデックス読込 ==
index = faiss.read_index(INDEX_FILE)

# == メタデータと段落を読み込む ==
data = np.load(METADATA_NPZ, allow_pickle=True)
metadata_list = data["metadata_list"].tolist()  # np.array => Python list
paragraphs = data["paragraphs"].tolist()        # ✅ 段落テキストも読み込む

print("インデックスとメタデータを読み込みました。ベクトル数:", index.ntotal)

# == 検索関数 ==
def search_faiss(query, top_k=TOP_K):
    # Sentence-BERTで埋め込み (float32)
    emb_query = model.encode([query], show_progress_bar=False)
    emb_query = emb_query.astype("float32")

    # コサイン類似度にするためクエリベクトルを正規化 (FAISS側は内積で検索)
    # embedding / norm => 単位ベクトル
    norm = np.sqrt((emb_query * emb_query).sum(axis=1, keepdims=True))
    emb_query = emb_query / norm

    # 類似度検索
    # 距離 => 内積(IndexFlatIP)の場合、高いほど類似度が高い
    D, I = index.search(emb_query, top_k)
    # D.shape = (1, top_k), I.shape = (1, top_k)

    results = []
    for score, idx in zip(D[0], I[0]):
        item = metadata_list[idx]
        paragraph_text = ""  # JSONや別ファイルから取得してもよい
        # もし段落テキストも保持しているなら embeddings の順番に対応させる必要がある
        # ここでは "paragraphs.jsonl" から読み込んだ順に paragraphs[] に格納していた想定
        # => idx番目の paragraphs[idx]
        # ただし今は metadata_list に入れていないかもしれないので、やり方は要調整
        # 例: item["paragraph"] を最初にメタ情報として持っていれば良い
        #     or paragraphs[] をグローバルに持っていればいい
        # 今回の例では metadata_listにidがあるので、そちらを使いあとで再取得も可

        # もし metadata_list と paragraphs が同じ順で保持されているなら:
        paragraph_text = paragraphs[idx]  # search用に一時的にロードしておくか、Notebook内で持っておく

        results.append({
            "score": float(score),
            "text": paragraph_text,
            "metadata": item
        })

    return results

# == GUIパーツ準備 ==
query_box = widgets.Text(
    description="Query:",
    layout=widgets.Layout(width='400px')
)
search_button = widgets.Button(description="Search")
output_area = widgets.Output()

def on_search_clicked(b):
    with output_area:
        output_area.clear_output()
        query = query_box.value.strip()
        if not query:
            display(HTML("<b>クエリを入力してください。</b>"))
            return

        results = search_faiss(query, top_k=TOP_K)

        html_content = ""
        html_content += f"<h3>検索クエリ: {query}</h3>"
        html_content += "<ol>"
        for r in results:
            score = r["score"]
            text = r["text"]
            meta = r["metadata"]
            # 表示をHTMLで整形
            html_content += f"<li><b>Score:</b> {score:.4f} <br>"
            html_content += f"<b>ID:</b> {meta.get('id')}<br>"
            html_content += f"<b>Page:</b> {meta.get('page')}<br>"
            html_content += f"<b>Source:</b> {meta.get('source')}<br>"
            html_content += f"<div style='margin-top:5px; border:1px solid #ccc; padding:5px;'>{text}</div></li><br>"
        html_content += "</ol>"

        display(HTML(html_content))

search_button.on_click(on_search_clicked)

# == 画面表示 ==
display(query_box, search_button, output_area)


In [None]:
#英語でクエリ入力・回答生成（gemini APIを使用）
import numpy as np
import faiss
import ipywidgets as widgets
from IPython.display import display, HTML
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
from dotenv import load_dotenv
import os

load_dotenv()  # .envファイルを読み込む

API_KEY = os.getenv("GEMINI_API_KEY") # 環境変数からAPIキーを取得

# === 設定 ===  
INDEX_FILE = 'faiss_index.bin'
METADATA_NPZ = 'faiss_metadata.npz'
MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
EMB_DIM = 384
TOP_K = 5

# === Gemini初期化 ===
genai.configure(api_key=API_KEY)
# === モデル・インデックス・データロード ===
print("Loading model...")
model = SentenceTransformer(MODEL_NAME)

print("Loading FAISS index...")
index = faiss.read_index(INDEX_FILE)

print("Loading metadata...")
data = np.load(METADATA_NPZ, allow_pickle=True)
metadata_list = data['metadata_list'].tolist()
paragraphs = data['paragraphs'].tolist()

# === 検索関数 ===
def search_faiss(query, k=TOP_K):
    q_vec = model.encode([query], convert_to_numpy=True).astype('float32')
    faiss.normalize_L2(q_vec)
    D, I = index.search(q_vec, k)

    results = []
    for score, idx in zip(D[0], I[0]):
        para_text = paragraphs[idx]
        meta = metadata_list[idx]
        results.append({
            'score': float(score),
            'paragraph': para_text,
            'metadata': meta
        })
    return results

# === Geminiで回答生成（引用番号付き）===
def generate_answer_with_gemini(query, top_results):
    context_str_list = []
    for i, r in enumerate(top_results, 1):
        para_text = r['paragraph']
        context_str_list.append(f"[{i}] {para_text}")

    context_str = "\n\n".join(context_str_list)
    prompt = (
        f"Use the numbered context paragraphs below to answer the question concisely. "
        f"Reference the sources using [1], [2], etc., where appropriate.\n\n"
        f"{context_str}\n\n"
        f"Question: {query}\n\n"
        f"Answer:"
    )

    model_gemini = genai.GenerativeModel('models/gemini-1.5-flash-latest')
    response = model_gemini.generate_content(prompt)
    return response.text

# === GUI部分 ===
query_box = widgets.Text(
    description='Query:',
    layout=widgets.Layout(width='500px'),
    placeholder='Enter your question...'
)
search_button = widgets.Button(description="Search & Answer", button_style='success')
output_area = widgets.Output()

def on_search_clicked(b):
    with output_area:
        output_area.clear_output()
        query = query_box.value.strip()
        if not query:
            display(HTML("<b style='color:red;'>Please enter a query.</b>"))
            return
        
        print("Searching documents...")
        top_results = search_faiss(query, k=TOP_K)
        
        print("Generating answer with Gemini...")
        answer = generate_answer_with_gemini(query, top_results)

        # 結果表示
        display(HTML(f"<h3>Gemini Answer</h3><p>{answer}</p>"))

        # 引用文献表示
        html_refs = "<h4>References</h4><ol>"
        for i, r in enumerate(top_results, 1):
            meta = r['metadata']
            html_refs += (
                f"<li><b>{meta.get('title')}</b> (page {meta.get('page')})<br>"
                f"<i>{meta.get('source')}</i></li>"
            )
        html_refs += "</ol>"
        display(HTML(html_refs))

search_button.on_click(on_search_clicked)

# === 表示 ===
display(widgets.VBox([query_box, search_button, output_area]))


In [None]:
#日本語対応（クエリの翻訳にもgeminiを使用）
import numpy as np
import faiss
import ipywidgets as widgets
from IPython.display import display, HTML
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
from dotenv import load_dotenv
import os

# === 環境変数の読み込み ===
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")

# === Gemini初期化 ===
genai.configure(api_key=API_KEY)

# === 設定 ===
INDEX_FILE = 'faiss_index.bin'
METADATA_NPZ = 'faiss_metadata.npz'
MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
EMB_DIM = 384
TOP_K = 5

# === モデル・インデックス・メタデータの読み込み ===
print("Loading model...")
model = SentenceTransformer(MODEL_NAME)

print("Loading FAISS index...")
index = faiss.read_index(INDEX_FILE)

print("Loading metadata...")
data = np.load(METADATA_NPZ, allow_pickle=True)
metadata_list = data['metadata_list'].tolist()
paragraphs = data['paragraphs'].tolist()

# === Geminiで日本語→英語 翻訳 ===
def translate_query_to_english(japanese_query):
    prompt = f"次の日本語を正確な英語に翻訳してください：\n\n{japanese_query}"
    translator = genai.GenerativeModel('models/gemini-1.5-flash-latest')
    response = translator.generate_content(prompt)
    return response.text.strip()

# === FAISS検索関数 ===
def search_faiss(query, k=TOP_K):
    q_vec = model.encode([query], convert_to_numpy=True).astype('float32')
    faiss.normalize_L2(q_vec)
    D, I = index.search(q_vec, k)

    results = []
    for score, idx in zip(D[0], I[0]):
        para_text = paragraphs[idx]
        meta = metadata_list[idx]
        results.append({
            'score': float(score),
            'paragraph': para_text,
            'metadata': meta
        })
    return results

# === Geminiで回答生成（日本語出力・出典番号付き）===
def generate_answer_with_gemini(original_japanese_query, top_results):
    context_str_list = []
    for i, r in enumerate(top_results, 1):
        para_text = r['paragraph']
        context_str_list.append(f"[{i}] {para_text}")

    context_str = "\n\n".join(context_str_list)
    prompt = (
        f"以下の番号付きの文脈を参考にして、質問に日本語で簡潔に答えてください。\n"
        f"適切な箇所には [1], [2] などで出典を示してください。\n\n"
        f"{context_str}\n\n"
        f"質問: {original_japanese_query}\n\n"
        f"回答:"
    )

    model_gemini = genai.GenerativeModel('models/gemini-1.5-flash-latest')
    response = model_gemini.generate_content(prompt)
    return response.text

# === GUI作成 ===
query_box = widgets.Text(
    description='質問:',
    layout=widgets.Layout(width='500px'),
    placeholder='日本語で質問を入力してください...'
)
search_button = widgets.Button(description="検索 & 回答生成", button_style='success')
output_area = widgets.Output()

def on_search_clicked(b):
    with output_area:
        output_area.clear_output()
        japanese_query = query_box.value.strip()
        if not japanese_query:
            display(HTML("<b style='color:red;'>質問を入力してください。</b>"))
            return

        print("英語に翻訳中...")
        english_query = translate_query_to_english(japanese_query)

        print("FAISSで検索中...")
        top_results = search_faiss(english_query, k=TOP_K)

        print("Geminiで回答生成中...")
        answer = generate_answer_with_gemini(japanese_query, top_results)

        # 回答表示
        display(HTML(f"<h3>Geminiの回答</h3><p>{answer}</p>"))

        # 引用文献表示
        html_refs = "<h4>出典</h4><ol>"
        for i, r in enumerate(top_results, 1):
            meta = r['metadata']
            html_refs += (
                f"<li><b>{meta.get('title')}</b> (page {meta.get('page')})<br>"
                f"<i>{meta.get('source')}</i></li>"
            )
        html_refs += "</ol>"
        display(HTML(html_refs))

search_button.on_click(on_search_clicked)

# === 表示 ===
display(widgets.VBox([query_box, search_button, output_area]))
