In [None]:
import numpy as np
from collections import defaultdict
from sentence_transformers import SentenceTransformer
from pyvis.network import Network
import os

# --- モデルとデータ読み込み ---
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)

data = np.load("faiss_metadata.npz", allow_pickle=True)
metadata_list = data["metadata_list"].tolist()
paragraph_embeddings = data["embeddings"]

# --- 文書単位の平均ベクトルを計算 ---
def compute_doc_embeddings(paragraph_embeddings, metadata_list):
    doc_vectors = defaultdict(list)
    for emb, meta in zip(paragraph_embeddings, metadata_list):
        doc_vectors[meta["source"]].append(emb)
    
    doc_embeddings = {}
    for source, vectors in doc_vectors.items():
        vecs = np.vstack(vectors)
        vecs /= np.linalg.norm(vecs, axis=1, keepdims=True)
        mean_vec = np.mean(vecs, axis=0)
        mean_vec /= np.linalg.norm(mean_vec)
        doc_embeddings[source] = mean_vec.astype("float32")
    return doc_embeddings

# --- クエリに対するネットワークを構築して可視化 ---
def build_query_network(query, similarity_threshold=0.6, max_nodes=30, edge_threshold=0.7):
    print(f"\n🔍 クエリ: '{query}'")
    print(f"📏 類似度しきい値: {similarity_threshold}, 表示ノード上限: {max_nodes}, エッジ閾値: {edge_threshold}")
    
    # 文書ベクトル計算 & クエリベクトル化
    doc_embeddings = compute_doc_embeddings(paragraph_embeddings, metadata_list)
    query_vec = model.encode([query], normalize_embeddings=True)[0]

    # 類似度を計算
    similarities = {
        doc: float(np.dot(query_vec, emb))
        for doc, emb in doc_embeddings.items()
    }

    # 類似度しきい値でフィルタ → スコア順に上位max_nodesまで取得
    sorted_docs = sorted(
        [(doc, sim) for doc, sim in similarities.items() if sim >= similarity_threshold],
        key=lambda x: x[1],
        reverse=True
    )[:max_nodes]

    if not sorted_docs:
        print("⚠️ 該当する文書が見つかりませんでした。条件をゆるめて再実行してください。")
        return

    selected_docs = dict(sorted_docs)
    print(f"✅ 表示対象ノード数: {len(selected_docs)}")

    # --- ネットワーク生成 ---
    net = Network(height="600px", width="100%", notebook=True)
    net.force_atlas_2based()

    # ノード追加
    for doc, sim in selected_docs.items():
        title = next((m["title"] for m in metadata_list if m["source"] == doc), doc)
        net.add_node(doc, label=title[:50], title=title, value=sim)

    # エッジ追加（文書間類似度）
    docs = list(selected_docs.keys())
    for i in range(len(docs)):
        for j in range(i + 1, len(docs)):
            sim_ij = float(np.dot(doc_embeddings[docs[i]], doc_embeddings[docs[j]]))
            if sim_ij > edge_threshold:
                net.add_edge(docs[i], docs[j], value=sim_ij)

    # --- HTML表示 ---
    output_path = "query_network.html"
    net.show(output_path)
    print(f"🌐 ネットワークを '{output_path}' に保存しました。ブラウザで開いて確認してください。")

# === 実行例 ===
build_query_network(
    query="filamin-A",  # 任意のクエリに変更可能
    similarity_threshold=0.4,  # クエリとの類似度がこれ以上の論文のみ対象
    max_nodes=40,              # 上位40件までを表示（多すぎると重くなる）
    edge_threshold=0.7         # エッジ（論文同士のつながり）を張るためのしきい値
)



In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer
from pyvis.network import Network
import ipywidgets as widgets
from IPython.display import display, clear_output
from collections import defaultdict

# --- モデルとデータ読み込み ---
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)

data = np.load("faiss_metadata.npz", allow_pickle=True)
metadata_list = data["metadata_list"].tolist()
paragraph_embeddings = data["embeddings"]

# --- 文書単位の平均ベクトルを計算 ---
def compute_doc_embeddings(paragraph_embeddings, metadata_list):
    doc_vectors = defaultdict(list)
    for emb, meta in zip(paragraph_embeddings, metadata_list):
        doc_vectors[meta["source"]].append(emb)

    doc_embeddings = {}
    doc_titles = {}
    for source, vectors in doc_vectors.items():
        vecs = np.vstack(vectors)
        vecs /= np.linalg.norm(vecs, axis=1, keepdims=True)
        mean_vec = np.mean(vecs, axis=0)
        mean_vec /= np.linalg.norm(mean_vec)
        doc_embeddings[source] = mean_vec.astype("float32")
        title = next((m["title"] for m in metadata_list if m["source"] == source), source)
        doc_titles[source] = title
    return doc_embeddings, doc_titles

# --- 準備 ---
doc_embeddings, doc_titles = compute_doc_embeddings(paragraph_embeddings, metadata_list)
title_to_source = {title: source for source, title in doc_titles.items()}
all_titles = sorted(title_to_source.keys())

# --- ネットワーク描画 ---
def build_paper_network(center_source, top_k=20, edge_threshold=0.75):
    center_vec = doc_embeddings[center_source]
    similarities = {
        doc: float(np.dot(center_vec, vec))
        for doc, vec in doc_embeddings.items()
        if doc != center_source
    }

    top_docs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k]

    net = Network(height="600px", width="100%", notebook=True)
    #net.force_atlas_2based()　ばねのようにしたい場合
    net.set_options('''
    {
    "physics": {
        "enabled": false
    }
    }
    ''')

    center_title = doc_titles.get(center_source, center_source)
    net.add_node(center_source, label=center_title[:50], title=center_title, value=1.0, color="red")

    for doc, score in top_docs:
        title = doc_titles.get(doc, doc)
        net.add_node(doc, label=title[:50], title=title, value=score)
        if score > edge_threshold:
            net.add_edge(center_source, doc, value=score)

    for i in range(len(top_docs)):
        for j in range(i + 1, len(top_docs)):
            doc1, doc2 = top_docs[i][0], top_docs[j][0]
            sim_ij = float(np.dot(doc_embeddings[doc1], doc_embeddings[doc2]))
            if sim_ij > edge_threshold:
                net.add_edge(doc1, doc2, value=sim_ij)

    net.show("center_paper_network.html")
    print("✅ ネットワークを 'center_paper_network.html' に保存しました。")

# --- UI部品 ---
search_box = widgets.Text(description="タイトル検索:", layout=widgets.Layout(width='500px'))
title_dropdown = widgets.Dropdown(options=[], description="候補:", layout=widgets.Layout(width='95%'))
run_button = widgets.Button(description="ネットワーク表示")

# --- タイトル検索処理 ---
def on_search_change(change):
    query = search_box.value.strip().lower()
    if not query:
        title_dropdown.options = []
        return
    filtered = [t for t in all_titles if query in t.lower()]
    title_dropdown.options = filtered if filtered else ["該当なし"]

search_box.observe(on_search_change, names='value')

# --- 実行処理 ---
def on_run_clicked(b):
    selected_title = title_dropdown.value
    if selected_title == "該当なし":
        print("⚠️ 候補がありません。")
        return
    selected_source = title_to_source[selected_title]
    build_paper_network(center_source=selected_source)

run_button.on_click(on_run_clicked)

# --- 表示 ---
display(search_box, title_dropdown, run_button)


In [None]:
#Jupyter Notebook内でグラフ表示（ipycytoscape）+クリックでZoteroリンクを開く機能対応
import numpy as np
from sentence_transformers import SentenceTransformer
from ipycytoscape import CytoscapeWidget, Node, Edge
from IPython.display import display
from collections import defaultdict
import os
import requests
from dotenv import load_dotenv
import webbrowser
import ipywidgets as widgets

# === ユーザー設定（ここでパラメータ調整） ===
TOP_K = 20                      # 表示するノード数（中心論文に似ているもの）
EDGE_THRESHOLD = 0.75          # エッジをつなぐ類似度の閾値

# COSEレイアウト初期設定（デフォルト値）
COSE_LAYOUT_PARAMS = {
    "name": "cose",
    "animate": False
}

# --- 環境変数読み込み ---
load_dotenv()
ZOTERO_USER_ID = os.getenv("ZOTERO_USER_ID")
ZOTERO_API_KEY = os.getenv("ZOTERO_API_KEY")
PDF_FOLDER = os.getenv("PDF_FOLDER", "")

# --- モデルとデータ読み込み ---
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)
data = np.load("faiss_metadata.npz", allow_pickle=True)
metadata_list = data["metadata_list"].tolist()
paragraph_embeddings = data["embeddings"]

# --- 文書平均ベクトルの計算 ---
def compute_doc_embeddings(paragraph_embeddings, metadata_list):
    doc_vectors = defaultdict(list)
    for emb, meta in zip(paragraph_embeddings, metadata_list):
        doc_vectors[meta["source"]].append(emb)

    doc_embeddings = {}
    doc_titles = {}
    for source, vectors in doc_vectors.items():
        vecs = np.vstack(vectors)
        vecs /= np.linalg.norm(vecs, axis=1, keepdims=True)
        mean_vec = np.mean(vecs, axis=0)
        mean_vec /= np.linalg.norm(mean_vec)
        doc_embeddings[source] = mean_vec.astype("float32")
        title = next((m["title"] for m in metadata_list if m["source"] == source), source)
        doc_titles[source] = title
    return doc_embeddings, doc_titles

# --- ZoteroリンクとparentKey取得 ---
def get_zotero_link_and_key(source):
    pdf_name = source.replace(PDF_FOLDER, "").lstrip("/\\")
    url = f"https://api.zotero.org/users/{ZOTERO_USER_ID}/items"
    headers = {"Zotero-API-Key": ZOTERO_API_KEY}
    params = {"q": pdf_name, "itemType": "attachment", "format": "json"}
    res = requests.get(url, headers=headers, params=params)
    if res.status_code == 200:
        try:
            for item in res.json():
                if item["data"].get("title") == pdf_name:
                    parent_key = item["data"].get("parentItem")
                    return f"zotero://select/items/0_{parent_key}", parent_key
        except Exception as e:
            print("Zotero JSON error:", e)
    return None, None

# --- Jupyter表示用ネットワーク作成 ---
def show_network_jupyter(center_source, doc_embeddings, doc_titles):
    center_vec = doc_embeddings[center_source]
    similarities = {
        doc: float(np.dot(center_vec, vec))
        for doc, vec in doc_embeddings.items()
        if doc != center_source
    }

    top_docs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:TOP_K]

    cyto = CytoscapeWidget()
    cyto.set_style([
        {
            'selector': 'node',
            'style': {
                'label': 'data(label)',
                'background-color': '#0074D9',
                'width': '30',
                'height': '30'
            }
        },
        {
            'selector': '[group = "center"]',
            'style': {
                'background-color': 'red',
                'width': '40',
                'height': '40'
            }
        }
    ])
    cyto.set_layout(**COSE_LAYOUT_PARAMS)

    center_title = doc_titles.get(center_source, center_source)
    cyto.graph.add_node(Node(data={"id": center_source, "label": center_title[:50], "group": "center"}))

    for doc, score in top_docs:
        title = doc_titles.get(doc, doc)
        cyto.graph.add_node(Node(data={"id": doc, "label": title[:50]}))
        if score > EDGE_THRESHOLD:
            cyto.graph.add_edge(Edge(data={"source": center_source, "target": doc}))

    for i in range(len(top_docs)):
        for j in range(i + 1, len(top_docs)):
            doc1, doc2 = top_docs[i][0], top_docs[j][0]
            sim_ij = float(np.dot(doc_embeddings[doc1], doc_embeddings[doc2]))
            if sim_ij > EDGE_THRESHOLD:
                cyto.graph.add_edge(Edge(data={"source": doc1, "target": doc2}))

    def on_click_handler(node):
        source_id = node['data']['id']
        link, _ = get_zotero_link_and_key(source_id)
        if link:
            print(f"Zoteroリンクを開きます: {link}")
            webbrowser.open(link)
        else:
            print("Zoteroリンクが見つかりませんでした。")

    cyto.on("node", "click", on_click_handler)

    display(cyto)

# --- 実行準備 ---
doc_embeddings, doc_titles = compute_doc_embeddings(paragraph_embeddings, metadata_list)
title_to_source = {v: k for k, v in doc_titles.items()}
all_titles = sorted(title_to_source.keys())

search_box = widgets.Text(description="検索:", layout=widgets.Layout(width='400px'))
dropdown = widgets.Dropdown(options=[], description="候補:", layout=widgets.Layout(width='95%'))
run_button = widgets.Button(description="ネットワーク表示")

# 検索で候補絞り込み
def on_search_change(change):
    query = search_box.value.strip().lower()
    filtered = [t for t in all_titles if query in t.lower()]
    dropdown.options = filtered if filtered else ["該当なし"]

search_box.observe(on_search_change, names='value')

# ボタンで表示
def on_run_clicked(b):
    selected_title = dropdown.value
    if selected_title == "該当なし":
        print("⚠️ 候補がありません。")
        return
    selected_source = title_to_source[selected_title]
    show_network_jupyter(selected_source, doc_embeddings, doc_titles)

run_button.on_click(on_run_clicked)

# 表示
display(search_box, dropdown, run_button)

In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer
from ipycytoscape import CytoscapeWidget, Node, Edge
from IPython.display import display
from collections import defaultdict
import os
import requests
from dotenv import load_dotenv
import webbrowser
import ipywidgets as widgets

# === ユーザー設定（ここでパラメータ調整） ===
TOP_K = 20                      # 表示するノード数（中心論文に似ているもの）
EDGE_THRESHOLD = 0.75          # エッジをつなぐ類似度の閾値

# COSEレイアウト初期設定（デフォルト値）
COSE_LAYOUT_PARAMS = {
    "name": "cose",
    "animate": False
}

# --- 環境変数読み込み ---
load_dotenv()
ZOTERO_USER_ID = os.getenv("ZOTERO_USER_ID")
ZOTERO_API_KEY = os.getenv("ZOTERO_API_KEY")
PDF_FOLDER = os.getenv("PDF_FOLDER", "")

# --- モデルとデータ読み込み ---
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)
data = np.load("faiss_metadata.npz", allow_pickle=True)
metadata_list = data["metadata_list"].tolist()
paragraph_embeddings = data["embeddings"]

# --- 文書平均ベクトルの計算 ---
def compute_doc_embeddings(paragraph_embeddings, metadata_list):
    doc_vectors = defaultdict(list)
    for emb, meta in zip(paragraph_embeddings, metadata_list):
        doc_vectors[meta["source"]].append(emb)

    doc_embeddings = {}
    doc_titles = {}
    for source, vectors in doc_vectors.items():
        vecs = np.vstack(vectors)
        vecs /= np.linalg.norm(vecs, axis=1, keepdims=True)
        mean_vec = np.mean(vecs, axis=0)
        mean_vec /= np.linalg.norm(mean_vec)
        doc_embeddings[source] = mean_vec.astype("float32")
        title = next((m["title"] for m in metadata_list if m["source"] == source), source)
        doc_titles[source] = title
    return doc_embeddings, doc_titles

# --- ZoteroリンクとparentKey取得 ---
def get_zotero_link_and_key(source):
    pdf_name = source.replace(PDF_FOLDER, "").lstrip("/\\")
    url = f"https://api.zotero.org/users/{ZOTERO_USER_ID}/items"
    headers = {"Zotero-API-Key": ZOTERO_API_KEY}
    params = {"q": pdf_name, "itemType": "attachment", "format": "json"}
    res = requests.get(url, headers=headers, params=params)
    if res.status_code == 200:
        try:
            for item in res.json():
                if item["data"].get("title") == pdf_name:
                    parent_key = item["data"].get("parentItem")
                    return f"zotero://select/items/0_{parent_key}", parent_key
        except Exception as e:
            print("Zotero JSON error:", e)
    return None, None

# --- Jupyter表示用ネットワーク作成 ---
def show_network_jupyter(center_source, doc_embeddings, doc_titles):
    center_vec = doc_embeddings[center_source]
    similarities = {
        doc: float(np.dot(center_vec, vec))
        for doc, vec in doc_embeddings.items()
        if doc != center_source
    }

    top_docs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:TOP_K]

    cyto = CytoscapeWidget()
    cyto.set_style([
        {
            'selector': 'node',
            'style': {
                'label': 'data(label)',
                'text-valign': 'center',
                'text-halign': 'center',
                'text-background-color': 'white',
                'text-background-opacity': 1,
                'text-background-shape': 'roundrectangle',
                'text-border-color': '#999',
                'text-border-width': 1,
                'text-border-opacity': 0.5,
                'background-color': '#0074D9',
                'width': '30',
                'height': '30',
                'font-size': 10
            }
        },
        {
            'selector': '[group = "center"]',
            'style': {
                'background-color': 'red',
                'width': '40',
                'height': '40'
            }
        }
    ])
    cyto.set_layout(**COSE_LAYOUT_PARAMS)

    center_title = doc_titles.get(center_source, center_source)
    cyto.graph.add_node(Node(data={"id": center_source, "label": center_title[:50], "group": "center"}))

    for doc, score in top_docs:
        title = doc_titles.get(doc, doc)
        cyto.graph.add_node(Node(data={"id": doc, "label": title[:50]}))
        if score > EDGE_THRESHOLD:
            cyto.graph.add_edge(Edge(data={"source": center_source, "target": doc}))

    for i in range(len(top_docs)):
        for j in range(i + 1, len(top_docs)):
            doc1, doc2 = top_docs[i][0], top_docs[j][0]
            sim_ij = float(np.dot(doc_embeddings[doc1], doc_embeddings[doc2]))
            if sim_ij > EDGE_THRESHOLD:
                cyto.graph.add_edge(Edge(data={"source": doc1, "target": doc2}))

    def on_click_handler(node):
        source_id = node['data']['id']
        link, _ = get_zotero_link_and_key(source_id)
        if link:
            print(f"Zoteroリンクを開きます: {link}")
            webbrowser.open(link)
        else:
            print("Zoteroリンクが見つかりませんでした。")

    cyto.on("node", "click", on_click_handler)

    display(cyto)

# --- 実行準備 ---
doc_embeddings, doc_titles = compute_doc_embeddings(paragraph_embeddings, metadata_list)
title_to_source = {v: k for k, v in doc_titles.items()}
all_titles = sorted(title_to_source.keys())

search_box = widgets.Text(description="検索:", layout=widgets.Layout(width='400px'))
dropdown = widgets.Dropdown(options=[], description="候補:", layout=widgets.Layout(width='95%'))
run_button = widgets.Button(description="ネットワーク表示")
output_area = widgets.Output()

# 検索で候補絞り込み
def on_search_change(change):
    query = search_box.value.strip().lower()
    filtered = [t for t in all_titles if query in t.lower()]
    dropdown.options = filtered if filtered else ["該当なし"]

search_box.observe(on_search_change, names='value')

# ボタンで表示
def on_run_clicked(b):
    selected_title = dropdown.value
    with output_area:
        output_area.clear_output()
        if selected_title == "該当なし":
            print("⚠️ 候補がありません。")
            return
        selected_source = title_to_source[selected_title]
        show_network_jupyter(selected_source, doc_embeddings, doc_titles)

run_button.on_click(on_run_clicked)

# 表示
display(search_box, dropdown, run_button, output_area)
