In [1]:
import matplotlib.pyplot as plt
from pyvis.network import Network
import networkx as nx
from pathlib import Path
import pandas as pd
import numpy as np

import nest_asyncio

from llama_index.core import Response
from llama_index.core.evaluation import (
    FaithfulnessEvaluator,
    DatasetGenerator,
    RelevancyEvaluator,
)

from app.notebooks.helpers.bot.kg_generation import create_kg_triplets
from notebooks.helpers.bot.bot import (
    get_chat_engine,
    load_llm,
    load_embedding_model,
    setup_index_and_storage,
    generate_pairings_documents,
    service,
)

nest_asyncio.apply()
%matplotlib inline

In [None]:
def display_eval_df(query: str, response: Response, eval_result: Response) -> None:
    eval_df = pd.DataFrame(
        {
            "Query": query,
            "Response": str(response),
            "Source": (' '.join(eval_result.contexts)[:1000] + "..."),
            "Evaluation Result": eval_result.passing,
        },
        index = [0]
    )
    eval_df = eval_df.style.set_properties(
        **{
            "inline-size": "600px",
            "overflow-wrap": "break-word",
        },
        subset=["Response", "Source"]
    )
    display(eval_df)



In [2]:
def parse_triplets(text):
    triplets = []
    rows = text.split('\n')
    for row in rows[:-1]:
        triplet = row.split('**')
        triplets.append((triplet[0], triplet[1], triplet[2]))
    return triplets

kg_triplets = []

KG = create_kg_triplets(sample_size=10, for_model=False)
G = nx.DiGraph()
for _, row in KG.iterrows():
    triplets = parse_triplets(row['triplets'])
    for triplet in triplets:
        G.add_edge(triplet[0], triplet[2], label=triplet[1])

In [None]:
KG = create_kg_triplets(sample_size=1000)
kg_pairings = KG.apply(generate_pairings_documents, axis=1)

In [None]:
print(kg_pairings.iloc[0].get_content(metadata_mode='llm'))
print(kg_pairings.iloc[0].get_node_info())

In [None]:
llm = load_llm('ollama')

In [None]:
embed_model = load_embedding_model("foodbert")
service_context = service(llm=llm, embed_model=embed_model)

In [None]:
storage_context, kg_index = setup_index_and_storage(
        service=service_context,
        kg_pairings=kg_pairings,
        show_progress=False,
        force=True,
    )

In [None]:
chat_engine = get_chat_engine(
    kg_index,
    chat_mode="context",
    retriver_mode="hybrid",
    use_global_node_triplets=True,
    max_keywords_per_query=10,
    num_chunks_per_query=15,
    similarity_top_k=2,
    graph_store_query_depth=2,
)
chat_engine.reset()

In [None]:
data_generator = DatasetGenerator.from_documents(
        kg_pairings, service_context=service_context
)
eval_questions = data_generator.generate_questions_from_nodes(num=3)

evaluator = RelevancyEvaluator(service_context=service_context)

In [None]:
eval_questions[1]

In [None]:
response = chat_engine.chat(eval_questions[1])
print(response)
eval_result = evaluator.evaluate_response(
    query=eval_questions[1], response=response
)

In [None]:
print(eval_result.passing)
print(eval_result.score)
print(eval_result.feedback)
print(eval_result.contexts)

In [None]:
display_eval_df(eval_questions[1], response, eval_result)

In [None]:
response = chat_engine.chat(
    "What are the primary grape varieties used in producing Bordeaux wines?",
)

print(response)

In [None]:
G = kg_index.get_networkx_graph(limit=1000)

In [4]:
net = Network(notebook = True, cdn_resources = "remote",
                # bgcolor = "#222222",
                font_color = "black",
                height = "750px",
                width = "100%",
                select_menu = True,
                filter_menu = True,
)
net.show_buttons(filter_="physics")
net.from_nx(G)
net.show("nx.html")

nx.html


In [None]:
pos = nx.spring_layout(G, seed=42, k=1.5)
labels = nx.get_edge_attributes(G, 'label')
plt.figure(figsize=(20, 20))
nx.draw(G, pos, font_size=8, node_size=200, node_color='lightblue', edge_color='gray', alpha=0.6)
# nx.draw_networkx_edge_labels(G, pos, font_size=3, label_pos=0.3, verticalalignment='baseline')
plt.title('Knowledge Graph')
plt.show()

In [None]:
for edge in G.edges(data=True):
    print(edge)

In [None]:
print(G.number_of_nodes())