## Knowledge Graph Utilizing LLM Agent

### Setup and Configuration

In [None]:
# This cell will install all the necessary Python packages.
!pip install langchain langchain-deepseek networkx pyvis pydantic -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/756.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━[0m [32m450.6/756.0 kB[0m [31m13.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m57.3 MB/s[0m eta [36m0:00:00[0m
[?25h

### Imports and API Key Access

In [None]:
import os
from google.colab import userdata
from pydantic import BaseModel, Field
from typing import List

# LangChain Imports
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser

print("Libraries imported successfully.")

# Access the API key from Colab Secrets
DEEPSEEK_API_KEY = userdata.get('DEEPSEEK_API_KEY')

if not DEEPSEEK_API_KEY:
    print("⚠️ DEEPSEEK_API_KEY not found. Please follow Step 2 to add it to Colab Secrets.")
else:
    print("✅ DeepSeek API Key loaded.")

Libraries imported successfully.
✅ DeepSeek API Key loaded.


### Define Your Output Structure (Pydantic)

In [None]:
from pydantic import BaseModel, Field
from typing import List, Literal

# Define the five traits as a type for the LLM to use
TraitType = Literal["Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]

class PersonalityTrait(BaseModel):
    """Represents an extracted personality trait for a specific person."""
    person: str = Field(description="The name of the person being described.")
    trait: TraitType = Field(description="The Big Five (OCEAN) personality trait identified.")
    evidence: str = Field(description="The exact quote or text evidence from the input that supports this trait.")
    score: float = Field(description="A score from -1.0 (low) to 1.0 (high) for this trait, inferred from the text.", ge=-1.0, le=1.0)

class PersonalityAnalysis(BaseModel):
    """A structured list of all personality traits extracted from the text."""
    traits: List[PersonalityTrait]

### Set Up the LangChain Components

In [None]:
# 1. Set up the new Output Parser
personality_parser = PydanticOutputParser(pydantic_object=PersonalityAnalysis)

# 2. Create the new Prompt Template
personality_template_string = """
You are an expert occupational psychologist. Your job is to read a text and infer the Big Five (OCEAN)
personality traits of the people mentioned.

For each person, identify any text that provides evidence for one of the five traits.
- Openness: imaginative, curious, likes variety
- Conscientiousness: organized, dependable, disciplined
- Extraversion: sociable, energetic, assertive
- Agreeableness: trusting, kind, cooperative
- Neuroticism: anxious, moody, emotionally unstable

Follow the output format instructions precisely. Only extract traits when clear evidence is present.

{format_instructions}

Here is the text you need to analyze:
---
{text}
---
"""

personality_prompt = ChatPromptTemplate.from_template(
    personality_template_string,
    partial_variables={"format_instructions": personality_parser.get_format_instructions()}
)

# 3. Create the new chain (the LLM object 'llm' is the same)
personality_chain = personality_prompt | llm | personality_parser

print("✅ Personality extraction chain is ready!")

✅ Personality extraction chain is ready!


### Evaluation for Knowledge Graph

In [None]:
import numpy as np

def evaluate_kg(ground_truth_triples, generated_triples):
    """
    Calculates Precision, Recall, and F1-Score for a generated KG
    against a ground truth KG.

    Args:
    - ground_truth_triples (list of tuples): The "correct" triples.
    - generated_triples (list of tuples): The "predicted" triples from the LLM.

    Returns:
    - dict: A dictionary containing precision, recall, and f1_score.
    """

    # 1. Convert lists of triples to sets for easy comparison
    # A set only contains unique elements and is very fast for lookups.
    gt_set = set(ground_truth_triples)
    gen_set = set(generated_triples)

    # 2. Calculate True Positives (TP), False Positives (FP), and False Negatives (FN)

    # True Positives: Triples that are in BOTH the generated set AND the ground truth set.
    tp_set = gen_set.intersection(gt_set)
    tp = len(tp_set)

    # False Positives: Triples that are in the generated set but NOT in the ground truth.
    # (These are "hallucinated" or incorrect extractions)
    fp_set = gen_set.difference(gt_set)
    fp = len(fp_set)

    # False Negatives: Triples that are in the ground truth but NOT in the generated set.
    # (These are the facts your LLM "missed")
    fn_set = gt_set.difference(gen_set)
    fn = len(fn_set)

    # 3. Calculate Metrics

    # Precision: Of all the triples we generated, how many were correct?
    # (Measures "trustworthiness")
    if (tp + fp) == 0:
        precision = 0.0  # Avoid division by zero if no triples were generated
    else:
        precision = tp / (tp + fp)

    # Recall: Of all the possible correct triples, how many did we find?
    # (Measures "completeness")
    if (tp + fn) == 0:
        recall = 0.0  # Avoid division by zero if ground truth is empty
    else:
        recall = tp / (tp + fn)

    # F1-Score: The balanced average of Precision and Recall.
    if (precision + recall) == 0:
        f1_score = 0.0
    else:
        f1_score = 2 * (precision * recall) / (precision + recall)


    # --- Print a detailed report ---
    print("--- KG Evaluation Report ---")
    print(f"Total Ground Truth Triples: {len(gt_set)}")
    print(f"Total Generated Triples:    {len(gen_set)}")
    print("-" * 30)
    print(f"True Positives (TP):  {tp}")
    print(f"False Positives (FP): {fp}")
    print(f"False Negatives (FN): {fn}")
    print("-" * 30)

    print(f"\n✅ True Positives (Correctly found):")
    for triple in tp_set:
        print(f"   {triple}")

    print(f"\n❌ False Positives (Incorrectly generated):")
    for triple in fp_set:
        print(f"   {triple}")

    print(f"\n⚠️ False Negatives (Missed):")
    for triple in fn_set:
        print(f"   {triple}")

    print("\n" + "=" * 30)
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1_score:.4f}")
    print("=" * 30)

    return {
        "precision": precision,
        "recall": recall,
        "f1_score": f1_score
    }

### Run the Extraction

In [None]:
# --- 1. Define Our "Ground Truth" (The Answer Key) ---
# Based on a human reading the text, these are the traits we expect to find.
# We are simplifying this to (Person, has_trait, Trait)
ground_truth_triples = [
    ("Alex", "has_trait", "Extraversion"),       # Evidence: "prefers to work alone" (low)
    ("Alex", "has_trait", "Conscientiousness"),   # Evidence: "meticulously organized" (high)
    ("Sarah", "has_trait", "Extraversion"),      # Evidence: "energetic, loves brainstorming" (high)
    ("Sarah", "has_trait", "Conscientiousness"), # Evidence: "desk is a mess" (low)
    ("Sarah", "has_trait", "Agreeableness")      # Evidence: "wonderfully kind" (high)
]

print(f"Ground Truth loaded with {len(ground_truth_triples)} expected facts.")


# --- 2. Run the LLM to get the 'response' object ---
# (This is the logic from Cell 16)
personality_text = """
Alex is a brilliant programmer, but he's not a people-person. He prefers to work alone
and finds large meetings draining. His colleague, Sarah, is the complete opposite.
She organizes all the team events, is incredibly energetic, and loves brainstorming
in a group. Alex, on the other hand, is meticulously organized. His code is always
perfectly documented. Sarah's desk is a bit of a mess, and she sometimes forgets
to push her code, but she is also known for being wonderfully kind and always
the first to help a teammate who is stuck.
"""

print(f"Processing text with LLM...")
try:
    response = personality_chain.invoke({"text": personality_text})
    print(f"✅ LLM response received, found {len(response.traits)} traits.")

    # --- 3. The "Bridge": Convert LLM response to evaluation format ---
    # This is the step you were asking about.
    # We create the 'generated_triples' list from the LLM response.
    generated_triples_for_eval = []
    for trait_item in response.traits:
        # Create the simple (S, P, O) tuple
        simple_triple = (trait_item.person, "has_trait", trait_item.trait)
        generated_triples_for_eval.append(simple_triple)

    print(f"✅ Converted LLM response to simple triples for evaluation.")


    # --- 4. Run the Evaluation ---
    # Now we compare the ground truth to our *actual* generated data
    print("\nRunning evaluation...")
    metrics = evaluate_kg(ground_truth_triples, generated_triples_for_eval)

except Exception as e:
    print(f"\n❌ An error occurred during the LLM chain invocation:")
    print(e)

Ground Truth loaded with 5 expected facts.
Processing text with LLM...
✅ LLM response received, found 5 traits.
✅ Converted LLM response to simple triples for evaluation.

Running evaluation...
--- KG Evaluation Report ---
Total Ground Truth Triples: 5
Total Generated Triples:    5
------------------------------
True Positives (TP):  5
False Positives (FP): 0
False Negatives (FN): 0
------------------------------

✅ True Positives (Correctly found):
   ('Alex', 'has_trait', 'Extraversion')
   ('Sarah', 'has_trait', 'Extraversion')
   ('Alex', 'has_trait', 'Conscientiousness')
   ('Sarah', 'has_trait', 'Conscientiousness')
   ('Sarah', 'has_trait', 'Agreeableness')

❌ False Positives (Incorrectly generated):

⚠️ False Negatives (Missed):

Precision: 1.0000
Recall:    1.0000
F1-Score:  1.0000


### Build the networkx Graph

In [None]:
import networkx as nx

# 1. Create a new directed graph
G_personality = nx.DiGraph()

print("Building graph from 'response.traits'...")

# 2. Iterate over our extracted traits (this is the FIX)
for trait_item in response.traits:

    # 3. Add the nodes with styling
    #    We'll make People blue dots and Traits green squares
    G_personality.add_node(
        trait_item.person,
        label=trait_item.person,
        title=f"Person: {trait_item.person}",
        shape="dot",
        color="#3498db" # Blue
    )
    G_personality.add_node(
        trait_item.trait,
        label=trait_item.trait,
        title=f"Trait: {trait_item.trait}",
        shape="square",
        color="#2ecc71" # Green
    )

    # 4. Add the edge, using the score as the label and evidence as the hover-over title
    edge_label = f"Score: {trait_item.score}"
    edge_title = f"Evidence: {trait_item.evidence}"

    G_personality.add_edge(
        trait_item.person,
        trait_item.trait,
        label=edge_label,
        title=edge_title,
        color="#95a5a6" # Gray
    )

print("✅ Personality NetworkX graph built successfully.")
print(f"Nodes: {G_personality.number_of_nodes()}")
print(f"Edges: {G_personality.number_of_edges()}")

Building graph from 'response.traits'...
✅ Personality NetworkX graph built successfully.
Nodes: 5
Edges: 5


### Visualize with pyvis in Colab

In [None]:
from pyvis.network import Network
from IPython.display import display, HTML

# 1. Create a Pyvis network object
net_personality = Network(notebook=True, width="100%", height="600px", cdn_resources='remote', directed=True)

# 2. Load the NetworkX graph into the Pyvis network
#    Pyvis will automatically read the 'label', 'title', 'color', and 'shape' attributes
net_personality.from_nx(G_personality)

# 3. (Optional) Add physics-based layout options
net_personality.set_options("""
var options = {
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -15000,
      "centralGravity": 0.2
    },
    "minVelocity": 0.75
  },
  "interaction": {
    "hover": true
  }
}
""")

# 4. Save the network as an HTML file
net_personality.save_graph("personality_graph.html")

print("✅ Interactive 'personality_graph.html' file generated.")
print("Displaying graph... (People are blue dots, traits are green squares)")
print("Hover over the edges to see the evidence!")

# 5. Display the HTML file in the Colab output cell
display(HTML("personality_graph.html"))

✅ Interactive 'personality_graph.html' file generated.
Displaying graph... (People are blue dots, traits are green squares)
Hover over the edges to see the evidence!


### Reusable Function for Generate and Display Graph

In [None]:
from pyvis.network import Network
from IPython.display import display, HTML
import networkx as nx
import traceback

def generate_and_display_personality_graph(text_input: str):
    """
    Takes a string of text, runs the full PERSONALITY KG extraction,
    saves the result globally, and visualizes it.
    """
    global global_llm_response   # <-- NEW: Tell function to use global var
    global_llm_response = None  # <-- NEW: Clear old results

    print(f"Processing text: '{text_input[:100]}...'")

    try:
        # --- Step 1: Run the PERSONALITY extraction chain ---
        response = personality_chain.invoke({"text": text_input})
        global_llm_response = response  # <-- NEW: Save the result globally

        if not response.traits:
            print("Extraction complete, but no personality traits were found.")
            return

        print(f"✅ Extracted {len(response.traits)} traits.")

        # (The rest of the function is the same as before)
        for trait in response.traits:
            print(f"\n  - Person:   {trait.person}")
            print(f"    Trait:    {trait.trait} (Score: {trait.score})")
            print(f"    Evidence: '{trait.evidence}'")

        # --- Step 2: Build the NetworkX graph (Personality Version) ---
        G_personality = nx.DiGraph()
        for trait_item in response.traits:
            G_personality.add_node(
                trait_item.person, label=trait_item.person,
                title=f"Person: {trait_item.person}", shape="dot", color="#3498db"
            )
            G_personality.add_node(
                trait_item.trait, label=trait_item.trait,
                title=f"Trait: {trait_item.trait}", shape="square", color="#2ecc71"
            )
            edge_label = f"Score: {trait_item.score}"
            edge_title = f"Evidence: {trait_item.evidence}"
            G_personality.add_edge(
                trait_item.person, trait_item.trait,
                label=edge_label, title=edge_title, color="#95a5a6"
            )

        print(f"\n✅ NetworkX graph built (Nodes: {G_personality.number_of_nodes()}, Edges: {G_personality.number_of_edges()}).")

        # --- Step 3: Build and display the Pyvis graph ---
        net_personality = Network(notebook=True, width="100%", height="600px", cdn_resources='remote', directed=True)
        net_personality.from_nx(G_personality)
        net_personality.set_options("""
        var options = {
          "physics": {"barnesHut": {"gravitationalConstant": -15000, "centralGravity": 0.2}, "minVelocity": 0.75},
          "interaction": {"hover": true}
        }
        """)
        net_personality.save_graph("personality_graph.html")
        print("✅ Interactive graph saved. Displaying now...")
        display(HTML("personality_graph.html"))

    except Exception as e:
        print(f"\n❌ An error occurred during the process:")
        print(traceback.format_exc())

# Run this cell once to define the updated function
print("✅ Reusable function 'generate_and_display_personality_graph' is defined and will save results.")

✅ Reusable function 'generate_and_display_personality_graph' is defined and will save results.


### Interactive Input Text Box

In [None]:
# This will store the output from the UI cell
global_llm_response = None

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# 1. Create the widgets
text_area = widgets.Textarea(
    value='Paste your scenario text here...',
    placeholder='Type or paste your text',
    description='Input Text:',
    layout={'width': '100%', 'height': '150px'}
)

submit_button = widgets.Button(
    description='Generate Personality Graph',
    button_style='success',
    icon='play'
)

# 2. Create an output area to display the graph
output_area = widgets.Output()

# 3. Define the function that runs when the button is clicked
def on_button_clicked(b):
    # Clear any previous graph from the output area
    with output_area:
        clear_output(wait=True)

        # Get the text from the text area
        text_input = text_area.value

        if not text_input or text_input == 'Paste your scenario text here...':
            print("Please enter some text into the box first.")
            return

        # Call our NEW, correct main function
        generate_and_display_personality_graph(text_input)

# 4. Link the button click to the function
submit_button.on_click(on_button_clicked)

# 5. Display the UI
print("Enter your text below and click 'Generate Personality Graph':")
display(text_area, submit_button, output_area)

Enter your text below and click 'Generate Personality Graph':


Textarea(value='Paste your scenario text here...', description='Input Text:', layout=Layout(height='150px', wi…

Button(button_style='success', description='Generate Personality Graph', icon='play', style=ButtonStyle())

Output()

### Evaluation

In [None]:
# --- 1. The Ground Truth You Provided ---
ground_truth_triples = [
    ("Alex", "has_trait", "Conscientiousness"),
    ("Alex", "has_trait", "Neuroticism"),
    ("Alex", "has_trait", "Openness"),
    ("Alex", "has_trait", "Extraversion"),
    ("Ben", "has_trait", "Conscientiousness"),
    ("Ben", "has_trait", "Extraversion"),
    ("Ben", "has_trait", "Openness"),
    ("Ben", "has_trait", "Agreeableness"),
    ("Chloe", "has_trait", "Conscientiousness"),
    ("Chloe", "has_trait", "Extraversion"),
    ("Chloe", "has_trait", "Openness"),
    ("Chloe", "has_trait", "Agreeableness"),
    ("Chloe", "has_trait", "Neuroticism"),
]

print(f"Ground Truth loaded with {len(ground_truth_triples)} expected facts.")

# --- 2. Check if the UI has run and saved a response ---
if 'global_llm_response' not in globals() or global_llm_response is None:
    print("="*30)
    print("❌ Error: No LLM response found.")
    print("Please run the 'Generate Personality Graph' button in the cell above first.")
else:
    print(f"✅ Found response from UI with {len(global_llm_response.traits)} traits. Converting for evaluation...")

    # --- 3. Convert the LLM response to the simple (S, P, O) format ---
    generated_triples_for_eval = []
    for trait_item in global_llm_response.traits:
        # Create the simple (Person, "has_trait", Trait) tuple
        simple_triple = (trait_item.person, "has_trait", trait_item.trait)
        generated_triples_for_eval.append(simple_triple)

    # --- 4. Run the Evaluation ---
    # (This calls the 'evaluate_kg' function from Cell 21)
    print("Running evaluation...")
    metrics = evaluate_kg(ground_truth_triples, generated_triples_for_eval)

Ground Truth loaded with 13 expected facts.
✅ Found response from UI with 9 traits. Converting for evaluation...
Running evaluation...
--- KG Evaluation Report ---
Total Ground Truth Triples: 13
Total Generated Triples:    9
------------------------------
True Positives (TP):  9
False Positives (FP): 0
False Negatives (FN): 4
------------------------------

✅ True Positives (Correctly found):
   ('Ben', 'has_trait', 'Openness')
   ('Ben', 'has_trait', 'Agreeableness')
   ('Chloe', 'has_trait', 'Conscientiousness')
   ('Chloe', 'has_trait', 'Agreeableness')
   ('Ben', 'has_trait', 'Extraversion')
   ('Alex', 'has_trait', 'Openness')
   ('Alex', 'has_trait', 'Conscientiousness')
   ('Alex', 'has_trait', 'Neuroticism')
   ('Chloe', 'has_trait', 'Extraversion')

❌ False Positives (Incorrectly generated):

⚠️ False Negatives (Missed):
   ('Chloe', 'has_trait', 'Openness')
   ('Alex', 'has_trait', 'Extraversion')
   ('Chloe', 'has_trait', 'Neuroticism')
   ('Ben', 'has_trait', 'Conscientious

## Generated Data
```
Scenario: The Project Phoenix Crisis Meeting
The project was two weeks behind schedule, and the team called an emergency meeting to figure out a new plan.

Alex (Project Manager): Alex arrived 10 minutes early, his original project plan already updated with red ink, highlighting every missed deadline. He sat quietly at the head of the table, nervously tapping his pen and clearly distressed, worrying that this failure would reflect badly on his career. When the team started brainstorming, he sighed, "I don't think we need 'creative' ideas right now. These new, unvetted tools are too risky. We just need to stick to the original plan and work harder." He listened intently to everyone but didn't say much else, just politely nodding, clearly preferring to be by himself to re-work the numbers.

Ben (Lead Developer): Ben rushed in five minutes late, spilling a bit of coffee on his shirt. "So sorry, everyone! My desk is an absolute black hole!" he laughed, his voice filling the room and charming everyone instantly. When Alex explained the problem, Ben was completely unfazed. "This is a fantastic opportunity!" he clapped, jumping up to the whiteboard. "I was just reading a wild article about a new generative AI framework. What if we scrap 80% of our current backend and rebuild it using that? It's creative, it's exciting!" He then noticed Alex tensing up and immediately softened his tone. "Or, just a thought. Alex, you look stressed. What can I do to help your part of the plan? I'm happy to help anyone who's stuck."

Chloe (Lead Designer): Chloe confidently took charge, standing up and moving to the whiteboard next to Ben. "Okay, let's stop the chaos," she said, her voice assertive. "Ben, no new frameworks. That's a distraction and totally impractical. And Alex, your original plan is what got us into this mess. It's too rigid." She didn't mince words and seemed completely calm, focused only on the solution. "I've already re-written the schedule. We'll work in focused 2-hour design sprints, no exceptions. I don't care if the solution is boring, I care that it works and ships on time. This is the only practical way forward." She ignored the team's groans and started writing the new tasks.
```

### Justification
The Project Phoenix Crisis Meeting scenario was intentionally designed to serve as synthetic data for evaluating the knowledge graph extraction system. The scenario simulates a realistic workplace crisis, a high-stress project meeting. Which naturally reveals personality traits, emotional responses, and interpersonal dynamics among individuals.

## Download

In [None]:
from google.colab import _message
import nbformat

# Get notebook data from Colab internal API
nb_data = _message.blocking_request('get_ipynb')

# Sometimes Colab wraps it like {'ipynb': {...}}, so unwrap if needed
if "ipynb" in nb_data:
    nb_data = nb_data["ipynb"]

# Double-check for essential keys
if "cells" not in nb_data:
    raise ValueError("❌ This Colab JSON doesn’t contain 'cells' — can’t export properly. Try File → Download .ipynb manually.")

# Remove any broken widget metadata safely
nb_data["metadata"].pop("widgets", None)

# Convert dict → NotebookNode
nb_node = nbformat.from_dict(nb_data)

# Save it as a valid Jupyter Notebook file
path = "/content/notebook.ipynb"
with open(path, "w") as f:
    nbformat.write(nb_node, f)

print("✅ Notebook successfully cleaned and saved at:", path)


In [41]:
from google.colab import files
files.download("/content/notebook.ipynb")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>