In [9]:
# Install the OpenAI and LangChain libraries
# - `openai`: Provides access to OpenAI's GPT models for tasks like text generation, embeddings, and completions.
# - `langchain`: A framework for building applications using large language models (LLMs).
#                Includes tools for chaining prompts, memory, and integrations like knowledge graphs.
!pip install -q openai langchain
# Attempt to install the LangChain Community library
# - `langchain-community`: This may refer to a community-supported version or extensions of LangChain.
#   Ensure this package exists and is maintained if errors occur during installation.
!pip install -q langchain-community

This script initializes the OpenAI API client and defines a function to interact with the GPT model. The get_chat_response function sends a user-provided text input to the GPT model (gpt-3.5-turbo) and returns the model's response.

In [10]:
import os
from openai import OpenAI

# Set the API key in the environment variable
os.environ["OPENAI_API_KEY"] = "sk-MNL1gYbV6CyXkh2rwPxao_D7n8nSxwW4_0wozr5sUtT3BlbkFJoEpwVXUH_Z3deg71NI-mM8QqSOkOGzQ5WDXmQ8FQEA" # Replace with your actual API key

client = OpenAI()

def get_chat_response(text):
    """
    This function takes a text input and returns the chat completion message.
    """
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": text,
            }
        ],
        model="gpt-3.5-turbo",
    )
    return chat_completion.choices[0].message.content


In [None]:
import networkx as nx  # For creating and analyzing graphs/networks.

import matplotlib.pyplot as plt  # For data visualization and plotting.

import numpy as np  # For numerical operations and array handling.

import seaborn as sns

import pandas as pd

import random  # For generating random numbers.

from langchain.llms import OpenAI

from langchain.graphs.networkx_graph import NetworkxEntityGraph, KnowledgeTriple # Represents (subject, predicate, object) triples.

from scipy.spatial.distance import cosine  # For cosine similarity/distance between vectors.

from scipy.stats import wasserstein_distance  # For Wasserstein distance (probability distribution comparison).

from sklearn.metrics.pairwise import cosine_similarity

from sklearn.linear_model import LinearRegression, BayesianRidge  # Regression models.

from sklearn.datasets import fetch_20newsgroups  # Fetch the 20 Newsgroups text dataset.

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  # Model performance metrics.

import matplotlib.colors as mcolors  # For handling and customizing colors in visualizations.

import sklearn.metrics  # For evaluation metrics like accuracy, precision, recall, etc.

import matplotlib.colors as mcolors  # For handling color schemes in plots

import textwrap  # For wrapping text into fixed-width lines

from sklearn.metrics import roc_curve, auc

from langchain.chains import GraphQAChain  # For question answering over knowledge graphs.

# Prompt Engineering
from langchain.prompts import PromptTemplate  # To define templates for LLM prompts.


In [None]:
!pip install  rdflib  SPARQLWrapper

This script defines a knowledge graph using a set of triples representing entities (nodes) and their relationships (edges). The triples are categorized into parts based on themes, such as LLMs in the legal context, RAG integration, collaborations, and key people involved. The knowledge graph is constructed programmatically by adding these triples into the graph index, which allows for efficient querying and analysis.

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Set up the DBpedia SPARQL endpoint
sparql = SPARQLWrapper("https://dbpedia.org/sparql")

# SPARQL Query: Retrieve cybersecurity-related concepts and their triples
query = """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?concept ?label ?abstract
WHERE {
  ?concept rdf:type dbo:Software .
  ?concept rdfs:label ?label .
  ?concept dbo:abstract ?abstract .
  FILTER (LANG(?label) = 'en' && LANG(?abstract) = 'en')
  FILTER (CONTAINS(LCASE(?label), "cyber") || CONTAINS(LCASE(?label), "security") || CONTAINS(LCASE(?label), "malware"))
}
LIMIT 10
"""

# Execute the query
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Initialize Knowledge Graph (KG) and portion tracking
kg = []
portion_indices = {}
portion_counter = 1  # Start portion numbering
triple_index = 0  # Track overall index

print("\nStructured Knowledge Graph:\n")

for result in results["results"]["bindings"]:
    concept = result["concept"]["value"].split("/")[-1]  # Extracts entity name
    label = result["label"]["value"]
    abstract = result["abstract"]["value"]  # Store full abstract without truncation

    # Store portion index range
    start_index = triple_index
    portion_indices[f"Part {portion_counter}"] = range(start_index, start_index + 3)  # Each part has 3 triples

    # Print structured output
    print(f"\n# Part {portion_counter}")
    print(f"({concept}) → (type) → (Software)")
    print(f"({concept}) → (label) → ({label})")
    print(f"({concept}) → (abstract) →")
    print(abstract)  # Print full abstract with line breaks
    print("-" * 80)

    # Store in KG
    kg.append((concept, "type", "Software"))
    kg.append((concept, "label", label))
    kg.append((concept, "abstract", abstract))  # Store full abstract

    # Increment indices
    triple_index += 3
    portion_counter += 1

# Print portion indices separately
print("\nPortion Indices:\n")
for part, index_range in portion_indices.items():
    print(f"{part}: {index_range}")

# Save KG to a text file
with open("knowledge_graph_output.txt", "w", encoding="utf-8") as f:
    for triple in kg:
        f.write(f"( {triple[0]} , {triple[1]} , {triple[2]})\n\n")  # Ensuring full visibility

print("\nFinal Knowledge Graph saved as 'knowledge_graph_output.txt'.")

# Print the final KG in a readable format
print("\nFinal Knowledge Graph List:\n")
for triple in kg:
  print("(", triple[0],",", triple[1],", ",triple[2], ")")  # Print without truncation

In [None]:
print("Original KG node count:", len(set(node for triple in kg for node in (triple[0], triple[2]))))

In [None]:
part_indices ={
"Part 1": range(0, 3),
"Part 2": range(3, 6),
"Part 3": range(6, 9),
"Part 4": range(9, 12),
"Part 5": range(12, 15),
"Part 6": range(15, 18),
"Part 7": range(18, 21),
"Part 8": range(21, 24),
"Part 9": range(24, 27),
"Part 10": range(27, 30)
}
part_names = list(part_indices.keys())

# Instantiate the graph
graph = NetworkxEntityGraph()

# Build the graph from the knowledge triples
for (node1, relation, node2) in kg:
    graph.add_triple(KnowledgeTriple(node1, relation, node2))

Visualizes the knowledge graph as a directed graph using NetworkX and Matplotlib. Nodes represent entities, and edges depict relationships with labels for clarity. The layout uses spring positioning with increased spacing for readability. Custom node colors and labeled edges enhance the visualization, displayed without axes.

In [None]:
def wrap_text(text, max_words=8):
    """Wrap text if it contains more than `max_words` words."""
    words = text.split()
    return "Explanation" if len(words) > max_words else text

def visualize_graph_with_chains(kg, part_indices):
    """
    Visualize a directed graph highlighting nodes and edges by chain membership.

    Parameters:
        kg (list of tuples): The knowledge graph as a list of (node1, relation, node2).
        part_indices (dict): A dictionary where keys are chain names and values are lists of indices
                             corresponding to the `kg` entries in each chain.
    Returns:
        None
    """
    # Create graph
    G = nx.DiGraph()
    for node1, relation, node2 in kg:
        G.add_edge(node1, node2, label=relation)

    # Generate positions for the graph
    pos = nx.spring_layout(G, k=8, iterations=100, seed=0)

    # Define color maps
    chain_cmap = mcolors.LinearSegmentedColormap.from_list('chain_colors', ['#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#a6d854'])
    chain_norm = mcolors.Normalize(vmin=0, vmax=len(part_indices) - 1)

    # Initialize node and edge colors
    node_colors = ['lightblue'] * len(G.nodes())
    edge_colors = ['gray'] * len(G.edges())

    # Step 1: Assign distinct colors for each chain
    chain_color_map = {}
    for i, (chain_name, indices) in enumerate(part_indices.items()):
        color = chain_cmap(chain_norm(i))
        chain_color_map[chain_name] = color

    # Step 2: Color nodes based on the chain they belong to
    node_chain_map = {}
    for chain_name, indices in part_indices.items():
        color = chain_color_map[chain_name]
        for idx in indices:
            node1, relation, node2 = kg[idx]

            # Update node colors based on chain
            if node1 in G.nodes:
                node_chain_map[node1] = chain_name
                node_colors[list(G.nodes).index(node1)] = color
            if node2 in G.nodes:
                node_chain_map[node2] = chain_name
                node_colors[list(G.nodes).index(node2)] = color

    # Step 3: Assign edge colors based on the chain
    for i, (node1, node2) in enumerate(G.edges()):
        for chain_name, indices in part_indices.items():
            color = chain_color_map[chain_name]
            for idx in indices:
                n1, _, n2 = kg[idx]
                if (node1, node2) == (n1, n2):
                    edge_colors[i] = color
                    break

    # Apply label filtering
    wrapped_labels = {node: wrap_text(node) for node in G.nodes()}

    # Create the figure with subplots
    fig, axs = plt.subplots(1, 2, figsize=(20, 8), dpi=600)

    # Left: Original Knowledge Graph
    nx.draw_networkx_nodes(G, pos, node_color='lightblue', node_size=1200, ax=axs[0])
    nx.draw_networkx_edges(G, pos, edge_color='gray', width=1.2, ax=axs[0])
    nx.draw_networkx_labels(G, pos, labels=wrapped_labels, font_size=6, ax=axs[0])
    edge_labels = nx.get_edge_attributes(G, 'label')
    wrapped_edge_labels = {edge: wrap_text(label) for edge, label in edge_labels.items()}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=wrapped_edge_labels, font_size=6, ax=axs[0])
    axs[0].set_title("Original Knowledge Graph", fontsize=10)
    axs[0].axis('off')

    # Right: Highlighted Nodes Based on Chains
    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=1200, ax=axs[1], edgecolors='black')
    nx.draw_networkx_edges(G, pos, edge_color=edge_colors, width=1.5, ax=axs[1])
    nx.draw_networkx_labels(G, pos, labels=wrapped_labels, font_size=6, ax=axs[1])
    nx.draw_networkx_edge_labels(G, pos, edge_labels=wrapped_edge_labels, font_size=6, ax=axs[1])
    axs[1].set_title("Graph Highlighted by Chain Membership", fontsize=10)
    axs[1].axis('off')

    # Create a legend for chain colors
    handles = [plt.Line2D([0], [0], marker='o', color=color, markersize=10, linestyle='', label=chain_name)
               for chain_name, color in chain_color_map.items()]
    axs[1].legend(handles=handles, title="Chains", loc='upper right', fontsize=8)

    # Display the plot
    plt.show()

    # Print which nodes belong to which chain
    print("\n--- Node Chain Mapping ---")
    for node, chain in node_chain_map.items():
        print(f"Node '{node}' belongs to chain '{chain}'.")


In [None]:
visualize_graph_with_chains(kg, part_indices)

Defines a function to perturb the knowledge graph by selectively removing triples belonging to specified parts. This allows testing the impact of missing information on downstream tasks or analysis. The function filters out triples associated with the indices of the parts to be removed and returns the modified knowledge graph.

In [None]:
def perturb_kg_by_removing_parts(kg, parts_to_remove):
    """
    Perturbs the knowledge graph by removing triples from the specified parts.

    Parameters:
    - kg: The full knowledge graph triples list
    - parts_to_remove: List of part names to remove

    Returns:
    - perturbed_kg: The perturbed KG without the specified parts
    """
    perturbed_kg = []

    # Collect indices of the triples to keep based on parts to remove
    indices_to_remove = set()
    for part in parts_to_remove:
        indices_to_remove.update(part_indices[part])

    # Add triples that are not in the indices to remove
    perturbed_kg = [triple for i, triple in enumerate(kg) if i not in indices_to_remove]

    return perturbed_kg

This function computes the embedding for a given text using a specified model. It processes the text by removing newline characters and queries the OpenAI embeddings API to generate a vector representation, useful for similarity comparisons and downstream tasks.

In [None]:
import random
import json
import unicodedata
import re
embedding_cache = {}
EMBEDDING_MODEL = "text-embedding-3-small"
def normalize_text(text):
    """
    Normalize text by removing excessive spaces, normalizing Unicode characters,
    and converting to lowercase.
    """
    text = text.replace("\n", " ").strip()  # Remove newlines and extra spaces
    text = unicodedata.normalize("NFKC", text)  # Normalize Unicode characters
    text = re.sub(r"\s+", " ", text)  # Replace multiple spaces with a single space
    text = text.lower()  # Convert to lowercase (optional but recommended)
    return text

def get_embedding(text):
    text = normalize_text(text)
    if text in embedding_cache:
        return embedding_cache[text]  # Return cached embedding
    embedding = client.embeddings.create(input=[text], model=EMBEDDING_MODEL).data[0].embedding
    embedding_cache[text] = embedding  # Store result in cache
    return embedding

Defines a function to query a GraphQAChain with a question and temperature setting, returning the answer and its embedding. The function initializes the chain with a specified graph and temperature, processes the question, and computes the embedding for the returned answer, facilitating downstream analysis or comparison

In [None]:
def get_answer_and_embedding(question: str, temp: float, graph):
    """
    Sends a question and temperature to the GraphQAChain and returns the original answer string
    and its embedding as separate outputs.

    Args:
        question (str): The question to ask the chain.
        temp (float): The temperature setting for the OpenAI model.
        graph: The graph object for the GraphQAChain.

    Returns:
        Tuple[str, list]: The original answer as a string and its embedding as a list.
    """
    # Initialize the GraphQAChain with the specified temperature
    chain = GraphQAChain.from_llm(OpenAI(temperature=temp), graph=graph, verbose=False)

    # Run the question through the chain to get the answer
    original_answer = chain.run(question)
    original_answer_str = str(original_answer)

    # Compute the embedding for the original answer
    original_answer_embedding = get_embedding(original_answer)

    # Return both answer and embedding separately
    return original_answer_str, original_answer_embedding


This function visualizes the explainability of a knowledge graph by displaying the original graph and an enhanced graph with nodes and edges colored based on their importance coefficients. It leverages a directed graph structure, wraps node labels for readability, adjusts node sizes based on connectivity, and applies a custom colormap to represent the significance of graph components. The visualization is presented in a two-panel layout, highlighting both the original structure and the explainability features derived from Simple SMILE GraphRAG analysis. A color bar provides a reference for importance coefficients

In [None]:
def plot_knowledge_graph_explainability(kg, part_indices, coeff):
    """
    Improved visualization of a knowledge graph with explainability features.

    Parameters:
        kg (list): Knowledge graph triplets (node1, relation, node2).
        part_indices (dict): Mapping of part names to indices.
        coeff (list): Importance coefficients for each part.
    """
    # Create graph
    G = nx.DiGraph()
    for node1, relation, node2 in kg:
        G.add_edge(node1, node2, label=relation)

    # Generate positions for the graph with increased spacing
    pos = nx.spring_layout(G, k=8, iterations=100, seed=0)

    # Create color map (blue for negative, red for positive)
    cmap = mcolors.LinearSegmentedColormap.from_list('red_blue', ['blue',  '#d3d3d3', 'red'])
    norm = mcolors.Normalize(vmin=-1, vmax=1)

    # Adjust node size based on degree (number of connections)
    node_sizes = [1500 + 100 * G.degree(node) for node in G.nodes()]

    # Assign node and edge colors based on the importance of each part
    node_colors = []
    edge_colors = []
    for node in G.nodes():
        for part_name, indices in part_indices.items():
            part_idx = int(part_name.split()[-1]) - 1
            coeff_value = coeff[part_idx]
            color = cmap(norm(coeff_value))
            if node in [kg[i][0] for i in indices] or node in [kg[i][2] for i in indices]:
                node_colors.append(color)
                break
        else:
            node_colors.append('#8da0cb')  # Default color if no match found

    for i, (node1, node2) in enumerate(G.edges()):
        for part_name, indices in part_indices.items():
            part_idx = int(part_name.split()[-1]) - 1
            coeff_value = coeff[part_idx]
            color = cmap(norm(coeff_value))
            if i in indices:
                edge_colors.append(color)
                break
        else:
            edge_colors.append('gray')  # Default color if no match found

    # Wrap the text labels
    wrapped_labels = {node: wrap_text(node) for node in G.nodes()}

    # Create the figure with subplots
    fig, axs = plt.subplots(1, 2, figsize=(24, 10), dpi=600, gridspec_kw={'width_ratios': [1, 1.3]})

    # Left: Original Knowledge Graph
    nx.draw_networkx_nodes(G, pos, node_color= '#d3d3d3', node_size=node_sizes, ax=axs[0])
    nx.draw_networkx_edges(G, pos, edge_color='gray', width=1.5, ax=axs[0])
    nx.draw_networkx_labels(G, pos, labels=wrapped_labels, font_size=8, ax=axs[0])
    edge_labels = nx.get_edge_attributes(G, 'label')
    wrapped_edge_labels = {edge: wrap_text(label) for edge, label in edge_labels.items()}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=wrapped_edge_labels, font_size=8, ax=axs[0])
    axs[0].set_title("Original Knowledge Graph", fontsize=12)
    axs[0].axis('off')

    # Right: SMILE Explainability with Node and Edge Colors
    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes, ax=axs[1])
    nx.draw_networkx_edges(G, pos, edge_color=edge_colors, width=1.8, ax=axs[1])
    nx.draw_networkx_labels(G, pos, labels=wrapped_labels, font_size=8, ax=axs[1])
    nx.draw_networkx_edge_labels(G, pos, edge_labels=wrapped_edge_labels, font_size=8, ax=axs[1])
    axs[1].set_title("Simple SMILE GraphRAG Explainability", fontsize=12)
    axs[1].axis('off')

    # Show the color bar
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    fig.colorbar(sm, ax=axs[1], label='Importance Coefficients')

    # Save the figure
    plt.savefig('knowledge_graph_explainability_improved.png', bbox_inches='tight')

    # Display the plot
    plt.show()


Defines the question to query the GraphQAChain or knowledge retrieval system. Here, the question seeks information about it, a framework that integrates external knowledge bases to improve the accuracy and reliability of AI-generated responses

In [None]:
question = "what is Network_Security_Services?"
#Portion 3

This snippet sets the temperature parameter to 0 for deterministic response generation and queries the GraphQAChain with the question. The function get_answer_and_embedding returns the original answer as a string along with its embedding. The answer is then printed for review.


In [None]:
# Initialize the LLM
llm = OpenAI(temperature=0)
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(original_answer_str)

In [None]:
# Define the original vector (all parts present)
original = np.array([1, 1, 1, 1, 1,1, 1, 1, 1, 1])
original = original.reshape(1, -1)  # Shape becomes (1, 10)

This function calculates the importance coefficients for perturbations on a knowledge graph while providing
detailed logging at each iteration. It removes random parts of the knowledge graph, generates perturbed
responses, computes similarities to the original answer, and fits a linear regression model to calculate
coefficients. The function includes:

- Temperature (`temp`) parameter to adjust the behavior of the GraphQAChain.
- Iterative logs showing the removed parts, perturbed responses, and calculated similarities.
- A summary of all similarities and weights after processing.
The coefficients provide insights into the contribution of each part of the knowledge graph to response fidelity.

In [None]:
def calculate_coefficients_print_Temerature(temp, original, kg, part_names, question, original_answer_embedding, original_answer_str):
    """
    Function to calculate coefficients for perturbations on a knowledge graph.
    It removes parts of the KG, generates perturbed responses, and calculates coefficients.

    Parameters:
    - temp: Temperature value (0 or 1)
    - original: Original vector (numpy array)
    - kg: Knowledge graph (list of triples)
    - part_names: List of part names in the KG
    - question: Question for GraphQAChain
    - original_answer_embedding: Embedding of the original answer
    - original_answer_str: Original answer text

    Returns:
    - coeff: Coefficients from linear regression
    """
        # Define the original vector (all parts present)
    original = np.array([1, 1, 1, 1, 1,1, 1, 1, 1, 1])
    original = original.reshape(1, -1)  # Shape becomes (1, 10)
    similarities_wd = []
    perturbations_vect2 = []
    perturbation_texts = []
    generated_embeddings = []
    epsilon = 1e-6

    for i in range(20):  # Number of perturbations
        perturbation_vector = original.copy().flatten()
        num_parts_to_remove = random.randint(1, len(part_names))
        parts_to_remove_indices = random.sample(range(len(part_names)), num_parts_to_remove)

        for part_idx in parts_to_remove_indices:
            perturbation_vector[part_idx] = 0

        perturbations_vect2.append(perturbation_vector)
        parts_to_remove = [part_names[idx] for idx in parts_to_remove_indices]

        # Call the perturb_kg_by_removing_parts function directly
        perturbed_kg = perturb_kg_by_removing_parts(kg, parts_to_remove)

        graph_temp = NetworkxEntityGraph()

        for (node1, relation, node2) in perturbed_kg:
            graph_temp.add_triple(KnowledgeTriple(node1, relation, node2))

        chain = GraphQAChain.from_llm(OpenAI(temperature=temp), graph=graph_temp, verbose=False)
        temp_response = chain.run(question)

        perturbation_texts.append(temp_response)

        # Call the get_embedding function directly
        temp_response_embedding = get_embedding(temp_response)
        generated_embeddings.append(temp_response_embedding)

        # Calculate Wasserstein distance
        similarity_wd = wasserstein_distance(original_answer_embedding, temp_response_embedding)
        similarities_wd.append(similarity_wd)

        # Print progress for each iteration
        print(f"Iteration {i + 1}")  # Fixed to use `i` defined in the loop
        print(f"Parts removed: {parts_to_remove}")
        print(f"original_answer response: {original_answer_str}")
        print(f"Perturbed response: {temp_response}")
        print(f"Distancee with original answer: {similarity_wd}\n")

    perturbations_vect2 = np.array(perturbations_vect2)
    distances = sklearn.metrics.pairwise_distances(perturbations_vect2, original, metric='cosine').ravel()

    kernel_width = 0.25
    weights = np.sqrt(np.exp(-(distances**2) / kernel_width**2))

    # Print all similarities and weights
    print(f"similarities_wd: {similarities_wd}")
    print(f"Weights: {weights}")

    # Calculate inverse Wasserstein distances
    inverse_similarities_wd = [1.0 / (dist + epsilon) for dist in similarities_wd]

    # Scale inverse Wasserstein distances
    min_value = min(inverse_similarities_wd)
    max_value = max(inverse_similarities_wd)
    if min_value == max_value:
      print("Warning: min_value and max_value are equal. Avoiding division by zero.")
      scaled_inverse_similarities_wd = [1.0 for _ in inverse_similarities_wd]  # Assign a constant
    else:
      scaled_inverse_similarities_wd = [
          (value - min_value) / (max_value - min_value) for value in inverse_similarities_wd
      ]
    # Linear regression for cosine similarities
    simpler_model = LinearRegression()
    # Linear regression for scaled inverse Wasserstein distances
    simpler_model.fit(X=perturbations_vect2, y=scaled_inverse_similarities_wd, sample_weight=weights)
    coeff = simpler_model.coef_
    return coeff


# Accuracy: How well the explanation matches the model's actual decision-making process.

This function computes and visualizes the Receiver Operating Characteristic (ROC) curve for binary classification models.
It calculates the False Positive Rate (FPR), True Positive Rate (TPR), and Area Under the Curve (AUC) to evaluate the
discriminative power of the model. The ROC curve is plotted along with a diagonal line indicating random guessing,
providing an intuitive visual representation of model performance.

In [None]:
def plot_roc_curve(y_true, y_scores):
    """
    Computes the ROC curve, AUC, and plots the ROC curve.

    Parameters:
    - y_true: Ground truth binary labels (array-like).
    - y_scores: Predicted probabilities or scores (array-like).

    Returns:
    - roc_auc: The computed Area Under the Curve (AUC) value.
    """
    # Compute False Positive Rate (FPR), True Positive Rate (TPR), and thresholds
    fpr, tpr, thresholds = roc_curve(y_true, y_scores)

    # Compute AUC
    roc_auc = auc(fpr, tpr)

    # Print AUC
    print(f"AUC: {roc_auc:.3f}")

    # Plot ROC Curve
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')  # Diagonal line for random guessing
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.show()

    return roc_auc


## Accuracy: Evaluate how well the explanation for "what is Network_Security_Services?" (Temp 0) aligns with the ground truth using similarity metrics. We have  original_answer_embedding and original_answer_str calculated before.

In [None]:
question = "what is Network_Security_Services?"
#Portion 3

This script runs the `calculate_coefficients_print_Temerature` function with a temperature (`temp`) of 0,
which ensures deterministic outputs from the GraphQAChain. It processes the original knowledge graph,
perturbs it, and calculates coefficients through a linear regression model. The function provides detailed
iteration-wise logs, including removed parts, perturbed responses, similarities, and weights, offering a
comprehensive understanding of the model's behavior and the impact of perturbations on the response fidelity.

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(original_answer_str)

In [None]:
temp= 0
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str = original_answer_str,
)

In [None]:
print(coeff)
# Format the coefficients to a specific number format (e.g., rounding to 3 decimal places)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


This script uses the `plot_roc_curve` function to compute and visualize the ROC curve for a binary classification problem.
The `y_true` array contains the ground truth labels, while `y_scores` holds the predicted probabilities or scores
(e.g., coefficients). The function calculates the AUC value and plots the ROC curve, providing a quantitative
and visual assessment of model performance. The computed AUC value is also printed for reference.

In [None]:
#  Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 1, 0, 0,0,0,0,0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

This code defines a set of importance coefficients (`coeff`) representing the contribution of each part
of the knowledge graph to response fidelity. Using the `plot_knowledge_graph_explainability` function,
the graph is visualized with nodes and edges color-coded based on these coefficients. The visualization
highlights the most and least influential components, aiding in the interpretability of the knowledge graph.


In [None]:
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for "what is Network_Security_Services?" (Temp 1) by comparing it to the ground truth, accounting for potential variability due to higher randomness.

In [None]:
question = "what is Network_Security_Services?"
#Portion 3

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(original_answer_str)

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 1, 0, 0,0,0,0,0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is the type of  Scooby-Doo_and_the_Cyber_Chase_(video_game)?" (Temp 0) by comparing it with the verified ground truth for alignment and correctness.

In [None]:
question = "What is the type of  Scooby-Doo_and_the_Cyber_Chase_(video_game)?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(original_answer_str)

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([1, 0, 0, 0, 0,0, 0, 0, 0, 0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 1., -0.,  0., -0.,  0.,  0.,  0., -0., -0.,  0.])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for " "What is the type of  Scooby-Doo_and_the_Cyber_Chase_(video_game)?" (Temp 1) by comparing it with the ground truth, considering increased variability due to higher randomness.

In [None]:
question =  "What is the type of  Scooby-Doo_and_the_Cyber_Chase_(video_game)?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


In [None]:
#  Ground truth (true labels) and predicted probabilities
y_true = np.array([1, 0, 0, 0, 0,0, 0, 0, 0, 0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0.154, -0.286,  0.253,  0.293, -0.007,  0.187,  0.121, -0.24 ,
        0.086, -0.422])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is  Mirai ?" (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question = "IS Mirai from the Japanese word?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 1, 0, 0, 0,0, 0, 0, 0, 0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([-0.686,  0.119, -0.289,  0.235,  0.877, -0.096,  0.116,  0.117, 0.217, -0.826])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for "question = "What is Mirai?" (Temp 1) by comparing it with the ground truth, accounting for variability due to higher randomness.


In [None]:
question = " IS Mirai from the Japanese word?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
#  Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 1, 0, 0, 0,0, 0, 0, 0, 0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities
# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff= np.array([ 0.239,  0.175,  0.002, -0.071,  0.135,  0.177, -0.038, -0.011,
        0.314, -0.215])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is MS Antivirus ? " (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question = "What is MS Antivirus?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(original_answer_str)

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 1, 0,0, 0, 0, 0, 0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")


In [None]:
coeff=np.array([ 0.047, -0.389, -0.825,  0.848,  0.169,  0.559, -0.401,  0.639,
        0.165, -0.074])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for "What is MS Antivirus?" (Temp 1) by comparing it with the ground truth, considering variability due to higher randomness.


In [None]:
question = "What is MS Antivirus?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 1,0,0, 0, 0, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0.099,  0.359, -0.139, -0.405, -0.031,  0.409,  0.016, -0.133,
       -0.015,  0.032])
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is CyberCIEGE?" (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question = "What is CyberCIEGE?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(original_answer_str)

In [None]:
temp= 0
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0,0,1,0, 0, 0, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities
# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([-0.,  0.,  0.,  0.,  1.,  0., -0., -0.,  0., -0.])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for " What is CyberCIEGE?" (Temp 1) by comparing it with the ground truth, considering variability due to higher randomness.


In [None]:
question = " What is CyberCIEGE?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,1,0, 0, 0, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities
# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0.232, -0.024,  0.023, -0.175,  0.024, -0.188, -0.051, -0.314,
       -0.011, -0.163])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is CyberExtension?" (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question = "What is CyberExtension?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(original_answer_str)

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
#  Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,1, 0, 0, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities
# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0., -0.,  0., -0., -0.,  1., -0.,  0., -0.,  0.])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for "What is CyberExtension?" (Temp 1) by comparing it with the ground truth, considering variability due to higher randomness.


In [None]:
question = "What is CyberExtension?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
#  Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,1, 0, 0, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities
# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0.132,  0.344, -0.497, -0.605, -0.371, -0.067,  0.009,  0.081,
        0.207,  0.244])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is CyberGladiators?" (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question = "What is CyberGladiators?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,0, 1, 0, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0., -0.,  0., -0., -0.,  0.,  1., -0., -0., -0.])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for ""What is CyberGladiators?" (Temp 1) by comparing it with the ground truth, considering variability due to higher randomness.


In [None]:
question = "What is CyberGladiators?

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,0, 1, 0, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities
# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0.326,  0.148, -0.284,  0.386,  0.115, -0.338,  0.612, -0.397,
        0.299, -0.037])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is CyberLink_MediaShow?" (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question ="What is CyberLink_MediaShow?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print("Original answer:", original_answer_str)

In [None]:
temp= 0
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,0, 0, 1, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([-0., -0.,  0.,  0.,  0., -0., -0.,  1., -0., -0.])
# Call the function
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for "What is CyberLink_MediaShow?" (Temp 1) by comparing it with the ground truth, considering variability due to higher randomness.


In [None]:
question = "What is CyberLink_MediaShow?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
temp= 1
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


In [None]:
#Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,0, 0, 1, 0,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([-0.082, -0.044, -0.065,  0.097,  0.05 ,  0.164, -0.195,  0.071,
       -0.097, -0.186])
# Call the functaion
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is CyberQuery?" (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question = "What is CyberQuery?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
temp= 0
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)

formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,0,0,0,1,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0.,  0., -0., -0., -0.,  0., -0.,  0.,  1.,  0.])
# Call the functaion
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for ""What is CyberQuery?" (Temp 1) by comparing it with the ground truth, considering variability due to higher randomness.


In [None]:
question = "What is CyberQuery?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
temp= 1
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff

In [None]:
#  Ground truth (true labels) and predicted probabilities
y_true = np.array([0,0,0,0,0,0,0,0,1,0])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([-0.19 , -0.323,  0.029,  0.07 ,  0.156,  0.175, -0.374, -0.211,
        0.461,  0.235])
# Call the functaion
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Assess the explanation for "What is CyberStrike?" (Temp 1) by comparing it with the ground truth, considering variability due to higher randomness.


In [None]:
question = "What is CyberStrike?"

In [None]:
temp = 1
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


In [None]:
# Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,0,0,0,0,1])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities

# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0.369, -0.342,  0.157, -0.186,  0.283,  0.405, -0.057,  0.321,
       -0.41 ,  0.189])
# Call the functaion
plot_knowledge_graph_explainability(kg, part_indices, coeff)

## Accuracy: Evaluate the explanation for "What is CyberStrike?" (Temp 0) by comparing it to the ground truth for precision and alignment.


In [None]:
question = "What is CyberStrike?"

In [None]:
temp = 0
original_answer_str, original_answer_embedding = get_answer_and_embedding(question, temp, graph)
print(f"Original answer: {original_answer_str}")

In [None]:
temp= 0
coeff = calculate_coefficients_print_Temerature(
    temp= temp,
    original=original,
    kg=kg,
    part_names=part_names,
    question=question,
    original_answer_embedding=original_answer_embedding,
    original_answer_str=original_answer_str,
)

In [None]:
print(coeff)
# Format the coefficients to a specific number format (e.g., rounding to 3 decimal places)
formatted_coeff = np.round(coeff, 3)  # Rounds to 3 decimal places
formatted_coeff


In [None]:
#  Ground truth (true labels) and predicted probabilities
y_true = np.array([0, 0, 0, 0,0,0,0,0,0,1])  # Ground truth (0: negative, 1: positive)
y_scores = formatted_coeff # Predicted probabilities
# Call the function to compute and plot ROC curve
roc_auc_value = plot_roc_curve(y_true, y_scores)
print(f"Computed AUC: {roc_auc_value:.3f}")

In [None]:
coeff=np.array([ 0., -0., -0.,  0., -0., -0., -0., -0.,  0.,  1.])
# Call the functaion
plot_knowledge_graph_explainability(kg, part_indices, coeff)