Required libraries:

```pip install pyvis```

```pip install plotly```

```pip install nbformat```

In [57]:
# Reading JSON files
import json
def read_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        documents = json.load(file)
    return documents

In [58]:
import networkx as nx
# Building knowledge graph
def build_knowledge_graph(llm_output, complete=True):
    G = nx.Graph()

    for doc in llm_output:
        # Supposing that each document has a single problem
        # for each problem I extract the causes and the solutions
        # as they are saved as key-value pairs
    
        problem = doc.get('problem')
        causes = doc.get('causes', [])
        solutions = doc.get('solutions', [])

        # Adding the problem node
        G.add_node(problem, type="problem")

        # Adding cause nodes and caused_by edges
        for cause in causes:
            G.add_node(cause, type='cause')
            G.add_edge(problem, cause, type='caused_by')

        # Adding solution nodes and solvable_as edges
        for solution in solutions:
            G.add_node(solution, type='solution')
            G.add_edge(problem, solution, type='solvable_as')

    return G

In [59]:
def colour_graph(G):
    # Adding color attributes to the graph for better visualization
    color_map = {'problem': 'red', 'cause': 'yellow', 'solution': 'green'}
    edge_color_map = {'caused_by': 'orange', 'solvable_as': 'lightblue'}

    for node, data in G.nodes(data=True):
        node_type = data['type']
        if node_type in color_map:
            G.nodes[node]['color'] = color_map[node_type]

    # Adding color attributes to edges
    for u, v, data in G.edges(data=True):
        edge_type = data['type']
        if edge_type in edge_color_map:
            G[u][v]['color'] = edge_color_map[edge_type]
    return G

In [60]:
from pyvis.network import Network
# Showing the graph
def visualize_graph(G, output_file):
    net = Network(notebook=True, cdn_resources='in_line')
    net.from_nx(G)
    net.show(output_file)

In [61]:
# Complete graph building
file = "graph_data.json"
knowledge_graph_path = "r_iphone.html"
# Path of GEXF file (Gephi format)
output_gexf_path = 'r_iphone.gexf'
llm_output = read_json(file)

# Knowledge graph building
G = build_knowledge_graph(llm_output)
G_col = colour_graph(G)
# Showing the knowledge graph
visualize_graph(G_col, knowledge_graph_path) # This saves the graph in HTML

# Esporting the graph for Gephi
nx.write_gexf(G, output_gexf_path)
print(f'Knowledge graph stored in: {knowledge_graph_path}')
print(f'Knowledge Graph in GEXF format stored in {output_gexf_path}')

r_iphone.html
Knowledge graph stored in: r_iphone.html
Knowledge Graph in GEXF format stored in r_iphone.gexf


#### Subgraphs extraction

In [62]:
import heapq
# This is a function to retrieve all the subgraphs characterized by having highly discussed problems
def extract_high_subgraphs(G):

    # Finding nodes of type PROBLEM
    problem_nodes = [node for node, data in G.nodes(data=True) if data['type'] == 'problem']
    
    # Computing the number of edges for each PROBLEM node
    edge_counts = {node: len(list(G.edges(node))) for node in problem_nodes}
    
    # Filter nodes with edge counts in the top 10 
    top_10_counts = set(heapq.nlargest(10, edge_counts.values()))
    max_edge_nodes = [node for node, count in edge_counts.items() if count in top_10_counts]
    
    # Building subgraphs 
    subgraphs = []
    for node in max_edge_nodes:
        neighbors = list(G.neighbors(node))
        subgraph_nodes = neighbors + [node]
        subgraphs.append(G.subgraph(subgraph_nodes))
    
    return subgraphs

In [63]:
# SUBGRAPHS

llm_output = read_json(file)

# Knowledge graph building
H_arr = extract_high_subgraphs(G)
count = 0
for H in H_arr:
    count += 1
    knowledge_graph_path = f"./subgraphs/view_subgraph_{count}.html"
    H_col = colour_graph(H)
    # Showing the knowledge graph
    visualize_graph(H_col, knowledge_graph_path) # This saves the graph in HTML

    print(f'Knowledge graph stored in: {knowledge_graph_path}')



./subgraphs/view_subgraph_1.html
Knowledge graph stored in: ./subgraphs/view_subgraph_1.html
./subgraphs/view_subgraph_2.html
Knowledge graph stored in: ./subgraphs/view_subgraph_2.html
./subgraphs/view_subgraph_3.html
Knowledge graph stored in: ./subgraphs/view_subgraph_3.html
./subgraphs/view_subgraph_4.html
Knowledge graph stored in: ./subgraphs/view_subgraph_4.html
./subgraphs/view_subgraph_5.html
Knowledge graph stored in: ./subgraphs/view_subgraph_5.html
./subgraphs/view_subgraph_6.html
Knowledge graph stored in: ./subgraphs/view_subgraph_6.html
./subgraphs/view_subgraph_7.html
Knowledge graph stored in: ./subgraphs/view_subgraph_7.html
./subgraphs/view_subgraph_8.html
Knowledge graph stored in: ./subgraphs/view_subgraph_8.html
./subgraphs/view_subgraph_9.html
Knowledge graph stored in: ./subgraphs/view_subgraph_9.html
./subgraphs/view_subgraph_10.html
Knowledge graph stored in: ./subgraphs/view_subgraph_10.html
