In [5]:
import os
import pandas as pd
from pyvis.network import Network
import networkx as nx

# Define the directory containing the Parquet files
data_dir = r"C:\Users\omar2\ragtest\output\20240824-111436\artifacts"  # Replace with the actual timestamp

# Load the entity and relationship data
entities_file = os.path.join(data_dir, 'create_final_entities.parquet')
relationships_file = os.path.join(data_dir, 'create_final_relationships.parquet')

entities_df = pd.read_parquet(entities_file)
relationships_df = pd.read_parquet(relationships_file)

# Create a NetworkX graph
G = nx.Graph()

# Add nodes (entities) to the graph
for _, entity in entities_df.iterrows():
    G.add_node(entity['id'], label=entity['name'], title=entity['description'])

# Add edges (relationships) to the graph
for _, relation in relationships_df.iterrows():
    G.add_edge(relation['source'], relation['target'], title=relation['description'])

# Create a PyVis network from the NetworkX graph
net = Network(notebook=True, width="100%", height="700px", bgcolor="#222222", font_color="white")
net.from_nx(G)

# Customize the appearance of the graph
net.toggle_physics(True)
net.show_buttons(filter_=['physics'])

# Save the graph as an HTML file in the current directory
output_file = 'graphrag_network.html'
net.show(output_file)

print(f"Graph visualization saved as {output_file}")

# Save entities to an Excel file
entities_excel_output_file = os.path.join(data_dir, 'entities.xlsx')
entities_df.to_excel(entities_excel_output_file, index=False)
print(f"Entities saved to {entities_excel_output_file}")

# Save relationships to a separate Excel file
relationships_excel_output_file = os.path.join(data_dir, 'relationships.xlsx')
relationships_df.to_excel(relationships_excel_output_file, index=False)
print(f"Relationships saved to {relationships_excel_output_file}")


graphrag_network.html
Graph visualization saved as graphrag_network.html
Entities saved to C:\Users\omar2\ragtest\output\20240824-111436\artifacts\entities.xlsx
Relationships saved to C:\Users\omar2\ragtest\output\20240824-111436\artifacts\relationships.xlsx
