In [8]:
import os
import pandas as pd
from pyvis.network import Network
import networkx as nx

# Step 1: Convert Parquet files to Excel
def convert_parquet_to_excel(parquet_path, excel_path):
    df = pd.read_parquet(parquet_path)
    df.to_excel(excel_path, index=False)

# Define paths for Parquet and Excel files
data_dir = r"C:\Users\omar2\ragtest\output\20240907-222016\artifacts"  # Update your directory path
entities_parquet = os.path.join(data_dir, 'create_final_entities.parquet')
relationships_parquet = os.path.join(data_dir, 'create_final_relationships.parquet')

entities_excel = os.path.join(data_dir, 'entities.xlsx')
relationships_excel = os.path.join(data_dir, 'relationships.xlsx')

# Convert Parquet files to Excel
convert_parquet_to_excel(entities_parquet, entities_excel)
convert_parquet_to_excel(relationships_parquet, relationships_excel)

# Step 2: Create Graph from Excel files
# Define paths to Excel files
output_directory = r'C:\Users\omar2\OneDrive\Desktop\9-5-24\output'  # Output directory for HTML graph files
os.makedirs(output_directory, exist_ok=True)

# Read nodes and edges from Excel files
try:
    # Read nodes and edges data from Excel
    nodes_df = pd.read_excel(entities_excel)  # Load the entities Excel file
    edges_df = pd.read_excel(relationships_excel)  # Load the relationships Excel file

    # Fill missing values with empty strings to avoid type errors
    nodes_df.fillna('', inplace=True)
    edges_df.fillna('', inplace=True)

    # Create a directed NetworkX graph
    G = nx.DiGraph()  # Use DiGraph for directed graph

    # Define a professional color palette using subtle tones
    node_color = {
        "Person": "#4B8BBE",           # Soft Blue
        "Occupation": "#D4A5A5",       # Light Pink
        "Business": "#F28C28",         # Orange
        "Group": "#8E44AD",            # Medium Purple
        "Device": "#073B4C",           # Very Dark Blue
        "Content": "#06D6A0",          # Soft Green
        "Platform": "#EF476F",         # Soft Red
        "Contact": "#3498DB",          # Light Blue
        "Document": "#A3A847",         # Olive Green
        "SocialAccount": "#1F618D",    # Darker Blue
        "Email": "#D35400",            # Dark Orange
        "Company": "#F4D03F",          # Yellow
        "Website": "#16A085",          # Teal
        "PhoneNumber": "#C0392B",      # Brick Red
        "Service": "#FFD166",          # Soft Yellow
        "Event": "#118AB2",            # Darker Blue
        "Organization": "#FFB6C1",     # Light Pink (New)
        "System": "#9370DB",           # Medium Purple (New)
        "Campaign": "#FF6347",         # Tomato Red (New)
        "IPAddress": "#8B0000",        # Dark Red (New)
    }

    # Add nodes from the nodes DataFrame
    for _, row in nodes_df.iterrows():
        node_id = str(row['id'])  # Ensure id is string type
        label = str(row['name'])
        description = str(row.get('description', ''))
        node_type = str(row.get('type', ''))

        color = node_color.get(node_type, '#BDBDBD')  # Default to a subtle gray
        G.add_node(node_id, label=label, title=description, color=color, size=15)

    # Add edges from the edges DataFrame with arrows
    for _, row in edges_df.iterrows():
        source = str(row['source'])
        target = str(row['target'])
        relationship = str(row.get('description', ''))
        weight = row.get('weight', 1)  # Default weight if missing
        
        G.add_edge(source, target, label=relationship, title=relationship, weight=weight)

    # Create a PyVis network from the NetworkX graph
    net = Network(notebook=False, width="100%", height="100vh", bgcolor="#FFFFFF", font_color="#333333", directed=True)
    net.from_nx(G)

    # Enable arrows and customize edges
    for edge in net.edges:
        edge['arrows'] = 'to'  # Add arrows pointing towards the target nodes
        edge['color'] = '#9E9E9E'  # Light gray for edges to keep focus on nodes
        edge['width'] = 1.5  # Set a uniform but subtle width for edges

    # Set physics settings for better layout
    net.set_options("""
    var options = {
      "nodes": {
        "borderWidth": 2,
        "shape": "dot",
        "font": {
          "size": 12,
          "face": "arial",
          "color": "#333333"
        },
        "shadow": {
          "enabled": true,
          "color": "rgba(0,0,0,0.2)",
          "size": 10
        }
      },
      "edges": {
        "arrows": {
          "to": {
            "enabled": true,
            "scaleFactor": 0.5
          }
        },
        "color": {
          "inherit": false,
          "opacity": 0.7
        },
        "smooth": {
          "enabled": true,
          "type": "dynamic"
        },
        "shadow": {
          "enabled": true,
          "color": "rgba(0,0,0,0.2)",
          "size": 5
        }
      },
      "physics": {
        "enabled": true,
        "barnesHut": {
          "gravitationalConstant": -30000,
          "centralGravity": 0.3,
          "springLength": 150,
          "springConstant": 0.05,
          "damping": 0.09
        },
        "minVelocity": 0.75
      }
    }
    """)

    # Define the output HTML file path
    graph_file_path = os.path.join(output_directory, "enhanced_professional_network_graph.html")

    # Save the graph as an HTML file
    net.save_graph(graph_file_path)

    print(f"Enhanced and professional graph saved as {graph_file_path}")

except Exception as e:
    # Print any errors encountered
    print(f"Error processing files: {e}")

print("Conversion completed. Enhanced HTML graph file is saved in the output directory.")


Enhanced and professional graph saved as C:\Users\omar2\OneDrive\Desktop\9-5-24\output\enhanced_professional_network_graph.html
Conversion completed. Enhanced HTML graph file is saved in the output directory.
