# Explore Entities from Azure Tenant Scan

This notebook queries the Neo4j database to show all entities (resources) discovered during the Azure tenant scan.

## Prerequisites
- Neo4j must be running (default: bolt://localhost:7687)
- You must have completed an `azure-tenant-grapher scan`

In [1]:
# Import required libraries
from neo4j import GraphDatabase
import os
import pandas as pd
from IPython.display import display, Markdown

In [2]:
# Connect to Neo4j
import os

uri = os.getenv('NEO4J_URI', 'bolt://localhost:7687')
neo4j_user = os.getenv('NEO4J_USER', 'neo4j')
neo4j_password = os.getenv('NEO4J_PASSWORD', 'your-password-here')

auth = (neo4j_user, neo4j_password)

driver = GraphDatabase.driver(uri, auth=auth)
print("✅ Connected to Neo4j")

✅ Connected to Neo4j


## 1. Get All Entity Types (Node Labels)

In [3]:
with driver.session() as session:
    # Get all node labels (entity types)
    result = session.run('CALL db.labels()')
    labels = [record[0] for record in result]
    
    display(Markdown('### 📊 Entity Types Discovered in Your Tenant'))
    display(Markdown('---'))
    
    entity_summary = []
    total_entities = 0
    
    for label in sorted(labels):
        count_result = session.run(f'MATCH (n:`{label}`) RETURN count(n) as count')
        count = count_result.single()['count']
        total_entities += count
        entity_summary.append({'Entity Type': label, 'Count': count})
    
    # Display as DataFrame
    df = pd.DataFrame(entity_summary)
    display(df)
    
    display(Markdown(f'\n### 📈 **Total Entities: {total_entities}**'))

### 📊 Entity Types Discovered in Your Tenant

---

Unnamed: 0,Entity Type,Count
0,DNSZone,11
1,IdentityGroup,12
2,Original,3459
3,PrivateEndpoint,74
4,Region,16
5,Resource,6496
6,ResourceGroup,232
7,RoleAssignment,1042
8,RoleDefinition,79
9,ServicePrincipal,231



### 📈 **Total Entities: 12124**

## 2. Explore Entity Details with Examples

In [None]:
with driver.session() as session:
    labels = session.run('CALL db.labels()')
    labels = [record[0] for record in labels]
    
    for label in sorted(labels):
        count_result = session.run(f'MATCH (n:`{label}`) RETURN count(n) as count')
        count = count_result.single()['count']
        
        display(Markdown(f'### 📦 {label}: {count} entities'))
        
        # Get sample entities (up to 3)
        sample_query = f'MATCH (n:`{label}`) RETURN n LIMIT 3'
        sample_result = session.run(sample_query)
        sample_nodes = [record['n'] for record in sample_result]
        
        if sample_nodes:
            # Convert nodes to dictionaries for DataFrame
            rows = []
            for node in sample_nodes:
                props = dict(node.items())
                rows.append(props)
            
            # Display as DataFrame
            if rows:
                df_sample = pd.DataFrame(rows)
                # Limit column width for readability
                with pd.option_context('display.max_colwidth', 60):
                    display(df_sample)
            
            # Show connected nodes for the first example
            first_node = sample_nodes[0]
            props = dict(first_node.items())
            
            print(f'\nExample connections for first entity:')
            if 'name' in props:
                print(f'   Entity: {props["name"]}')
            elif 'id' in props:
                example_id = props['id'][:80] + '...' if len(props['id']) > 80 else props['id']
                print(f'   Entity ID: {example_id}')
            
            # Show connected nodes - query based on what identifier exists
            if 'id' in props and props['id']:
                # Use id if available
                rel_query = f"""
                MATCH (n:`{label}` {{id: $identifier}})-[r]->(m)
                RETURN type(r) as rel_type, labels(m) as target_labels, count(*) as count
                ORDER BY count DESC
                LIMIT 5
                """
                identifier = props['id']
            elif 'name' in props and props['name']:
                # Fall back to name
                rel_query = f"""
                MATCH (n:`{label}` {{name: $identifier}})-[r]->(m)
                RETURN type(r) as rel_type, labels(m) as target_labels, count(*) as count
                ORDER BY count DESC
                LIMIT 5
                """
                identifier = props['name']
            else:
                identifier = None
            
            if identifier:
                rel_result = session.run(rel_query, identifier=identifier)
                rel_records = list(rel_result)
                
                if rel_records:
                    print(f'   Connected nodes:')
                    for rel_rec in rel_records:
                        rel_type = rel_rec['rel_type']
                        target_labels = ', '.join(rel_rec['target_labels'])
                        rel_count = rel_rec['count']
                        print(f'      -{rel_type}-> ({target_labels}) : {rel_count}')
                else:
                    print(f'   No outgoing connections')
            
            print()

## 3. Get All Relationship Types

In [None]:
with driver.session() as session:
    rel_result = session.run('CALL db.relationshipTypes()')
    rel_types = [record[0] for record in rel_result]
    
    display(Markdown('### 🔗 Relationship Types'))
    display(Markdown('---'))
    
    rel_summary = []
    for rel_type in sorted(rel_types):
        rel_count = session.run(f'MATCH ()-[r:`{rel_type}`]->() RETURN count(r) as count')
        count = rel_count.single()['count']
        rel_summary.append({'Relationship Type': rel_type, 'Count': count})
    
    df_rels = pd.DataFrame(rel_summary)
    display(df_rels)
    
    # Show examples of each relationship type
    display(Markdown('\n### 📋 Relationship Examples'))
    
    for rel_type in sorted(rel_types):
        # Get 2 example relationships of this type
        example_query = f"""
        MATCH (source)-[r:`{rel_type}`]->(target)
        RETURN labels(source) as source_labels, 
               source.name as source_name,
               source.id as source_id,
               labels(target) as target_labels,
               target.name as target_name,
               target.id as target_id
        LIMIT 2
        """
        examples = session.run(example_query)
        example_records = list(examples)
        
        if example_records:
            print(f'\n{rel_type}:')
            for i, ex in enumerate(example_records, 1):
                source_label = ', '.join(ex['source_labels'])
                target_label = ', '.join(ex['target_labels'])
                
                # Use name if available, otherwise truncated id
                source_display = ex['source_name'] if ex['source_name'] else (ex['source_id'][:50] + '...' if ex['source_id'] and len(ex['source_id']) > 50 else ex['source_id'])
                target_display = ex['target_name'] if ex['target_name'] else (ex['target_id'][:50] + '...' if ex['target_id'] and len(ex['target_id']) > 50 else ex['target_id'])
                
                print(f'  {i}. ({source_label}:{source_display}) -[{rel_type}]-> ({target_label}:{target_display})')

## 4. Query Specific Resource Types

Let's explore some common Azure resource types:

In [6]:
# Query all Virtual Machines
with driver.session() as session:
    query = """
    MATCH (vm:Resource)
    WHERE vm.type = 'Microsoft.Compute/virtualMachines'
    RETURN vm.name AS Name, vm.location AS Location, vm.id AS ID
    LIMIT 10
    """
    result = session.run(query)
    records = [dict(record) for record in result]
    
    if records:
        display(Markdown('### 🖥️ Virtual Machines'))
        df_vms = pd.DataFrame(records)
        display(df_vms)
    else:
        print("No Virtual Machines found")

### 🖥️ Virtual Machines

Unnamed: 0,Name,Location,ID
0,Server01,westus,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
1,Server01,westus,vm-8aadb006d380a78b
2,cseifert-windows-vm,westus2,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
3,cseifert-windows-vm,westus2,vm-e7d8f11a336c37df
4,andyye-windows-server-vm,southcentralus,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
5,andyye-windows-server-vm,southcentralus,vm-b9a98c6bab229822
6,c2server,westus2,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
7,c2server,westus2,vm-f25a4e14d87390d9
8,klakkaraju-abvm,westus2,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
9,klakkaraju-abvm,westus2,vm-db34e1f997632955


In [7]:
# Query all Storage Accounts
with driver.session() as session:
    query = """
    MATCH (storage:Resource)
    WHERE storage.type = 'Microsoft.Storage/storageAccounts'
    RETURN storage.name AS Name, storage.location AS Location, storage.id AS ID
    LIMIT 10
    """
    result = session.run(query)
    records = [dict(record) for record in result]
    
    if records:
        display(Markdown('### 💾 Storage Accounts'))
        df_storage = pd.DataFrame(records)
        display(df_storage)
    else:
        print("No Storage Accounts found")

### 💾 Storage Accounts

Unnamed: 0,Name,Location,ID
0,cm160224hpcp4rein6,northcentralus,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
1,cm160224hpcp4rein6,northcentralus,storage-aecb8be3c7ff8984
2,tmp160224v7qxvc2ghd,northcentralus,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
3,tmp160224v7qxvc2ghd,northcentralus,storage-ba40f51d891b352a
4,simulandapia5ea,eastus,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
5,simulandapia5ea,eastus,storage-0c849600bc31b6ae
6,stadaptaieas670410800455,eastus2,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
7,stadaptaieas670410800455,eastus2,storage-83e6a8fbe0cee2ab
8,seccorestorage,eastus,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
9,seccorestorage,eastus,storage-5e6c99e130a06b0d


In [8]:
# Query all Resource Groups
with driver.session() as session:
    query = """
    MATCH (rg:ResourceGroup)
    RETURN rg.name AS Name, rg.location AS Location, rg.id AS ID
    LIMIT 10
    """
    result = session.run(query)
    records = [dict(record) for record in result]
    
    if records:
        display(Markdown('### 📁 Resource Groups'))
        df_rgs = pd.DataFrame(records)
        display(df_rgs)
    else:
        print("No Resource Groups found")

### 📁 Resource Groups

Unnamed: 0,Name,Location,ID
0,ARTBAS-160224hpcp4rein6,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
1,ARTBAS-TmpUpload-160224v7qxvc2ghd,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
2,adx,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
3,simuland-api,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
4,TheContinentalHotels,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
5,rg-adapt-ai,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
6,LogAnalyticsDefaultResources,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
7,security_research_readers,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
8,mordor,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...
9,Research1,,/subscriptions/9b00bc5e-9abc-45de-9958-02a9d92...


## 5. Explore Resource Relationships

Let's see how resources are connected:

In [9]:
# Example: Find all resources in a specific resource group
with driver.session() as session:
    query = """
    MATCH (rg:ResourceGroup)-[:CONTAINS]->(resource)
    RETURN rg.name AS ResourceGroup, 
           count(resource) AS ResourceCount
    ORDER BY ResourceCount DESC
    LIMIT 10
    """
    result = session.run(query)
    records = [dict(record) for record in result]
    
    if records:
        display(Markdown('### 📊 Resources per Resource Group'))
        df_rg_counts = pd.DataFrame(records)
        display(df_rg_counts)
    else:
        print("No resource group relationships found")

### 📊 Resources per Resource Group

Unnamed: 0,ResourceGroup,ResourceCount
0,rysweet-linux-vm-pool,168
1,SimuLand,161
2,MAIDAP,104
3,rg-adapt-ai,96
4,TheContinentalHotels,88
5,sparta_attackbot,69
6,ai-soc-analyst-rg,62
7,ai-soc-abhiram6-rg,53
8,ARTBAS-190724pleef40zad,49
9,ARTBAS-090824g961kjf0od,49


## 6. Custom Query

Write your own Cypher query to explore the data:

In [10]:
# Custom Cypher query
custom_query = """
MATCH (n)
RETURN labels(n) AS Labels, count(n) AS Count
ORDER BY Count DESC
LIMIT 20
"""

with driver.session() as session:
    result = session.run(custom_query)
    records = [dict(record) for record in result]
    
    display(Markdown('### 🔍 Custom Query Results'))
    df_custom = pd.DataFrame(records)
    display(df_custom)

### 🔍 Custom Query Results

Unnamed: 0,Labels,Count
0,"[Resource, Original]",3459
1,[Resource],3037
2,[RoleAssignment],1042
3,[Tag],356
4,[ResourceGroup],232
5,[ServicePrincipal],231
6,[User],114
7,[RoleDefinition],79
8,[PrivateEndpoint],74
9,[Region],16


## 8. Close Connection

In [11]:
# Import additional libraries for graph visualization
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict

print("✅ Graph libraries imported")

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
# Step 1: Query ALL relationships with full context (labels + resource types)
with driver.session() as session:
    query = """
    MATCH (source)-[r]->(target)
    RETURN labels(source) as source_labels,
           source.type as source_type,
           type(r) as rel_type,
           labels(target) as target_labels,
           target.type as target_type
    """
    result = session.run(query)
    all_relationships = []
    
    for record in result:
        all_relationships.append({
            'source_labels': record['source_labels'],
            'source_type': record['source_type'],
            'rel_type': record['rel_type'],
            'target_labels': record['target_labels'],
            'target_type': record['target_type']
        })
    
    print(f"✅ Loaded {len(all_relationships)} relationships from graph")
    print("   Now aggregating by resource types...")

In [None]:
# Step 2: Aggregate relationships by standardized resource types
def get_resource_type_name(labels, azure_type):
    """
    Determine standardized resource type name from labels and Azure type.
    Priority:
    1. If it's a Resource node with Azure type, extract the resource type (e.g., virtualMachines)
    2. Otherwise use the primary label (e.g., ResourceGroup, Subscription, Tag)
    """
    if not labels:
        return "Unknown"
    
    # Check if it's a Resource node
    if 'Resource' in labels and azure_type:
        # Extract resource type from Azure resource type
        # e.g., "Microsoft.Compute/virtualMachines" -> "virtualMachines"
        parts = azure_type.split('/')
        if len(parts) >= 2:
            return parts[-1]  # Last part is the resource type
        return parts[0]
    
    # For non-Resource nodes, use the most specific label
    # Filter out generic labels like 'Original'
    filtered_labels = [l for l in labels if l not in ['Original', 'Resource']]
    if filtered_labels:
        return filtered_labels[0]
    
    return labels[0] if labels else "Unknown"

# Aggregate relationships
relationship_counts = defaultdict(lambda: defaultdict(int))

for rel in all_relationships:
    source_type_name = get_resource_type_name(rel['source_labels'], rel['source_type'])
    target_type_name = get_resource_type_name(rel['target_labels'], rel['target_type'])
    rel_type = rel['rel_type']
    
    # Create aggregation key: (source_type, rel_type, target_type)
    key = (source_type_name, rel_type, target_type_name)
    relationship_counts[key][rel_type] += 1

# Convert to list for sorting and display
aggregated_relationships = []
for (source_type, rel_type, target_type), counts in relationship_counts.items():
    frequency = counts[rel_type]
    aggregated_relationships.append({
        'source_type': source_type,
        'rel_type': rel_type,
        'target_type': target_type,
        'frequency': frequency
    })

# Sort by frequency
aggregated_relationships.sort(key=lambda x: x['frequency'], reverse=True)

display(Markdown('### 🔗 Top 100 Aggregated Relationship Patterns'))
print(f"Total unique patterns: {len(aggregated_relationships)}\n")
print("Top 100 by frequency:")
for i, rel in enumerate(aggregated_relationships[:100], 1):
    same_type = " [SAME TYPE]" if rel['source_type'] == rel['target_type'] else ""
    print(f"{i:3d}. {rel['source_type']:35s} -[{rel['rel_type']:20s}]-> {rel['target_type']:35s} : {rel['frequency']:5d}{same_type}")

In [None]:
# Step 3: Build NetworkX graph from aggregated data
G = nx.MultiDiGraph()

# Collect all unique resource types and their frequencies
resource_type_counts = defaultdict(int)

# Count occurrences of each resource type from relationships
for rel in aggregated_relationships:
    resource_type_counts[rel['source_type']] += rel['frequency']
    resource_type_counts[rel['target_type']] += rel['frequency']

# Add nodes for all resource types
for resource_type, count in resource_type_counts.items():
    G.add_node(resource_type, count=count)

# Add edges for relationships
edge_counts = defaultdict(int)
for rel in aggregated_relationships:
    source = rel['source_type']
    target = rel['target_type']
    rel_type = rel['rel_type']
    frequency = rel['frequency']
    
    # Add edge
    G.add_edge(source, target, relationship=rel_type, frequency=frequency)
    
    # Track aggregated edge counts (for visualization)
    edge_key = (source, target)
    edge_counts[edge_key] += frequency

print(f"✅ Graph constructed from aggregated data:")
print(f"   - Nodes (resource types): {G.number_of_nodes()}")
print(f"   - Edges (relationship patterns): {G.number_of_edges()}")
print(f"   - Unique source->target patterns: {len(edge_counts)}")
print(f"\nTop 20 resource types by connection frequency:")
for i, (rtype, count) in enumerate(sorted(resource_type_counts.items(), key=lambda x: x[1], reverse=True)[:20], 1):
    print(f"  {i:2d}. {rtype:40s} : {count:6d}")

In [None]:
# Step 4: Visualize the aggregated graph with architectural pattern overlay

# First, define architectural patterns (will be used for overlay)
architectural_patterns = {
    "Web Application": {
        "resources": ["sites", "serverFarms", "storageAccounts", "components"],
        "description": "App Service web application with storage and monitoring"
    },
    "Virtual Machine Workload": {
        "resources": ["virtualMachines", "disks", "networkInterfaces", "virtualNetworks", "networkSecurityGroups"],
        "description": "IaaS VM with networking and storage"
    },
    "Container Platform": {
        "resources": ["managedClusters", "containerRegistries", "virtualNetworks", "loadBalancers"],
        "description": "AKS or container-based platform"
    },
    "Data Platform": {
        "resources": ["servers", "databases", "storageAccounts", "privateEndpoints"],
        "description": "Database with secure connectivity and storage"
    },
    "Serverless Application": {
        "resources": ["sites", "storageAccounts", "components", "vaults"],
        "description": "Function App with storage, monitoring, and secrets"
    },
    "Data Analytics": {
        "resources": ["clusters", "workspaces", "storageAccounts", "namespaces"],
        "description": "Analytics platform with data ingestion and storage"
    },
    "Secure Workload": {
        "resources": ["vaults", "privateEndpoints", "privateDnsZones", "networkInterfaces"],
        "description": "Resources with private networking and Key Vault"
    },
    "Managed Identity Pattern": {
        "resources": ["userAssignedIdentities", "sites", "managedClusters", "virtualMachines"],
        "description": "Resources using managed identities for authentication"
    },
    "Monitoring & Observability": {
        "resources": ["components", "workspaces", "dataCollectionRules", "smartDetectorAlertRules"],
        "description": "Application Insights and Log Analytics monitoring"
    },
    "Network Security": {
        "resources": ["networkSecurityGroups", "virtualNetworks", "subnets", "bastionHosts"],
        "description": "Network isolation and secure access"
    }
}

# Detect which patterns exist in the environment
pattern_matches = {}
for pattern_name, pattern_info in architectural_patterns.items():
    pattern_resources = set(pattern_info['resources'])
    existing_resources = set(G.nodes())
    matched_resources = pattern_resources.intersection(existing_resources)

    if len(matched_resources) >= 2:
        connection_count = 0
        pattern_edges = []

        for source in matched_resources:
            for target in matched_resources:
                if source != target and G.has_edge(source, target):
                    edges = G.get_edge_data(source, target)
                    for key, data in edges.items():
                        connection_count += data.get('frequency', 1)
                        pattern_edges.append((source, data['relationship'], target))

        pattern_matches[pattern_name] = {
            "matched_resources": list(matched_resources),
            "missing_resources": list(pattern_resources - matched_resources),
            "connection_count": connection_count,
            "pattern_edges": pattern_edges[:5],
            "completeness": len(matched_resources) / len(pattern_resources) * 100
        }

print(f"📐 Detected {len(pattern_matches)} architectural patterns")

# Filter to show only the most significant nodes
top_n_nodes = 30
top_nodes = sorted(resource_type_counts.items(), key=lambda x: x[1], reverse=True)[:top_n_nodes]
top_node_names = [name for name, _ in top_nodes]

# Create subgraph with only top nodes
G_filtered = G.subgraph(top_node_names).copy()

# Compute layout
pos = nx.spring_layout(G_filtered, k=3, iterations=50, seed=42)

# Assign patterns to nodes
node_pattern_map = {}
node_colors = []

for node in G_filtered.nodes():
    node_patterns = []
    for pattern_name, match in pattern_matches.items():
        if node in match['matched_resources']:
            node_patterns.append(pattern_name)
    node_pattern_map[node] = node_patterns

    if node_patterns:
        best_pattern = max(node_patterns, key=lambda p: pattern_matches[p]['completeness'])
        pattern_index = list(pattern_matches.keys()).index(best_pattern)
        node_colors.append(pattern_index)
    else:
        node_colors.append(-1)

# Calculate sizes and edge properties
node_sizes = [G_filtered.nodes[node]['count'] / 4 for node in G_filtered.nodes()]

# Separate pattern vs cross-pattern edges
pattern_edges = []
cross_pattern_edges = []
pattern_edge_widths = []
cross_pattern_edge_widths = []
pattern_edge_colors = []

for u, v, data in G_filtered.edges(data=True):
    freq = edge_counts.get((u, v), 0)
    edge_width = max(1, freq / 50)

    u_patterns = set(node_pattern_map.get(u, []))
    v_patterns = set(node_pattern_map.get(v, []))
    shared_patterns = u_patterns.intersection(v_patterns)

    if shared_patterns:
        pattern_edges.append((u, v))
        pattern_edge_widths.append(edge_width * 2.5)
        shared_pattern = list(shared_patterns)[0]
        pattern_index = list(pattern_matches.keys()).index(shared_pattern)
        pattern_edge_colors.append(pattern_index)
    else:
        cross_pattern_edges.append((u, v))
        cross_pattern_edge_widths.append(edge_width * 0.4)

# Create visualization
from matplotlib.patches import Polygon
from scipy.spatial import ConvexHull
import numpy as np

fig, ax = plt.subplots(1, 1, figsize=(28, 24))

# Draw pattern boundaries (using patches, zorder works here)
cmap = plt.cm.tab10
pattern_legend = []

for pattern_idx, (pattern_name, match) in enumerate(pattern_matches.items()):
    pattern_nodes = [n for n in match['matched_resources'] if n in G_filtered.nodes()]

    if len(pattern_nodes) >= 3:
        try:
            points = np.array([[pos[node][0], pos[node][1]] for node in pattern_nodes])
            center = points.mean(axis=0)
            points_expanded = center + (points - center) * 1.15
            hull = ConvexHull(points_expanded)
            hull_points = points_expanded[hull.vertices]

            color = cmap(pattern_idx / len(pattern_matches))
            polygon = Polygon(hull_points, facecolor=color, alpha=0.08,
                            edgecolor=color, linewidth=3, linestyle='--', zorder=1)
            ax.add_patch(polygon)

            from matplotlib.patches import Patch
            pattern_legend.append(
                Patch(facecolor=color, edgecolor=color,
                      label=f"{pattern_name} ({match['completeness']:.0f}%)", alpha=0.5)
            )
        except:
            pass

# Draw cross-pattern edges first (gray background) - no zorder parameter
if cross_pattern_edges:
    nx.draw_networkx_edges(G_filtered, pos, edgelist=cross_pattern_edges,
                           width=cross_pattern_edge_widths, alpha=0.15,
                           edge_color='gray', arrows=True, arrowsize=10,
                           arrowstyle='->', connectionstyle='arc3,rad=0.05',
                           ax=ax)

# Draw pattern edges (colored by pattern) - no zorder parameter
if pattern_edges:
    for idx, (u, v) in enumerate(pattern_edges):
        edge_color = cmap(pattern_edge_colors[idx] / len(pattern_matches))
        nx.draw_networkx_edges(G_filtered, pos, edgelist=[(u, v)],
                               width=pattern_edge_widths[idx], alpha=0.6,
                               edge_color=[edge_color], arrows=True, arrowsize=15,
                               arrowstyle='->', connectionstyle='arc3,rad=0.1',
                               ax=ax)

# Draw nodes (zorder works here)
nx.draw_networkx_nodes(G_filtered, pos, node_size=node_sizes,
                       node_color=node_colors, cmap=cmap, vmin=-1,
                       vmax=len(pattern_matches)-1, alpha=0.95,
                       edgecolors='black', linewidths=2.5, ax=ax)

# Draw labels (no zorder parameter)
nx.draw_networkx_labels(G_filtered, pos, font_size=10,
                        font_weight='bold', ax=ax)

# Add legend
if pattern_legend:
    ax.legend(handles=pattern_legend, loc='upper left', fontsize=10,
              framealpha=0.95, title="Architectural Patterns", title_fontsize=12)

ax.set_title(f"Azure Resource Graph with Architectural Pattern Overlay (Top {top_n_nodes})\n" +
             "Dashed boundaries = Pattern groupings | Thick colored edges = Intra-pattern | " +
             "Thin gray = Cross-pattern\n" +
             "Node color = Pattern | Node size = Connection frequency",
             fontsize=16, fontweight='bold', pad=25)
ax.axis('off')
plt.tight_layout()
plt.show()

print(f"\n📊 Visualization:")
print(f"   - Resource types: {len(G_filtered.nodes())}")
print(f"   - Intra-pattern edges: {len(pattern_edges)} (colored)")
print(f"   - Cross-pattern edges: {len(cross_pattern_edges)} (gray)")
print(f"   - Patterns detected: {len(pattern_matches)}")

In [None]:
# Individual Pattern Visualizations
# Show the complete graph with one pattern highlighted at a time

import math

# Calculate grid layout for subplots
n_patterns = len(pattern_matches)
n_cols = 3
n_rows = math.ceil(n_patterns / n_cols)

# Create figure with subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(36, 12 * n_rows))
axes_flat = axes.flatten() if n_patterns > 1 else [axes]

# Use the same layout for all visualizations
pos_consistent = nx.spring_layout(G_filtered, k=3, iterations=50, seed=42)

# For each pattern, create a focused visualization
for pattern_idx, (pattern_name, match) in enumerate(pattern_matches.items()):
    ax = axes_flat[pattern_idx]

    # Get nodes in this pattern
    pattern_nodes = [n for n in match['matched_resources'] if n in G_filtered.nodes()]

    # Categorize all nodes: in-pattern, out-pattern
    node_colors_focus = []
    node_sizes_focus = []

    for node in G_filtered.nodes():
        size = G_filtered.nodes[node]['count'] / 4

        if node in pattern_nodes:
            # Node belongs to this pattern - highlight it
            node_colors_focus.append('orange')
            node_sizes_focus.append(size * 1.5)  # Make it bigger
        else:
            # Node doesn't belong to this pattern - dim it
            node_colors_focus.append('lightgray')
            node_sizes_focus.append(size * 0.7)

    # Categorize edges: intra-pattern (within this pattern), related (touching this pattern), other
    intra_pattern_edges = []
    related_edges = []
    other_edges = []

    intra_widths = []
    related_widths = []
    other_widths = []

    for u, v, data in G_filtered.edges(data=True):
        freq = edge_counts.get((u, v), 0)
        edge_width = max(1, freq / 50)

        u_in_pattern = u in pattern_nodes
        v_in_pattern = v in pattern_nodes

        if u_in_pattern and v_in_pattern:
            # Both nodes in pattern - intra-pattern edge
            intra_pattern_edges.append((u, v))
            intra_widths.append(edge_width * 3)
        elif u_in_pattern or v_in_pattern:
            # One node in pattern - related edge
            related_edges.append((u, v))
            related_widths.append(edge_width * 1.5)
        else:
            # Neither node in pattern - background edge
            other_edges.append((u, v))
            other_widths.append(edge_width * 0.3)

    # Draw pattern boundary if possible
    if len(pattern_nodes) >= 3:
        try:
            from matplotlib.patches import Polygon
            from scipy.spatial import ConvexHull
            import numpy as np

            points = np.array([[pos_consistent[node][0], pos_consistent[node][1]] for node in pattern_nodes])
            center = points.mean(axis=0)
            points_expanded = center + (points - center) * 1.2
            hull = ConvexHull(points_expanded)
            hull_points = points_expanded[hull.vertices]

            polygon = Polygon(hull_points, facecolor='orange', alpha=0.15,
                            edgecolor='orange', linewidth=4, linestyle='--')
            ax.add_patch(polygon)
        except:
            pass

    # Draw edges in layers (background to foreground)
    # 1. Other edges (very faint)
    if other_edges:
        nx.draw_networkx_edges(G_filtered, pos_consistent, edgelist=other_edges,
                               width=other_widths, alpha=0.05, edge_color='gray',
                               arrows=True, arrowsize=8, arrowstyle='->',
                               connectionstyle='arc3,rad=0.05', ax=ax)

    # 2. Related edges (connecting to pattern)
    if related_edges:
        nx.draw_networkx_edges(G_filtered, pos_consistent, edgelist=related_edges,
                               width=related_widths, alpha=0.4, edge_color='steelblue',
                               arrows=True, arrowsize=12, arrowstyle='->',
                               connectionstyle='arc3,rad=0.08', ax=ax)

    # 3. Intra-pattern edges (within pattern - highlighted)
    if intra_pattern_edges:
        nx.draw_networkx_edges(G_filtered, pos_consistent, edgelist=intra_pattern_edges,
                               width=intra_widths, alpha=0.8, edge_color='darkorange',
                               arrows=True, arrowsize=16, arrowstyle='->',
                               connectionstyle='arc3,rad=0.1', ax=ax)

    # Draw nodes
    nx.draw_networkx_nodes(G_filtered, pos_consistent,
                           node_size=node_sizes_focus,
                           node_color=node_colors_focus,
                           alpha=0.9,
                           edgecolors='black',
                           linewidths=2,
                           ax=ax)

    # Draw labels (only for pattern nodes and directly connected nodes)
    nodes_to_label = set(pattern_nodes)
    for u, v in related_edges:
        nodes_to_label.add(u)
        nodes_to_label.add(v)

    labels_subset = {node: node for node in nodes_to_label if node in G_filtered.nodes()}
    nx.draw_networkx_labels(G_filtered, pos_consistent,
                            labels=labels_subset,
                            font_size=9,
                            font_weight='bold',
                            ax=ax)

    # Title with pattern info
    ax.set_title(f"{pattern_name}\n" +
                 f"Completeness: {match['completeness']:.0f}% | " +
                 f"Resources: {len(pattern_nodes)} | " +
                 f"Intra-pattern edges: {len(intra_pattern_edges)} | " +
                 f"Cross-pattern edges: {len(related_edges)}",
                 fontsize=12, fontweight='bold', pad=10)
    ax.axis('off')

# Hide extra subplots if any
for idx in range(n_patterns, len(axes_flat)):
    axes_flat[idx].axis('off')

plt.suptitle("Individual Architectural Pattern Views\n" +
             "Orange nodes/edges = Pattern resources | Blue edges = Cross-pattern connections | Gray = Other resources",
             fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()
plt.show()

print(f"\n📊 Individual Pattern Visualizations:")
print(f"   Created {n_patterns} pattern-focused views")
print(f"\nPattern Summary:")
for pattern_name, match in sorted(pattern_matches.items(), key=lambda x: x[1]['completeness'], reverse=True):
    pattern_nodes_viz = [n for n in match['matched_resources'] if n in G_filtered.nodes()]
    print(f"   • {pattern_name}: {len(pattern_nodes_viz)} resources visible in top {top_n_nodes}")

In [None]:
# Step 5: Export aggregated graph data to JSON
import json

graph_export = {
    "nodes": [
        {
            "id": node,
            "label": node,
            "count": G.nodes[node]['count']
        }
        for node in G.nodes()
    ],
    "edges": [
        {
            "source": u,
            "target": v,
            "relationship": data['relationship'],
            "frequency": data['frequency']
        }
        for u, v, data in G.edges(data=True)
    ],
    "summary": {
        "total_nodes": G.number_of_nodes(),
        "total_edges": G.number_of_edges(),
        "top_resource_types": [
            {"type": name, "connection_count": count} 
            for name, count in top_nodes[:20]
        ],
        "aggregation_method": "By Azure resource type and node labels",
        "source_relationships": len(all_relationships)
    }
}

# Save to file
output_file = "/tmp/azure_resource_graph_aggregated.json"
with open(output_file, 'w') as f:
    json.dump(graph_export, f, indent=2)

print(f"✅ Aggregated graph data exported to: {output_file}")
print(f"   - {len(graph_export['nodes'])} unique resource types")
print(f"   - {len(graph_export['edges'])} unique relationship patterns")
print(f"   - Aggregated from {len(all_relationships):,} individual relationships")
print(f"\nYou can use this JSON file with visualization tools like D3.js, Cytoscape, or Gephi")

In [None]:
# Close the driver connection
driver.close()
print("✅ Connection closed")