In [3]:
import json
import networkx as nx

# The full AWS Knowledge Graph data provided in the Canvas, stored as a raw string.
# In a production environment, you would typically read this from a separate .json file.
AWS_KG_JSON_DATA = """
{
  "groundstation": "diagrams.aws.satellite.GroundStation",
  "satellite communication": "diagrams.aws.satellite.GroundStation",
  "space data": "diagrams.aws.satellite.GroundStation",
  "telemetry": "diagrams.aws.satellite.GroundStation",
  "aws.satellite.groundstation": "diagrams.aws.satellite.GroundStation",
  "satellite": "diagrams.aws.satellite.Satellite",
  "space": "diagrams.aws.satellite.Satellite",
  "orbit": "diagrams.aws.satellite.Satellite",
  "adconnector": "diagrams.aws.security.AdConnector",
  "active directory connector": "diagrams.aws.security.AdConnector",
  "on-premises directory": "diagrams.aws.security.AdConnector",
  "artifact": "diagrams.aws.security.Artifact",
  "compliance reports": "diagrams.aws.security.Artifact",
  "security documents": "diagrams.aws.security.Artifact",
  "certificateauthority": "diagrams.aws.security.CertificateAuthority",
  "private ca": "diagrams.aws.security.CertificateAuthority",
  "issue certificates": "diagrams.aws.security.CertificateAuthority",
  "certificatemanager": "diagrams.aws.security.CertificateManager",
  "acm": "diagrams.aws.security.CertificateManager",
  "ssl/tls": "diagrams.aws.security.CertificateManager",
  "manage certificates": "diagrams.aws.security.CertificateManager",
  "clouddirectory": "diagrams.aws.security.CloudDirectory",
  "hierarchy directory": "diagrams.aws.security.CloudDirectory",
  "policy store": "diagrams.aws.security.CloudDirectory",
  "cloudhsm": "diagrams.aws.security.Cloudhsm",
  "hsm": "diagrams.aws.security.Cloudhsm",
  "fips 140-2": "diagrams.aws.security.Cloudhsm",
  "cryptographic keys": "diagrams.aws.security.Cloudhsm",
  "cognito": "diagrams.aws.security.Cognito",
  "user pool": "diagrams.aws.security.Cognito",
  "identity pool": "diagrams.aws.security.Cognito",
  "sign up": "diagrams.aws.security.Cognito",
  "federation": "diagrams.aws.security.Cognito",
  "detective": "diagrams.aws.security.Detective",
  "security investigation": "diagrams.aws.security.Detective",
  "root cause analysis": "diagrams.aws.security.Detective",
  "directoryservice": "diagrams.aws.security.DirectoryService",
  "ds": "diagrams.aws.security.DirectoryService",
  "managed active directory": "diagrams.aws.security.DirectoryService",
  "ldap": "diagrams.aws.security.DirectoryService",
  "firewallmanager": "diagrams.aws.security.FirewallManager",
  "fms": "diagrams.aws.security.FirewallManager",
  "central firewall": "diagrams.aws.security.FirewallManager",
  "security policy management": "diagrams.aws.security.FirewallManager",
  "guardduty": "diagrams.aws.security.Guardduty",
  "threat detection": "diagrams.aws.security.Guardduty",
  "malicious activity": "diagrams.aws.security.Guardduty",
  "intrusion detection": "diagrams.aws.security.Guardduty",
  "iamaccessanalyzer": "diagrams.aws.security.IdentityAndAccessManagementIamAccessAnalyzer",
  "access analyzer": "diagrams.aws.security.IdentityAndAccessManagementIamAccessAnalyzer",
  "public access validation": "diagrams.aws.security.IdentityAndAccessManagementIamAccessAnalyzer",
  "identityandaccessmanagementiamaddon": "diagrams.aws.security.IdentityAndAccessManagementIamAddOn",
  "identityandaccessmanagementiamawsstsalternate": "diagrams.aws.security.IdentityAndAccessManagementIamAWSStsAlternate",
  "identityandaccessmanagementiamawssts": "diagrams.aws.security.IdentityAndAccessManagementIamAWSSts",
  "iamawssts": "diagrams.aws.security.IdentityAndAccessManagementIamAWSSts",
  "security token service": "diagrams.aws.security.IdentityAndAccessManagementIamAWSSts",
  "temporary credentials": "diagrams.aws.security.IdentityAndAccessManagementIamAWSSts",
  "identityandaccessmanagementiamdataencryptionkey": "diagrams.aws.security.IdentityAndAccessManagementIamDataEncryptionKey",
  "dek": "diagrams.aws.security.IdentityAndAccessManagementIamDataEncryptionKey",
  "identityandaccessmanagementiamencrypteddata": "diagrams.aws.security.IdentityAndAccessManagementIamEncryptedData",
  "encrypted data": "diagrams.aws.security.IdentityAndAccessManagementIamEncryptedData",
  "identityandaccessmanagementiamlongtermsecuritycredential": "diagrams.aws.security.IdentityAndAccessManagementIamLongTermSecurityCredential",
  "long term credentials": "diagrams.aws.security.IdentityAndAccessManagementIamLongTermSecurityCredential",
  "identityandaccessmanagementiammfatoken": "diagrams.aws.security.IdentityAndAccessManagementIamMfaToken",
  "mfa": "diagrams.aws.security.IdentityAndAccessManagementIamMfaToken",
  "identityandaccessmanagementiampermissions": "diagrams.aws.security.IdentityAndAccessManagementIamPermissions",
  "iampermissions": "diagrams.aws.security.IdentityAndAccessManagementIamPermissions",
  "access policy": "diagrams.aws.security.IdentityAndAccessManagementIamPermissions",
  "identityandaccessmanagementiamrole": "diagrams.aws.security.IdentityAndAccessManagementIamRole",
  "iamrole": "diagrams.aws.security.IdentityAndAccessManagementIamRole",
  "assume role": "diagrams.aws.security.IdentityAndAccessManagementIamRole",
  "identityandaccessmanagementiamtemporarysecuritycredential": "diagrams.aws.security.IdentityAndAccessManagementIamTemporarySecurityCredential",
  "temporary security credentials": "diagrams.aws.security.IdentityAndAccessManagementIamTemporarySecurityCredential",
  "identityandaccessmanagementiam": "diagrams.aws.security.IdentityAndAccessManagementIam",
  "iam": "diagrams.aws.security.IdentityAndAccessManagementIam",
  "authentication": "diagrams.aws.security.IdentityAndAccessManagementIam",
  "authorization": "diagrams.aws.security.IdentityAndAccessManagementIam",
  "access control": "diagrams.aws.security.IdentityAndAccessManagementIam",
  "inspectoragent": "diagrams.aws.security.InspectorAgent",
  "inspector": "diagrams.aws.security.Inspector",
  "vulnerability scanning": "diagrams.aws.security.Inspector",
  "application security analysis": "diagrams.aws.security.Inspector",
  "keymanagementservice": "diagrams.aws.security.KeyManagementService",
  "kms": "diagrams.aws.security.KeyManagementService",
  "encryption keys": "diagrams.aws.security.KeyManagementService",
  "cmk": "diagrams.aws.security.KeyManagementService",
  "macie": "diagrams.aws.security.Macie",
  "sensitive data discovery": "diagrams.aws.security.Macie",
  "pii detection": "diagrams.aws.security.Macie",
  "managedmicrosoftad": "diagrams.aws.security.ManagedMicrosoftAd",
  "microsoft ad": "diagrams.aws.security.ManagedMicrosoftAd",
  "managed ldap": "diagrams.aws.security.ManagedMicrosoftAd",
  "resourceaccessmanager": "diagrams.aws.security.ResourceAccessManager",
  "ram": "diagrams.aws.security.ResourceAccessManager",
  "share resources": "diagrams.aws.security.ResourceAccessManager",
  "multi-account sharing": "diagrams.aws.security.ResourceAccessManager",
  "secretsmanager": "diagrams.aws.security.SecretsManager",
  "database credentials": "diagrams.aws.security.SecretsManager",
  "api keys": "diagrams.aws.security.SecretsManager",
  "secret rotation": "diagrams.aws.security.SecretsManager",
  "securityhubfinding": "diagrams.aws.security.SecurityHubFinding",
  "securityhub": "diagrams.aws.security.SecurityHub",
  "centralized findings": "diagrams.aws.security.SecurityHub",
  "security compliance": "diagrams.aws.security.SecurityHub",
  "securityidentityandcompliance": "diagrams.aws.security.SecurityIdentityAndCompliance",
  "securitylake": "diagrams.aws.security.SecurityLake",
  "centralized security data": "diagrams.aws.security.SecurityLake",
  "open cyber security schema framework": "diagrams.aws.security.SecurityLake",
  "shieldadvanced": "diagrams.aws.security.ShieldAdvanced",
  "shield": "diagrams.aws.security.Shield",
  "ddos protection": "diagrams.aws.security.Shield",
  "mitigate attack": "diagrams.aws.security.Shield",
  "simplead": "diagrams.aws.security.SimpleAd",
  "linux active directory": "diagrams.aws.security.SimpleAd",
  "singlesignon": "diagrams.aws.security.SingleSignOn",
  "sso": "diagrams.aws.security.SingleSignOn",
  "user access portal": "diagrams.aws.security.SingleSignOn",
  "waffilteringrule": "diagrams.aws.security.WAFFilteringRule",
  "waf": "diagrams.aws.security.WAF",
  "web application firewall": "diagrams.aws.security.WAF",
  "block sql injection": "diagrams.aws.security.WAF",
  "cross site scripting": "diagrams.aws.security.WAF",
  "backup": "diagrams.aws.storage.Backup",
  "central backup": "diagrams.aws.storage.Backup",
  "data retention": "diagrams.aws.storage.Backup",
  "cloudenduredisasterrecovery": "diagrams.aws.storage.CloudendureDisasterRecovery",
  "cdr": "diagrams.aws.storage.CloudendureDisasterRecovery",
  "disaster recovery": "diagrams.aws.storage.CloudendureDisasterRecovery",
  "minimal downtime": "diagrams.aws.storage.CloudendureDisasterRecovery",
  "efsinfrequentaccessprimarybg": "diagrams.aws.storage.EFSInfrequentaccessPrimaryBg",
  "efsstandardprimarybg": "diagrams.aws.storage.EFSStandardPrimaryBg",
  "elasticblockstoreebssnapshot": "diagrams.aws.storage.ElasticBlockStoreEBSSnapshot",
  "ebs snapshot": "diagrams.aws.storage.ElasticBlockStoreEBSSnapshot",
  "elasticblockstoreebsvolume": "diagrams.aws.storage.ElasticBlockStoreEBSVolume",
  "ebs volume": "diagrams.aws.storage.ElasticBlockStoreEBSVolume",
  "elasticblockstoreebs": "diagrams.aws.storage.ElasticBlockStoreEBS",
  "ebs": "diagrams.aws.storage.ElasticBlockStoreEBS",
  "block storage": "diagrams.aws.storage.ElasticBlockStoreEBS",
  "ec2 volume": "diagrams.aws.storage.ElasticBlockStoreEBS",
  "elasticfilesystemefsfilesystem": "diagrams.aws.storage.ElasticFileSystemEFSFileSystem",
  "elasticfilesystemefs": "diagrams.aws.storage.ElasticFileSystemEFS",
  "efs": "diagrams.aws.storage.ElasticFileSystemEFS",
  "file storage": "diagrams.aws.storage.ElasticFileSystemEFS",
  "nfs": "diagrams.aws.storage.ElasticFileSystemEFS",
  "shared file system": "diagrams.aws.storage.ElasticFileSystemEFS",
  "fsxforlustre": "diagrams.aws.storage.FsxForLustre",
  "lustre file system": "diagrams.aws.storage.FsxForLustre",
  "high performance compute storage": "diagrams.aws.storage.FsxForLustre",
  "fsxforwindowsfileserver": "diagrams.aws.storage.FsxForWindowsFileServer",
  "windows file server": "diagrams.aws.storage.FsxForWindowsFileServer",
  "smb storage": "diagrams.aws.storage.FsxForWindowsFileServer",
  "fsx": "diagrams.aws.storage.Fsx",
  "managed third-party file system": "diagrams.aws.storage.Fsx",
  "multiplevolumesresource": "diagrams.aws.storage.MultipleVolumesResource",
  "s3accesspoints": "diagrams.aws.storage.S3AccessPoints",
  "s3 glacier archive": "diagrams.aws.storage.S3GlacierArchive",
  "s3 glacier vault": "diagrams.aws.storage.S3GlacierVault",
  "s3glacier": "diagrams.aws.storage.S3Glacier",
  "archive storage": "diagrams.aws.storage.S3Glacier",
  "deep archive": "diagrams.aws.storage.S3Glacier",
  "s3objectlambdaaccesspoints": "diagrams.aws.storage.S3ObjectLambdaAccessPoints",
  "s3 object processing": "diagrams.aws.storage.S3ObjectLambdaAccessPoints",
  "similestorageservices3bucketwithobjects": "diagrams.aws.storage.SimpleStorageServiceS3BucketWithObjects",
  "similestorageservices3bucket": "diagrams.aws.storage.SimpleStorageServiceS3Bucket",
  "similestorageservices3object": "diagrams.aws.storage.SimpleStorageServiceS3Object",
  "similestorageservices3": "diagrams.aws.storage.SimpleStorageServiceS3",
  "s3": "diagrams.aws.storage.SimpleStorageServiceS3",
  "object storage": "diagrams.aws.storage.SimpleStorageServiceS3",
  "data lake": "diagrams.aws.storage.SimpleStorageServiceS3",
  "static website hosting": "diagrams.aws.storage.SimpleStorageServiceS3",
  "snowfamilysnowballimportexport": "diagrams.aws.storage.SnowFamilySnowballImportExport",
  "snowballedge": "diagrams.aws.storage.SnowballEdge",
  "snowballdiagrams": "diagrams.aws.storage.Snowball",
  "snowball": "diagrams.aws.storage.Snowball",
  "data transfer appliance": "diagrams.aws.storage.Snowball",
  "migrate large data": "diagrams.aws.storage.Snowball",
  "snowmobile": "diagrams.aws.storage.Snowmobile",
  "petabyte scale migration": "diagrams.aws.storage.Snowmobile",
  "storagegatewaycachedvolume": "diagrams.aws.storage.StorageGatewayCachedVolume",
  "cached volume gateway": "diagrams.aws.storage.StorageGatewayCachedVolume",
  "storagegatewaynoncachedvolume": "diagrams.aws.storage.StorageGatewayNonCachedVolume",
  "non-cached volume gateway": "diagrams.aws.storage.StorageGatewayNonCachedVolumediagrams.aws.storage.StorageGatewayNonCachedVolume",
  "storagegatewayvirtualtapelibrary": "diagrams.aws.storage.StorageGatewayVirtualTapeLibrary",
  "vtl": "diagrams.aws.storage.StorageGatewayVirtualTapeLibrary",
  "storagegateway": "diagrams.aws.storage.StorageGateway",
  "hybrid storage": "diagrams.aws.storage.StorageGateway",
  "on-premises gateway": "diagrams.aws.storage.StorageGateway",
  "storagediagrams": "diagrams.aws.storage.Storage",
  "storage": "diagrams.aws.storage.Storage"
}
"""

def create_aws_knowledge_graph(json_data_string: str) -> nx.DiGraph:
    """
    Parses a JSON string containing AWS keyword mappings and converts it into
    a NetworkX Directed Graph (DiGraph).

    The graph structure is:
    - Nodes: Keywords (e.g., 'serverless') and AWS Service Endpoints (e.g., 'diagrams.aws.compute.Lambda').
    - Edges: Directed edges from a Keyword Node to its corresponding Service Endpoint Node.
    - Edge Attributes: The edge is labeled with the keyword itself (stored in the 'keyword' attribute).

    Args:
        json_data_string: A string containing the knowledge graph data in JSON dictionary format.

    Returns:
        A networkx.DiGraph object representing the AWS knowledge graph.
    """
    # 1. Parse the JSON string into a Python dictionary
    try:
        knowledge_data = json.loads(json_data_string)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON data: {e}")
        return nx.DiGraph()

    # 2. Initialize the graph
    G = nx.DiGraph()

    # 3. Iterate through the dictionary and add nodes and edges
    for keyword, service_endpoint in knowledge_data.items():
        # Ensure both nodes exist
        G.add_node(keyword, type='keyword')
        G.add_node(service_endpoint, type='service')

        # Add a directed edge from the keyword to the service endpoint.
        # The edge attribute 'keyword' helps identify the nature of the link.
        G.add_edge(keyword, service_endpoint, keyword=keyword)

    return G


# Create the graph from the constant data string
aws_kg = create_aws_knowledge_graph(AWS_KG_JSON_DATA)

# Output some basic information about the generated graph
print("--- AWS Knowledge Graph Statistics ---")
print(f"Total Nodes: {aws_kg.number_of_nodes()}")
print(f"Total Edges: {aws_kg.number_of_edges()}")

# Find a specific service endpoint (a target node)
guardduty_service = "diagrams.aws.security.Guardduty"

# Find all keywords (source nodes) linked to GuardDuty
guardduty_keywords = list(aws_kg.predecessors(guardduty_service))
print(f"\nKeywords linked to GuardDuty ({guardduty_service}):")
print(", ".join(guardduty_keywords))

# Example of a Keyword node and its successor (the service it points to)
example_keyword = "storage"
try:
    service = next(iter(aws_kg.successors(example_keyword)))
    print(f"\nKeyword '{example_keyword}' maps to service: {service}")
except StopIteration:
    print(f"\nKeyword '{example_keyword}' has no mapping.")

# You can now use this 'aws_kg' graph object for advanced graph analysis,
# pathfinding, or visualization with libraries like Matplotlib or Gephi.

--- AWS Knowledge Graph Statistics ---
Total Nodes: 263
Total Edges: 189

Keywords linked to GuardDuty (diagrams.aws.security.Guardduty):
guardduty, threat detection, malicious activity, intrusion detection

Keyword 'storage' maps to service: diagrams.aws.storage.Storage


In [6]:
import json
import networkx as nx
import os
from typing import Dict, Any, Union

# Colab-specific import. This must be available in the execution environment.
try:
    from google.colab import files
except ImportError:
    # If not running in Colab, 'files' will not be defined.
    pass

def create_aws_knowledge_graph_from_file(file_path: str) -> Union[nx.DiGraph, None]:
    """
    Reads a JSON dictionary from a specified file path, where keywords map to
    AWS service endpoints, and converts it into a NetworkX Directed Graph (DiGraph).

    The graph structure links a Keyword Node to its corresponding Service Endpoint Node.

    Args:
        file_path: The path to the text file containing the JSON knowledge graph data.

    Returns:
        A networkx.DiGraph object representing the AWS knowledge graph, or None if an error occurs.
    """
    if not os.path.exists(file_path):
        print(f"Error: File not found at path: {file_path}")
        return None

    # 1. Read the JSON string from the file
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            json_data_string = f.read()
    except IOError as e:
        print(f"Error reading file {file_path}: {e}")
        return None

    # 2. Parse the JSON string into a Python dictionary
    try:
        knowledge_data: Dict[str, str] = json.loads(json_data_string)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON data from file {file_path}: {e}")
        return None

    # 3. Initialize the graph
    G = nx.DiGraph()

    # 4. Iterate through the dictionary and add nodes and edges
    for keyword, service_endpoint in knowledge_data.items():
        # Add the nodes and assign them types for easy identification
        G.add_node(keyword, type='keyword')
        G.add_node(service_endpoint, type='service')

        # Add a directed edge from the keyword (source) to the service (target)
        G.add_edge(keyword, service_endpoint, relationship='maps_to')

    return G

# Example Usage (Tailored for Google Colab file upload):
if __name__ == '__main__':
    try:
        # 1. Trigger the Colab file upload dialog
        print("Please upload your knowledge graph data file now (JSON format text file).")
        uploaded = files.upload()

        if not uploaded:
            print("\nNo file was uploaded. Aborting graph creation.")
        else:
            # 2. Get the filename from the uploaded dictionary keys (Colab returns a dict)
            DATA_FILE = list(uploaded.keys())[0]

            print(f"Successfully uploaded '{DATA_FILE}'. Attempting to load knowledge graph...")

            # 3. Load the graph using the new file name
            aws_kg = create_aws_knowledge_graph_from_file(DATA_FILE)

            if aws_kg:
                print("\n--- AWS Knowledge Graph Loaded Successfully ---")
                print(f"Total Nodes: {aws_kg.number_of_nodes()}")
                print(f"Total Edges: {aws_kg.number_of_edges()}")
                print("\nFirst 5 keywords (nodes):")
                print(list(aws_kg.nodes)[:5])
            else:
                print("\nFailed to load the knowledge graph. Please ensure the uploaded file contains valid JSON.")

    except NameError:
        print("Error: 'google.colab.files' is not available. This code must be run in a Google Colab environment.")
    except Exception as e:
        print(f"An unexpected error occurred during file processing: {e}")

Please upload your knowledge graph data file now (JSON format text file).


Saving New Text Document.txt to New Text Document.txt
Successfully uploaded 'New Text Document.txt'. Attempting to load knowledge graph...

--- AWS Knowledge Graph Loaded Successfully ---
Total Nodes: 1385
Total Edges: 892

First 5 keywords (nodes):
['analytics', 'diagrams.aws.analytics.Analytics', 'amazon opensearch service', 'diagrams.aws.analytics.AmazonOpensearchService', 'opensearch']


1. Finding the AWS Service for a specific Keyword (Successors)
Since the graph edges run from the keyword to the service, you use the successors() method to find the destination node (the service endpoint).

Goal: Find the AWS service that the keyword 'ddos protection' maps to.

In [8]:
# Assuming 'aws_kg' is the loaded NetworkX graph object

keyword_query = 'ddos protection'
try:
    # Use next(iter(...)) because we expect only one service per keyword
    service_endpoint = next(aws_kg.successors(keyword_query))

    # The endpoint is a string like 'diagrams.aws.security.Shield'
    print(f"'{keyword_query}' maps to Service: {service_endpoint}")

except StopIteration:
    print(f"Keyword '{keyword_query}' not found in the graph.")

'ddos protection' maps to Service: diagrams.aws.security.Shield


2. Finding all Keywords for a specific AWS Service (Predecessors)
To find all the synonyms or related keywords that point to a service, you use the predecessors() method.

Goal: Find all keywords that map to the Shield service endpoint.

In [9]:
# Assuming 'aws_kg' is the loaded NetworkX graph object

service_query = 'diagrams.aws.security.Shield'
# predecessors() returns a generator, convert to a list for viewing
related_keywords = list(aws_kg.predecessors(service_query))

print(f"Service {service_query} is mapped by the following keywords:")
for keyword in related_keywords:
    print(f"- {keyword}")

Service diagrams.aws.security.Shield is mapped by the following keywords:
- shield
- ddos protection
- mitigate attack


3. Inspecting Node and Edge Attributes
You can also retrieve the metadata stored on the nodes (type) and edges (relationship).

Node Attributes:

In [10]:
# Assuming 'aws_kg' is the loaded NetworkX graph object

keyword = 'sso'
service = 'diagrams.aws.security.SingleSignOn'

print(f"Node '{keyword}' attributes: {aws_kg.nodes[keyword]}")
print(f"Node '{service}' attributes: {aws_kg.nodes[service]}")

# Output will be:
# Node 'sso' attributes: {'type': 'keyword'}
# Node 'diagrams.aws.security.SingleSignOn' attributes: {'type': 'service'}

Node 'sso' attributes: {'type': 'keyword'}
Node 'diagrams.aws.security.SingleSignOn' attributes: {'type': 'service'}


Edge Attributes:

In [11]:
# Assuming 'aws_kg' is the loaded NetworkX graph object

keyword = 'sso'
service = 'diagrams.aws.security.SingleSignOn'

# Access the edge attributes directly using the two nodes
edge_data = aws_kg.get_edge_data(keyword, service)

print(f"Edge data from '{keyword}' to '{service}': {edge_data}")

# Output will be:
# Edge data from 'sso' to 'diagrams.aws.security.SingleSignOn': {'relationship': 'maps_to'}

Edge data from 'sso' to 'diagrams.aws.security.SingleSignOn': {'relationship': 'maps_to'}
