In [None]:
%load_ext ext.stackql

In [None]:
%run includes/google-audit-setup.ipynb

In [None]:
## notebook variables
org_id = "12345"

In [None]:
#
# discover folders and projects
#
def get_projects_query(entity_id):
    return f"""
    SELECT displayName, name, parent, projectId 
    FROM google.cloudresourcemanager.projects 
    WHERE parent = '{entity_id}';
    """

def get_folders_query(entity_id):
    return f"""
    SELECT displayName, name, parent 
    FROM google.cloudresourcemanager.folders 
    WHERE parent = '{entity_id}';
    """

start_time = time.time()

# Start with the root organization to get all resources
all_resources = get_resources_recursive("organizations/%s" % (org_id), get_projects_query, get_folders_query)

# Convert list to dataframe and filter
resources_df = (pd.DataFrame(all_resources)
                .loc[lambda df: df['error'].isna()]
                .drop('error', axis=1, errors='ignore'))

# Create root node and build the tree
root = Node("organization", opened=False, icon='building')
build_tree_node(resources_df, "organization", root)

# Display the tree
tree = Tree(nodes=[root])

# Calculate metrics and display
elapsed_time = (time.time() - start_time) * 1000
num_folders = resources_df.query("resType == 'folder'").shape[0]
num_projects = resources_df.query("resType == 'project'").shape[0]
projects_df = resources_df.query("resType == 'project'")['projectId'].dropna().to_frame()
projects = projects_df['projectId'].tolist()

print(f"Total elapsed time: {elapsed_time:.2f} ms")
cards_data = [("Number of Projects", num_projects), ("Number of Folders", num_folders)]
display_cards(cards_data)
tree

In [None]:
#
# get project iam bindings
#
queries = [
    f"""
    SELECT role, condition, members, '{project}' as project
    FROM google.cloudresourcemanager.projects_iam_policies
    WHERE projectsId = '{project}'
    """
    for project in projects
]

res = stackql.executeQueriesAsync(queries)

bindings_df = (
    explode_json_list_col(pd.read_json(json.dumps(res)), 'members', 'member')
    .assign(**{
        'member_type': lambda x: x['member'].str.split(':', n=1).str[0],
        'member_email': lambda x: x['member'].str.split(':', n=1).str[1]
    })
    .drop('member', axis=1)
)

bindings_df

In [None]:
#
# get zones and regions
#
queries = [
    f"""
    SELECT name, SPLIT_PART(region, '/', 9) as region
    FROM google.compute.zones
    WHERE project = '{project}'
    """
    for project in projects
]

all_results = []
for query in queries:
    res = stackql.execute(query)
    try:
        parsed_result = json.loads(res)
        all_results.extend(parsed_result)
    except json.JSONDecodeError:
        print(f"Failed to parse result from query: {query}")
        print(f"Raw result: {res}")
        
zones_df = (
    pd.DataFrame(all_results)
    .loc[lambda x: x['error'].isnull()]
    .drop('error', axis=1)
    .drop_duplicates()
)

regions_df = pd.DataFrame(zones_df['region'].unique(), columns=['region'])
# For every combination of project and region
projects_regions_df = projects_df.assign(key=1).merge(regions_df.assign(key=1), on='key').drop('key', axis=1)
# For every combination of project and zone
projects_zones_df = projects_df.assign(key=1).merge(zones_df.assign(key=1), on='key').drop(['key', 'region'], axis=1)

# python list variables
regions = regions_df['region'].tolist()
zones = zones_df['name'].tolist()
projects_regions_list = projects_regions_df.to_dict(orient='records')
projects_zones_list = projects_zones_df.to_dict(orient='records')
