In [None]:
%load_ext ext.stackql

In [None]:
## imports and object instantiation
import json, time, itertools, sys, threading, psycopg2
from concurrent.futures import ThreadPoolExecutor, as_completed
import ipywidgets as widgets
import pandas as pd
from IPython.display import clear_output, display, Markdown, HTML
from ipytree import Tree, Node
from psycopg2.extras import RealDictCursor
from psycopg2 import ProgrammingError

conn = psycopg2.connect("dbname=stackql user=stackql host=localhost port=5444")

In [None]:
org_id = widgets.Text(
    placeholder='12345',
    description='GCP Org ID',
    disabled=False
)
display(org_id)

In [None]:
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 100)

In [None]:
## functions

def display_cards(cards_data):
    cards_html = ''
    
    for title, value in cards_data:
        card_template = f"""
        <div style="
            border: 1px solid #e3e3e3;
            border-radius: 4px;
            padding: 20px;
            display: inline-block;
            margin: 5px;
            text-align: center;
            width: 150px;
            background-color: #f7f7f7;">
            <h4 style="margin: 5px 0;">{title}</h4>
            <span style="font-size: 30px; font-weight: bold; color: red;">{value}</span>
        </div>
        """
        cards_html += card_template
    
    display(HTML(cards_html))

def get_icon(resType):
    if resType == "project":
        return 'codepen'
    else:
        return resType

def print_overwrite(message):
    clear_output(wait=True)
    print(message)

def build_tree_node(df, parent_name, parent_node=None):
    children = df[df['parentDisplayName'] == parent_name]
    
    for _, child in children.iterrows():
        child_node = Node(child['displayName'], opened=False, icon=get_icon(child['resType']))
        if parent_node:
            parent_node.add_node(child_node)
        build_tree_node(df, child['displayName'], child_node)

def explode_json_list_col(input_df, col_to_explode, exploded_col):
    # Load JSON strings to lists where applicable and keep others as they are
    input_df[col_to_explode] = input_df[col_to_explode].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
    
    # Explode the lists
    exploded_df = input_df.explode(col_to_explode)
    
    # If the exploded column is empty, simply return the dataframe
    if exploded_df[col_to_explode].empty:
        return exploded_df

    # Determine the type of the elements in the exploded list
    non_na_elements = exploded_df[col_to_explode].dropna()

    if not non_na_elements.empty:
        first_non_na = non_na_elements.iloc[0]
        
        # If elements are dicts, further explode their keys
        if isinstance(first_non_na, dict):
            for key in first_non_na.keys():
                exploded_df[f"{exploded_col}_{key}"] = exploded_df[col_to_explode].apply(lambda x: x.get(key) if isinstance(x, dict) else None)
            exploded_df.drop(columns=[col_to_explode], inplace=True)
        else:
            exploded_df.rename(columns={col_to_explode: exploded_col}, inplace=True)

    return exploded_df

In [None]:
def run_stackql_query(query, debug=False):
    try:
        with conn.cursor(cursor_factory=RealDictCursor) as cur:
            cur.execute(query)
            rows = cur.fetchall()
            return pd.DataFrame(rows)
    except Exception as e:
        if debug:
            print(f"Error executing query: {str(e)}")
        return pd.DataFrame()

In [None]:
def run_stackql_queries(queries, debug=False):
    start_time = time.time()
    all_results = []
    
    with conn.cursor(cursor_factory=RealDictCursor) as cur:
        for query in queries:
            if debug:
                print(f"Executing: {query}...")
            cur.execute(query)

            try:
                results = cur.fetchall()
                if results:  # check if the result is not empty
                    all_results.extend(results)
            except ProgrammingError:
                continue  # No results for this query, move on to the next one

    df = pd.DataFrame(all_results)

    # Check if 'error' column exists in df
    if 'error' in df.columns:
        df = df[df['error'].isnull()].drop(columns=['error'], inplace=False)

    number_of_rows = df.shape[0]
    elapsed_time = round(time.time() - start_time)

    print(f"Found {number_of_rows} rows in {elapsed_time} seconds")

    return df

In [None]:
def query_and_format(entity_id, query_fn, parent_display_name, res_type):
    df = run_stackql_query(query_fn(entity_id))
    df["parentDisplayName"] = parent_display_name
    df["resType"] = res_type
    return df

def get_resources_recursive(entity_id, get_projects_query_fn, get_folders_query_fn, parent_display_name='organization'):
    dfs = []  # List to store DataFrames

    with ThreadPoolExecutor() as executor:
        future_project = executor.submit(query_and_format, entity_id, get_projects_query_fn, parent_display_name, "project")
        future_folder = executor.submit(query_and_format, entity_id, get_folders_query_fn, parent_display_name, "folder")

        projects_df = future_project.result()
        print_overwrite(f"Found {len(projects_df)} projects in {entity_id}")
        dfs.append(projects_df)

        folders_df = future_folder.result()
        print_overwrite(f"Found {len(folders_df)} folders in {entity_id}")
        dfs.append(folders_df)

        # Parallelize the fetching of child resources
        folder_futures = [executor.submit(get_resources_recursive, folder['name'], get_projects_query_fn, get_folders_query_fn, folder['displayName']) 
                          for _, folder in folders_df.iterrows() if 'name' in folder]
        
        for future in as_completed(folder_futures):
            dfs.append(future.result())

    # Concatenate all collected DataFrames at once
    resources_df = pd.concat(dfs, ignore_index=True)
    return resources_df

def get_all_resources(get_projects_query, get_folders_query, org_id):
    start_time = time.time()
    
    # Start with the root organization to get all resources
    resources_df = get_resources_recursive("organizations/%s" % (org_id), get_projects_query, get_folders_query)
    
    # Create root node and build the tree
    root = Node("organization", opened=False, icon='building')
    build_tree_node(resources_df, "organization", root)
    
    # Display the tree
    tree = Tree(nodes=[root])
    
    # Calculate metrics and display
    elapsed_time = round(time.time() - start_time)
    num_folders = resources_df.query("resType == 'folder'").shape[0]
    num_projects = resources_df.query("resType == 'project'").shape[0]
    projects = resources_df.query("resType == 'project'")['projectId'].dropna().tolist()
    
    print(f"Total elapsed time: {elapsed_time} seconds")
    cards_data = [("Number of Projects", num_projects), ("Number of Folders", num_folders)]
    display_cards(cards_data)
    
    return resources_df, projects, tree