In [None]:
from code_data_science import (
    data_table as dt,
    unique_dictionaries as ud,
    tree_data_grid,
)

# dependency report recipe
df = dt.read_csv("../samples/github_secrets_in_use.csv")

# The match column contains the full line of code and we only need the
# specific match so we can create a new column using regex /~~>secret\.\w*/
df["specific_match"] = df["match"].str.extract(r"~~>secrets\.(\w*)")

# Group by 'specific_match' and count total occurrences
total_grouped = df.groupby("specific_match").size().reset_index(name="total_count")

# Initialize unique dictionary tree
tree = ud.UniqueDictionaries()

# Add total counts to the tree
for _, row in total_grouped.iterrows():
    specific_match_value = row["specific_match"]
    total_count = row["total_count"]

    tree.add({"path": specific_match_value, "count": total_count})

# Group by 'specific_match' and 'repositoryPath' to count repository-specific occurrences
repo_grouped = (
    df.groupby(["specific_match", "repositoryPath"])
    .size()
    .reset_index(name="repo_count")
)

# Add repository-specific counts to the tree
for _, row in repo_grouped.iterrows():
    specific_match_value = row["specific_match"]
    repository_path = row["repositoryPath"]
    repo_count = row["repo_count"]

    tree.add(
        {"path": f"{specific_match_value}:::{repository_path}", "count": repo_count}
    )

tree_data = []
for item in tree.to_list():
    item["path"] = item["path"].split(":::")
    tree_data.append(item)

In [None]:
tree_data_grid.display(
    tree_data,
    "Secrets",
    [{"field": "count", "headerName": "Occurrences", "minWidth": 200}],
)