# TODO
See CSV Tables
https://docutils.sourceforge.io/docs/ref/rst/directives.html#csv-table


In [6]:
from msticpy.data import QueryProvider
import pandas as pd


providers = {
    "MSSentinel": "Microsoft Sentinel",
    "M365D": "Microsoft 365 Defender",
    "Kusto": "Kusto/Azure Data Explorer",
    "SecurityGraph": "Microsoft Graph",
    "Splunk": "Splunk",
    "ResourceGraph": "Azure Resource Graph",
    "Sumologic": "Sumologic",
    "LocalData": "Local Data",
}

prov_list = QueryProvider.list_data_environments()

print("Generating documentation for for the following providers")
print(", ".join(list(providers)))
print("Skipping the following providers")
print(", ".join(list(set(prov_list) - set(providers))))
env_q_stores = {prov: QueryProvider(prov) for prov in providers}


query_series = []
for env, env_queries in env_q_stores.items():
    query_names = env_queries.list_queries()
    for query_name in query_names:
        q_group, q_name = query_name.split(".")
        query_path = [env, q_group, q_name]
        q_obj = getattr(getattr(env_queries, q_group), q_name)
        qry = env_queries.query_store.get_query(q_group + "." + q_name)
        if "table" in qry.default_params:
            q_table = qry.default_params["table"].get("default", "na").split()[0]
        elif "table" in qry.required_params:
            q_table = qry.required_params["table"].get("default", "na").split()[0]
        else:
            q_table = "-"
        q_dict = {
            "Environment": env,
            "QueryGroup": q_group,
            "Query": q_name,
            "Description": qry.description,
            "Req-Params": ", ".join(
                [
                    f"{param} ({p_data.get('type')})"
                    for param, p_data in qry.required_params.items()
                ]
            ),
            # "OtherParams": ", ".join([f"{param}" for param in qry.default_params]),
            "Table": q_table,
        }
        query_series.append(pd.Series(q_dict))

query_df = pd.DataFrame(query_series)

# env_queries.

from tabulate import tabulate

for name, grp in query_df.groupby("Environment"):
    friendly_name = providers.get(name, name)
    print(f"Queries for {friendly_name}")
    print("-" * len(f"Queries for {friendly_name}"))
    print(f"\nData Environment identifier: {name}")
    print()
    print(
        tabulate(
            grp.drop(columns="Environment"),
            headers="keys",
            showindex="never",
            tablefmt="rst",
        )
    )
    print("\n")

file_name = "source/data_acquisition/DataQueries.rst"
with open(file_name, "w") as rst_file:
    rst_file.write("Data Queries Reference\n")
    rst_file.write("=" * len("Data Queries Reference") + "\n\n")

    group_dict = {name: grp for name, group in query_df.groupby("Environment")}
    for name, friendly_name in env_q_stores.items():
        grp = group_dict[name]
        rst_file.write(f"Queries for {friendly_name}")
        rst_file.write("-" * len(f"Queries for {friendly_name}"))
        rst_file.write(f"\nData Environment identifier: {name}")
        tbl_txt = tabulate(
            grp.drop(columns="Environment"),
            headers="keys",
            showindex="never",
            tablefmt="rst",
        )
        rst_file.write(tbl_txt)
        rst_file.write("\n\n")


Generating documentation for for the following providers
MSSentinel, M365D, Kusto, SecurityGraph, Splunk, ResourceGraph, Sumologic, LocalData
Skipping the following providers
AzureSentinel, MDE, Mordor, LogAnalytics, AzureSecurityCenter, MDATP
Queries for Local Data
----------------------

QueryGroup       Query                             Description                             ReqdParams    Table
Azure            list_all_signins_geo              List all Azure AD logon events                        -
Network          list_azure_network_flows_by_host  List Azure Network flows by host name                 -
Network          list_azure_network_flows_by_ip    List Azure Network flows by IP address                -
SecurityAlert    list_alerts                       Retrieves list of alerts                              -
WindowsSecurity  get_process_tree                  Get process tree for a process                        -
WindowsSecurity  list_host_events                  List events 

In [10]:
query_df.groupby("Environment").asdict()

AttributeError: 'DataFrameGroupBy' object has no attribute 'asdict'