In [None]:
from datetime import timedelta
import math
import json
import time

import awswrangler
import boto3
from matplotlib import cycler
from matplotlib.ticker import PercentFormatter
import matplotlib.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas

In [None]:
# Configure me!
AWS_PROFILE = ""
CLOUDTRAIL_ATHENA_TABLE_NAME = ""
CLOUDTRAIL_ATHENA_WORKGROUP = ""
CLOUDTRAIL_ATHENA_DATABASE = ""
CLOUDTRAIL_ATHENA_CATALOG = ""
DAYS = 1

In [None]:
session = boto3.session.Session(profile_name=AWS_PROFILE)

def execute_query(query):
    return awswrangler.athena.read_sql_query(
        query,
        database=CLOUDTRAIL_ATHENA_DATABASE,
        data_source=CLOUDTRAIL_ATHENA_CATALOG,
        workgroup=CLOUDTRAIL_ATHENA_WORKGROUP,
        athena_cache_settings={"max_cache_seconds": 60 * 60},
        boto3_session=session,
        ctas_approach=False,
    )

In [None]:
def init_style():
    background="#24283b"
    foreground="#c0caf5"
    comment="#565f89"
    cycle=[
        "#7aa2f7",  # blue
        "#ff9e64",  # orange
        "#9ece6a",  # green
        "#f7768e",  # red
        "#9d7cd8",  # purple
        "#bb9af7",  # magenta
        "#565f89",  # comment
        "#e0af68",  # yellow
        "#7dcfff",  # cyan
    ]

    plt.style.use({
        "lines.color": foreground,
        "patch.edgecolor": foreground,
        "text.color": foreground,
        "axes.facecolor": background,
        "axes.edgecolor": foreground,
        "axes.labelcolor": foreground,
        "xtick.color": foreground,
        "ytick.color": foreground,
        "legend.framealpha": 0,
        "grid.color": comment,
        "figure.facecolor": background,
        "figure.edgecolor": background,
        "savefig.facecolor": background,
        "savefig.edgecolor": background,
        "boxplot.boxprops.color": foreground,
        "boxplot.capprops.color": foreground,
        "boxplot.flierprops.color": foreground,
        "boxplot.flierprops.markeredgecolor": foreground,
        "boxplot.whiskerprops.color": foreground,
        "axes.prop_cycle": cycler(color=cycle)
    })

init_style()

In [None]:
def draw_latency(data, label_fn=None, title="CloudTrail delay statistics"):
    ticks = pandas.DataFrame(data={"seconds": [60, 300, 600, 3600, 86400], "labels": ["1 min", "5 min", "10 min", "1 hour", "24 hours"]})

    max_cols = 2
    n_rows = 1 + ((len(data) - 1) // max_cols)
    n_cols = max_cols if len(data) > max_cols else len(data)

    fig, axs = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 6 * n_rows), squeeze=False)
    
    fig.supylabel("Cumulative events delivered")
    fig.supxlabel("CloudTrail delay delivering to S3")
    fig.suptitle(title)

    for ((index, row), ax) in zip(data.iterrows(), axs.flat):
        hist_bins = json.loads(row["s3_write_delay_seconds_histogram"].replace('{', '[').replace('}', ']'))
        bins = np.array([each[0] for each in hist_bins])
        counts = np.array([each[1] for each in hist_bins])
    
        # Go up to 24 hours, we don't control the buckets
        if (bins[-1] < 24 * 60 * 60):
            bins = np.append(bins, [24 * 60 * 60])
            counts = np.append(counts, [0])
    
        # Plot cumulative
        total = counts.sum()
        counts = 100 * counts/total;
    
        ax.hist(bins, bins, weights=counts, cumulative=True)
        if label_fn:
            ax.set_title(label_fn(row))
    
        # Show linear scale up to 600 seconds, then log
        ax.set_xscale("symlog", linthresh=600)
        
        ax.set_xticks(ticks["seconds"], ticks["labels"])
        ax.set_xticks([], [], minor=True)
        ax.yaxis.set_major_formatter(PercentFormatter())
        
        for tick in ticks["seconds"]:
            ax.axvline(x = tick, linestyle="dashed", alpha=0.3)
            
        for tick in ticks["seconds"][:-1]:
            lt_ratio = row[f"s3_write_delay_seconds_count_lt_{tick}"] / total
            if 1 > lt_ratio > 0.99:
                label = f"{lt_ratio:%}"
            else:
                label = f"{100 * lt_ratio:.2f}%"
        
            plt.text(tick, 0.95, label, transform=transforms.offset_copy(ax.get_xaxis_transform(), fig, x=0.05))
            if lt_ratio == 1:
                break
        
    
        ax.set_xmargin(0)
        ax.set_ymargin(.1)

        def interval_string(seconds):
            return str(timedelta(seconds=round(seconds)))

        table_cells = [
            [stat, interval_string(row[f"s3_write_delay_seconds_{stat}"])]
            for stat in ("min", "avg", "p50", "p95", "p99", "max")
        ]
            
        table = ax.table(cellText=table_cells, bbox=[0.6,0.05,0.35,0.35], colWidths=[0.4, 0.6], zorder=1, edges="open")
        table.auto_set_font_size(False)
        for cell in table.get_celld().values():
            cell.set_text_props(color="white", alpha=0.6, fontweight="bold", fontsize="medium")

    return fig

In [None]:
def draw_events(data, title, xlabel, ylabel):
    ticks = [0, 60, 120, 180, 240, 300]

    max_cols = 2
    n_rows = 1 + ((len(data) - 1) // max_cols)
    n_cols = max_cols if len(data) > max_cols else len(data)

    fig, axs = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 6 * n_rows), squeeze=False)
    
    fig.supxlabel(xlabel)
    fig.supylabel(ylabel)
    fig.suptitle(title)
    
    for (group, rows), ax in zip(data, axs.flat):
        ax.scatter(rows.iloc[:, 0], rows.iloc[:, 1], s=0.1)
        
        ax.set_xticks(ticks)
        ax.set_xticks([], [], minor=True)
        ax.set_yticks(ticks)
        ax.set_yticks([], [], minor=True)
        if group:
            ax.set_title(group)
        ax.set_xlim(0, 300)
        ax.set_ylim(0, 360)
        
        #for tick in ticks:
        #     ax.axhline(y = tick, linestyle="dashed", alpha=0.1)
        
    
        ax.set_xmargin(0)
        ax.set_ymargin(0)
    
    return fig

In [None]:
data_all_events = execute_query(f"""
with events as (
    select
        *,
        "$path",
        parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as parsed_eventtime,
        "$file_modified_time",
        "$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as s3_write_delay_interval,
        to_milliseconds("$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ')) / 1000 as s3_write_delay_seconds,
        regexp_extract("$path", '\d{8}T\d{4}') as path_timestamp,
       "$file_size"
    from
        "{CLOUDTRAIL_ATHENA_TABLE_NAME}"
    where
        addendum is null
    and eventtype = 'AwsApiCall'
)
select
    count(1) as "count",
    min(s3_write_delay_seconds) as "s3_write_delay_seconds_min",
    avg(s3_write_delay_seconds) as "s3_write_delay_seconds_avg",
    map_entries(numeric_histogram(100, s3_write_delay_seconds)) as "s3_write_delay_seconds_histogram",
    avg("$file_size") as size,
    approx_percentile(s3_write_delay_seconds, 0.5) as "s3_write_delay_seconds_p50",
    approx_percentile(s3_write_delay_seconds, 0.95) as "s3_write_delay_seconds_p95",
    approx_percentile(s3_write_delay_seconds, 0.99) as "s3_write_delay_seconds_p99",
    approx_percentile(s3_write_delay_seconds, 0.999) as "s3_write_delay_seconds_p999",
    approx_percentile(s3_write_delay_seconds, 0.9999) as "s3_write_delay_seconds_p9999",
    max(s3_write_delay_seconds) as "s3_write_delay_seconds_max",
    count(1) filter (where s3_write_delay_seconds <= 60) as "s3_write_delay_seconds_count_lt_60",
    count(1) filter (where s3_write_delay_seconds <= 300) as "s3_write_delay_seconds_count_lt_300",
    count(1) filter (where s3_write_delay_seconds <= 600) as "s3_write_delay_seconds_count_lt_600",
    count(1) filter (where s3_write_delay_seconds <= 3600) as "s3_write_delay_seconds_count_lt_3600",
    count(1) filter (where s3_write_delay_seconds <= 86400) as "s3_write_delay_seconds_count_lt_86400"
from
    events
where
    eventdate between
            format_datetime(current_date - interval '{1 + DAYS}' day, 'YYYY/MM/dd')
        and format_datetime(current_date - interval '1' day, 'YYYY/MM/dd')
""")

In [None]:
draw_latency(data_all_events).savefig("all_events.png")

In [None]:
data_by_managementevent = execute_query(f"""
with events as (
    select
        *,
        "$path",
        parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as parsed_eventtime,
        "$file_modified_time",
        "$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as s3_write_delay_interval,
        to_milliseconds("$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ')) / 1000 as s3_write_delay_seconds,
        regexp_extract("$path", '\d{8}T\d{4}') as path_timestamp,
       "$file_size"
    from
        "{CLOUDTRAIL_ATHENA_TABLE_NAME}"
    where
        addendum is null
    and eventtype = 'AwsApiCall'
)
select
    managementevent,
    count(1) as "count",
    min(s3_write_delay_seconds) as "s3_write_delay_seconds_min",
    avg(s3_write_delay_seconds) as "s3_write_delay_seconds_avg",
    map_entries(numeric_histogram(100, s3_write_delay_seconds)) as "s3_write_delay_seconds_histogram",
    avg("$file_size") as size,
    approx_percentile(s3_write_delay_seconds, 0.5) as "s3_write_delay_seconds_p50",
    approx_percentile(s3_write_delay_seconds, 0.95) as "s3_write_delay_seconds_p95",
    approx_percentile(s3_write_delay_seconds, 0.99) as "s3_write_delay_seconds_p99",
    approx_percentile(s3_write_delay_seconds, 0.999) as "s3_write_delay_seconds_p999",
    approx_percentile(s3_write_delay_seconds, 0.9999) as "s3_write_delay_seconds_p9999",
    max(s3_write_delay_seconds) as "s3_write_delay_seconds_max",
    count(1) filter (where s3_write_delay_seconds <= 60) as "s3_write_delay_seconds_count_lt_60",
    count(1) filter (where s3_write_delay_seconds <= 300) as "s3_write_delay_seconds_count_lt_300",
    count(1) filter (where s3_write_delay_seconds <= 600) as "s3_write_delay_seconds_count_lt_600",
    count(1) filter (where s3_write_delay_seconds <= 3600) as "s3_write_delay_seconds_count_lt_3600",
    count(1) filter (where s3_write_delay_seconds <= 86400) as "s3_write_delay_seconds_count_lt_86400"
from
    events
where
    eventdate between
            format_datetime(current_date - interval '{1 + DAYS}' day, 'YYYY/MM/dd')
        and format_datetime(current_date - interval '1' day, 'YYYY/MM/dd')
    and eventsource in (
        's3.amazonaws.com'
    )
group by
    managementevent
""")

In [None]:
draw_latency(
    data_by_managementevent,
    label_fn=lambda row: "Management events" if row["managementevent"] else "Data events",
    title="CloudTrail delay statistics (S3 events)",
).savefig("s3_events_by_management.png")

In [None]:
data_by_eventsource = execute_query(f"""
with events as (
    select
        *,
        "$path",
        parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as parsed_eventtime,
        "$file_modified_time",
        "$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as s3_write_delay_interval,
        to_milliseconds("$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ')) / 1000 as s3_write_delay_seconds,
        regexp_extract("$path", '\d{8}T\d{4}') as path_timestamp,
       "$file_size"
    from
        "{CLOUDTRAIL_ATHENA_TABLE_NAME}"
    where
        addendum is null
    and eventtype = 'AwsApiCall'
)
select
    eventsource,
    count(1) as "count",
    min(s3_write_delay_seconds) as "s3_write_delay_seconds_min",
    avg(s3_write_delay_seconds) as "s3_write_delay_seconds_avg",
    map_entries(numeric_histogram(100, s3_write_delay_seconds)) as "s3_write_delay_seconds_histogram",
    avg("$file_size") as size,
    approx_percentile(s3_write_delay_seconds, 0.5) as "s3_write_delay_seconds_p50",
    approx_percentile(s3_write_delay_seconds, 0.95) as "s3_write_delay_seconds_p95",
    approx_percentile(s3_write_delay_seconds, 0.99) as "s3_write_delay_seconds_p99",
    approx_percentile(s3_write_delay_seconds, 0.999) as "s3_write_delay_seconds_p999",
    approx_percentile(s3_write_delay_seconds, 0.9999) as "s3_write_delay_seconds_p9999",
    max(s3_write_delay_seconds) as "s3_write_delay_seconds_max",
    count(1) filter (where s3_write_delay_seconds <= 60) as "s3_write_delay_seconds_count_lt_60",
    count(1) filter (where s3_write_delay_seconds <= 300) as "s3_write_delay_seconds_count_lt_300",
    count(1) filter (where s3_write_delay_seconds <= 600) as "s3_write_delay_seconds_count_lt_600",
    count(1) filter (where s3_write_delay_seconds <= 3600) as "s3_write_delay_seconds_count_lt_3600",
    count(1) filter (where s3_write_delay_seconds <= 86400) as "s3_write_delay_seconds_count_lt_86400"
from
    events
where
    eventdate between
            format_datetime(current_date - interval '{1 + DAYS}' day, 'YYYY/MM/dd')
        and format_datetime(current_date - interval '1' day, 'YYYY/MM/dd')
    and eventsource in (
        's3.amazonaws.com',
        'sts.amazonaws.com',
        'ssm.amazonaws.com',
        'ec2.amazonaws.com'
    )
group by
    eventsource
""")

In [None]:
draw_latency(
    data_by_eventsource,
    label_fn=lambda row: row["eventsource"],
    title="CloudTrail delay statistics by event source",
).savefig("all_events_by_source.png")

In [None]:
data_by_eventname = execute_query(f"""
with events as (
    select
        *,
        "$path",
        parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as parsed_eventtime,
        "$file_modified_time",
        "$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as s3_write_delay_interval,
        to_milliseconds("$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ')) / 1000 as s3_write_delay_seconds,
        regexp_extract("$path", '\d{8}T\d{4}') as path_timestamp,
       "$file_size"
    from
        "{CLOUDTRAIL_ATHENA_TABLE_NAME}"
    where
        addendum is null
    and eventtype = 'AwsApiCall'
)
select
    eventname,
    count(1) as "count",
    min(s3_write_delay_seconds) as "s3_write_delay_seconds_min",
    avg(s3_write_delay_seconds) as "s3_write_delay_seconds_avg",
    map_entries(numeric_histogram(100, s3_write_delay_seconds)) as "s3_write_delay_seconds_histogram",
    avg("$file_size") as size,
    approx_percentile(s3_write_delay_seconds, 0.5) as "s3_write_delay_seconds_p50",
    approx_percentile(s3_write_delay_seconds, 0.95) as "s3_write_delay_seconds_p95",
    approx_percentile(s3_write_delay_seconds, 0.99) as "s3_write_delay_seconds_p99",
    approx_percentile(s3_write_delay_seconds, 0.999) as "s3_write_delay_seconds_p999",
    approx_percentile(s3_write_delay_seconds, 0.9999) as "s3_write_delay_seconds_p9999",
    max(s3_write_delay_seconds) as "s3_write_delay_seconds_max",
    count(1) filter (where s3_write_delay_seconds <= 60) as "s3_write_delay_seconds_count_lt_60",
    count(1) filter (where s3_write_delay_seconds <= 300) as "s3_write_delay_seconds_count_lt_300",
    count(1) filter (where s3_write_delay_seconds <= 600) as "s3_write_delay_seconds_count_lt_600",
    count(1) filter (where s3_write_delay_seconds <= 3600) as "s3_write_delay_seconds_count_lt_3600",
    count(1) filter (where s3_write_delay_seconds <= 86400) as "s3_write_delay_seconds_count_lt_86400"
from
    events
where
    eventdate between
            format_datetime(current_date - interval '{1 + DAYS}' day, 'YYYY/MM/dd')
        and format_datetime(current_date - interval '1' day, 'YYYY/MM/dd')
    and eventname in ('AssumeRole', 'DescribeNetworkInterfaces', 'GetObject', 'PutObject')
group by
    eventname
""")

In [None]:
draw_latency(
    data_by_eventname.sort_values("eventname"),
    label_fn=lambda row: row["eventname"],
    title="CloudTrail delay statistics by event name",
).savefig("all_events_by_name.png")

In [None]:
events_sample = execute_query(f"""
with events as (
    select
        *,
        "$path",
        parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as parsed_eventtime,
        "$file_modified_time",
        "$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as s3_write_delay_interval,
        to_milliseconds("$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ')) / 1000 as s3_write_delay_seconds,
        regexp_extract("$path", '\d{8}T\d{4}') as path_timestamp,
       "$file_size"
    from
        "{CLOUDTRAIL_ATHENA_TABLE_NAME}"
    where
        addendum is null
    and eventtype = 'AwsApiCall'
),
indexed_events_by_source as (
    select
        *,
        row_number() over (partition by eventname order by random()) as index
    from
        events
    where
        eventdate between
                format_datetime(current_date - interval '{1 + DAYS}' day, 'YYYY/MM/dd')
            and format_datetime(current_date - interval '1' day, 'YYYY/MM/dd')
)
select
    eventsource,
    eventname,
    eventcategory,
    readonly,
    parsed_eventtime,
    s3_write_delay_seconds
from
    indexed_events_by_source
where
    index <= 10000
    and eventname in ('AssumeRole', 'DescribeNetworkInterfaces', 'GetObject', 'PutObject')
""")

In [None]:
events_sample["eventtime_mod_5_min"] = events_sample["parsed_eventtime"].map(lambda t: (t.minute * 60 + t.second) % 300)
draw_events(
    events_sample.groupby("eventname", sort=True)[["eventtime_mod_5_min", "s3_write_delay_seconds"]],
    title="CloudTrail delay by event name",
    xlabel="Event time (mod 5 minutes)",
    ylabel="CloudTrail delay (seconds)",
).savefig("sample_events_by_name.png")

In [None]:
data_by_accountregion = execute_query(f"""
with events as (
    select
        *,
        "$path",
        parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as parsed_eventtime,
        "$file_modified_time",
        "$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as s3_write_delay_interval,
        to_milliseconds("$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ')) / 1000 as s3_write_delay_seconds,
        regexp_extract("$path", '\d{8}T\d{4}') as path_timestamp,
       "$file_size"
    from
        "{CLOUDTRAIL_ATHENA_TABLE_NAME}"
    where
        addendum is null
    and eventtype = 'AwsApiCall'
)
select
    account,
    region,
    count(1) as "count",
    min(s3_write_delay_seconds) as "s3_write_delay_seconds_min",
    avg(s3_write_delay_seconds) as "s3_write_delay_seconds_avg",
    map_entries(numeric_histogram(100, s3_write_delay_seconds)) as "s3_write_delay_seconds_histogram",
    avg("$file_size") as size,
    approx_percentile(s3_write_delay_seconds, 0.5) as "s3_write_delay_seconds_p50",
    approx_percentile(s3_write_delay_seconds, 0.95) as "s3_write_delay_seconds_p95",
    approx_percentile(s3_write_delay_seconds, 0.99) as "s3_write_delay_seconds_p99",
    approx_percentile(s3_write_delay_seconds, 0.999) as "s3_write_delay_seconds_p999",
    approx_percentile(s3_write_delay_seconds, 0.9999) as "s3_write_delay_seconds_p9999",
    max(s3_write_delay_seconds) as "s3_write_delay_seconds_max",
    count(1) filter (where s3_write_delay_seconds <= 60) as "s3_write_delay_seconds_count_lt_60",
    count(1) filter (where s3_write_delay_seconds <= 300) as "s3_write_delay_seconds_count_lt_300",
    count(1) filter (where s3_write_delay_seconds <= 600) as "s3_write_delay_seconds_count_lt_600",
    count(1) filter (where s3_write_delay_seconds <= 3600) as "s3_write_delay_seconds_count_lt_3600",
    count(1) filter (where s3_write_delay_seconds <= 86400) as "s3_write_delay_seconds_count_lt_86400"
from
    events
where
    eventdate between
            format_datetime(current_date - interval '{1 + DAYS}' day, 'YYYY/MM/dd')
        and format_datetime(current_date - interval '0' day, 'YYYY/MM/dd')
    and eventsource = 'sts.amazonaws.com'
group by
    account, region
""")

In [None]:
ordered_data_by_accountregion = data_by_accountregion.sort_values("count", ascending = False)
busy_account = ordered_data_by_accountregion.iloc[0]["account"]
busy_account_regions = ordered_data_by_accountregion[ordered_data_by_accountregion["account"] == busy_account]

draw_latency(
    busy_account_regions.iloc[[0, -1]],
    label_fn=lambda row: "busy region" if row["region"] == busy_account_regions.iloc[0]["region"] else "quiet region", # row["account"] + " " + row["region"]
    title="CloudTrail delay statistics by region (single account)",
).savefig("busy_region_quiet_region.png")

In [None]:
events_sample_delay = execute_query(f"""
with events as (
    select
        *,
        "$path",
        parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as parsed_eventtime,
        "$file_modified_time",
        "$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ') as s3_write_delay_interval,
        to_milliseconds("$file_modified_time" - parse_datetime(eventtime, 'YYYY-MM-dd''T''HH:mm:ssZ')) / 1000 as s3_write_delay_seconds,
        regexp_extract("$path", '\d{8}T\d{4}') as path_timestamp,
       "$file_size"
    from
        "{CLOUDTRAIL_ATHENA_TABLE_NAME}"
    where
        addendum is null
    and eventtype = 'AwsApiCall'
),
indexed_events_by_source as (
    select
        *,
        row_number() over (partition by account, region order by random()) as index,
        to_milliseconds(parsed_eventtime - first_value(parsed_eventtime) over (partition by "$path" order by parsed_eventtime asc)) / 1000 as seconds_after_first,
        count() over (partition by account, region, path_timestamp) as events_in_window
    from
        events
    where
        eventdate between
                format_datetime(current_date - interval '{1 + DAYS}' day, 'YYYY/MM/dd')
            and format_datetime(current_date - interval '1' day, 'YYYY/MM/dd')
)
select
    eventsource,
    eventname,
    eventcategory,
    readonly,
    parsed_eventtime,
    s3_write_delay_seconds,
    seconds_after_first,
    events_in_window
from
    indexed_events_by_source
where
    index <= 1000
""")

In [None]:
draw_events(
    events_sample_delay.query("seconds_after_first < 600 and s3_write_delay_seconds < 600").groupby(lambda a: False)[["seconds_after_first", "s3_write_delay_seconds"]],
    title="CloudTrail delay by time after first event in file",
    xlabel="Time after first event in file (seconds)",
    ylabel="CloudTrail delay (seconds)",
).savefig("events_sample_delay.png")