# Bot Statistics

Analysis of OpenStreetMap contributions from automated bots.

In [1]:
import duckdb
import util

util.init()

## Monthly Bot Activity Trends

Analysis of monthly total, percentage, and accumulated bot edits and contributors.

In [2]:
# Monthly bot statistics with totals, percentages, and accumulated values
df = duckdb.sql("""
WITH monthly_total AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as total_contributors,
        CAST(SUM(edit_count) as BIGINT) as total_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    GROUP BY year, month
),
monthly_bot AS (
    SELECT 
        year,
        month,
        CONCAT(year, '-', LPAD(CAST(month as VARCHAR), 2, '0')) as months,
        COUNT(DISTINCT user_name) as bot_contributors,
        CAST(SUM(edit_count) as BIGINT) as bot_edits
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE bot = true
    GROUP BY year, month
)
SELECT 
    mt.months,
    mt.year,
    mt.month,
    COALESCE(mb.bot_contributors, 0) as "Bot Contributors",
    COALESCE(mb.bot_edits, 0) as "Bot Edits",
    ROUND((COALESCE(mb.bot_contributors, 0) * 100.0) / mt.total_contributors, 2) as "Percent Bot Contributors",
    ROUND((COALESCE(mb.bot_edits, 0) * 100.0) / mt.total_edits, 2) as "Percent Bot Edits",
FROM monthly_total mt
LEFT JOIN monthly_bot mb ON mt.year = mb.year AND mt.month = mb.month
ORDER BY mt.year, mt.month
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Monthly Bot Edits",
            label="Total Edits",
            x_col="months",
            y_col="Bot Edits",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Percentage of Bot Edits",
            label="Percent Edits",
            x_col="months",
            y_col="Percent Bot Edits",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Monthly Bot Contributors",
            label="Total Contributors",
            x_col="months",
            y_col="Bot Contributors",
            query_or_df=df,
        ),
        util.FigureConfig(
            title="Percentage of Bot Contributors",
            label="Percent Contributors",
            x_col="months",
            y_col="Percent Bot Contributors",
            y_unit_hover_template="%",
            query_or_df=df,
        ),
    ]
)

## Yearly Bot Statistics by Editing Software

Interactive table showing yearly edits and contributors for each bot editing software.

In [3]:
# Yearly statistics for bot editing software
query = """
WITH user_first_year AS (
    SELECT 
        user_name,
        created_by,
        MIN(year) as first_year
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE created_by IS NOT NULL AND bot = true
    GROUP BY user_name, created_by
),
software_totals AS (
    SELECT
        created_by as "Editing Software",
        CAST(SUM(edit_count) as BIGINT) as total_edits_all_time,
        CAST(SUM(CASE WHEN year >= 2021 THEN edit_count ELSE 0 END) as BIGINT) as total_edits_2021_now,
        CAST(COUNT(DISTINCT user_name) as BIGINT) as total_contributors_all_time,
        CAST(COUNT(DISTINCT CASE WHEN year >= 2021 THEN user_name END) as BIGINT) as total_contributors_2021_now
    FROM '../changeset_data/year=*/month=*/*.parquet'
    WHERE created_by IS NOT NULL AND bot = true
    GROUP BY created_by
),
yearly_metrics AS (
    SELECT
        d.year,
        d.created_by as "Editing Software",
        CAST(SUM(d.edit_count) as BIGINT) as "Edits",
        CAST(COUNT(DISTINCT d.user_name) as BIGINT) as "Contributors",
        CAST(COUNT(DISTINCT CASE WHEN ufy.first_year = d.year THEN d.user_name END) as BIGINT) as "New Contributors"
    FROM '../changeset_data/year=*/month=*/*.parquet' d
    LEFT JOIN user_first_year ufy 
        ON d.user_name = ufy.user_name AND d.created_by = ufy.created_by
    WHERE d.created_by IS NOT NULL AND d.bot = true
    GROUP BY d.year, d.created_by
)
SELECT 
    ym.year,
    ym."Editing Software",
    ym."Edits",
    ym."New Contributors",
    ym."Contributors",
    st.total_edits_all_time as "Total Edits",
    st.total_edits_2021_now as "Total Edits (2021 - Now)",
    st.total_contributors_all_time as "Total Contributors",
    st.total_contributors_2021_now as "Total Contributors (2021 - Now)"
FROM yearly_metrics ym
JOIN software_totals st
    ON ym."Editing Software" = st."Editing Software"
ORDER BY year DESC, "Edits" DESC
"""

df = duckdb.sql(query).df()

# Get top editing software by total edits and contributors
top_edits = df.groupby("Editing Software")["Total Edits"].first().nlargest(100)
top_edits_2021_now = df.groupby("Editing Software")["Total Edits (2021 - Now)"].first().nlargest(100)
top_contributors = df.groupby("Editing Software")["Total Contributors"].first().nlargest(100)
top_contributors_2021_now = df.groupby("Editing Software")["Total Contributors (2021 - Now)"].first().nlargest(100)

table_configs = [
    util.TableConfig(
        title="Top 100 Bot Editing Software by Edits",
        query_or_df=df[df["Editing Software"].isin(top_edits.index)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Edits",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Edits",
    ),
    util.TableConfig(
        title="Top 100 Bot Editing Software by Edits 2021 - Now",
        query_or_df=df[(df["Editing Software"].isin(top_edits_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Edits",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Edits (2021 - Now)",
    ),
    util.TableConfig(
        title="Top 100 Bot Editing Software by Contributors",
        query_or_df=df[df["Editing Software"].isin(top_contributors.index)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Contributors",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Contributors",
    ),
    util.TableConfig(
        title="Top 100 Bot Editing Software by Contributors 2021 - Now",
        query_or_df=df[(df["Editing Software"].isin(top_contributors_2021_now.index)) & (df["year"] >= 2021)],
        x_axis_col="year",
        y_axis_col="Editing Software",
        value_col="Contributors",
        center_columns=["Rank", "Editing Software"],
        sum_col="Total Contributors (2021 - Now)",
    ),
]

util.show_tables(table_configs)

Rank,Editing Software,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total Edits
1,osmtools,0,0,488405,1199363,1241094,6820108,15029385,2655466,3271936,7558536,6454470,8151944,12399003,29128241,2154313,1546179,98098443
2,Redaction bot,0,0,21303266,284119,102887,81142,195753,278968,323820,308288,3375623,33835,8164,521134,1471,0,26818470
3,autoAWS,0,0,0,0,0,0,0,0,2674425,76127,178939,133574,2635334,87343,66735,69521,5921998
4,JOSM,3943,59844,602095,997656,13511,3350,0,134189,120598,218,52828,3267,2158485,285,46430,20103,4216802
5,Roy,0,0,1216065,2940171,0,0,0,0,0,0,0,0,0,0,0,0,4156236
6,FindvejBot,3600197,358760,104606,175,0,0,0,0,0,0,0,0,0,0,0,0,4063738
7,osmapi,0,0,0,0,71233,0,556109,3,15143,53062,14674,281782,107001,501978,789476,187369,2577830
8,upload.py,2045740,0,267,75897,3793,2717,0,0,0,0,0,0,0,0,0,0,2128414
9,osmapis,0,0,127237,343781,327188,137592,307640,95443,64731,0,0,0,0,0,0,0,1403612
10,bash script,0,0,0,0,0,0,0,0,0,124,7548,1237987,1,0,80,0,1245740

Rank,Editing Software,2021,2022,2023,2024,2025,Total Edits (2021 - Now)
1,osmtools,8151944,12399003,29128241,2154313,1546179,53379680
2,autoAWS,133574,2635334,87343,66735,69521,2992507
3,JOSM,3267,2158485,285,46430,20103,2228570
4,osmapi,281782,107001,501978,789476,187369,1867606
5,bash script,1237987,1,0,80,0,1238068
6,Redaction bot,33835,8164,521134,1471,0,564604
7,https_all_the_things,10430,62062,365444,76678,28265,542879
8,gtfs2osm v1.0,0,0,0,0,74878,74878
9,gtfs2osm v0.9,0,0,0,31176,475,31651
10,MoveSourceKeyToChangeset,0,0,0,8359,0,8359

Rank,Editing Software,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total Contributors
1,rocketdata.io,0,0,0,0,0,0,0,0,234,0,0,0,0,0,0,0,234
2,osmtools,0,0,4,2,20,25,34,35,25,23,17,17,20,20,14,9,115
3,JOSM,4,5,11,12,3,2,0,6,5,2,4,3,3,2,4,1,46
4,osmapi,0,0,0,0,2,0,1,1,1,2,4,3,6,5,4,5,22
5,Redaction bot,0,0,2,1,1,1,1,1,2,2,1,1,1,1,1,0,5
6,bash script,0,0,0,0,0,0,0,0,0,1,1,1,1,1,2,0,5
7,upload.py,1,0,1,2,1,1,0,0,0,0,0,0,1,0,0,0,4
8,PythonOsmApi,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,3
9,https_all_the_things,0,0,0,0,0,0,0,0,0,2,1,1,1,1,2,1,3
10,FindvejBot,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,2

Rank,Editing Software,2021,2022,2023,2024,2025,Total Contributors (2021 - Now)
1,osmtools,17,20,20,14,9,38
2,osmapi,3,6,5,4,5,16
3,JOSM,3,3,2,4,1,11
4,bash script,1,1,1,2,0,5
5,gtfs2osm v1.0,0,0,0,0,2,2
6,osmuser63783_script 0.1,0,0,2,0,0,2
7,https_all_the_things,1,1,1,2,1,2
8,FFMChecker,0,0,0,1,0,1
9,Everydoorbot,0,0,0,1,1,1
10,BagBot,0,0,1,0,0,1


## Geographic Distribution of Bot Edits

World map showing the total number of edits made by bots across different geographical locations.

In [4]:
# Geographic distribution of bot edits
df = duckdb.sql("""
SELECT
    mid_pos_x as x,
    mid_pos_y as y,
    SUM(edit_count) as z
FROM '../changeset_data/year=*/month=*/*.parquet'
WHERE mid_pos_x IS NOT NULL AND mid_pos_y IS NOT NULL AND bot = true
GROUP BY mid_pos_x, mid_pos_y
""").df()

util.show_figure(
    [
        util.FigureConfig(
            title="Geographic Distribution of Bot Edits",
            x_col="x",
            y_col="y",
            z_col="z",
            query_or_df=df,
            plot_type="map",
        )
    ]
)