# Editing Software

In [32]:
import duckdb
import pandas as pd

default_layout = dict(
    margin=dict(l=55, r=55, b=55, t=55),
    font=dict(family="Times", size=15),
    title_x=0.5,
    paper_bgcolor="#f5f2f0",
    plot_bgcolor="#f5f2f0",
    xaxis=dict(tickcolor="black", linecolor="black", showgrid=True, gridcolor="darkgray", zerolinecolor="darkgray"),
    yaxis=dict(
        tickcolor="black",
        linecolor="black",
        showgrid=True,
        gridcolor="darkgray",
        zerolinecolor="darkgray",
        rangemode="tozero",
    ),
)
pd.set_option("display.max_rows", 100)

## Top 10 Editing Software by Yearly Edit Count

In [36]:
# Section 1, "10 most used editors"
sql_query = """
WITH software_yearly_edits AS (
	SELECT
		year,
		created_by,
		CAST(SUM(edit_count) as BIGINT) as edits
	FROM '../data_enriched/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY year, created_by
),
software_total_edits AS (
	SELECT
		created_by,
		CAST(SUM(edits) as BIGINT) as total_edits
	FROM software_yearly_edits
	GROUP BY created_by
	ORDER BY total_edits DESC
	LIMIT 10
)
SELECT
	year,
	created_by,
	edits
FROM software_yearly_edits
WHERE created_by IN (SELECT created_by FROM software_total_edits)
ORDER BY year, created_by
"""
df = duckdb.sql(sql_query).df()

pivot_table = (
    df.pivot_table(
        index="created_by",
        columns="year",
        values="edits",
    )
    .fillna(0)
    .astype(int)
)
pivot_table.columns.name = "Editor"
pivot_table.index.name = None
pivot_table["Total"] = pivot_table.sum(axis=1)
pivot_table = pivot_table.sort_values("Total", ascending=False)
pivot_table

Editor,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total
JOSM,125617731,494460089,539135966,542128543,500031429,666714750,642725919,625443152,664171922,794010429,790721973,996077692,917485702,758566474,756759812,767135437,325128093,10906315113
iD,0,0,0,0,34430862,109275830,142202485,189396418,279162048,321342148,359664267,459865024,480370002,416745824,427963559,428165596,191281367,3839865430
Potlatch,44030413,64726838,100956564,147706379,133843837,78092007,60204602,42988829,33377795,25182374,20637960,17371354,5083644,2678828,2574667,2766754,977964,783200809
Rapid,0,0,0,0,0,0,0,0,0,0,11678103,51870526,63919308,63564994,63683830,52467053,20156724,327340538
osmtools,150412240,8994328,981921,1229689,2734257,1499370,6826400,15035558,2655505,3271989,7558536,6454470,8152531,12399003,29128241,2154313,578476,260066827
bulk_upload.py,64669041,24649625,1442184,759090,185033,8089,1061001,1113638,2398,1036,26933301,263564,464798,0,0,4341,0,121557139
StreetComplete,0,0,0,0,0,0,0,85,1256558,1701133,1983723,3934118,12439388,12904369,13630403,12782820,4729050,65361647
Merkaartor,5378663,10887187,10250026,9702870,4380406,3791724,3696590,2522894,1735350,2138115,2316287,1745000,334114,491775,123828,96656,150822,59742307
upload.py,31167015,10166002,2402891,421238,776590,422731,488194,4827,38997,3021,4195,1853644,4378760,822536,968637,2536270,699623,57155171
Vespucci,2087,22298,49455,165722,277566,804178,885787,1055056,1559094,2073139,2296702,3560018,3517790,3864897,4998545,9855980,5241579,40229893


In [35]:
# Section 1, "100 most used editors"
sql_query = """
WITH software_yearly_edits AS (
	SELECT
		year,
		created_by,
		CAST(SUM(edit_count) as BIGINT) as edits
	FROM '../data_enriched/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY year, created_by
),
software_total_edits AS (
	SELECT
		created_by,
		CAST(SUM(edits) as BIGINT) as total_edits
	FROM software_yearly_edits
	GROUP BY created_by
	ORDER BY total_edits DESC
	LIMIT 100
)
SELECT
	year,
	created_by,
	edits
FROM software_yearly_edits
WHERE created_by IN (SELECT created_by FROM software_total_edits)
ORDER BY year, created_by
"""
df = duckdb.sql(sql_query).df()

pivot_table = (
    df.pivot_table(
        index="created_by",
        columns="year",
        values="edits",
    )
    .fillna(0)
    .astype(int)
)
pivot_table.columns.name = "Editor"
pivot_table.index.name = None
pivot_table["Total"] = pivot_table.sum(axis=1)
pivot_table = pivot_table.sort_values("Total", ascending=False)
pivot_table

Editor,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total
JOSM,125617731,494460089,539135966,542128543,500031429,666714750,642725919,625443152,664171922,794010429,790721973,996077692,917485702,758566474,756759812,767135437,325128093,10906315113
iD,0,0,0,0,34430862,109275830,142202485,189396418,279162048,321342148,359664267,459865024,480370002,416745824,427963559,428165596,191281367,3839865430
Potlatch,44030413,64726838,100956564,147706379,133843837,78092007,60204602,42988829,33377795,25182374,20637960,17371354,5083644,2678828,2574667,2766754,977964,783200809
Rapid,0,0,0,0,0,0,0,0,0,0,11678103,51870526,63919308,63564994,63683830,52467053,20156724,327340538
osmtools,150412240,8994328,981921,1229689,2734257,1499370,6826400,15035558,2655505,3271989,7558536,6454470,8152531,12399003,29128241,2154313,578476,260066827
bulk_upload.py,64669041,24649625,1442184,759090,185033,8089,1061001,1113638,2398,1036,26933301,263564,464798,0,0,4341,0,121557139
StreetComplete,0,0,0,0,0,0,0,85,1256558,1701133,1983723,3934118,12439388,12904369,13630403,12782820,4729050,65361647
Merkaartor,5378663,10887187,10250026,9702870,4380406,3791724,3696590,2522894,1735350,2138115,2316287,1745000,334114,491775,123828,96656,150822,59742307
upload.py,31167015,10166002,2402891,421238,776590,422731,488194,4827,38997,3021,4195,1853644,4378760,822536,968637,2536270,699623,57155171
Vespucci,2087,22298,49455,165722,277566,804178,885787,1055056,1559094,2073139,2296702,3560018,3517790,3864897,4998545,9855980,5241579,40229893


## More

In [37]:
sql_query = """
WITH software_yearly_edits AS (
	SELECT
		year,
		created_by,
		CAST(SUM(edit_count) as BIGINT) as edits
	FROM '../data_enriched/year=*/month=*/*.parquet'
	WHERE created_by IS NOT NULL
	GROUP BY year, created_by
),
software_total_edits AS (
	SELECT
		created_by,
		CAST(SUM(edits) as BIGINT) as total_edits
	FROM software_yearly_edits
	GROUP BY created_by
	ORDER BY total_edits DESC
	LIMIT 100
)
SELECT
	year,
	created_by,
	edits
FROM software_yearly_edits
WHERE created_by IN (SELECT created_by FROM software_total_edits)
ORDER BY year, created_by
"""
df = duckdb.sql(sql_query).df()

pivot_table = (
    df.pivot_table(
        index="created_by",
        columns="year",
        values="edits",
    )
    .fillna(0)
    .astype(int)
)
pivot_table.columns.name = "Editor"
pivot_table.index.name = None
pivot_table["Total"] = pivot_table.sum(axis=1)
pivot_table = pivot_table.sort_values("Total", ascending=False)
pivot_table

Editor,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,Total
JOSM,125617731,494460089,539135966,542128543,500031429,666714750,642725919,625443152,664171922,794010429,790721973,996077692,917485702,758566474,756759812,767135437,325128093,10906315113
iD,0,0,0,0,34430862,109275830,142202485,189396418,279162048,321342148,359664267,459865024,480370002,416745824,427963559,428165596,191281367,3839865430
Potlatch,44030413,64726838,100956564,147706379,133843837,78092007,60204602,42988829,33377795,25182374,20637960,17371354,5083644,2678828,2574667,2766754,977964,783200809
Rapid,0,0,0,0,0,0,0,0,0,0,11678103,51870526,63919308,63564994,63683830,52467053,20156724,327340538
osmtools,150412240,8994328,981921,1229689,2734257,1499370,6826400,15035558,2655505,3271989,7558536,6454470,8152531,12399003,29128241,2154313,578476,260066827
bulk_upload.py,64669041,24649625,1442184,759090,185033,8089,1061001,1113638,2398,1036,26933301,263564,464798,0,0,4341,0,121557139
StreetComplete,0,0,0,0,0,0,0,85,1256558,1701133,1983723,3934118,12439388,12904369,13630403,12782820,4729050,65361647
Merkaartor,5378663,10887187,10250026,9702870,4380406,3791724,3696590,2522894,1735350,2138115,2316287,1745000,334114,491775,123828,96656,150822,59742307
upload.py,31167015,10166002,2402891,421238,776590,422731,488194,4827,38997,3021,4195,1853644,4378760,822536,968637,2536270,699623,57155171
Vespucci,2087,22298,49455,165722,277566,804178,885787,1055056,1559094,2073139,2296702,3560018,3517790,3864897,4998545,9855980,5241579,40229893
