# Percent of Prize Money Won Per Ranking Band

For each year, the total amount of prize money is summed for the top 750 singles players and the top 250 doubles players separately. Then, for each ranking band of 50 players (i.e, 1-50, 51-100, etc.) the sum of their prize money is calculated in order to find the average amount of prize money earned by each particular ranking band of players. The resulting distribution of percent of prize money won shows how spread out prize money is awarded across the rankings.

In [93]:
import pandas as pd
import datetime

In [94]:
START_YEAR = 2009
END_YEAR = datetime.datetime.now().year - 1
MAX_SINGLES = 750
MAX_DOUBLES = 250

In [95]:
singles_total_prize_money = {}
doubles_total_prize_money = {}

for year in range(START_YEAR, END_YEAR + 1):
    singles_total_prize_money[year] = 0
    singles = pd.read_csv(f"../data/players/clean/singles/{year}.gz")

    for rank in range(1, MAX_SINGLES + 1):
        player = singles[singles["ranking"] == rank]
        if not player.empty:
            singles_total_prize_money[year] += player["prize_money"].values[0]

    doubles_total_prize_money[year] = 0
    doubles = pd.read_csv(f"../data/players/clean/doubles/{year}.gz")

    for rank in range(1, MAX_DOUBLES + 1):
        player = doubles[doubles["ranking"] == rank]
        if not player.empty:
            doubles_total_prize_money[year] += player["prize_money"].values[0]

In [96]:
data = []
for year in range(START_YEAR, END_YEAR + 1):
    singles = pd.read_csv(f"../data/players/clean/singles/{year}.gz")

    band_prize_money = 0
    for i in range(1, MAX_SINGLES + 1):
        player = singles.loc[singles["ranking"] == i]
        if not player.empty:
            band_prize_money += player["prize_money"].iloc[0]
        
        if i % 50 == 0:
            band_percent_prize_money = band_prize_money / singles_total_prize_money[year]
            data.append({
                "year": year,
                "type": "singles",
                "ranking_band": f"{i - 49} - {i}",
                "prize_money": band_prize_money,
                "percent_prize_money": band_percent_prize_money
            })

            band_prize_money = 0

    doubles = pd.read_csv(f"../data/players/clean/doubles/{year}.gz")

    band_prize_money = 0
    for i in range(1, MAX_DOUBLES + 1):
        player = doubles.loc[doubles["ranking"] == i]
        if not player.empty:
            band_prize_money += player["prize_money"].iloc[0]
        
        if i % 50 == 0:
            band_percent_prize_money = band_prize_money / doubles_total_prize_money[year]
            data.append({
                "year": year,
                "type": "doubles",
                "ranking_band": f"{i - 49} - {i}",
                "prize_money": band_prize_money,
                "percent_prize_money": band_percent_prize_money
            })

            band_prize_money = 0

res = pd.DataFrame(data)

In [97]:
singles_prize_money_bands = {}
for i in range(1, MAX_SINGLES + 1, 50):
    ranking_band = f"{i} - {i + 49}"
    singles_prize_money_bands[ranking_band] = res[(res["type"] == "singles") & (res["ranking_band"] == ranking_band)]

doubles_prize_money_bands = {}
for i in range(1, MAX_DOUBLES + 1, 50):
    ranking_band = f"{i} - {i + 49}"
    doubles_prize_money_bands[ranking_band] = res[(res["type"] == "doubles") & (res["ranking_band"] == ranking_band)]

In [98]:
for ranking_band in singles_prize_money_bands:
    print(singles_prize_money_bands[ranking_band])

     year     type ranking_band   prize_money  percent_prize_money
0    2009  singles       1 - 50  9.801710e+07             0.664013
20   2010  singles       1 - 50  9.592749e+07             0.667812
40   2011  singles       1 - 50  1.026904e+08             0.675601
60   2012  singles       1 - 50  1.018718e+08             0.680690
80   2013  singles       1 - 50  1.169570e+08             0.682208
100  2014  singles       1 - 50  1.245026e+08             0.679812
120  2015  singles       1 - 50  1.327623e+08             0.696707
140  2016  singles       1 - 50  1.364686e+08             0.685017
160  2017  singles       1 - 50  1.433242e+08             0.659839
180  2018  singles       1 - 50  1.549889e+08             0.662777
200  2019  singles       1 - 50  1.575849e+08             0.653292
220  2020  singles       1 - 50  7.288362e+07             0.596342
240  2021  singles       1 - 50  1.015849e+08             0.563202
260  2022  singles       1 - 50  1.362050e+08             0.61

In [99]:
for ranking_band in doubles_prize_money_bands:
    print(doubles_prize_money_bands[ranking_band])

     year     type ranking_band  prize_money  percent_prize_money
15   2009  doubles       1 - 50  18478370.54             0.651378
35   2010  doubles       1 - 50  16727686.56             0.634989
55   2011  doubles       1 - 50  17455627.20             0.634962
75   2012  doubles       1 - 50  20716152.60             0.702744
95   2013  doubles       1 - 50  19507640.10             0.667720
115  2014  doubles       1 - 50  19962046.72             0.664386
135  2015  doubles       1 - 50  24819417.60             0.728427
155  2016  doubles       1 - 50  24425455.32             0.700726
175  2017  doubles       1 - 50  26429572.11             0.692629
195  2018  doubles       1 - 50  26935132.40             0.679238
215  2019  doubles       1 - 50  27806864.47             0.682899
235  2020  doubles       1 - 50  12665079.52             0.638495
255  2021  doubles       1 - 50  18651636.90             0.634805
275  2022  doubles       1 - 50  24361752.10             0.685552
295  2023 

In [100]:
singles_prize_money_years = {}
for year in range(START_YEAR, END_YEAR + 1):
    singles_prize_money_years[year] = res[(res["type"] == "singles") & (res["year"] == year)]

doubles_prize_money_years = {}
for year in range(START_YEAR, END_YEAR + 1):
    doubles_prize_money_years[year] = res[(res["type"] == "doubles") & (res["year"] == year)]

In [101]:
for year in singles_prize_money_years:
    print(singles_prize_money_years[year])

    year     type ranking_band  prize_money  percent_prize_money
0   2009  singles       1 - 50  98017096.08             0.664013
1   2009  singles     51 - 100  19538346.58             0.132362
2   2009  singles    101 - 150  11365235.54             0.076993
3   2009  singles    151 - 200   5370628.86             0.036383
4   2009  singles    201 - 250   3400908.52             0.023039
5   2009  singles    251 - 300   2388280.96             0.016179
6   2009  singles    301 - 350   1612583.24             0.010924
7   2009  singles    351 - 400   1609778.74             0.010905
8   2009  singles    401 - 450    969412.70             0.006567
9   2009  singles    451 - 500    884351.86             0.005991
10  2009  singles    501 - 550    640087.72             0.004336
11  2009  singles    551 - 600    670347.92             0.004541
12  2009  singles    601 - 650    443671.90             0.003006
13  2009  singles    651 - 700    401520.62             0.002720
14  2009  singles    701 

In [102]:
for year in doubles_prize_money_years:
    print(doubles_prize_money_years[year])

    year     type ranking_band  prize_money  percent_prize_money
15  2009  doubles       1 - 50  18478370.54             0.651378
16  2009  doubles     51 - 100   4890617.74             0.172398
17  2009  doubles    101 - 150   2580365.78             0.090960
18  2009  doubles    151 - 200   1467197.96             0.051720
19  2009  doubles    201 - 250    951559.04             0.033543
    year     type ranking_band  prize_money  percent_prize_money
35  2010  doubles       1 - 50  16727686.56             0.634989
36  2010  doubles     51 - 100   5001694.08             0.189866
37  2010  doubles    101 - 150   2317362.24             0.087968
38  2010  doubles    151 - 200   1328556.36             0.050432
39  2010  doubles    201 - 250    967977.54             0.036745
    year     type ranking_band  prize_money  percent_prize_money
55  2011  doubles       1 - 50  17455627.20             0.634962
56  2011  doubles     51 - 100   4972888.96             0.180893
57  2011  doubles    101 