## Crossfit stuff

In [1]:
import json
import requests
import pandas as pd
import numpy as np
from io import StringIO
from bs4 import BeautifulSoup
import time, datetime

## See the Markdown files for the necessary info

In [2]:
year = 2024
base_url = f'https://c3po.crossfit.com/api/competitions/v2/competitions/games/{year}/leaderboards'
r = requests.get(f'{base_url}')
data = r.json()

## Seeing that you may not know how much data is returned, I oblige you to copy the base url and paste it into your browser.

### Really examine the contents of that json blob before you pick the key you want to access.

In [3]:
print(list(data.keys()))

['version', 'dataType', 'query', 'sort', 'pagination', 'competition', 'ordinals', 'leaderboardRows']


In [4]:
page_count = data['pagination']
comp_info = data['competition']
leaderboard_list = data['leaderboardRows'] # This is a list of dicts
print(f"Leaderboard Length and number of entrants: {len(leaderboard_list)}")

# leaderboard_list is a list of dictionaries
# Pick an index and see what the keys are. Follow the keys in your browser on Pretty-print
leaderboard_list[0].keys()

Leaderboard Length and number of entrants: 39


dict_keys(['overallRank', 'overallScore', 'nextStage', 'ui', 'entrant', 'scores'])

In [5]:
comp_year = comp_info['year']
entrant_info = leaderboard_list[0].get('entrant')
entrant_info


{'competitorId': '900251',
 'competitorName': 'James Sprague',
 'firstName': 'James',
 'lastName': 'Sprague',
 'status': 'ACT',
 'postCompStatus': '',
 'gender': 'M',
 'profilePicS3key': '98726-P900251_9-184.jpg',
 'countryOfOriginCode': 'US',
 'countryOfOriginName': 'United States',
 'countryShortCode': '',
 'regionId': '34',
 'regionName': 'North America West',
 'divisionId': '1',
 'affiliateId': '967',
 'affiliateName': 'CrossFit Spokane Valley',
 'age': '22',
 'height': '74 in',
 'weight': '210 lb',
 'teamCaptain': '0'}

In [6]:
entrants_info = [entry.get('entrant') for entry in leaderboard_list]
entrants_info

[{'competitorId': '900251',
  'competitorName': 'James Sprague',
  'firstName': 'James',
  'lastName': 'Sprague',
  'status': 'ACT',
  'postCompStatus': '',
  'gender': 'M',
  'profilePicS3key': '98726-P900251_9-184.jpg',
  'countryOfOriginCode': 'US',
  'countryOfOriginName': 'United States',
  'countryShortCode': '',
  'regionId': '34',
  'regionName': 'North America West',
  'divisionId': '1',
  'affiliateId': '967',
  'affiliateName': 'CrossFit Spokane Valley',
  'age': '22',
  'height': '74 in',
  'weight': '210 lb',
  'teamCaptain': '0'},
 {'competitorId': '671093',
  'competitorName': 'Dallin Pepper',
  'firstName': 'Dallin',
  'lastName': 'Pepper',
  'status': 'ACT',
  'postCompStatus': '',
  'gender': 'M',
  'profilePicS3key': 'd5ba0-P671093_6-184.jpg',
  'countryOfOriginCode': 'US',
  'countryOfOriginName': 'United States',
  'countryShortCode': '',
  'regionId': '35',
  'regionName': 'North America East',
  'divisionId': '1',
  'affiliateId': '23233',
  'affiliateName': 'Cro

### While the dictionaries are in order of rank, It's wise to grab the `overallRank` values for each leader board row ie {`overallRank` : "1"} for first place

In [7]:
# Use list comprehension to access the rank values and return a list
ranking_df = pd.DataFrame.from_dict({'Rank':[entry.get('overallRank') for entry in leaderboard_list]})
ranking_df.head()

Unnamed: 0,Rank
0,1
1,2
2,3
3,4
4,5


In [8]:
# Create a dataframe for the list of entrants dictionaries
# We will combine the rank and the entrant info in the next step
entrants_df = pd.DataFrame(entrants_info)
entrants_df.head()

Unnamed: 0,competitorId,competitorName,firstName,lastName,status,postCompStatus,gender,profilePicS3key,countryOfOriginCode,countryOfOriginName,countryShortCode,regionId,regionName,divisionId,affiliateId,affiliateName,age,height,weight,teamCaptain
0,900251,James Sprague,James,Sprague,ACT,,M,98726-P900251_9-184.jpg,US,United States,,34,North America West,1,967.0,CrossFit Spokane Valley,22,74 in,210 lb,0
1,671093,Dallin Pepper,Dallin,Pepper,ACT,,M,d5ba0-P671093_6-184.jpg,US,United States,,35,North America East,1,23233.0,CrossFit Tailwinds,22,72 in,215 lb,0
2,107101,Brent Fikowski,Brent,Fikowski,ACT,,M,9c4bb-P107101_11-184.jpg,CA,Canada,,34,North America West,1,,,33,74 in,220 lb,0
3,1020449,Jayson Hopper,Jayson,Hopper,ACT,,M,da4cc-P1020449_8-184.jpg,US,United States,,35,North America East,1,15471.0,CrossFit Crash,26,73 in,220 lb,0
4,158264,Patrick Vellner,Patrick,Vellner,ACT,,M,d471c-P158264_7-184.jpg,CA,Canada,,34,North America West,1,1918.0,CrossFit Nanaimo,34,71 in,195 lb,0


In [9]:
leaderboard_2024 = pd.concat([ranking_df, entrants_df], axis=1)
leaderboard_2024.head()

Unnamed: 0,Rank,competitorId,competitorName,firstName,lastName,status,postCompStatus,gender,profilePicS3key,countryOfOriginCode,...,countryShortCode,regionId,regionName,divisionId,affiliateId,affiliateName,age,height,weight,teamCaptain
0,1,900251,James Sprague,James,Sprague,ACT,,M,98726-P900251_9-184.jpg,US,...,,34,North America West,1,967.0,CrossFit Spokane Valley,22,74 in,210 lb,0
1,2,671093,Dallin Pepper,Dallin,Pepper,ACT,,M,d5ba0-P671093_6-184.jpg,US,...,,35,North America East,1,23233.0,CrossFit Tailwinds,22,72 in,215 lb,0
2,3,107101,Brent Fikowski,Brent,Fikowski,ACT,,M,9c4bb-P107101_11-184.jpg,CA,...,,34,North America West,1,,,33,74 in,220 lb,0
3,4,1020449,Jayson Hopper,Jayson,Hopper,ACT,,M,da4cc-P1020449_8-184.jpg,US,...,,35,North America East,1,15471.0,CrossFit Crash,26,73 in,220 lb,0
4,5,158264,Patrick Vellner,Patrick,Vellner,ACT,,M,d471c-P158264_7-184.jpg,CA,...,,34,North America West,1,1918.0,CrossFit Nanaimo,34,71 in,195 lb,0


In [10]:
# Check the dtype of each column
# None of them are properly formatted yet but have no fear. We only need the first 3 columns
leaderboard_2024.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 21 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Rank                 39 non-null     object
 1   competitorId         39 non-null     object
 2   competitorName       39 non-null     object
 3   firstName            39 non-null     object
 4   lastName             39 non-null     object
 5   status               39 non-null     object
 6   postCompStatus       39 non-null     object
 7   gender               39 non-null     object
 8   profilePicS3key      39 non-null     object
 9   countryOfOriginCode  39 non-null     object
 10  countryOfOriginName  39 non-null     object
 11  countryShortCode     39 non-null     object
 12  regionId             39 non-null     object
 13  regionName           39 non-null     object
 14  divisionId           39 non-null     object
 15  affiliateId          39 non-null     object
 16  affiliateN

In [11]:
# Ther are better ways to name this but I like descriptive names
rank_id_name_df = leaderboard_2024.iloc[:,:3]
rank_id_name_df.head()

Unnamed: 0,Rank,competitorId,competitorName
0,1,900251,James Sprague
1,2,671093,Dallin Pepper
2,3,107101,Brent Fikowski
3,4,1020449,Jayson Hopper
4,5,158264,Patrick Vellner


In [12]:
rank_id_name_df.iloc[0]

Rank                          1
competitorId             900251
competitorName    James Sprague
Name: 0, dtype: object

## Eventually I'll explain how to scrape the data but today is not that day.

But..... thought process behind the functions below.

Yes I don't need the options to take a list of ids or names but the option is available

### 1. Unnecessary elif and else in scrape_athlete_data. `rank_id_name_df` should never be empty but you never know.


def scrape_athlete_data(rank_id_name_df=None, ids=None, names=None):
    all_athlete_data = []
```python
    if rank_id_name_df is not None:  # Use DataFrame if provided
        for index, row in rank_id_name_df.iterrows():
            rank = row.iloc[0]
            competitor_id = row.iloc[1] # Changed for iloc to work correctly
            athlete_name = row.iloc[2]   # Changed for iloc to work correctly
            all_athlete_data.extend(scrape_individual_athlete(rank, competitor_id, athlete_name))
    elif ids is not None and names is not None:  # Use lists if provided
        for competitor_id, athlete_name in zip(ids, names):
            all_athlete_data.extend(scrape_individual_athlete(rank, competitor_id, athlete_name))
    else:
        return pd.DataFrame()  # Return empty DataFrame if no input is provided
```
    if all_athlete_data:
        all_column_headers = ["podium_place", "id", "name"] + column_headers # add name to the column headers
        df = pd.DataFrame(all_athlete_data, columns=all_column_headers)
        return df
    else:
        return pd.DataFrame()  # Return empty DataFrame if no data was scraped

```python
def scrape_individual_athlete(rank, competitor_id, athlete_name):
    athlete_data = []
    url = f"https://games.crossfit.com/athlete/{competitor_id}"
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        benchmark_stats_table = soup.find('div', id='benchmarkStats')

        if benchmark_stats_table:
            rows = benchmark_stats_table.find_all("tr")
            global column_headers # use a global variable so it is accessible outside of the function
            column_headers = [row.find_all("th")[0].text.strip() for row in rows if row.find_all("th")]
            column_data = [row.find_all("td")[0].text.strip() for row in rows if row.find_all("td")]

            if column_headers and column_data:
                athlete_data_with_id_name = [rank,competitor_id, athlete_name] + column_data
                athlete_data.append(athlete_data_with_id_name)
        else:
            print(f"No benchmarkStats table found for athlete {athlete_name} : {competitor_id}.")
    else:
        print(f"Request failed for athlete {competitor_id} with status code: {response.status_code}")
    return athlete_data

In [13]:
def scrape_individual_athlete(rank, competitor_id, athlete_name):
    athlete_data = []
    url = f"https://games.crossfit.com/athlete/{competitor_id}"
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        benchmark_stats_table = soup.find('div', id='benchmarkStats')

        if benchmark_stats_table:
            rows = benchmark_stats_table.find_all("tr")
            column_headers = [row.find_all("th")[0].text.strip() for row in rows if row.find_all("th")]
            column_data = [row.find_all("td")[0].text.strip() for row in rows if row.find_all("td")]

            if column_headers and column_data:
                athlete_data_with_id_name = [rank,competitor_id, athlete_name] + column_data
                athlete_data.append(athlete_data_with_id_name)
            return athlete_data, column_headers
        else:
            print(f"No benchmarkStats table found for athlete {athlete_name} : {competitor_id}.")
            return [], []  # Return empty lists for both data and headers
    else:
        print(f"Request failed for athlete {competitor_id} with status code: {response.status_code}")
        return [], []  # Return empty lists for both data and headers
    


def scrape_athlete_data(rank_id_name_df):
    all_athlete_data = []
    all_column_headers = ["podium_place", "id", "name"]
    
    for index, row in rank_id_name_df.iterrows():
        rank = row.iloc[0]
        competitor_id = row.iloc[1] 
        athlete_name = row.iloc[2] 
        # all_athlete_data.extend(scrape_individual_athlete(rank, competitor_id, athlete_name))
        athlete_data, column_headers = scrape_individual_athlete(rank, competitor_id, athlete_name)
        
        if athlete_data: # Check if data was scraped for this athlete
            all_athlete_data.extend(athlete_data)
            # print(athlete_data[3:])
            if not all_column_headers[3:]: # Get column headers only once
                all_column_headers.extend(column_headers) # Assuming the first athlete has all the headers
                # all_column_headers.extend(col) # Assuming the first athlete has all the headers
                print(all_column_headers)

    if all_athlete_data:
        df = pd.DataFrame(all_athlete_data, columns=all_column_headers)
        return df
    else:
        return pd.DataFrame()  # Return empty DataFrame if no data was scraped


pd.set_option('future.no_silent_downcasting', True)
df = scrape_athlete_data(rank_id_name_df=rank_id_name_df).replace('--',np.nan)
if not df.empty:
  print(df.head())
else:
  print("No athlete data could be scraped")

['podium_place', 'id', 'name', 'Back Squat', 'Chad1000x', 'Clean and Jerk', 'Deadlift', 'Fight Gone Bad', 'Filthy 50', 'Fran', 'Grace', 'Helen', 'L1 Benchmark', 'Max Pull-ups', 'Murph', 'Run 5k', 'Snatch', 'Sprint 400m']
No benchmarkStats table found for athlete Chris Ibarra : 2082854.
No benchmarkStats table found for athlete Calum Clements : 771441.
  podium_place       id             name Back Squat Chad1000x Clean and Jerk  \
0            1   900251    James Sprague     465 lb     43:50         355 lb   
1            2   671093    Dallin Pepper     425 lb       NaN         360 lb   
2            3   107101   Brent Fikowski     435 lb     47:36         355 lb   
3            4  1020449    Jayson Hopper     475 lb       NaN         390 lb   
4            5   158264  Patrick Vellner     455 lb       NaN         355 lb   

  Deadlift Fight Gone Bad Filthy 50  Fran Grace Helen L1 Benchmark  \
0   550 lb            520     17:30  2:10  1:05  7:02          NaN   
1   550 lb            403

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   podium_place    37 non-null     object
 1   id              37 non-null     object
 2   name            37 non-null     object
 3   Back Squat      29 non-null     object
 4   Chad1000x       3 non-null      object
 5   Clean and Jerk  29 non-null     object
 6   Deadlift        29 non-null     object
 7   Fight Gone Bad  10 non-null     object
 8   Filthy 50       7 non-null      object
 9   Fran            20 non-null     object
 10  Grace           21 non-null     object
 11  Helen           11 non-null     object
 12  L1 Benchmark    0 non-null      object
 13  Max Pull-ups    18 non-null     object
 14  Murph           1 non-null      object
 15  Run 5k          16 non-null     object
 16  Snatch          29 non-null     object
 17  Sprint 400m     12 non-null     object
dtypes: object(18

In [20]:
import os
path = os.getcwd()+f"{comp_year}_athlete_leaderboard_benchmarkstats.csv"
df.to_csv(path,index=False)

In [21]:
unclean_data = pd.read_csv(path)
unclean_data.info()
unclean_data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   podium_place    37 non-null     int64  
 1   id              37 non-null     int64  
 2   name            37 non-null     object 
 3   Back Squat      29 non-null     object 
 4   Chad1000x       3 non-null      object 
 5   Clean and Jerk  29 non-null     object 
 6   Deadlift        29 non-null     object 
 7   Fight Gone Bad  10 non-null     float64
 8   Filthy 50       7 non-null      object 
 9   Fran            20 non-null     object 
 10  Grace           21 non-null     object 
 11  Helen           11 non-null     object 
 12  L1 Benchmark    0 non-null      float64
 13  Max Pull-ups    18 non-null     float64
 14  Murph           1 non-null      object 
 15  Run 5k          16 non-null     object 
 16  Snatch          29 non-null     object 
 17  Sprint 400m     12 non-null     objec

Unnamed: 0,podium_place,id,name,Back Squat,Chad1000x,Clean and Jerk,Deadlift,Fight Gone Bad,Filthy 50,Fran,Grace,Helen,L1 Benchmark,Max Pull-ups,Murph,Run 5k,Snatch,Sprint 400m
0,1,900251,James Sprague,465 lb,43:50,355 lb,550 lb,520.0,17:30,2:10,1:05,7:02,,72.0,,18:00,285 lb,0:59
1,2,671093,Dallin Pepper,425 lb,,360 lb,550 lb,403.0,,2:07,1:58,,,50.0,,,300 lb,
2,3,107101,Brent Fikowski,435 lb,47:36,355 lb,515 lb,,,,1:14,7:08,,,43:39,19:33,305 lb,
3,4,1020449,Jayson Hopper,475 lb,,390 lb,550 lb,408.0,,2:15,2:00,,,,,19:00,300 lb,0:52
4,5,158264,Patrick Vellner,455 lb,,355 lb,595 lb,389.0,20:16,2:14,1:17,9:46,,64.0,,18:50,290 lb,1:06


In [22]:
# First clean the data of the missing values '--' and convert weight (lb/kg) to floats
pd.set_option('future.no_silent_downcasting', True)
cleaned_data = unclean_data.map(lambda x: 
    np.nan if x == '--' else ( 
        float(x.replace(' lb', '')) if isinstance(x, str) and x.endswith('lb') else (
            float(x.replace(' kg', '')) * 2.20 if isinstance(x, str) and x.endswith('kg') else (
                int(x) if isinstance(x,str) and x.isdigit() else x
            )
        )
    )
)
cleaned_data.info()
cleaned_data.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   podium_place    37 non-null     int64  
 1   id              37 non-null     int64  
 2   name            37 non-null     object 
 3   Back Squat      29 non-null     float64
 4   Chad1000x       3 non-null      object 
 5   Clean and Jerk  29 non-null     float64
 6   Deadlift        29 non-null     float64
 7   Fight Gone Bad  10 non-null     float64
 8   Filthy 50       7 non-null      object 
 9   Fran            20 non-null     object 
 10  Grace           21 non-null     object 
 11  Helen           11 non-null     object 
 12  L1 Benchmark    0 non-null      float64
 13  Max Pull-ups    18 non-null     float64
 14  Murph           1 non-null      object 
 15  Run 5k          16 non-null     object 
 16  Snatch          29 non-null     float64
 17  Sprint 400m     12 non-null     objec

Unnamed: 0,podium_place,id,name,Back Squat,Chad1000x,Clean and Jerk,Deadlift,Fight Gone Bad,Filthy 50,Fran,Grace,Helen,L1 Benchmark,Max Pull-ups,Murph,Run 5k,Snatch,Sprint 400m
0,1,900251,James Sprague,465.0,43:50,355.0,550.0,520.0,17:30,2:10,1:05,7:02,,72.0,,18:00,285.0,0:59
1,2,671093,Dallin Pepper,425.0,,360.0,550.0,403.0,,2:07,1:58,,,50.0,,,300.0,
2,3,107101,Brent Fikowski,435.0,47:36,355.0,515.0,,,,1:14,7:08,,,43:39,19:33,305.0,
3,4,1020449,Jayson Hopper,475.0,,390.0,550.0,408.0,,2:15,2:00,,,,,19:00,300.0,0:52
4,5,158264,Patrick Vellner,455.0,,355.0,595.0,389.0,20:16,2:14,1:17,9:46,,64.0,,18:50,290.0,1:06
5,6,74320,Samuel Kwant,460.0,,345.0,530.0,,,2:04,1:47,,,74.0,,18:05,285.0,
6,7,40955,Ricky Garard,451.0,,363.0,528.0,490.0,14:50,2:05,1:10,6:45,,75.0,,17:32,297.0,0:54
7,8,811708,Justin Medeiros,,,,,,,,,,,,,,,
8,9,975774,Roman Khrennikov,473.0,,370.0,572.0,,,2:00,1:10,,,75.0,,,280.0,
9,10,947291,Austin Hatfield,535.0,,375.0,540.0,430.0,,1:50,1:07,,,82.0,,17:52,305.0,0:48


In [23]:
import re
fully_cleaned_data = cleaned_data.map(lambda x: 
                        ((time.strptime(x,'%M:%S').tm_min * 60) 
                            + time.strptime(x,'%M:%S').tm_sec
                        ) 
                        if isinstance(x, str) and bool(re.match(r'^\d{1,2}:\d{2}$', x)) else x
                        )

In [24]:
fully_cleaned_data.info()
fully_cleaned_data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   podium_place    37 non-null     int64  
 1   id              37 non-null     int64  
 2   name            37 non-null     object 
 3   Back Squat      29 non-null     float64
 4   Chad1000x       3 non-null      float64
 5   Clean and Jerk  29 non-null     float64
 6   Deadlift        29 non-null     float64
 7   Fight Gone Bad  10 non-null     float64
 8   Filthy 50       7 non-null      float64
 9   Fran            20 non-null     float64
 10  Grace           21 non-null     float64
 11  Helen           11 non-null     float64
 12  L1 Benchmark    0 non-null      float64
 13  Max Pull-ups    18 non-null     float64
 14  Murph           1 non-null      float64
 15  Run 5k          16 non-null     float64
 16  Snatch          29 non-null     float64
 17  Sprint 400m     12 non-null     float

Unnamed: 0,podium_place,id,name,Back Squat,Chad1000x,Clean and Jerk,Deadlift,Fight Gone Bad,Filthy 50,Fran,Grace,Helen,L1 Benchmark,Max Pull-ups,Murph,Run 5k,Snatch,Sprint 400m
0,1,900251,James Sprague,465.0,2630.0,355.0,550.0,520.0,1050.0,130.0,65.0,422.0,,72.0,,1080.0,285.0,59.0
1,2,671093,Dallin Pepper,425.0,,360.0,550.0,403.0,,127.0,118.0,,,50.0,,,300.0,
2,3,107101,Brent Fikowski,435.0,2856.0,355.0,515.0,,,,74.0,428.0,,,2619.0,1173.0,305.0,
3,4,1020449,Jayson Hopper,475.0,,390.0,550.0,408.0,,135.0,120.0,,,,,1140.0,300.0,52.0
4,5,158264,Patrick Vellner,455.0,,355.0,595.0,389.0,1216.0,134.0,77.0,586.0,,64.0,,1130.0,290.0,66.0


In [25]:
fully_cleaned_data.describe()

Unnamed: 0,podium_place,id,Back Squat,Chad1000x,Clean and Jerk,Deadlift,Fight Gone Bad,Filthy 50,Fran,Grace,Helen,L1 Benchmark,Max Pull-ups,Murph,Run 5k,Snatch,Sprint 400m
count,37.0,37.0,29.0,3.0,29.0,29.0,10.0,7.0,20.0,21.0,11.0,0.0,18.0,1.0,16.0,29.0,12.0
mean,20.0,767905.3,462.427586,2788.666667,359.275862,541.482759,445.7,984.142857,130.25,87.857143,445.727273,,66.222222,2619.0,1129.0625,290.172414,55.666667
std,11.669047,556115.3,32.870557,137.932351,17.494359,27.70588,63.326403,174.152642,11.313127,23.776639,50.323138,,11.874617,,48.686026,12.26292,4.696872
min,1.0,1690.0,396.0,2630.0,316.8,485.0,337.0,795.0,110.0,65.0,394.0,,50.0,2619.0,1052.0,264.0,48.0
25%,10.0,308651.0,435.0,2743.0,352.0,528.0,404.25,854.5,122.75,70.0,425.0,,55.5,2619.0,1082.25,280.0,53.5
50%,21.0,740370.0,463.0,2856.0,360.0,539.0,438.5,917.0,127.5,77.0,438.0,,66.5,2619.0,1141.0,290.0,54.5
75%,30.0,1020449.0,475.0,2868.0,374.0,555.0,505.75,1126.0,136.75,107.0,451.0,,75.0,2619.0,1168.5,300.0,59.0
max,38.0,2064040.0,535.0,2880.0,395.0,600.0,522.0,1216.0,156.0,146.0,586.0,,85.0,2619.0,1200.0,310.0,66.0


In [26]:
path = os.getcwd()+f"cleaned_{comp_year}_athlete_leaderboard_benchmarkstats.csv"
fully_cleaned_data.to_csv(path,index=False)