# Imports

In [None]:
import pandas as pd
import requests
import nbimporter
from nbimporter import NotebookLoader

loader = NotebookLoader('.')
module = loader.load_module('scraping_data_functions')

# Data Scraping

### 2021

##### Open

###### Athletes information

In [None]:
# Query 1 - 2021 Open Not Scores
df_2021_open_not_scores = module.scraping_data_open_and_games('2021', 'open')

df_2021_open_not_scores

###### Athletes scores

In [None]:
# Query 2 - 2021 Open Scores
df_2021_open_scores = module.scraping_data_open_and_games('2021', 'open', scores=True)

df_2021_open_scores

##### Quarterfinals

###### Athletes information

In [None]:
base_url = "https://c3po.crossfit.com/api/leaderboards/v2/competitions/quarterfinalsindividual/2021/leaderboards"

data_list = []
genders = [1, 2]

# Iterate over each gender
for gender in genders:
    # Create the initial API URL to fetch the data for the first page
    url = f"{base_url}?division={gender}&page=1"
    try:
        # Fetch the response for the first page
        response = requests.get(url).json()
        # Extract the total number of pages from the response
        total_pages = response['pagination']['totalPages']
        # Iterate over each page
        for page in range(1, total_pages + 1):
            # Create the API URL for each page
            url = f"{base_url}?division={gender}&page={page}"
            try:
                # Fetch the response for the current page
                response = requests.get(url).json()
                # Extract the data for each row in the leaderboardRows
                for row in response['leaderboardRows']:
                    # Copy the entrant data and add additional fields for overallRank and overallScore
                    row_data = row['entrant'].copy()
                    row_data['overallRank'] = row['overallRank']
                    row_data['overallScore'] = row['overallScore']
                    # Append the row data to the data_list
                    data_list.append(row_data)
            except Exception as e:
                # Handle any errors that occur during the API request for a specific page
                print(f"Error occurred while fetching data for gender={gender}, page={page}: {e}")
    except Exception as e:
        # Handle any errors that occur during the API request for fetching total_pages
        print(f"Error occurred while fetching total_pages for gender={gender}: {e}")

# Create a DataFrame from the collected data_list
df = pd.DataFrame(data_list)

df

###### Athletes scores

In [None]:
base_url = "https://c3po.crossfit.com/api/leaderboards/v2/competitions/quarterfinalsindividual/2021/leaderboards"

data_list = []
genders = [1, 2]

# Iterate over each gender
for gender in genders:
    # Create the initial API URL to fetch the data for the first page
    url = f"{base_url}?division={gender}&page=1"
    try:
        # Fetch the response for the first page
        response = requests.get(url).json()
        # Extract the total number of pages from the response
        total_pages = response['pagination']['totalPages']
        # Iterate over each page
        for page in range(1, total_pages + 1):
            # Create the API URL for each page
            url = f"{base_url}?division={gender}&page={page}"
            try:
                # Fetch the response for the current page
                response = requests.get(url).json()
                # Extract the data for each row in the leaderboardRows
                for row in response['leaderboardRows']:
                    # Extract the total number of ordinals from the scores
                    total_ordinals = pd.DataFrame(row['scores'] for row in response['leaderboardRows']).shape[1]
                    # Iterate over each ordinal
                    for ordinal in range(0, total_ordinals):
                        # Copy the entrant data and add additional fields for overallRank and overallScore
                        row_data = row['scores'][ordinal].copy()
                        row_data['competitorId'] = row['entrant']['competitorId']
                        # Append the row data to the data_list
                        data_list.append(row_data)
            except Exception as e:
                # Handle any errors that occur during the API request for a specific page
                print(f"Error occurred while fetching data for gender={gender}, page={page}: {e}")
    except Exception as e:
        # Handle any errors that occur during the API request for fetching total_pages
        print(f"Error occurred while fetching total_pages for gender={gender}: {e}")

# Create a DataFrame from the collected data_list
df = pd.DataFrame(data_list)

df

##### Semifinals

###### Athletes information

In [None]:
base_url = "https://c3po.crossfit.com/api/leaderboards/v2/competitions/semifinals/2021/leaderboards"

data_list = []
semifinals = [176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 187]
genders = [1, 2]

# Iterate over each semifinal
for semifinal in semifinals:
    # Iterate over each gender
    for gender in genders:
        # Create the initial API URL to fetch the data for the first page
        url = f"{base_url}?semifinal={semifinal}&division={gender}&page=1"
        try:
            # Fetch the response for the first page
            response = requests.get(url).json()
            # Extract the total number of pages from the response
            total_pages = response['pagination']['totalPages']
            # Iterate over each page
            for page in range(1, total_pages + 1):
                # Create the API URL for each page
                url = f"{base_url}?semifinal={semifinal}&division={gender}&page={page}"
                try:
                    # Fetch the response for the current page
                    response = requests.get(url).json()
                    # Extract the data for each row in the leaderboardRows
                    for row in response['leaderboardRows']:
                        # Copy the entrant data and add additional fields for overallRank and overallScore
                        row_data = row['entrant'].copy()
                        row_data['overallRank'] = row['overallRank']
                        row_data['overallScore'] = row['overallScore']
                        # Append the row data to the data_list
                        data_list.append(row_data)
                except Exception as e:
                    # Handle any errors that occur during the API request for a specific page
                    print(f"Error occurred while fetching data for semifinal={semifinal}, gender={gender}, page={page}: {e}")
        except Exception as e:
            # Handle any errors that occur during the API request for fetching total_pages
            print(f"Error occurred while fetching total_pages for semifinal={semifinal}, gender={gender}: {e}")

# Create a DataFrame from the collected data_list
df = pd.DataFrame(data_list)

df

###### Athletes scores

In [None]:
base_url = "https://c3po.crossfit.com/api/leaderboards/v2/competitions/semifinals/2021/leaderboards"

data_list = []
semifinals = [176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 187]
genders = [1, 2]

# Iterate over each semifinal
for semifinal in semifinals:
    # Iterate over each gender
    for gender in genders:
        # Create the initial API URL to fetch the data for the first page
        url = f"{base_url}?semifinal={semifinal}&division={gender}&page=1"
        try:
            # Fetch the response for the first page
            response = requests.get(url).json()
            # Extract the total number of pages from the response
            total_pages = response['pagination']['totalPages']
            # Iterate over each page
            for page in range(1, total_pages + 1):
                # Create the API URL for each page
                url = f"{base_url}?semifinal={semifinal}&division={gender}&page={page}"
                try:
                    # Fetch the response for the current page
                    response = requests.get(url).json()
                    # Extract the data for each row in the leaderboardRows
                    for row in response['leaderboardRows']:
                        # Extract the total number of ordinals from the scores
                        total_ordinals = pd.DataFrame(row['scores'] for row in response['leaderboardRows']).shape[1]
                        # Iterate over each ordinal
                        for ordinal in range(0, total_ordinals):
                            # Copy the entrant data and add additional fields for overallRank and overallScore
                            row_data = row['scores'][ordinal].copy()
                            row_data['competitorId'] = row['entrant']['competitorId']
                            # Append the row data to the data_list
                            data_list.append(row_data)
                except Exception as e:
                    # Handle any errors that occur during the API request for a specific page
                    print(f"Error occurred while fetching data for semifinal={semifinal}, gender={gender}, page={page}: {e}")
        except Exception as e:
            # Handle any errors that occur during the API request for fetching total_pages
            print(f"Error occurred while fetching total_pages for semifinal={semifinal}, gender={gender}: {e}")

# Create a DataFrame from the collected data_list
df = pd.DataFrame(data_list)

df

##### Games

###### Athletes information

In [None]:
# Query 7 - 2021 Games Not Scores
df_2021_games_not_scores = module.scraping_data_open_and_games('2021', 'games')

df_2021_games_not_scores

###### Athletes scores

In [None]:
# Query 8 - 2021 Games Scores
df_2021_games_scores = module.scraping_data_open_and_games('2021', 'games', scores=True)

df_2021_games_scores