<a href="https://colab.research.google.com/github/lweislo/UCI_data/blob/main/UCI_new_results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

THIS GRABS RESULTS FROM THE NEW UCI WEBSITE'S API
* Click the 'play' button below
* In another browser window, navigate to the main UCI race page that lists event 'stages'
* example: https://www.uci.org/competition-details/2021/ROA/63844 and copy URL
* FIRST: Paste the URL in the top box, click "Get available stages". A list will display below the boxes.
* NEXT: Select the stage or race you want to get results for.
* Results will be saved out as a tab-delimited text file
* If you want to get another stage or GC results, no need to hit play again - just select it on the dropdown list.

NOTE: the stage race results classifications come in as the UCI's order, not our desired order. 




In [None]:
# Dependencies
import numpy as np
import pandas as pd
import requests
import json
import re
from bs4 import BeautifulSoup
from google.colab import files
from google.colab import files
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import TwoByTwoLayout

#Grab the data from the UCI API link
def output_file(header_list, table_list):

    outfile = 'uci_results.txt'
    with open(outfile, 'w') as file:
        for item in range(0, len(table_list)):
            print(f'Outputting results for {header_list[item]}')
            try:
            # Write plain CSV output full results
                file.write(f'\n{header_list[item]}\n')
                file.write(table_list[item].to_csv(header=False, index=False, encoding='UTF-8', sep='\t'))
            except:
                pass
    # Empty out the hopper
    table_list = []
    header_list = []
    files.download(outfile)
def format_results(url):

    data = requests.get(url).json()
    data = data['results']
    if len(data) !=0:
        df = pd.DataFrame(data)
        return df
    else:
        pass
def format_table(df):
    out_df = pd.DataFrame()
    out_df['Rank'] = df['rank'].fillna(df['result'])
    out_df['Rider'] = (df['firstname'] + ' ' + df['lastname'].str.title())
    out_df['Country'] = df['nationality']
    if 'team' in df.columns:
        out_df['Team'] = df['team']
    out_df['Result'] = df['result']
    return out_df
def find_stages(raw_url):
    event_codes = []
    titles = []
    stage_results = []
    stage_list = []
    page = requests.get(raw_url)
    if page.status_code == 200:
        content = page.content
        soup = BeautifulSoup(content, "html5lib")
        links = soup.find('div', {'data-component':'CompetitionDetailsModule'})
        if links:
            try:
                raw_urls = json.loads(links['data-props'])
                stages = raw_urls['results']['accordion']
                for i in stages:
                    stage_list.append(i['label'])
                    for n in i['results']:
                        stage_results.append(i['label'])
                        event_codes.append(n['eventCode'])
                        titles.append(n['title'])
            except TypeError:
                print("Problem getting results, UCI may have changed their API")
                pass
        else:
            print("Error finding results. UCI may have changed their HTML tags")
        return event_codes, titles, stage_results, stage_list
def scrape_api(stage, codes, headers, discipline):
    header_list = []
    table_list = []
    print(f"Getting results for {stage}")
    out_df = pd.DataFrame()
    for i, v in enumerate(codes):

        try:
            url = f"https://www.uci.org/api/calendar/results/{v}?discipline={discipline}"
            print(f'Getting results for {url}')
            df = format_results(url)

            if len(df)>0:
                print(f"{len(df)} rows of data found for {headers[i]}")
                header_list.append(headers[i])
                try:
                    out_df = format_table(df)
                    table_list.append(out_df)
                except:
                    table_list.append(df)
            else:
                print(f"No data found for {headers[i]}")
        except:
            pass
    output_file(header_list, table_list)

def on_button_clicked_1(b):
    stages_url = race_url.value
    try:
        if b.description == 'Get available stages':
            if 'uci.org' not in stages_url:
                print("Please enter a valid UCI URL eg. https://www.uci.org/competition-details/2021/ROA/63803")
            else:
                print(f"Getting available stages")              
                disciplines = ['ROA', 'MTB', 'CRO']
                discipline = stages_url.split('/')[-2]
                if discipline not in disciplines:
                    print("That is not a valid event URL")
                else:
                    event_codes, titles, stage_results, stage_list = find_stages(stages_url)
                    dropdown_stage = widgets.Dropdown(options = values_plus_select(stage_list))
                    display(dropdown_stage)
                    dropdown_stage.observe(dropdown_stage_eventhandler, names='value')
    except TypeError:
        print("We had a problem with that URL")
    return event_codes, titles, stage_results, stage_list

# Handle populating the stage/event list dropdown
ALL = 'Select stage'
def values_plus_select(stage_list):
    stage_list.insert(0, ALL)
    return stage_list

# Handle a change in the selection
def dropdown_stage_eventhandler(change):
    if (change.new == ALL):
        pass
    else:
        stage = change.new #The stage or event selected
        stages_url = race_url.value #The URL entered in box
        discipline = stages_url.split('/')[-2]
        # Get the list of stages/codes to make the URLs etc

        event_codes, titles, stage_results, stage_list = find_stages(stages_url)
        # Whittle down the big list of everthing to just the stage selecetd
        stages = [i for i, val in enumerate(stage_results) if val in stage] # Stage names from soup
        codes = [val for i, val in enumerate(event_codes) if i in stages] # Event codes corresponding to stage selected
        headers = [val for i, val in enumerate(titles) if i in stages] # Classification names from stage selected
        # Get the actual results and download
        scrape_api(stage, codes, headers, discipline)


get_stages = widgets.Button(description="Get available stages")
race_url = widgets.Text(value='', placeholder='', description='Enter UCI URL for race here:', disabled=False)

get_stages.on_click(on_button_clicked_1)

TwoByTwoLayout(top_left=race_url,
            top_right=get_stages,
            merge=False)
# @title <-- Click the play button to the left to load program.

TwoByTwoLayout(children=(Text(value='', description='Enter UCI URL for race here:', layout=Layout(grid_area='t…

Getting available stages


Dropdown(options=('Select stage', 'Men Elite', 'Men Junior', 'Women Elite', 'Women Junior'), value='Select sta…

Getting results for Women Elite
Getting results for https://www.uci.org/api/calendar/results/D2EV226511?discipline=CRO
22 rows of data found for General Classification
Outputting results for General Classification


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Getting results for Men Elite
Getting results for https://www.uci.org/api/calendar/results/D2EV226509?discipline=CRO
44 rows of data found for General Classification
Outputting results for General Classification


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Getting results for Men Junior
Getting results for https://www.uci.org/api/calendar/results/D2EV226510?discipline=CRO
13 rows of data found for General Classification
Outputting results for General Classification


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Getting results for Women Junior
Getting results for https://www.uci.org/api/calendar/results/D2EV226512?discipline=CRO
6 rows of data found for General Classification
Outputting results for General Classification


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>