In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

In [None]:
folderpath = os.path.abspath('')

In [None]:
# initiate Chrome driver
opts = Options()
opts.add_argument('--headless')
driver = Chrome(options = opts)

In [None]:
race_id = 935293 #935289 #get_race_ids()

In [None]:
def html_to_df(driver):
    
    # convert results table into dataframe
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.find(id='table_event_results_final')
    df = pd.read_html(str(table))[0]
    
    return df

In [None]:
def import_race_data(driver, race_id):
    
    # set url
    url = 'https://zwiftpower.com/events.php?zid=%d' % race_id
    
    # open page
    driver.get(url)
    
    data = True
    results = pd.DataFrame()
    while data == True:
    
        df = html_to_df(driver)

        # add results to exisiting data
        results = pd.concat([results, df])

        # check if there are other pages of results
        nxt = driver.find_elements_by_class_name('paginate_button.next.disabled')

        data = False
        if len(nxt) == 0:
            btn = driver.find_element_by_class_name('paginate_button.next')
            lnk = btn.find_element_by_link_text('Next')
            lnk.click()
            data = True
            
    return results

In [None]:
# *** IMPORT DATA ***

In [None]:
# import BCSE teams
filepath = r"%s/bcse_teams.csv" % folderpath
teams = pd.read_csv(filepath)

In [None]:
# import points
filepath = r"%s/points.csv" % folderpath
pts = pd.read_csv(filepath)

In [None]:
# import race data
df = import_race_data(driver, race_id)

In [None]:
# *** CLEAN UP DATA ***

In [None]:
# drop unnecessary columns
df = df.drop(['#','Gain'], axis=1)

In [None]:
# rename columns
df = df.rename(columns={'Unnamed: 0': 'Cat', 'Avg':'HR_Avg', 'Max':'HR_Max'})

In [None]:
# clean up name column
df['Rider'] = df['Rider'].str.strip()

In [None]:
# filter out non-bcse
data = df.loc[df['Rider'].str.endswith(tuple(teams['team_tag']))].copy()
nb = df.loc[~df['Rider'].str.endswith(tuple(teams['team_tag']))].copy()

In [None]:
# now extract team name
searchstr = '|'.join(teams['team_tag'])
data['Team'] = data['Rider'].str.extract("(" + searchstr + ")", expand=False)

In [None]:
# get rider names
x = data['Rider'].str.split(expand=True)

In [None]:
data['forename'] = x[0]
data['surname'] = x[1]

In [None]:
# split into categories
a = data.loc[data['Cat']=='A'].copy().reset_index()
b = data.loc[data['Cat']=='B'].copy().reset_index()
c = data.loc[data['Cat']=='C'].copy().reset_index()
d = data.loc[data['Cat']=='D'].copy().reset_index()

In [None]:
# allocate points per each category
cats = ['A','B','C','D']

In [None]:
# split into categories
dfs = [a,b,c,d]
for i, df in enumerate(dfs):

    df['position'] = df.index+1

    col = cats[i]

    dfs[i] = pd.merge(df, pts[[col]], left_index=True, right_index=True)

    # rename points column
    dfs[i].rename(columns={col: 'Points'}, inplace=True)
    
    # drop unnecessary index column
    dfs[i].drop('index', axis=1, inplace=True)

In [None]:
[a,b,c,d] = dfs

In [None]:
# # add new 'B' cat scoring in
# if len(b) > 55:
#     # placeholder
    
#     # increment => 50/len(b)
    
#     # b.loc[6:,'points']

In [None]:
# check for any missed BCSEs
#nb.loc[nb['Name'].str.contains('BCSE')]

In [None]:
# show BCSEs without tag in name
#data.loc[~data['Name'].str.contains('BCSE')]

In [None]:
# create main dataset again
total = pd.concat([a,b,c,d])

In [None]:
men = total.copy()

In [None]:
total = pd.concat([men, total])

In [173]:
# calculate team points
df = total.groupby(['Cat','Team'])['Points'].sum()

In [None]:
total.groupby(['Team'])['Points'].sum().sort_values(ascending=False)

In [168]:
def get_race_ids():
    
    response = input("Please enter race ids (separated by commas)")

    race_ids = response.split(',')

    return race_ids

In [None]:
935293, 935289