# eWRC API

Simple Python API for eWRC results.

Initially built for pulling results at the end of a rally.

What do we need to do to make it work live too, eg to force refesh on certain stages?


We can tunnel into class and champtionship for stage results, entry list, shakedown and final results.

In [1]:
#%pip install --upgrade beautifulsoup4

In [1]:
import pandas as pd
import re
from dakar_utils import getTime

In [2]:
import requests
import lxml.html as LH
from bs4 import BeautifulSoup
from bs4.element import NavigableString

from parse import parse

## Generic Utilities

Utility functions.

In [3]:
def soupify(url):
    """Load HTML from URL and parse it into a BeautifulSoup object.
    
    :param url: The URL of an HTML page we want to scrape.
    :type url: string, required
    :return: A soup representation of an HTML page.
    :rtype: bs4.BeautifulSoup object
    """
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'lxml') # Parse the HTML as a string
    
    # Remove occasional tags that might appear
    # https://stackoverflow.com/a/40760750/454773
    unwanted = soup.find(id="donate-main")
    if unwanted:
        unwanted.extract()
    
    return soup

In [4]:
def no_children(node):
    """Extract just the text and no child nodes from a soup node."""
    #https://stackoverflow.com/a/31909680/454773
    text = ''.join([t for t in node.contents if type(t) == NavigableString])
    return text

In [5]:
def dfify(table):
    
    # TO DO - TEST - ADD SPACE FOR <br/> - CHECK - MAYBE BROKEN ?
    #table = BeautifulSoup(str(table).replace("<br/>", " "))
    table = str(table).replace("<br/>", " ")
    df = pd.read_html('<html><body>{}</body></html>'.format(table))[0]
    df = df.dropna(axis=1, how='all').dropna(axis=0, how='all')
    return df

In [6]:
import unicodedata

In [7]:
def cleanString(s):
    """Clean a string:
     - convert it to a normalized NFKD string;
     - strip string of whitespace;
     - replace multiple space elements with a single space element.
    """
    s = unicodedata.normalize("NFKD", str(s))
    #replace multiple whitespace elements with single space
    s = ' '.join(s.strip().split())
    
    return s

In [8]:
def urljoin(*args, trailing=True):
    """
    Joins given arguments into an url. Trailing but not leading slashes are
    stripped for each argument.
    """
    simple = "/".join(map(lambda x: str(x).strip('/'), args))
    if trailing:
        return f'{simple}/'
    else:
        return simple

In [9]:
base_url = 'https://www.ewrc-results.com'

In [10]:
import urllib

def urlBuilder(stub, params):
    """Build a valid URL."""
    return f'{urljoin(base_url, stub, trailing=True)}?{urllib.parse.urlencode(params)}'

## Timing Utilities

In [11]:
def diffgapsplitter(col):
    """Take a dataframe column containing Gap and Diff elements as a single string
    and split them into separate columns.
    """
    #Normalise
    col = col.fillna('+0+0')
    #Remove leading +
    col = col.str.strip('+')
    #Split...
    col = col.str.split('+',expand=True)
    #Rename columns
    col = col.rename(columns={0:'Gap', 1:'Diff'})
    #Convert to numerics
    col['Gap'] = col['Gap'].apply(getTime)#.astype(float)
    col['Diff'] = col['Diff'].apply(getTime)
    return col

In [12]:
import pandas as pd
_test_df = diffgapsplitter( pd.DataFrame({'test': [None, '+1.0+2.7'] })['test'])
_test_df_expected = pd.DataFrame({'Gap': [0.0, 1.0], 'Diff': [0.0, 2.7] })
pd.testing.assert_frame_equal(_test_df, _test_df_expected)

## Scraping Functions

In [13]:
import urllib.parse

def search_events(query):
    """Search ewrc-results events for particular term."""
    if len(query) < 3:
        return {}
    search_url = f'{base_url}/search_event/?find_event={urllib.parse.quote(query)}'
    soup = soupify(search_url)
    links = soup.find_all("div", {'class':'search-event-event'})[0].find('table').find_all('a')
    rally_links = {}
    for a in links:
        if 'href' in a.attrs:
                rally_links[a['href'].strip('/').split('/')[-1]] = a.text.strip()
    return rally_links

In [14]:
#url='https://www.ewrc-results.com/results/54762-corbeau-seats-rally-tendring-clacton-2019/'
rally_stub = '54762-corbeau-seats-rally-tendring-clacton-2019'
rally_stub='61961-mgj-engineering-brands-hatch-winter-stages-2020'
rally_stub='59972-rallye-automobile-de-monte-carlo-2020'
rally_stub='62413-rallye-de-ourense-2020/'
#rally_stub='41079-rallye-automobile-de-monte-carlo-2021'
homepage_url=f'{base_url}/results/{rally_stub}/'


In [15]:
def _get_stages_from_homepage(soup):
    """Retrieve the stage list and keys from the results page."""
    stages = []
    for li in soup.find('div', {'class': 'rzlist50'}).find_all('li'):
        if li.has_attr('class'):
            stages.append((li['class'],'',li['class']))
        else:
            a = li.find('a')
            if not a.has_attr('class'):
                stages.append( (a.text, a['href'], a['title']) )
    return stages

def _get_stages_from_homepage(soup):
    """Retrieve the stage list and keys from the results page."""
    stages = []
    #print(soup.select('body > main > div:nth-child(6)')[0])
    #for li in soup.select('body > main > div:nth-child(6)')[0].find_all('div'):
    for li in soup.find_all('div', {'class': 'd-flex flex-wrap justify-content-center'})[0].find_all('div'):
        if li.has_attr('class'):
            stages.append((li['class'],'',li['class']))
        else:
            #print(li)
            a = li.find('a')
            stages.append( (a.text, a['href'], a['title']) )
    return stages


In [237]:
#There may be diffferent results categories, eg classes, championships
# These are keyed by an extra parameter on the end of the stub

def _get_categories_from_homepage(soup):
    """Retrieve categories."""
    _categories = []
    #categories = soup.find_all('div', {'class': 'rzlist40'})
    categories = soup.find_all('div', {'class': 'd-flex flex-wrap justify-content-center fs-091'})
    if categories:
        for category in categories:
            items = []
            for a in category.find_all('a'):
                if a.text:
                    items.append( (a.text, a['href']) )
            _categories.append(items)
    return _categories

def get_categories_from_homepage(stub):
    homepage_url=f'{base_url}/results/{stub}/'
    soup = soupify(homepage_url)
    return _get_categories_from_homepage(soup)

In [236]:
def homepage_quick_scrape(stub, params=None, path=None):
    """Do a quick utility scrape of the homepage."""
    params = '' if not params else urllib.parse.urlencode(params)
    homepage_url=f'{base_url}/results/{stub}/?{params}&{path}'
    print(homepage_url)
    soup = soupify(homepage_url)
    stages = _get_stages_from_homepage(soup)
    categories = _get_categories_from_homepage(soup)
    return stages, categories

In [225]:
Xstages, Xcategories = homepage_quick_scrape(rally_stub)
Xstages, Xcategories

https://www.ewrc-results.com/results/59972-rallye-automobile-de-monte-carlo-2020/?&None


([('1',
   '/results/59972-rallye-automobile-de-monte-carlo-2020/?s=241659',
   'SS1 Malijai - Puimichel - 20:38'),
  ('2',
   '/results/59972-rallye-automobile-de-monte-carlo-2020/?s=241660',
   'SS2 Bayons - Bréziers - 22:26'),
  (['service', 'text-muted'], '', ['service', 'text-muted']),
  (['leg-hidden'], '', ['leg-hidden']),
  (['service', 'text-muted'], '', ['service', 'text-muted']),
  ('3',
   '/results/59972-rallye-automobile-de-monte-carlo-2020/?s=241661',
   'SS3 Curbans - Venterol 1 - 08:36'),
  ('4',
   '/results/59972-rallye-automobile-de-monte-carlo-2020/?s=241662',
   'SS4 St-Clément-sur-Durance - Freissinières 1 - 09:56'),
  ('5',
   '/results/59972-rallye-automobile-de-monte-carlo-2020/?s=241663',
   'SS5 Avançon - Notre-Dame-du-Laus 1 - 11:21'),
  (['service', 'text-muted'], '', ['service', 'text-muted']),
  ('6',
   '/results/59972-rallye-automobile-de-monte-carlo-2020/?s=241664',
   'SS6 Curbans - Venterol 2 - 13:54'),
  ('7',
   '/results/59972-rallye-automobile-d

In [19]:
def get_stage_result_links(stub, params=None):
    #If navigation remains constant, items are in third list
    params = '' if not params else urllib.parse.urlencode(params)
    rally_stage_results_url= f'{base_url}/results/{stub}/?{params}'
    
    links={}
    soup = soupify(rally_stage_results_url)
    stages = soup.find('div', {'class': 'd-flex flex-wrap justify-content-center'}).find_all('div')
    for li in stages:
        #if 'class' in li.attrs:
        #    print(li['class'])
        #A class is set for service but not other things
        if 'class' not in li.attrs:
            a = li.find('a')
            if 'href' in a.attrs:
                #links.append(a['href'])
                links[f'SS{a.text}'] = a['href']
                
    return links

In [20]:
tmp = get_stage_result_links(rally_stub)
tmp

{'SS1': '/results/62413-rallye-de-ourense-2020/?s=292260',
 'SS2': '/results/62413-rallye-de-ourense-2020/?s=292261',
 'SS3': '/results/62413-rallye-de-ourense-2020/?s=292262',
 'SS4': '/results/62413-rallye-de-ourense-2020/?s=292263',
 'SS5': '/results/62413-rallye-de-ourense-2020/?s=292264',
 'SS6': '/results/62413-rallye-de-ourense-2020/?s=292265',
 'SS7': '/results/62413-rallye-de-ourense-2020/?s=292266',
 'SS8': '/results/62413-rallye-de-ourense-2020/?s=292267'}

In [21]:
stage_result_cols = ['Pos', 'CarNum', 'Desc', 'Class', 'Time', 'GapDiff', 'Speedkm', 'Stage',
       'StageName', 'StageDist', 'Gap', 'Diff', 'Speed', 'Dist', 'entryId',
       'model', 'navigator', 'PosNum']

In [22]:
stage_overall_cols = ['PosChange', 'CarNum', 'Desc', 'Class', 'Time', 'GapDiff', 'Speedkm',
       'Stage', 'StageName', 'StageDist', 'Pos', 'Change', 'Gap', 'Diff',
       'Speed', 'Dist']

In [23]:
retirement_cols = ['CarNum', 'driverNav', 'Model', 'Status']
retirement_extra_cols = ['Driver', 'CoDriver', 'Stage']

In [24]:
penalty_cols = ['CarNum', 'driverNav', 'Model', 'PenReason']
penalty_extra_cols = ['Driver', 'CoDriver', 'Stage', 'Time','Reason']

In [25]:
from numpy import nan

In [26]:
from parse import parse   

In [27]:
details = 'SS16 La Cabanette - Col de Braus 2 [Power Stage] - 13.36 km - 26. 1. 12:18'
#details = 'SS6 Curbans - Venterol 2 - 20.02 km - 24. 1. 13:54'
pattern = 'SS{stage} {name} - {dist:f} km - {datetime}'
parse(pattern, details)

<Result () {'stage': '16', 'name': 'La Cabanette - Col de Braus 2 [Power Stage]', 'dist': 13.36, 'datetime': '26. 1. 12:18'}>

In [58]:
def get_stage_results(stub, params=None):
    params = '' if not params else urllib.parse.urlencode(params)
    _url = f'{base_url.rstrip("/")}/{stub}/?{params}'
    print(_url)
    soup = soupify(_url)

    #details = soup.find('h4').text
    details = soup.find('h5', {'class':'mt-2'}).text

    pattern = 'SS{stage} {name} - {dist:f} km - {datetime}'
    parse_result = parse(pattern, details)
    if parse_result is None:
        pattern = 'SS{stage} - {dist:f} km'
        parse_result = parse(pattern, details)
    #print(details, parse_result)
    stage_num = f"SS{parse_result['stage']}"
  
    if 'name' in parse_result:
        stage_name = parse_result['name']
    else:
         stage_name = stage_num
    
    stage_dist =  parse_result['dist']
    if 'datetime' in parse_result:
        stage_datetime = parse_result['datetime']
    else:
        stage_datetime = None
    
    tables = soup.find_all('table')
    
    stage_result = tables[0]
    
    stage_overall = tables[1]
    
    result_cols = ['Pos','CarNum','Desc','Class', 'Time','GapDiff', 'Speedkm']
    overall_cols = ['PosChange', 'CarNum', 'Desc','Class', 'Time', 'GapDiff', 'Speedkm' ]
    stage_retirement_cols = ['CarNum', 'driverNav', 'Model', 'Status', 'Driver', 'CoDriver', 'Stage']
    stage_penalty_cols = ['CarNum', 'driverNav', 'Model', 'PenReason', 'Driver', 'CoDriver', 'Stage', 'Time', 'Reason']
    
    # Stage Result
    df_stage_result = dfify(stage_result)
    
    # Handle cancelled stage
    cancelled = (df_stage_result.iat[0,0] == 'Stage cancelled')
    if cancelled:
        return pd.DataFrame(columns=result_cols), pd.DataFrame(columns=overall_cols), pd.DataFrame(columns=stage_retirement_cols), pd.DataFrame(columns=stage_penalty_cols)
    
    print(df_stage_result.columns, result_cols)
    display(df_stage_result)
    df_stage_result.columns = result_cols
    
    df_stage_result['Stage'] = stage_num
    df_stage_result['StageName'] = stage_name
    df_stage_result['StageDist'] = stage_dist
    
    df_stage_result['GapDiff'].fillna('+0+0').str.strip('+').str.split('+',expand=True).rename(columns={0:'Gap', 1:'Diff'})
    df_stage_result[['Gap','Diff']] = diffgapsplitter(df_stage_result['GapDiff'])
    df_stage_result[['Speed','Dist']] = df_stage_result['Speedkm'].str.extract(r'(?P<Speed>[^.]*\.[\d])(?P<Dist>.*)')
    
    rows=[]
    # Separate out the elements from the driver column
    for d in stage_result.findAll("td", {"class": "position-relative"}):
        entryId = d.find('a')['href']
        #print(str(d)) #This gives us the raw HTML in the soup element
        driverNav = d.find('a').text.split('-')
        model=d.find('a').nextSibling.nextSibling
        rows.append( {'entryId':entryId,
                       'model':model,
                      'driver':cleanString(driverNav[0]),
                      'navigator':cleanString(driverNav[1])}) 

    df_stage_result[['driver','entryId','model','navigator']] = pd.DataFrame(rows)
    #Should we cast the Pos to a numeric too? Set = to na then ffill down?
    df_stage_result['PosNum'] = df_stage_result['Pos'].replace('=',nan).astype(float).fillna(method='ffill').astype(int)
    df_stage_result.set_index('driver',drop=True, inplace=True)
    
    # Stage Overall
    df_stage_overall = dfify(stage_overall)

    # Reduced cols if the stage is cancelled
    # THis is a crude hack; should really detect properly
    _cancelled_hack = len(df_stage_overall.columns) != len(overall_cols)
    df_stage_overall.columns = overall_cols[:len(df_stage_overall.columns)]
    df_stage_overall['Stage'] = stage_num
    df_stage_overall['StageName'] = stage_name
    df_stage_overall['StageDist'] = stage_dist
    
    df_stage_overall[['Pos','Change']] = df_stage_overall['PosChange'].astype(str).str.extract(r'(?P<Pos>[\d]*)\.\s?(?P<Change>.*)?')
    if not _cancelled_hack:
        df_stage_overall['GapDiff'].fillna('+0+0').str.strip('+').str.split('+',expand=True).rename(columns={0:'Gap', 1:'Diff'})
        df_stage_overall[['Gap','Diff']] = diffgapsplitter(df_stage_overall['GapDiff'])
        df_stage_overall[['Speed','Dist']] = df_stage_overall['Speedkm'].str.extract(r'(?P<Speed>[^.]*\.[\d])(?P<Dist>.*)')

    
    #  TO DO - classes  - at the moment, only use first class?
    df_stage_result["Class"] = df_stage_result["Class"].apply(lambda x: str(x).strip().split()[0])
    df_stage_overall["Class"] = df_stage_overall["Class"].apply(lambda x: str(x).strip().split()[0])
    
    # Retirements
    df_stage_retirements = pd.DataFrame(columns=retirement_cols+retirement_extra_cols)
    # get tag and then next sibling
    _retirementsHeader = soup.find('h6', text=re.compile('Retirement'))
    if _retirementsHeader:
        print('retirement')
        retired = _retirementsHeader.find_next_sibling()#find('table',{'class':'table-retired'})
        if retired:
            df_stage_retirements = dfify(retired)
            df_stage_retirements.columns = retirement_cols
            df_stage_retirements[['Driver','CoDriver']] = df_stage_retirements['driverNav'].str.extract(r'(?P<Driver>.*)\s+-\s+(?P<CoDriver>.*)')
            df_stage_retirements['Stage'] = stage_num

    # Penalties
    df_stage_penalties = pd.DataFrame(columns=penalty_cols+penalty_extra_cols)
    _penaltiesHeader = soup.find('h6', text=re.compile('Penalty'))
    if _penaltiesHeader:
        penalty = _penaltiesHeader.find_next_sibling() # ('table',{'class':'table-retired'})
        if penalty:
            df_stage_penalties = dfify(penalty)
            df_stage_penalties.columns = penalty_cols
            df_stage_penalties[['Driver','CoDriver']] = df_stage_penalties['driverNav'].str.extract(r'(?P<Driver>.*)\s+-\s+(?P<CoDriver>.*)')
            df_stage_penalties[['Time','Reason']] = df_stage_penalties['PenReason'].str.extract(r'(?P<Time>[^\s]*)\s+(?P<Reason>.*)')
            df_stage_penalties['Stage'] = stage_num

    return df_stage_result, df_stage_overall, df_stage_retirements, df_stage_penalties

In [64]:
partial_stub = '/results/54762-corbeau-seats-rally-tendring-clacton-2019/'
partial_stub='/results/42870-rallye-automobile-de-monte-carlo-2018/'
partial_stub='/results/61961-mgj-engineering-brands-hatch-winter-stages-2020/'
partial_stub='/results/59972-rallye-automobile-de-monte-carlo-2020/'
partial_stub='/results/61089-rally-islas-canarias-2020/'
#stub = tmp['SS3']
stage_result, stage_overall, stage_retirements, stage_penalties = get_stage_results(partial_stub)

https://www.ewrc-results.com//results/61089-rally-islas-canarias-2020//?
Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64') ['Pos', 'CarNum', 'Desc', 'Class', 'Time', 'GapDiff', 'Speedkm']


Unnamed: 0,0,1,2,3,4,5,6
0,1.,#19,López Pepe - Rozada Borja Citroën C3 R5,RC2 1,6:34.5,,108.1
1,2.,#15,Pernía E. - González Delgado E. Hyundai i20 R5,RC2 1,6:38.9,+4.4 +4.4,106.9 0.37
2,3.,#22,Suárez A. - Iglesias Pin A. Škoda Fabia Rally2...,RC2 1,6:40.0,+5.5 +1.1,106.7 0.46
3,4.,#23,Solans Nil - Moreno Xavi Škoda Fabia Rally2 evo,RC2 1,6:40.8,+6.3 +0.8,106.4 0.53
4,5.,#7,Mikkelsen Andreas - Jæger Anders Škoda Fabia R...,RC2,6:42.5,+8.0 +1.7,106.0 0.68
...,...,...,...,...,...,...,...
66,67.,#93,Robledano Javier - Suárez Suárez R. Opel Adam S,4 C4,8:38.3,+2:03.8 +4.2,82.3 10.5
67,68.,#95,Suárez L. - Rodríguez Santiago L. Peugeot 208 ...,4 C4,8:45.6,+2:11.1 +7.3,81.2 11.1
68,69.,#98,Martínez del Río D. - González Diz D. Fiat Sei...,3,8:50.5,+2:16.0 +4.9,80.4 11.5
69,70.,#35,Bassas Mas A. - Coronado Jiménez A. Peugeot 20...,RC4 3,9:02.5,+2:28.0 +12.0,78.6 12.5


retirement


In [65]:
stage_result.head()

Unnamed: 0_level_0,Pos,CarNum,Desc,Class,Time,GapDiff,Speedkm,Stage,StageName,StageDist,Gap,Diff,Speed,Dist,entryId,model,navigator,PosNum
driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
/entryinfo/61089-rally-islas-canarias-2020/2848176/,1.0,#19,López Pepe - Rozada Borja Citroën C3 R5,RC2,6:34.5,,108.1,SS17,Arucas 2 (TC+),11.85,0.0,0.0,108.1,,[Citroën C3 R5],López Pepe,Rozada Borja,1
/entryinfo/61089-rally-islas-canarias-2020/2848193/,2.0,#15,Pernía E. - González Delgado E. Hyundai i20 R5,RC2,6:38.9,+4.4 +4.4,106.9 0.37,SS17,Arucas 2 (TC+),11.85,4.4,4.4,106.9,0.37,[Hyundai i20 R5],Pernía E.,González Delgado E.,2
/entryinfo/61089-rally-islas-canarias-2020/2848178/,3.0,#22,Suárez A. - Iglesias Pin A. Škoda Fabia Rally2...,RC2,6:40.0,+5.5 +1.1,106.7 0.46,SS17,Arucas 2 (TC+),11.85,5.5,1.1,106.7,0.46,[Škoda Fabia Rally2 evo],Suárez A.,Iglesias Pin A.,3
/entryinfo/61089-rally-islas-canarias-2020/2848499/,4.0,#23,Solans Nil - Moreno Xavi Škoda Fabia Rally2 evo,RC2,6:40.8,+6.3 +0.8,106.4 0.53,SS17,Arucas 2 (TC+),11.85,6.3,0.8,106.4,0.53,[Škoda Fabia Rally2 evo],Solans Nil,Moreno Xavi,4
/entryinfo/61089-rally-islas-canarias-2020/2848194/,5.0,#7,Mikkelsen Andreas - Jæger Anders Škoda Fabia R...,RC2,6:42.5,+8.0 +1.7,106.0 0.68,SS17,Arucas 2 (TC+),11.85,8.0,1.7,106.0,0.68,[Škoda Fabia Rally2 evo],Mikkelsen Andreas,Jæger Anders,5


In [61]:
stage_overall

Unnamed: 0,PosChange,CarNum,Desc,Class,Time,GapDiff,Speedkm,Stage,StageName,StageDist,Pos,Change,Gap,Diff,Speed,Dist
0,1.,#11,Neuville Thierry - Gilsoul Nicolas Hyundai i20...,RC1,3:10:57.6,,95.6,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,1,,0.0,0.0,95.6,
1,2. +1,#17,Ogier Sébastien - Ingrassia Julien Toyota Yari...,RC1,3:11:10.2,+12.6 +12.6,95.5 0.04,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,2,+1,12.6,12.6,95.5,0.04
2,3. −1,#33,Evans Elfyn - Martin Scott Toyota Yaris WRC,RC1,3:11:11.9,+14.3 +1.7,95.5 0.05,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,3,−1,14.3,1.7,95.5,0.05
3,4.,#4,Lappi Esapekka - Ferm Janne Ford Fiesta WRC,RC1,3:14:06.6,+3:09.0 +2:54.7,94.1 0.62,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,4,,189.0,174.7,94.1,0.62
4,5.,#69,Rovanperä Kalle - Halttunen Jonne Toyota Yaris...,RC1,3:15:14.8,+4:17.2 +1:08.2,93.5 0.85,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,5,,257.2,68.2,93.5,0.85
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,69.,#84,Cartagena Julien - Aubery Anthony Peugeot 208 R2,RC4,4:51:30.0 2:00,+1:40:32.4 +31.2,63.1 19.8,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,69,,6032.4,31.2,63.1,19.8
69,70.,#92,Jerusalmi Jean-Marc - Luthen Guillaume Renault...,RC5,4:51:52.4 0:30,+1:40:54.8 +22.4,62.7 19.9,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,70,,6054.8,22.4,62.7,19.9
70,71.,#25,Nobre Paulo - Morales Gabriel Škoda Fabia R5 [SR],RC2,4:54:40.7,+1:43:43.1 +2:48.3,62.0 20.5,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,71,,6223.1,168.3,62.0,20.5
71,72.,#59,Dizier Grégory - Dizier Sophy Renault Clio RS ...,RC3,5:03:38.8 0:10,+1:52:41.2 +8:58.1,60.2 22.2,SS16,La Cabanette - Col de Braus 2 [Power Stage],13.36,72,,6761.2,538.1,60.2,22.2


In [62]:
stage_retirements

Unnamed: 0,CarNum,driverNav,Model,Status,Driver,CoDriver,Stage


In [63]:
stage_penalties

Unnamed: 0,CarNum,driverNav,Model,PenReason,Driver,CoDriver,Time,Reason,Stage
0,#21,Gryazin Nikolay - Fedorov Yaroslav,Hyundai i20 R5,0:20 - Late (2 min) at TC,Gryazin Nikolay,Fedorov Yaroslav,0:20,- Late (2 min) at TC,SS16
1,#85,Cartier Victor - Margaillan Mathilde,Ford Fiesta R2T,0:20 - Late (2 min) at TC,Cartier Victor,Margaillan Mathilde,0:20,- Late (2 min) at TC,SS16
2,#70,Ribaudo Tony - Degrange Julien,Peugeot 208 R2,0:10 - Late (1 min) at TC,Ribaudo Tony,Degrange Julien,0:10,- Late (1 min) at TC,SS16


In [34]:
def check_time_str(txt):
    """Clean a time string."""
    
    # Quick fix to cope with strings of the form:
    # '11:58.0 <a name="" title="Notional time"><span class="c-blue">[N]</span></a>'
    # This should be a proper validator.
    txt = txt.strip()
    if txt:
        txt = txt.split()[0]
    return txt

In [35]:
# TO DO

def get_stage_times(stub, dropnarow=True):
    url=f'https://www.ewrc-results.com/times/{stub}/'
    #print(url)
    soup = soupify(url)
    
    groups = soup.find_all('div',{'class':'times-driver'})
    # Each list member in groups is the data for one driver
    #The rows are essentially grouped in twos after the header row
    #cols = [c.text for c in times[0].findAll('div')]
    
    #groupsize=2
    #groups = [times[i:i+groupsize] for i in range(1, len(times), groupsize)]
    
    NAME_SUBGROUP = 0
    TIME_SUBGROUP = 1
    
    carNumMatch = lambda txt: re.search('#(?P<carNum>[\d]*)', cleanString(txt))
    carModelMatch = lambda txt:  re.search('</a>\s*(?P<carModel>.*)</div>', cleanString(txt))
    
    #pattern = '''<div class="times-one-time">{stagetime}<br/><span class="times-after">{overalltime}</span><br/>{pos}</div>'''
    
    t=[]
    i=0

    penaltypattern='class="r7_bold_red">{penalty}</span>'
    timepattern1 = '<div class="times-one-time font-weight-bold p-1 fs-091">{stagetime}'
    timepattern2 = '<span class="fs-09 text-muted">{cumtime}</span>'
    for gg in groups:
        g = gg.findChildren(recursive=False)
        #print('g0',g[0],'\nand g1\n', g[1], '\nxx\n')
        # g now has two divs
        i=i+1
        driverNav_el = g[NAME_SUBGROUP].find('a')
        driverNav = driverNav_el.text
        driver,navigator = driverNav.split(' - ')
        entryId = driverNav_el['href']
        retired = '<span class="r8_bold_red">R</span>' in str(g[NAME_SUBGROUP])
        carNum = carNumMatch(g[NAME_SUBGROUP]).group('carNum')
        
        #carModel = carModelMatch(g[NAME_SUBGROUP]).group('carModel')
        _carModel = g[NAME_SUBGROUP].find('div',{'class':'times-car mx-1'})
        carModel = _carModel.text.strip() if _carModel else ''
        
        #TO DO - may be None?
        try:
            classification = g[NAME_SUBGROUP].select('div:first-child div:first-child')[0].text
            classification = int(classification.strip().strip('.')[0]) if classification else nan
            #classification = pd.to_numeric(g[NAME_SUBGROUP].find('span').text.replace('R','').strip('').strip('.'))
        except:
            classification = ''
        
        stagetimes = []
        overalltimes = []
        penalties=[]
        positions = []
        for stage in g[TIME_SUBGROUP].findChildren(recursive=False)[:-1]:
            txt = cleanString(stage)
            if 'cancelled' in txt:
                stagetimes.append(nan)
                overalltimes.append(nan)
                positions.append(nan)
                penalties.append(nan)
            else:
                stagetimes_data = [t.strip() for t in txt.split('<br/>')]
                #print(txt, stagetimes_data,'\n')
                #stagetimes_data = parse(timepattern, txt )
                if stagetimes_data:
                    #stagetimes.append(check_time_str(stagetimes_data['stagetime']))
                    #overalltimes.append(check_time_str(stagetimes_data['overalltime']))
                    if len(stagetimes_data)<2:
                        if stagetimes_data[0]=='<div class="times-one-time font-weight-bold p-1 fs-091"><span class="font-weight-bold text-danger">R</span></div>':
                            # retired BUT may return in following stages
                            # example https://www.ewrc-results.com/times/61089-rally-islas-canarias-2020/?s=265920
                            stagetimes.append(nan)
                            overalltimes.append(nan)
                            positions.append(nan)
                    else:
                        # We may now be running as retired
                        if 'R</span>' in stagetimes_data[2]:
                            # running as retired
                            stagetimes.append(check_time_str(parse(timepattern1,stagetimes_data[0] )['stagetime']))
                            overalltimes.append(nan)
                            positions.append(nan)
                        else:
                            stagetimes.append(check_time_str(parse(timepattern1,stagetimes_data[0] )['stagetime']))
                            overalltimes.append(check_time_str(parse(timepattern2,stagetimes_data[1] )['cumtime']))
                            # There may be up/down arrow before position
                            positions.append(int(stagetimes_data[2].split('.')[0].split('>')[-1].strip()))
                    if len(stagetimes_data)>3:
                        penalties.append(stagetimes_data[3])
                    else:
                        penalties.append('')
                # TO DO - how do we account for cancelled stages?
                # If we add in blanks we get blanks for un-run stages too?
                #else:
                #    stagetimes.append('')
                #    overalltimes.append('')
                 #   positions.append('')
                #    penalties.append('')

        t.append({'entryId': entryId,
                  'driverNav': driverNav,
                  'driver': driver.strip(),
                  'navigator': navigator.strip(),
                  'carNum': carNum,
                  'carModel': carModel,
                  'retired': retired,
                  'Pos': classification,
                  'stagetimes': stagetimes,
                  'overalltimes': overalltimes,
                  'positions': positions,
                  'penalties': penalties})

    df_allInOne = pd.DataFrame(t).set_index(['entryId'])
    
    df_overall = pd.DataFrame(df_allInOne['overalltimes'].tolist(), index= df_allInOne.index)
    df_overall.columns = range(1, df_overall.shape[1]+1)
    
    df_overall_pos = pd.DataFrame(df_allInOne['positions'].tolist(), index= df_allInOne.index)
    df_overall_pos.columns = range(1, df_overall_pos.shape[1]+1)

    df_stages = pd.DataFrame(df_allInOne['stagetimes'].tolist(), index= df_allInOne.index)
    df_stages.columns = range(1, df_stages.shape[1]+1)
    
    df_stages_pos = df_stages.reset_index().drop_duplicates(subset='entryId').set_index('entryId').rank(method='min')
    df_stages_pos.columns = range(1, df_stages_pos.shape[1]+1)

    xcols = df_overall.columns

    for ss in xcols:
        df_overall[ss] = df_overall[ss].apply(getTime)
        df_stages[ss] = df_stages[ss].apply(getTime)

    # TO DO
    #We shouldn't really have to do this - why are there duplicates?
    #We seem to be appending rows over and over for each stage?
    df_allInOne = df_allInOne.reset_index().drop_duplicates(subset='entryId').set_index('entryId')
    df_overall = df_overall.reset_index().drop_duplicates(subset='entryId').set_index('entryId')
    df_stages = df_stages.reset_index().drop_duplicates(subset='entryId').set_index('entryId')
    df_overall_pos = df_overall_pos.reset_index().drop_duplicates(subset='entryId').set_index('entryId')
    
    if dropnarow:
        # BUT, we don't want to drop a cancelled stage?
        df_allInOne = df_allInOne.dropna(how='all', axis=1)
        df_overall = df_overall.dropna(how='all', axis=1)
        df_stages = df_stages.dropna(how='all', axis=1)
        df_overall_pos = df_overall_pos.dropna(how='all', axis=1)
        df_stages_pos = df_stages_pos.dropna(how='all', axis=1)
        
    return df_allInOne, df_overall, df_stages, df_overall_pos, df_stages_pos

In [36]:
url='https://www.ewrc-results.com/times/54762-corbeau-seats-rally-tendring-clacton-2019/'
#url='https://www.ewrc-results.com/times/42870-rallye-automobile-de-monte-carlo-2018/'

In [37]:
rally_stub = '42870-rallye-automobile-de-monte-carlo-2018'
rally_stub='61961-mgj-engineering-brands-hatch-winter-stages-2020'
rally_stub='59972-rallye-automobile-de-monte-carlo-2020'
rally_stub='60140-rally-sweden-2020'
rally_stub='61089-rally-islas-canarias-2020' #breaks

df_allInOne, df_overall, df_stages, \
    df_overall_pos, df_stages_pos = get_stage_times(rally_stub)

In [38]:
display(df_allInOne.head(2))
display(df_overall.head(2))
display(df_stages.head(2))
display(df_overall_pos.head(2))
display(df_stages_pos.head(2))

Unnamed: 0_level_0,driverNav,driver,navigator,carNum,carModel,retired,Pos,stagetimes,overalltimes,positions,penalties
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,Fourmaux Adrien - Jamoul Renaud,Fourmaux Adrien,Jamoul Renaud,20,Ford Fiesta Rally2,False,1,"[8:29.8, 8:49.1, 7:14.8, 9:22.0, 7:59.8, 8:10....","[8:29.8, 17:18.9, 24:33.7, 33:55.7, 41:55.5, 5...","[3, 8, 4, 3, 3, 3, 3, 3, 2, 1, 2, 1, 1, 1, 1, ...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848173/,Bonato Yoann - Boulloud Benjamin,Bonato Yoann,Boulloud Benjamin,18,Citroën C3 R5,False,2,"[8:37.6, 8:46.5, 7:26.1, 9:26.6, 7:55.2, 8:04....","[8:37.6, 17:24.1, 24:50.2, 34:16.8, 42:12.0, 5...","[10, 9, 9, 8, 6, 6, 4, 4, 4, 4, 4, 2, 2, 2, 2,...","[, , , , , , , , , , , , , , , , ]"


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,509.8,1038.9,1473.7,2035.7,2515.5,3005.9,3421.8,3952.0,4026.3,4594.6,5098.9,5581.9,6014.4,6580.5,7047.1,7527.2,7941.2
/entryinfo/61089-rally-islas-canarias-2020/2848173/,517.6,1044.1,1490.2,2056.8,2532.0,3016.2,3438.2,3977.6,4058.0,4625.0,5131.5,5614.1,6047.8,6617.7,7079.7,7556.2,7966.2


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,509.8,529.1,434.8,562.0,479.8,490.4,415.9,530.2,74.3,568.3,504.3,483.0,432.5,566.1,466.6,480.1,414.0
/entryinfo/61089-rally-islas-canarias-2020/2848173/,517.6,526.5,446.1,566.6,475.2,484.2,422.0,539.4,80.4,567.0,506.5,482.6,433.7,569.9,462.0,476.5,410.0


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,3.0,8.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0
/entryinfo/61089-rally-islas-canarias-2020/2848173/,10.0,9.0,9.0,8.0,6.0,6.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,25.0,39.0,6.0,57.0,16.0,31.0,6.0,51.0,6.0,54.0,27.0,12.0,8.0,46.0,14.0,10.0,15.0
/entryinfo/61089-rally-islas-canarias-2020/2848173/,32.0,36.0,9.0,62.0,12.0,24.0,9.0,55.0,15.0,53.0,30.0,11.0,10.0,51.0,9.0,8.0,11.0


## Final Results

Get overall rankings.

In [39]:
final_path = 'https://www.ewrc-results.com/final/{stub}/?{params}'

In [40]:
def get_final(stub, params=None):
    params = '' if not params else urllib.parse.urlencode(params)
    print(final_path.format(stub=stub, params=params))
    soup = soupify(final_path.format(stub=stub, params=params))
    html_table = soup.find('table', {'class': 'results'})
    
    # There are actually several tables contained in one
    # A rowspan straddles them
    
    tables = []
    table = []
    entryIds = []
    for row in html_table.find_all('tr'):
        row_data = []
        for cell in row:
            # New table
            if cell.text.startswith('Retirements'):
                tables.append(table)
                table = []
            else:
                row_data.append(cell.text.strip())
            if len(tables) == 0:
                tmp = cell.find('a', {'title': 'Entry info and stats'})
                if tmp:
                    entryIds.append(tmp['href'])
        table.append(row_data)
    
    # Process results table
    #tables = LH.fromstring(html).xpath('//table')
    #df_rally_overall = pd.read_html('<html><body>{}</body></html>'.format(tables[0]))[0]
    #df_rally_overall['badge'] = [img.find('img')['src'] for img in tables[0].findAll("td", {"class": "final-results-icon"}) ]
    #df_rally_overall.dropna(how='all', axis=1, inplace=True)
    #print(df_rally_overall[2])
    df_rally_overall = pd.DataFrame(tables[0]).replace('', nan).dropna(how='all', axis=1)
    df_rally_overall.columns=['Pos','CarNum','driverNav','Model',
                              'Reg', 'Class', 'Time','GapDiff', 'Speedkm']

    display(df_rally_overall)
    #Get the entry ID - use this as the unique key
    #in column 3, <a title='Entry info and stats'>
    df_rally_overall['entryId']= entryIds # [a['href'] for a in tables[0].findAll("a", {"title": "Entry info and stats"}) ]
    df_rally_overall.set_index('entryId', inplace=True)

    df_rally_overall[['Driver','CoDriver']] = df_rally_overall['driverNav'].str.extract(r'(?P<Driver>.*)\s+-\s+(?P<CoDriver>.*)')

    df_rally_overall['Historic']= df_rally_overall['Class'].str.contains('Historic')
    df_rally_overall['Class']= df_rally_overall['Class'].str.replace('Historic','')

    df_rally_overall['Pos'] = df_rally_overall['Pos'].astype(str).str.extract(r'(.*)\.')
    df_rally_overall['Pos'] = df_rally_overall['Pos'].astype(int)

    #df_rally_overall[['Model','Registration']]=df_rally_overall['ModelReg'].str.extract(r'(?P<Model>.*) \((?P<Registration>.*)\)')

    df_rally_overall["Class Rank"] = df_rally_overall.groupby("Class")["Pos"].rank(method='min')

    
    # Process retirements table
    if len(tables) > 1:
        pass
        #pd.DataFrame(tables[1])
    
    
    return df_rally_overall
    


In [41]:
rally_stub='59972-rallye-automobile-de-monte-carlo-2020'
get_final(rally_stub, {'sct': 1682})

https://www.ewrc-results.com/final/59972-rallye-automobile-de-monte-carlo-2020/?sct=1682


Unnamed: 0,Pos,CarNum,driverNav,Model,Reg,Class,Time,GapDiff,Speedkm
0,1.0,#20,Østberg Mads - Eriksen Torstein,Citroën C3 R5PH Sport,EZ-026-FT,RC2,3:25:19.4,,88.9
1,2.0,#23,Fourmaux Adrien - Jamoul Renaud,Ford Fiesta Rally2M-Sport Ford WRT,PX19 CUA,RC2,3:28:50.8,+3:31.4 +3:31.4,87.4 0.69
2,3.0,#21,Gryazin Nikolay - Fedorov Yaroslav,Hyundai i20 R5Hyundai Motorsport N,ALZ R 581,RC2,3:31:05.00:20,+5:45.6 +2:14.2,86.5 1.14
3,4.0,#24,Yates Rhys - Morgan James,Ford Fiesta Rally2M-Sport Ford WRT,PX69 EHB,RC2,3:38:22.4,+13:03.0 +7:17.4,83.6 2.57


Unnamed: 0_level_0,Pos,CarNum,driverNav,Model,Reg,Class,Time,GapDiff,Speedkm,Driver,CoDriver,Historic,Class Rank
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
/entryinfo/59972-rallye-automobile-de-monte-carlo-2020/2494756/,1,#20,Østberg Mads - Eriksen Torstein,Citroën C3 R5PH Sport,EZ-026-FT,RC2,3:25:19.4,,88.9,Østberg Mads,Eriksen Torstein,False,1.0
/entryinfo/59972-rallye-automobile-de-monte-carlo-2020/2495205/,2,#23,Fourmaux Adrien - Jamoul Renaud,Ford Fiesta Rally2M-Sport Ford WRT,PX19 CUA,RC2,3:28:50.8,+3:31.4 +3:31.4,87.4 0.69,Fourmaux Adrien,Jamoul Renaud,False,2.0
/entryinfo/59972-rallye-automobile-de-monte-carlo-2020/2479220/,3,#21,Gryazin Nikolay - Fedorov Yaroslav,Hyundai i20 R5Hyundai Motorsport N,ALZ R 581,RC2,3:31:05.00:20,+5:45.6 +2:14.2,86.5 1.14,Gryazin Nikolay,Fedorov Yaroslav,False,3.0
/entryinfo/59972-rallye-automobile-de-monte-carlo-2020/2498360/,4,#24,Yates Rhys - Morgan James,Ford Fiesta Rally2M-Sport Ford WRT,PX69 EHB,RC2,3:38:22.4,+13:03.0 +7:17.4,83.6 2.57,Yates Rhys,Morgan James,False,4.0


## Itinerary

In [42]:
itinerary_path = 'https://www.ewrc-results.com/timetable/{stub}/'

In [123]:

def get_itinerary(stub, params=None):
    """Scrape intinerary page."""
    soup = soupify(itinerary_path.format(stub=stub))
    
    # We now need to find this explicitly by text
    #event_dist = soup.find('td',text='Event total').parent.find_all('td')[-1].text
    event_dist = soup.find('div', text=re.compile('Event total.*')).text
    
    # This is no longer a table... need to scrape divs...
    # Maybe: find divs, iterate until we hit next table etc?
    itinerary_rows = soup.find('div', {'class':'harm-main'}).find_all('div', {'class':'harm'})
    row_items = []
    for row in itinerary_rows:
        items = []
        #each row contains 6 divs
        for i in row.find_all('div'):
            items.append(i.text)
        row_items.append(items)
    #print(pd.DataFrame(row_items))
    # 
    itinerary_df = pd.DataFrame(row_items)
    itinerary_df.columns = ['Stage','Name', 'distance', 'Date', 'Time', 'Other']
    itinerary_df['Date'] = itinerary_df['Date'].replace('', nan).ffill()
    
    itinerary_df['Leg'] = [nan if 'leg' not in str(x) else str(x).replace('. leg','') for x in itinerary_df['Stage']]
    itinerary_df['Leg'] = itinerary_df['Leg'].fillna(method='ffill')
    itinerary_df['Date'] = itinerary_df['Date'].fillna(method='ffill')
    
    #What if we have no stage name?
    itinerary_df['Name'].fillna('', inplace=True)
    itinerary_df['Cancelled'] = itinerary_df['Name'].str.contains('(?i)cancelled') #(?i) ignore case
    itinerary_leg_totals = itinerary_df[itinerary_df['Name'].str.contains("Leg total")][['Leg', 'distance']].reset_index(drop=True)
    
    # The full itinerary includes shakedown, service etc
    full_itinerary_df = itinerary_df[~itinerary_df['Name'].str.contains(". leg")]
    full_itinerary_df = full_itinerary_df[~full_itinerary_df['Date'].str.contains(" km")]
    full_itinerary_df = full_itinerary_df.fillna(method='bfill', axis=1)
    #Legs may not be identified but we may want to identify services
    full_itinerary_df['Service'] = [n.startswith('Flexi') or n.startswith('Service') for n in full_itinerary_df['Name']]
    #full_itinerary_df['Service'] = [ 'Service' in i for i in full_itinerary_df['distance'] ]
    full_itinerary_df['Service_Num'] = full_itinerary_df['Service'].cumsum()
    full_itinerary_df.reset_index(drop=True, inplace=True)
    # TO DO - handle this better
    full_itinerary_df['Leg'] = full_itinerary_df['Leg'].fillna(0)

    itinerary_df = full_itinerary_df[~full_itinerary_df['Service']].reset_index(drop=True)
    itinerary_df = full_itinerary_df[full_itinerary_df['Stage'].str.startswith('SS')].reset_index(drop=True)
    itinerary_df['Section'] = itinerary_df['Service_Num'].rank(method='dense')
    itinerary_df.drop(columns=['Service', 'Service_Num'], inplace=True)
    
    itinerary_df[['Distance', 'Distance_unit']] = itinerary_df['distance'].str.extract(r'(?P<Distance>[^\s]*)\s+(?P<Distance_unit>.*)?')
    itinerary_df['Distance'] = itinerary_df['Distance'].astype(float)

    itinerary_df.set_index('Stage', inplace=True)

    return event_dist, itinerary_leg_totals, itinerary_df, full_itinerary_df

In [44]:
stub='54762-corbeau-seats-rally-tendring-clacton-2019'
stub='42870-rallye-automobile-de-monte-carlo-2018'
#stub='61961-mgj-engineering-brands-hatch-winter-stages-2020'
stub='59972-rallye-automobile-de-monte-carlo-2020'
event_dist, itinerary_leg_totals, itinerary_df, full_itinerary_df = get_itinerary(stub)

In [45]:
print(event_dist)
display(itinerary_leg_totals)
display(itinerary_df)
display(full_itinerary_df)

# TO DO - are we missing legs?

Event total 304.28 km


Unnamed: 0,Leg,distance


Unnamed: 0_level_0,Name,distance,Date,Time,Other,Leg,Section,Distance,Distance_unit
Stage,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SS1,Malijai - Puimichel,17.47 km,23. 1.,20:38,,0,1.0,17.47,km
SS2,Bayons - Bréziers,25.49 km,23. 1.,22:26,,0,1.0,25.49,km
SS3,Curbans - Venterol 1,20.02 km,24. 1.,08:36,,0,2.0,20.02,km
SS4,St-Clément-sur-Durance - Freissinières 1,20.68 km,24. 1.,09:56,,0,2.0,20.68,km
SS5,Avançon - Notre-Dame-du-Laus 1,20.59 km,24. 1.,11:21,,0,2.0,20.59,km
SS6,Curbans - Venterol 2,20.02 km,24. 1.,13:54,,0,3.0,20.02,km
SS7,St-Clément-sur-Durance - Freissinières 2,20.68 km,24. 1.,15:14,,0,3.0,20.68,km
SS8,Avançon - Notre-Dame-du-Laus 2,20.59 km,24. 1.,16:39,,0,3.0,20.59,km
SS9,St-Léger-les-Mélèzes - La Bâtie-Neuve 1,16.87 km,25. 1.,09:38,,0,4.0,16.87,km
SS10,La Bréole - Selonnet 1,20.73 km,25. 1.,10:56,,0,4.0,20.73,km


Unnamed: 0,Stage,Name,distance,Date,Time,Other,Leg,Service,Service_Num
0,,Shakedown (Gap),3.35 km,22. 1.,16:01,,0,False,0
1,SS1,Malijai - Puimichel,17.47 km,23. 1.,20:38,,0,False,0
2,SS2,Bayons - Bréziers,25.49 km,23. 1.,22:26,,0,False,0
3,,Flexi Service A - Gap - 45+3 min,,23. 1.,23:36,,0,True,1
4,,Service B - Gap - 15+3 min,,24. 1.,07:45,,0,True,2
5,SS3,Curbans - Venterol 1,20.02 km,24. 1.,08:36,,0,False,2
6,SS4,St-Clément-sur-Durance - Freissinières 1,20.68 km,24. 1.,09:56,,0,False,2
7,SS5,Avançon - Notre-Dame-du-Laus 1,20.59 km,24. 1.,11:21,,0,False,2
8,,Service C - Gap - 40+3 min,,24. 1.,12:38,,0,True,3
9,SS6,Curbans - Venterol 2,20.02 km,24. 1.,13:54,,0,False,3


## Entry List

Get the entry list.

In [173]:
entrylist_path = "https://www.ewrc-results.com/entries/{stub}/?{params}"
EWRC_URL_BASE = "https://www.ewrc-results.com{path}"

In [152]:
def _get_entry_list_details(soup):
    """Scrape the details of an entry list page."""
    base_cols = ['CarNum', 'DriverName','CoDriverName','Team','Car','Class', 'Category', 'Type']
    
    entrylist_table = soup.find('div',{'class':'mt-1'})
    if entrylist_table:
        entrylist_table = entrylist_table.find('table', {'class': 'results'}).find_all('tr')
    else:
        return pd.DataFrame(columns=base_cols+['carNum'])
    
    # TO DO parse into the structure of the table by iterating each row
    table_rows = []
    for row in entrylist_table:
        items = []
        items.append(row.find('td', {'class':'text-left'}).text) # Car number
        items.append(row.find('a', {'title': 'Show driver profile'}).text)
        _codriver = row.find('a', {'title': 'Show codriver profile'})
        if _codriver:
            items.append(_codriver.text)
        items.append(row.find('td', {'class':'lh-130'}).text) #car
        items.append(row.find('td', {'class':'lh-130'}).text) #team
        items.append(row.find('td', {'class':'fs-091'}).text) #class
        items.append(row.find('td', {'class':'startlist-m'}).text) #category
        items.append(row.find('td', {'class':'entry-sct'}).text) #type
        table_rows.append(items)
    df_entrylist = pd.DataFrame(table_rows)
    for i in range(len(df_entrylist.columns) - len(base_cols)):
        base_cols.append(f'Meta_{i}')
    df_entrylist.columns = base_cols
    df_entrylist['carNum'] = df_entrylist['CarNum'].str.extract(r'#(.*)')
    return df_entrylist

def get_entry_list(stub, params=None):
    """Get a single entry list."""
    params = urllib.parse.urlencode(params) if params else ''
    entrylist_url = entrylist_path.format(stub=stub, params=params)
    soup = soupify(entrylist_url)
    if not soup:
        return pd.DataFrame()

    df_entrylist = _get_entry_list_details(soup)
    # TEST CLASS CHECK TO DO
    #df_entrylist["Class"] = df_entrylist["Class"].apply(lambda x: str(x).strip().split()[0])
    return df_entrylist.dropna(subset=['carNum'])


In [153]:
get_entry_list(stub)
#get_entry_list('66881-aci-rally-monza-2020/') #Breaks
#61089-rally-islas-canarias-2020  breaks
get_entry_list('61089-rally-islas-canarias-2020/')

Unnamed: 0,CarNum,DriverName,CoDriverName,Team,Car,Class,Category,Type,carNum
1,#1,Lukyanuk Alexey,Arnautov Alexey,Citroën C3 R5Saintéloc Junior Team,Citroën C3 R5Saintéloc Junior Team,RC2,,ERC,1
2,#2,Solberg Oliver,Johnston Aaron,Volkswagen Polo GTI R5,Volkswagen Polo GTI R5,RC2,,ERCERC 1 Junior,2
3,#3,Munster Grégoire,Louka Louis,Hyundai i20 R5,Hyundai i20 R5,RC2,,ERCERC 1 Junior,3
4,#4,Llarena Efrén,Fernández Sara,Citroën C3 R5Rally Team Spain,Citroën C3 R5Rally Team Spain,RC21,,ERCSpainERC 1 Junior,4
5,#5,Breen Craig,Nagle Paul,Hyundai i20 R5Team MRF Tyres,Hyundai i20 R5Team MRF Tyres,RC2,,ERC,5
...,...,...,...,...,...,...,...,...,...
89,#93,Robledano Javier,Suárez Suárez Raquel,Opel Adam SC.D. Faroga,Opel Adam SC.D. Faroga,4C4,,SpainCanary Islands,93
90,#94,Rodríguez Henríquez Jesús Manuel,Perestelo Concepción Francisco,Opel Adam SC.D. Faroga,Opel Adam SC.D. Faroga,4C4,,SpainCanary Islands,94
91,#95,Suárez Domingo,Rodríguez Santiago Lidia,Peugeot 208 1.2 Pure TechEscudería Maspalomas,Peugeot 208 1.2 Pure TechEscudería Maspalomas,4C4,,SpainCanary Islands,95
92,#97,Calvar Gonzalez José,Costas Rodríguez Eva,Seat Marbella GLEscudería Surco,Seat Marbella GLEscudería Surco,3H,,Spain,97


In [179]:

soup = soupify('https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020/?leg=1')
_get_entry_list_details(soup)

Unnamed: 0,CarNum,DriverName,CoDriverName,Team,Car,Class,Category,Type,carNum


In [211]:
YEAR=2021

def _get_starting_order(soup, year=YEAR):
    """Scrape starting order table."""
    _table = soup.find('table',{'class': 'results'})
    if not _table:
        return pd.DataFrame()

    _df = dfify(_table)
    _df.columns = ['startNum', 'carNum_str', 'driverNav', 'car', 'class', 'date_str']
    _df['carNum'] = _df['carNum_str'].str.strip('#').astype(int)
    _df['startNum'] = _df['startNum'].astype(int)
    _df['startTime'] = _df['date_str'] + f' {year}'
    _df['startTime'] = pd.to_datetime(_df['startTime'], format=f'%d. %m. %H:%M %Y')
    return _df

def get_start_lists(stub, params=None):
    """Get start lists for each leg."""
    
    def _get_soup(stub, params):
        entrylist_url = entrylist_path.format(stub=stub, params=params)
        print(entrylist_url)
        soup = soupify(entrylist_url)
        return soup
  
    params = urllib.parse.urlencode(params) if params else ''
    soup = _get_soup(stub, params=params)

    if not soup:
        return{}
    
    year = soup.find('h3').text.strip().split()[-1]
    try:
        year = int(year)
    except:
        year = YEAR

    leg_entry_lists = {}
    _legs = soup.find('div', {'class': 'mx-auto'}).find_all('a')
    
    # The start list for a leg disregards class, etc. All entries are listed
    for _leg in _legs:
        leg_stub = _leg['href']
        #params['leg'] = urllib.parse.parse_qs(urllib.parse.urlparse(leg_stub).query)['leg'][0]
        leg_soup = _get_soup(leg_stub.split('/')[2], leg_stub.split('?')[1])
        _leg_entry_list = _get_starting_order(leg_soup, year=year)
        leg_entry_lists[leg_stub.split('=')[-1]] = {'stub': leg_stub,
                                                    'label': _leg.text,
                                                    'startlist_df': _leg_entry_list}
    return leg_entry_lists

In [212]:
get_start_lists('61089-rally-islas-canarias-2020/')

https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020//?
https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020/?leg=1
https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020/?leg=2


{'1': {'stub': '/entries/61089-rally-islas-canarias-2020/?leg=1',
  'label': 'Leg 1 - 27. 11.',
  'startlist_df':     startNum carNum_str                                          driverNav  \
  0          1         #7                   Andreas Mikkelsen - Anders Jæger   
  1          2        #20                    Adrien Fourmaux - Renaud Jamoul   
  2          3        #10           Niki Mayr-Melnhof - Leopold Welsersheimb   
  3          4        #21                    Luis Monzón - José Carlos Déniz   
  4          5         #2                    Oliver Solberg - Aaron Johnston   
  ..       ...        ...                                                ...   
  87        88        #93            Javier Robledano - Raquel Suárez Suárez   
  88        89        #94  Jesús Manuel Rodríguez Henríquez - Francisco P...   
  89        90        #95          Domingo Suárez - Lidia Rodríguez Santiago   
  90        91        #97        José Calvar Gonzalez - Eva Costas Rodríguez   
  91    

TO DO 
For the ewrc class, we need to be able to call a single function that:
- returns the entry list;
- returns the start list dict
Then in the ewrc class, imporve cacheing by:
- initialising the entry list once
- if live, check each start list and if one doesn't yet exist, update it.

## Rebasers

Utils for rebasing

In [49]:
def _rebaseTimes(times, bib=None, basetimes=None):
    ''' Rebase times relative to specified driver. '''
    # Sometimes this errors depending on driver - check why...
    #Should we rebase against entryId, so need to lool that up. In which case, leave index as entryId
    if (bib is None and basetimes is None): return times
    #bibid = codes[codes['Code']==bib].index.tolist()[0]
    if bib is not None:
        return times - times.loc[bib]
    if times is not None and basetimes is not None:
        return times - basetimes
    return times

## `EWRC` Class
Create a class to that can be used to gran all the results for a particular rally, as required.

We fudge the definition of class functions so that we can separately define and functions in a standalione way. This is probably *not good practice*...!

In [50]:
# TO DO
# At the moment, we handle cacheing in the class
# Instead, cache using requests and always make the call from the class

In [257]:
class EWRC:
    """Class for eWRC data for a particular rally."""

    def __init__(self, stub='', base='https://www.ewrc-results.com', path='', live=False, stages=None):
        _path = stub.split('?')
        self.base_url = base
        self.path = _path[1] if len(_path)==2 else path
        self.stub = [s for s in _path[0].split('/') if s][-1]
        self.live = live
        self.stages = [] if stages is None else stages
        self.stage_result_links = None
        
        self.start_lists = {}
        self.raw_base_stages = []
        self.raw_base_categories = []
        self.base_stages = []
        self.base_classes = []
        self.base_categories = []
        self.base_championships = []
        self.rally_championship = ''
        self.rally_class = ''
        try:
            self.raw_base_stages, self.raw_base_categories = homepage_quick_scrape(stub)
            self.set_base_stages()
            self.set_base_classes()
            self.set_base_championships()
        except:
            pass
        
        self.df_rally_overall = None
        
        self.df_allInOne = None #we don't actually use this?
        self.df_overall = None
        self.df_stages = None
        self.df_overall_pos = None
        self.df_stages_pos = None
        
        self.df_overall_rebased_to_leader = None
        self.df_stages_rebased_to_overall_leader = None
        self.df_stages_rebased_to_stage_winner = None
        
        self.event_dist = None
        self.df_itinerary_leg_totals = None
        self.df_itinerary = None
        self.df_full_itinerary = None

        self.stage_distances_all = None
        self.stage_distances = None
        
        self.df_entry_list = None
        self.rally_classes = None
        
        self.entryFromCar = None
        self.carFromEntry = None
        self.driverNumFromEntry = None
 
        self.df_stage_result = pd.DataFrame(columns=stage_result_cols)
        self.df_stage_overall = pd.DataFrame(columns=stage_overall_cols)
        self.df_stage_retirements = pd.DataFrame(columns=retirement_cols+retirement_extra_cols)
        self.df_stage_penalties = pd.DataFrame(columns=penalty_cols+penalty_extra_cols)

    def generate_url(self, params):
        """
        Generate a URL for a particular selection.
        """
        # There seems to be a snafu in the arg name used on different paths for the class/championship.
        # This means we need to craft params by hand depneding on the page we want to load.
        return f'{urljoin(self.base_url, self.stub, trailing=True)}?{urllib.parse.urlencode(params)}'
        
    def set_base_stages(self):
        """Report base stages."""
        
        # TO DO  - what do "base stages" mean?
        self.base_stages = {}
        for s in self.raw_base_stages:
            if s[1]:
                self.base_stages[s[2].split()[0]] = {'name': ' '.join(s[2].split()[1:]).split('-')[0].strip(), 'link':s[1]} 
        return self.base_stages
    
    def set_base_classes(self):
        """Retrieve base classes."""
        # TO DO  - via get_categories_from_homepage(self.stub)[1] ?
        i = len(self.raw_base_categories)
        self.base_classes = {}
        for c in self.raw_base_categories[i-1]:
            if '&' in c[1]:
                self.base_classes[c[0]] = c[1].split('&')[-1]
            else:
                self.base_classes[c[0]] = ''
        return self.base_classes
    
    def set_base_categories(self):
        """Retrieve base categories."""
        self.base_categories = {c:u.split('&')[-1] for (c, u) in get_categories_from_homepage(self.stub)[0]}
        for k in self.base_categories:
            if '?' in self.base_categories[k]:
                self.base_categories[k] = ''
        return self.base_categories
    
    def set_base_championships(self):
        """Retrieve base championships."""
        # TO DO - this is broken; the new scraper can access each as a list
        self.base_championships = {}
        if len(self.raw_base_categories) == 2:
            for c in self.raw_base_categories[0]:
                if '&' in c[1]:
                    self.base_championships[c[0]] = c[1].split('&')[-1]
                else:
                    self.base_championships[c[0]] = ''
        else:
            self.base_championships = {'All':''}
        return self.base_championships

    def annotateEntryWithEventDriverId(self):
        """Add class entry ID to df_entry_list."""
        if hasattr(self, 'entryFromCar'):
            self.get_stage_times()
            
        self.df_entry_list['driverEntry']  = self.df_entry_list['carNum'].map(self.entryFromCar)
        self.df_entry_list['driverCar'] = self.df_entry_list['carNum'] + ': '+self.df_entry_list['DriverName']
        self.driverNumFromEntry = self.df_entry_list[['driverCar','driverEntry']].set_index('driverEntry').to_dict()['driverCar']
        
    def df_inclass_cars(self, _df, rally_class='all', typ='entryId'):
        """Get cars in particular class."""
        if rally_class != 'all':
            _df = _df[_df.index.isin(self.carsInClass(rally_class, typ=typ))]
        return _df

    def carsInClass(self, qclass, typ='carNum'):
        #Can't we also pass a dict of key/vals to the widget?
        #Omit car 0
        df_entry_list = self.get_entry_list()
        if not qclass:
            return []
        if qclass.lower()=='all':
            return df_entry_list[df_entry_list['CarNum']!='#0']['carNum'].dropna().to_list()
        _cars = df_entry_list[(df_entry_list['CarNum']!='#0') & (df_entry_list['Class']==qclass)]['carNum'].to_list()
        if typ=='entryId':
            _cars = [self.entryFromCar[c] for c in self.entryFromCar if c in _cars]
        return _cars

    def stages_class_winners(self, rally_class='all'):
        """Return stage winners for a specified class."""
        _class_stage_winners = self.df_inclass_cars(self.df_stages_pos,
                                                    rally_class=rally_class).idxmin()
        return _class_stage_winners
    
    def get_class_rebased_times(self, rally_class='all', typ='stagewinner'):
        """
        Get times rebased relative to class.
        Rebaser can be either class stage winner or class stage overall.
        """
        # TO DO  - not yet implemented for class overall
        self.get_stage_times()
        _stage_times = self.df_inclass_cars(self.df_stages, rally_class=rally_class)
        df_stages_rebased_to_stage_winner = _stage_times.apply(_rebaseTimes,
                                                               basetimes=_stage_times.min(), axis=1)
        return df_stages_rebased_to_stage_winner

    def set_rebased_times(self):
        if self.df_stages_rebased_to_overall_leader is None \
                or self.df_stages_rebased_to_stage_winner is None \
                or self.df_stages_rebased_to_stage_winner is None:
            #print('setting rebased times...')
            self.get_stage_times()
            leaderStagetimes = self.df_stages.iloc[0]
            self.df_stages_rebased_to_overall_leader = self.df_stages.apply(_rebaseTimes,
                                                                            basetimes=leaderStagetimes, axis=1)
            #print(self.df_stages.columns, self.df_stages_rebased_to_overall_leader.columns)
            #Now rebase to the stage winner
            self.df_stages_rebased_to_stage_winner = self.df_stages_rebased_to_overall_leader.apply(_rebaseTimes, basetimes=self.df_stages_rebased_to_overall_leader.min(), axis=1)

            leaderTimes = self.df_overall.min()
            self.df_overall_rebased_to_leader = self.df_overall.apply(_rebaseTimes,
                                                                      basetimes=leaderTimes, axis=1)

    def _set_car_entry_lookups(self, df, force=False):
        """Look-up dicts between car number and entry."""
        if force or not self.carFromEntry or not self.entryFromCar:
            _carFromEntry = df['carNum'].to_dict()
            if not self.carFromEntry:
                self.carFromEntry = {}
            self.carFromEntry = {**self.carFromEntry, **_carFromEntry}
            if not self.entryFromCar:
                self.entryFromCar = {}
            _entryFromCar = {v:k for (k, v) in self.carFromEntry.items()}
            self.entryFromCar = {**self.entryFromCar, **_entryFromCar}
    
    def get_final(self):
        if self.df_rally_overall is None:
            self.df_rally_overall = get_final(self.stub, params={'sct':self.rally_championship, 'ct':self.rally_class})
            self._set_car_entry_lookups(self.df_rally_overall)
        return self.df_rally_overall
        
    def get_stage_times(self):
        if self.live or self.df_overall is None or self.df_stages is None or self.df_overall_pos is None:
            self.df_allInOne, self.df_overall, self.df_stages, \
                self.df_overall_pos, self.df_stages_pos = get_stage_times(self.stub)
            self._set_car_entry_lookups(self.df_allInOne)
        return self.df_allInOne, self.df_overall, self.df_stages, self.df_overall_pos
    
    def get_itinerary(self):
        if self.live or self.event_dist is None or self.df_itinerary_leg_totals is None \
            or self.df_itinerary is None or self.df_full_itinerary is None:
                self.event_dist, self.df_itinerary_leg_totals, \
                    self.df_itinerary, self.df_full_itinerary_df = get_itinerary(self.stub)

        _stage_distances = self.df_itinerary['Distance'][~self.df_itinerary['Time'].str.contains('cancelled')]
        # Stage distances do not identify cancelled stages
        # The following is the correct stage index
        #_stage_distances.index = [int(i.lstrip('SS')) for i in _stage_distances.index]
        #As a hack, to cope with cancelled stages, reindex
        _stage_distances.reset_index(drop=True, inplace=True)
        _stage_distances.index += 1 
        self.stage_distances = _stage_distances
        self.stage_distances_all = self.df_itinerary['Distance']

        return self.event_dist, self.df_itinerary_leg_totals, \
                self.df_itinerary, self.df_full_itinerary_df

    def get_entry_list(self):
        if self.df_entry_list is None:
            #self.df_entry_list = get_entry_list(self.stub, self.path)
            self.df_entry_list = get_entry_list(self.stub, params={'sct':self.rally_championship, 'cat':self.rally_class})
        #A list of classes could be useful, so grab it while we can
        self.rally_classes = self.df_entry_list['Class'].dropna().unique()
        
        self.annotateEntryWithEventDriverId()
        return self.df_entry_list
    
    def get_start_lists(self):
        """Get start lists."""
        # Start lists are updated through the rally so hard to cache if live.
        if self.live or not self.start_lists:
            self.start_lists = get_start_lists(self.stub)
        return self.start_lists
    
    def get_stage_result_links(self):
        if self.stage_result_links is None:
            self.stage_result_links = get_stage_result_links(self.stub, params={'sct':self.rally_championship, 'ct':self.rally_class})
        return self.stage_result_links
    
    def get_stage_results(self, stage=None):
        #for now, just return what we have with stage as None
        if stage is None:
            return self.df_stage_result, self.df_stage_overall, \
                    self.df_stage_retirements, self.df_stage_penalties
        # Could maybe change that to get everything?
        stages = stage if isinstance(stage,list) else [stage]
        if stages:
            links = self.get_stage_result_links()
            #print(links)
            if 'all' in stages:
                #print('all')
                stages = [k for k in links.keys() if 'leg' not in k]
            elif 'final' in stages or 'last' in stages:
                stages = [k for k in links.keys() if 'leg' not in k][-1]
            for stage in stages:
                if self.live or (stage not in self.df_stage_result['Stage'].unique() and stage in links):
                    df_stage_result, df_stage_overall, df_stage_retirements, \
                        df_stage_penalties = get_stage_results(links[stage])
                    self.df_stage_result = self.df_stage_result.append(df_stage_result, sort=False).reset_index(drop=True)
                    self.df_stage_overall = self.df_stage_overall.append(df_stage_overall, sort=False).reset_index(drop=True)
                    self.df_stage_retirements = self.df_stage_retirements.append(df_stage_retirements, sort=False).reset_index(drop=True)
                    self.df_stage_penalties = self.df_stage_penalties.append(df_stage_penalties, sort=False).reset_index(drop=True)
        
        if stages:
            return self.df_stage_result[self.df_stage_result['Stage'].isin(stages)], \
                    self.df_stage_overall[self.df_stage_overall['Stage'].isin(stages)], \
                    self.df_stage_retirements[self.df_stage_retirements['Stage'].isin(stages)], \
                    self.df_stage_penalties[self.df_stage_penalties['Stage'].isin(stages)]
        
        self._set_car_entry_lookups(self.df_stage_result)
        
        return self.df_stage_result, self.df_stage_overall, \
                self.df_stage_retirements, self.df_stage_penalties

In [258]:
print(rally_stub+'/') #60140-rally-sweden-2020/
print('66881-aci-rally-monza-2020/'+'/') # stages 10 and 12 cancelled TO TEST BREAKS
# Also breaks '61089-rally-islas-canarias-2020/
ewrc=EWRC('61089-rally-islas-canarias-2020')

59972-rallye-automobile-de-monte-carlo-2020/
66881-aci-rally-monza-2020//
https://www.ewrc-results.com/results/61089-rally-islas-canarias-2020/?&None


In [259]:
ewrc.set_base_categories(), ewrc.set_base_classes()

({'All': '',
  'ERC': 'sct=6',
  'ERC 2': 'sct=563',
  'ERC 3': 'sct=564',
  'ERC 1 Junior': 'sct=1493',
  'ERC 3 Junior': 'sct=1492',
  'ERC Ladies': 'sct=772',
  'Spain': 'sct=27',
  'Spain Superchampionship': 'sct=1512',
  'Abarth Rally Cup': 'sct=1497',
  'Canary Islands': 'sct=89'},
 {'All': '',
  '1': 'ct=211',
  '2': 'ct=212',
  '3': 'ct=146',
  '4': 'ct=145',
  'H': 'ct=73',
  'C1': 'ct=58',
  'C2': 'ct=99',
  'C3': 'ct=25',
  'C4': 'ct=97',
  'RC2': 'ct=1018',
  'RC3': 'ct=1019',
  'RC4': 'ct=1020',
  'RC5': 'ct=1035',
  'RGT': 'ct=599'})

In [244]:
ewrc.get_start_lists()

https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020/?
https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020/?leg=1
https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020/?leg=2


{'1': {'stub': '/entries/61089-rally-islas-canarias-2020/?leg=1',
  'label': 'Leg 1 - 27. 11.',
  'startlist_df':     startNum carNum_str                                          driverNav  \
  0          1         #7                   Andreas Mikkelsen - Anders Jæger   
  1          2        #20                    Adrien Fourmaux - Renaud Jamoul   
  2          3        #10           Niki Mayr-Melnhof - Leopold Welsersheimb   
  3          4        #21                    Luis Monzón - José Carlos Déniz   
  4          5         #2                    Oliver Solberg - Aaron Johnston   
  ..       ...        ...                                                ...   
  87        88        #93            Javier Robledano - Raquel Suárez Suárez   
  88        89        #94  Jesús Manuel Rodríguez Henríquez - Francisco P...   
  89        90        #95          Domingo Suárez - Lidia Rodríguez Santiago   
  90        91        #97        José Calvar Gonzalez - Eva Costas Rodríguez   
  91    

In [114]:
ewrc.base_stages
# Expect {'SS2': {'name': 'Hof', 'link': '/results/60140-rally-sweden-2020/?s=250433'},
# 'SS3': {'name': 'Finnskogen 1',
#  'link': '/results/60140-rally-sweden-2020/?s=250434'}, etc

{'SS1': {'name': 'Valsequillo 1',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265881'},
 'SS2': {'name': 'San Mateo',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265882'},
 'SS3': {'name': 'Artenara 1',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265883'},
 'SS4': {'name': 'Tejeda 1',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265884'},
 'SS5': {'name': 'Valsequillo 2',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265893'},
 'SS6': {'name': 'San Mateo',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265894'},
 'SS7': {'name': 'Artenara 2',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265895'},
 'SS8': {'name': 'Tejeda 2',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265896'},
 'SS9': {'name': 'Las Palmas de Gran Canaria',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265897'},
 'SS10': {'name': 'Valleseco',
  'link': '/results/61089-rally-islas-canarias-2020/?s=265901'},
 'SS11': {'name': 'Gálda

In [115]:
#sct=10
#ewrc.rally_championship = 10
ewrc.get_stage_times()
ewrc.df_allInOne

Unnamed: 0_level_0,driverNav,driver,navigator,carNum,carModel,retired,Pos,stagetimes,overalltimes,positions,penalties
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,Fourmaux Adrien - Jamoul Renaud,Fourmaux Adrien,Jamoul Renaud,20,Ford Fiesta Rally2,False,1,"[8:29.8, 8:49.1, 7:14.8, 9:22.0, 7:59.8, 8:10....","[8:29.8, 17:18.9, 24:33.7, 33:55.7, 41:55.5, 5...","[3, 8, 4, 3, 3, 3, 3, 3, 2, 1, 2, 1, 1, 1, 1, ...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848173/,Bonato Yoann - Boulloud Benjamin,Bonato Yoann,Boulloud Benjamin,18,Citroën C3 R5,False,2,"[8:37.6, 8:46.5, 7:26.1, 9:26.6, 7:55.2, 8:04....","[8:37.6, 17:24.1, 24:50.2, 34:16.8, 42:12.0, 5...","[10, 9, 9, 8, 6, 6, 4, 4, 4, 4, 4, 2, 2, 2, 2,...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848183/,Ares Iván - Vázquez Liste David,Ares Iván,Vázquez Liste David,9,Hyundai i20 R5,False,3,"[8:29.3, 8:37.9, 7:13.2, 9:17.6, 7:59.0, 8:08....","[8:29.3, 17:07.2, 24:20.4, 33:38.0, 41:37.0, 4...","[2, 3, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 3, 3, 3, ...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848196/,Solberg Oliver - Johnston Aaron,Solberg Oliver,Johnston Aaron,2,Volkswagen Polo GTI R5,False,4,"[8:31.7, 8:57.9, 7:33.6, 9:39.4, 8:05.0, 8:14....","[8:31.7, 17:29.6, 25:03.2, 34:42.6, 42:47.6, 5...","[6, 13, 12, 13, 12, 10, 10, 7, 7, 7, 6, 5, 4, ...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848178/,Suárez José Antonio - Iglesias Pin Alberto,Suárez José Antonio,Iglesias Pin Alberto,22,Škoda Fabia Rally2 evo,False,5,"[8:47.8, 8:40.1, 7:36.9, 9:31.1, 7:51.5, 8:02....","[8:47.8, 17:27.9, 25:04.8, 34:35.9, 42:27.4, 5...","[19, 12, 13, 12, 9, 8, 5, 5, 5, 5, 5, 4, 5, 5,...","[, , , , , , , , , , , , , , , , ]"
...,...,...,...,...,...,...,...,...,...,...,...
/entryinfo/61089-rally-islas-canarias-2020/2848189/,Guerra Rodríguez Kevin - Cambeiro Pérez Aitor,Guerra Rodríguez Kevin,Cambeiro Pérez Aitor,72,,False,,"[8:58.8, 9:08.3]","[8:58.8, 18:07.1]","[30, 32]","[, ]"
/entryinfo/61089-rally-islas-canarias-2020/2850269/,Climent Nelson - Vega Agustín,Climent Nelson,Vega Agustín,86,,False,,"[10:47.7, 11:10.1, nan, 14:00.6, 11:35.7, 10:4...","[10:47.7, 21:57.8, nan, nan, nan, nan, nan, na...","[84, 81, nan, nan, nan, nan, nan, nan, nan, nan]","[, , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2851807/,Robledano Javier - Suárez Suárez Raquel,Robledano Javier,Suárez Suárez Raquel,93,,False,,"[13:09.8, 13:32.1, nan, 14:57.6, 11:59.5, 10:5...","[13:09.8, 26:41.9, nan, nan, nan, nan, nan, na...","[90, 89, nan, nan, nan, nan, nan, nan, nan, nan]","[, , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2852018/,Sánchez Garrido David - Monzón Cruz Juan,Sánchez Garrido David,Monzón Cruz Juan,68,,False,,"[9:12.2, 9:39.2]","[9:12.2, 18:51.4]","[42, 49]","[, ]"


In [116]:
'''
{'All': '',
 'RC1': 'ct=1017',
 'RC2': 'ct=1018',
 'RC4': 'ct=1020',
 'NAT4': 'ct=4742'}
 '''

ewrc.base_classes

{'All': '',
 '1': 'ct=211',
 '2': 'ct=212',
 '3': 'ct=146',
 '4': 'ct=145',
 'H': 'ct=73',
 'C1': 'ct=58',
 'C2': 'ct=99',
 'C3': 'ct=25',
 'C4': 'ct=97',
 'RC2': 'ct=1018',
 'RC3': 'ct=1019',
 'RC4': 'ct=1020',
 'RC5': 'ct=1035',
 'RGT': 'ct=599'}

In [117]:
'''
{'All': '',
 'WRC 2': 'sct=1682',
 'JWRC': 'sct=10',
 'WRC 3': 'sct=1681',
 'M': 'group=M'}
'''
ewrc.base_championships

{'All': '',
 'ERC': 'sct=6',
 'ERC 2': 'sct=563',
 'ERC 3': 'sct=564',
 'ERC 1 Junior': 'sct=1493',
 'ERC 3 Junior': 'sct=1492',
 'ERC Ladies': 'sct=772',
 'Spain': 'sct=27',
 'Spain Superchampionship': 'sct=1512',
 'Abarth Rally Cup': 'sct=1497',
 'Canary Islands': 'sct=89'}

In [118]:
#df_stage_result, df_stage_overall, df_stage_retirements, df_stage_penalties

# FOr some reason ths was giving the penalties table?
ewrc.get_stage_results('SS3')

https://www.ewrc-results.com//results/61089-rally-islas-canarias-2020/?s=265883/?
Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64') ['Pos', 'CarNum', 'Desc', 'Class', 'Time', 'GapDiff', 'Speedkm']


Unnamed: 0,0,1,2,3,4,5,6
0,1.,#9,Ares Iván - Vázquez Liste David Hyundai i20 R5,RC2 1,7:13.2,,97.6
1,2.,#23,Solans Nil - Moreno Xavi Škoda Fabia Rally2 evo,RC2 1,7:13.7,+0.5 +0.5,97.5 0.04
2,3.,#20,Fourmaux Adrien - Jamoul Renaud Ford Fiesta Ra...,RC2,7:14.8,+1.6 +1.1,97.3 0.14
3,4.,#25,Lemes Yeray - Peñate Rogelio Hyundai i20 R5,RC2 1 C1,7:20.1,+6.9 +5.3,96.1 0.59
4,5.,#50,Amourette Marc - Gauduin Anaïs Citroën C3 R5,RC2,7:20.2,+7.0 +0.1,96.1 0.60
...,...,...,...,...,...,...,...
83,84.,#81,Martín Rivero Miguel - Granado Gil J. BMW 325i...,C1,9:50.3,+2:37.1 +2.5,71.7 13.4
84,85.,#74,Quintana D. - Álvarez Barreto D. Ford Fiesta N5,1 C1,10:17.6,+3:04.4 +27.3,68.5 15.7
85,86.,#84,Medina Santana F. - Fleitas Díaz F. BMW M3 E46,C1,10:19.2,+3:06.0 +1.6,68.3 15.8
86,87.,#73,Cachón Alejandro - Jandrín Ford Fiesta N5,1,10:46.0,+3:32.8 +26.8,65.5 18.1


retirement


(    Pos CarNum                                               Desc Class  \
 0    1.     #9     Ares Iván - Vázquez Liste David Hyundai i20 R5   RC2   
 1    2.    #23    Solans Nil - Moreno Xavi Škoda Fabia Rally2 evo   RC2   
 2    3.    #20  Fourmaux Adrien - Jamoul Renaud Ford Fiesta Ra...   RC2   
 3    4.    #25        Lemes Yeray - Peñate Rogelio Hyundai i20 R5   RC2   
 4    5.    #50       Amourette Marc - Gauduin Anaïs Citroën C3 R5   RC2   
 ..  ...    ...                                                ...   ...   
 83  84.    #81  Martín Rivero Miguel - Granado Gil J. BMW 325i...    C1   
 84  85.    #74    Quintana D. - Álvarez Barreto D. Ford Fiesta N5     1   
 85  86.    #84     Medina Santana F. - Fleitas Díaz F. BMW M3 E46    C1   
 86  87.    #73          Cachón Alejandro - Jandrín Ford Fiesta N5     1   
 87  88.    #19       López Pepe - Rozada Borja Citroën C3 R5 [SR]   RC2   
 
        Time           GapDiff    Speedkm Stage   StageName  StageDist    Gap  \
 0   

In [126]:
not pd.isnull(ewrc.df_itinerary)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [120]:
ewrc.get_stage_results('SS2')[0]

https://www.ewrc-results.com//results/61089-rally-islas-canarias-2020/?s=265882/?
Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64') ['Pos', 'CarNum', 'Desc', 'Class', 'Time', 'GapDiff', 'Speedkm']


Unnamed: 0,0,1,2,3,4,5,6
0,1.,#23,Solans Nil - Moreno Xavi Škoda Fabia Rally2 evo,RC2 1,8:30.1,,84.8
1,2.,#25,Lemes Yeray - Peñate Rogelio Hyundai i20 R5,RC2 1 C1,8:33.1,+3.0 +3.0,84.3 0.25
2,3.,#14,Griebel Marijan - Winklhofer Pirmin Citroën C3 R5,RC2,8:34.0,+3.9 +0.9,84.1 0.32
3,4.,#9,Ares Iván - Vázquez Liste David Hyundai i20 R5,RC2 1,8:37.9,+7.8 +3.9,83.5 0.65
4,5.,#6,Cais Erik - Žáková Jindřiška Ford Fiesta Rally2,RC2,8:39.7,+9.6 +1.8,83.2 0.80
...,...,...,...,...,...,...,...
87,88.,#90,Morales Rivero Christian - Quintero M. Opel Ad...,4 C4,11:58.9,+3:28.8 +17.9,60.1 17.4
88,89.,#92,García Pérez D. - Quintana González D. Opel Ad...,4 C4,12:22.2,+3:52.1 +23.3,58.3 19.3
89,90.,#93,Robledano Javier - Suárez Suárez R. Opel Adam S,4 C4,13:32.1,+5:02.0 +1:09.9,53.2 25.2
90,91.,#19,López Pepe - Rozada Borja Citroën C3 R5 [SR],RC2 1,18:30.1,+10:00.0 +4:58.0,38.9 50.0


Unnamed: 0,Pos,CarNum,Desc,Class,Time,GapDiff,Speedkm,Stage,StageName,StageDist,Gap,Diff,Speed,Dist,entryId,model,navigator,PosNum
88,1.,#23,Solans Nil - Moreno Xavi Škoda Fabia Rally2 evo,RC2,8:30.1,,84.8,SS2,San Mateo - Disa 1,12.01,0.0,0.0,84.8,,[Škoda Fabia Rally2 evo],Solans Nil,Moreno Xavi,1
89,2.,#25,Lemes Yeray - Peñate Rogelio Hyundai i20 R5,RC2,8:33.1,+3.0 +3.0,84.3 0.25,SS2,San Mateo - Disa 1,12.01,3.0,3.0,84.3,0.25,[Hyundai i20 R5],Lemes Yeray,Peñate Rogelio,2
90,3.,#14,Griebel Marijan - Winklhofer Pirmin Citroën C3 R5,RC2,8:34.0,+3.9 +0.9,84.1 0.32,SS2,San Mateo - Disa 1,12.01,3.9,0.9,84.1,0.32,[Citroën C3 R5],Griebel Marijan,Winklhofer Pirmin,3
91,4.,#9,Ares Iván - Vázquez Liste David Hyundai i20 R5,RC2,8:37.9,+7.8 +3.9,83.5 0.65,SS2,San Mateo - Disa 1,12.01,7.8,3.9,83.5,0.65,[Hyundai i20 R5],Ares Iván,Vázquez Liste David,4
92,5.,#6,Cais Erik - Žáková Jindřiška Ford Fiesta Rally2,RC2,8:39.7,+9.6 +1.8,83.2 0.80,SS2,San Mateo - Disa 1,12.01,9.6,1.8,83.2,0.80,[Ford Fiesta Rally2],Cais Erik,Žáková Jindřiška,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,88.,#90,Morales Rivero Christian - Quintero M. Opel Ad...,4,11:58.9,+3:28.8 +17.9,60.1 17.4,SS2,San Mateo - Disa 1,12.01,208.8,17.9,60.1,17.4,[Opel Adam S],Morales Rivero Christian,Quintero M.,88
176,89.,#92,García Pérez D. - Quintana González D. Opel Ad...,4,12:22.2,+3:52.1 +23.3,58.3 19.3,SS2,San Mateo - Disa 1,12.01,232.1,23.3,58.3,19.3,[Opel Adam S],García Pérez D.,Quintana González D.,89
177,90.,#93,Robledano Javier - Suárez Suárez R. Opel Adam S,4,13:32.1,+5:02.0 +1:09.9,53.2 25.2,SS2,San Mateo - Disa 1,12.01,302.0,69.9,53.2,25.2,[Opel Adam S],Robledano Javier,Suárez Suárez R.,90
178,91.,#19,López Pepe - Rozada Borja Citroën C3 R5 [SR],RC2,18:30.1,+10:00.0 +4:58.0,38.9 50.0,SS2,San Mateo - Disa 1,12.01,600.0,298.0,38.9,50.0,[Citroën C3 R5],López Pepe,Rozada Borja,91


In [100]:
ewrc.get_stage_results('SS11')#[0] # Get an error on cancelled stage - rogue results?

https://www.ewrc-results.com//results/61089-rally-islas-canarias-2020/?s=265907&sct=10/?


IndexError: list index out of range

In [101]:
ewrc.set_rebased_times()
display(ewrc.df_stages_rebased_to_overall_leader)

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
/entryinfo/61089-rally-islas-canarias-2020/2848173/,7.8,-2.6,11.3,4.6,-4.6,-6.2,6.1,9.2,6.1,-1.3,2.2,-0.4,1.2,3.8,-4.6,-3.6,-4.0
/entryinfo/61089-rally-islas-canarias-2020/2848183/,-0.5,-11.2,-1.6,-4.4,-0.8,-2.1,0.8,3.8,8.5,8.7,6.5,25.4,27.8,-3.0,-6.4,1.6,2.5
/entryinfo/61089-rally-islas-canarias-2020/2848196/,1.9,8.8,18.8,17.4,5.2,3.7,12.2,5.3,9.3,1.9,11.0,4.1,-3.5,0.2,-6.0,-3.9,-9.0
/entryinfo/61089-rally-islas-canarias-2020/2848178/,18.0,-9.0,22.1,9.1,-8.3,-7.6,7.3,23.4,5.6,0.2,15.6,16.6,13.7,1.8,2.9,1.7,-14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
/entryinfo/61089-rally-islas-canarias-2020/2848189/,29.0,19.2,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2850269/,137.9,141.0,,278.6,215.9,153.7,124.3,208.5,511.5,17.3,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2851807/,280.0,283.0,,335.6,239.7,164.7,128.2,197.1,519.5,17.5,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2852018/,42.4,50.1,,,,,,,,,,,,,,,


In [102]:
ewrc.get_stage_results('SS1')#[2]

https://www.ewrc-results.com//results/61089-rally-islas-canarias-2020/?s=265881&sct=10/?


IndexError: list index out of range

In [103]:
ewrc.get_stage_result_links()

{'SS1': '/results/61089-rally-islas-canarias-2020/?s=265881&sct=10',
 'SS2': '/results/61089-rally-islas-canarias-2020/?s=265882&sct=10',
 'SS3': '/results/61089-rally-islas-canarias-2020/?s=265883&sct=10',
 'SS4': '/results/61089-rally-islas-canarias-2020/?s=265884&sct=10',
 'SS5': '/results/61089-rally-islas-canarias-2020/?s=265893&sct=10',
 'SS6': '/results/61089-rally-islas-canarias-2020/?s=265894&sct=10',
 'SS7': '/results/61089-rally-islas-canarias-2020/?s=265895&sct=10',
 'SS8': '/results/61089-rally-islas-canarias-2020/?s=265896&sct=10',
 'SS9': '/results/61089-rally-islas-canarias-2020/?s=265897&sct=10',
 'SS10': '/results/61089-rally-islas-canarias-2020/?s=265901&sct=10',
 'SS11': '/results/61089-rally-islas-canarias-2020/?s=265907&sct=10',
 'SS12': '/results/61089-rally-islas-canarias-2020/?s=265917&sct=10',
 'SS13': '/results/61089-rally-islas-canarias-2020/?s=265918&sct=10',
 'SS14': '/results/61089-rally-islas-canarias-2020/?s=265919&sct=10',
 'SS15': '/results/61089-rall

In [104]:
ewrc.stub

'61089-rally-islas-canarias-2020'

In [105]:
ewrc.get_stage_results('SS1')[3]

https://www.ewrc-results.com//results/61089-rally-islas-canarias-2020/?s=265881&sct=10/?


IndexError: list index out of range

In [106]:
ewrc.get_stage_results('SS1')[3]

https://www.ewrc-results.com//results/61089-rally-islas-canarias-2020/?s=265881&sct=10/?


IndexError: list index out of range

In [107]:
ewrc.get_stage_times()[0]

Unnamed: 0_level_0,driverNav,driver,navigator,carNum,carModel,retired,Pos,stagetimes,overalltimes,positions,penalties
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,Fourmaux Adrien - Jamoul Renaud,Fourmaux Adrien,Jamoul Renaud,20,Ford Fiesta Rally2,False,1,"[8:29.8, 8:49.1, 7:14.8, 9:22.0, 7:59.8, 8:10....","[8:29.8, 17:18.9, 24:33.7, 33:55.7, 41:55.5, 5...","[3, 8, 4, 3, 3, 3, 3, 3, 2, 1, 2, 1, 1, 1, 1, ...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848173/,Bonato Yoann - Boulloud Benjamin,Bonato Yoann,Boulloud Benjamin,18,Citroën C3 R5,False,2,"[8:37.6, 8:46.5, 7:26.1, 9:26.6, 7:55.2, 8:04....","[8:37.6, 17:24.1, 24:50.2, 34:16.8, 42:12.0, 5...","[10, 9, 9, 8, 6, 6, 4, 4, 4, 4, 4, 2, 2, 2, 2,...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848183/,Ares Iván - Vázquez Liste David,Ares Iván,Vázquez Liste David,9,Hyundai i20 R5,False,3,"[8:29.3, 8:37.9, 7:13.2, 9:17.6, 7:59.0, 8:08....","[8:29.3, 17:07.2, 24:20.4, 33:38.0, 41:37.0, 4...","[2, 3, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 3, 3, 3, ...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848196/,Solberg Oliver - Johnston Aaron,Solberg Oliver,Johnston Aaron,2,Volkswagen Polo GTI R5,False,4,"[8:31.7, 8:57.9, 7:33.6, 9:39.4, 8:05.0, 8:14....","[8:31.7, 17:29.6, 25:03.2, 34:42.6, 42:47.6, 5...","[6, 13, 12, 13, 12, 10, 10, 7, 7, 7, 6, 5, 4, ...","[, , , , , , , , , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2848178/,Suárez José Antonio - Iglesias Pin Alberto,Suárez José Antonio,Iglesias Pin Alberto,22,Škoda Fabia Rally2 evo,False,5,"[8:47.8, 8:40.1, 7:36.9, 9:31.1, 7:51.5, 8:02....","[8:47.8, 17:27.9, 25:04.8, 34:35.9, 42:27.4, 5...","[19, 12, 13, 12, 9, 8, 5, 5, 5, 5, 5, 4, 5, 5,...","[, , , , , , , , , , , , , , , , ]"
...,...,...,...,...,...,...,...,...,...,...,...
/entryinfo/61089-rally-islas-canarias-2020/2848189/,Guerra Rodríguez Kevin - Cambeiro Pérez Aitor,Guerra Rodríguez Kevin,Cambeiro Pérez Aitor,72,,False,,"[8:58.8, 9:08.3]","[8:58.8, 18:07.1]","[30, 32]","[, ]"
/entryinfo/61089-rally-islas-canarias-2020/2850269/,Climent Nelson - Vega Agustín,Climent Nelson,Vega Agustín,86,,False,,"[10:47.7, 11:10.1, nan, 14:00.6, 11:35.7, 10:4...","[10:47.7, 21:57.8, nan, nan, nan, nan, nan, na...","[84, 81, nan, nan, nan, nan, nan, nan, nan, nan]","[, , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2851807/,Robledano Javier - Suárez Suárez Raquel,Robledano Javier,Suárez Suárez Raquel,93,,False,,"[13:09.8, 13:32.1, nan, 14:57.6, 11:59.5, 10:5...","[13:09.8, 26:41.9, nan, nan, nan, nan, nan, na...","[90, 89, nan, nan, nan, nan, nan, nan, nan, nan]","[, , , , , , , , , ]"
/entryinfo/61089-rally-islas-canarias-2020/2852018/,Sánchez Garrido David - Monzón Cruz Juan,Sánchez Garrido David,Monzón Cruz Juan,68,,False,,"[9:12.2, 9:39.2]","[9:12.2, 18:51.4]","[42, 49]","[, ]"


In [108]:
ewrc.get_stage_times()[1]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,509.8,1038.9,1473.7,2035.7,2515.5,3005.9,3421.8,3952.0,4026.3,4594.6,5098.9,5581.9,6014.4,6580.5,7047.1,7527.2,7941.2
/entryinfo/61089-rally-islas-canarias-2020/2848173/,517.6,1044.1,1490.2,2056.8,2532.0,3016.2,3438.2,3977.6,4058.0,4625.0,5131.5,5614.1,6047.8,6617.7,7079.7,7556.2,7966.2
/entryinfo/61089-rally-islas-canarias-2020/2848183/,509.3,1027.2,1460.4,2018.0,2497.0,2985.3,3402.0,3936.0,4018.8,4595.8,5106.6,5615.0,6075.3,6638.4,7098.6,7580.3,7996.8
/entryinfo/61089-rally-islas-canarias-2020/2848196/,511.7,1049.6,1503.2,2082.6,2567.6,3061.7,3489.8,4025.3,4108.9,4679.1,5194.4,5681.5,6110.5,6676.8,7137.4,7613.6,8018.6
/entryinfo/61089-rally-islas-canarias-2020/2848178/,527.8,1047.9,1504.8,2075.9,2547.4,3030.2,3453.4,4007.0,4086.9,4655.4,5175.3,5674.9,6121.1,6689.0,7158.5,7640.3,8040.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
/entryinfo/61089-rally-islas-canarias-2020/2848189/,538.8,1087.1,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2850269/,647.7,1317.8,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2851807/,789.8,1601.9,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2852018/,552.2,1131.4,,,,,,,,,,,,,,,


In [109]:
ewrc.get_stage_times()[2]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,509.8,529.1,434.8,562.0,479.8,490.4,415.9,530.2,74.3,568.3,504.3,483.0,432.5,566.1,466.6,480.1,414.0
/entryinfo/61089-rally-islas-canarias-2020/2848173/,517.6,526.5,446.1,566.6,475.2,484.2,422.0,539.4,80.4,567.0,506.5,482.6,433.7,569.9,462.0,476.5,410.0
/entryinfo/61089-rally-islas-canarias-2020/2848183/,509.3,517.9,433.2,557.6,479.0,488.3,416.7,534.0,82.8,577.0,510.8,508.4,460.3,563.1,460.2,481.7,416.5
/entryinfo/61089-rally-islas-canarias-2020/2848196/,511.7,537.9,453.6,579.4,485.0,494.1,428.1,535.5,83.6,570.2,515.3,487.1,429.0,566.3,460.6,476.2,405.0
/entryinfo/61089-rally-islas-canarias-2020/2848178/,527.8,520.1,456.9,571.1,471.5,482.8,423.2,553.6,79.9,568.5,519.9,499.6,446.2,567.9,469.5,481.8,400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
/entryinfo/61089-rally-islas-canarias-2020/2848189/,538.8,548.3,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2850269/,647.7,670.1,,840.6,695.7,644.1,540.2,738.7,585.8,585.6,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2851807/,789.8,812.1,,897.6,719.5,655.1,544.1,727.3,593.8,585.8,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2852018/,552.2,579.2,,,,,,,,,,,,,,,


In [110]:
ewrc.get_stage_times()[3]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
entryId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
/entryinfo/61089-rally-islas-canarias-2020/2850324/,3.0,8.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0
/entryinfo/61089-rally-islas-canarias-2020/2848173/,10.0,9.0,9.0,8.0,6.0,6.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0
/entryinfo/61089-rally-islas-canarias-2020/2848183/,2.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
/entryinfo/61089-rally-islas-canarias-2020/2848196/,6.0,13.0,12.0,13.0,12.0,10.0,10.0,7.0,7.0,7.0,6.0,5.0,4.0,4.0,4.0,4.0,4.0
/entryinfo/61089-rally-islas-canarias-2020/2848178/,19.0,12.0,13.0,12.0,9.0,8.0,5.0,5.0,5.0,5.0,5.0,4.0,5.0,5.0,5.0,5.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
/entryinfo/61089-rally-islas-canarias-2020/2848189/,30.0,32.0,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2850269/,84.0,81.0,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2851807/,90.0,89.0,,,,,,,,,,,,,,,
/entryinfo/61089-rally-islas-canarias-2020/2852018/,42.0,49.0,,,,,,,,,,,,,,,


In [112]:
ewrc.get_entry_list()

https://www.ewrc-results.com/entries/61089-rally-islas-canarias-2020/?sct=10&cat=


Unnamed: 0,CarNum,DriverName,CoDriverName,Team,Car,Class,Category,Type,carNum,driverEntry,driverCar


In [86]:
ewrc=EWRC('54464-corsica-linea-tour-de-corse-2019')

https://www.ewrc-results.com/results/54464-corsica-linea-tour-de-corse-2019/?&None


In [87]:
ewrc.get_entry_list()

https://www.ewrc-results.com/entries/54464-corsica-linea-tour-de-corse-2019/?sct=&cat=


Unnamed: 0,CarNum,DriverName,CoDriverName,Team,Car,Class,Category,Type,carNum,driverEntry,driverCar
1,#1,Ogier Sébastien,Ingrassia Julien,Citroën C3 WRCCitroën Total WRT,Citroën C3 WRCCitroën Total WRT,RC1,M,WRC,1,/entryinfo/54464-corsica-linea-tour-de-corse-2...,1: Ogier Sébastien
2,#3,Suninen Teemu,Salminen Marko,Ford Fiesta WRCM-Sport Ford WRT,Ford Fiesta WRCM-Sport Ford WRT,RC1,M,WRC,3,/entryinfo/54464-corsica-linea-tour-de-corse-2...,3: Suninen Teemu
3,#4,Lappi Esapekka,Ferm Janne,Citroën C3 WRCCitroën Total WRT,Citroën C3 WRCCitroën Total WRT,RC1,M,WRC,4,/entryinfo/54464-corsica-linea-tour-de-corse-2...,4: Lappi Esapekka
4,#5,Meeke Kris,Marshall Sebastian,Toyota Yaris WRCToyota Gazoo Racing WRT,Toyota Yaris WRCToyota Gazoo Racing WRT,RC1,M,WRC,5,/entryinfo/54464-corsica-linea-tour-de-corse-2...,5: Meeke Kris
5,#6,Sordo Dani,del Barrio Carlos,Hyundai i20 Coupe WRCHyundai Shell Mobis WRT,Hyundai i20 Coupe WRCHyundai Shell Mobis WRT,RC1,M,WRC,6,/entryinfo/54464-corsica-linea-tour-de-corse-2...,6: Sordo Dani
...,...,...,...,...,...,...,...,...,...,...,...
91,#106,Lemaire Martin,Barboni Philippe,Citroën C2 R2 Max,Citroën C2 R2 Max,RC4,,WRC,106,/entryinfo/54464-corsica-linea-tour-de-corse-2...,106: Lemaire Martin
92,#107,Bracconi Emmanuel,Saoletti Florian,Renault Twingo RS R1,Renault Twingo RS R1,RC5,,WRC,107,/entryinfo/54464-corsica-linea-tour-de-corse-2...,107: Bracconi Emmanuel
93,#108,d'Ulivo Nicolas,Paolini Angélique,Renault Twingo RS R1,Renault Twingo RS R1,RC5,,WRC,108,/entryinfo/54464-corsica-linea-tour-de-corse-2...,108: d'Ulivo Nicolas
94,#109,Dutreuil Guy,Mallon Laurent,Citroën DS3 R1,Citroën DS3 R1,RC5,,WRC,109,/entryinfo/54464-corsica-linea-tour-de-corse-2...,109: Dutreuil Guy


In [88]:
ewrc.get_stage_results('SS2')[0]

https://www.ewrc-results.com//results/54464-corsica-linea-tour-de-corse-2019/?s=218771/?
Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64') ['Pos', 'CarNum', 'Desc', 'Class', 'Time', 'GapDiff', 'Speedkm']


Unnamed: 0,0,1,2,3,4,5,6
0,1.,#5,Meeke Kris - Marshall Sebastian Toyota Yaris WRC,RC1 M,14:23.6,,108.1
1,2.,#8,Tänak Ott - Järveoja Martin Toyota Yaris WRC,RC1 M,14:25.0,+1.4 +1.4,108.0 0.05
2,3.,#6,Sordo Dani - del Barrio Carlos Hyundai i20 Cou...,RC1 M,14:26.5,+2.9 +1.5,107.8 0.11
3,4.,#11,Neuville Thierry - Gilsoul Nicolas Hyundai i20...,RC1 M,14:27.7,+4.1 +1.2,107.6 0.16
4,5.,#33,Evans Elfyn - Martin Scott Ford Fiesta WRC,RC1 M,14:28.8,+5.2 +1.1,107.5 0.20
...,...,...,...,...,...,...,...
86,87.,#109,Dutreuil Guy - Mallon Laurent Citroën DS3 R1,RC5,20:01.3,+5:37.7 +6.1,77.7 13.0
87,88.,#61,Sassi Alberto - Cangini Fabio Abarth 124 Rally...,RGT,21:27.1,+7:03.5 +1:25.8,72.6 16.3
88,89.,#84,Fraymouth François - Felicelli Romain Peugeot ...,RC4,22:18.8,+7:55.2 +51.7,69.8 18.3
89,90.,#43,Simonetti Robert - Simonetti Célia Citroën DS3...,RC1,23:42.0,+9:18.4 +1:23.2,65.7 21.5


retirement


Unnamed: 0,Pos,CarNum,Desc,Class,Time,GapDiff,Speedkm,Stage,StageName,StageDist,Gap,Diff,Speed,Dist,entryId,model,navigator,PosNum
0,1.,#5,Meeke Kris - Marshall Sebastian Toyota Yaris WRC,RC1,14:23.6,,108.1,SS2,Valinco 1,25.94,0.0,0.0,108.1,,[Toyota Yaris WRC],Meeke Kris,Marshall Sebastian,1
1,2.,#8,Tänak Ott - Järveoja Martin Toyota Yaris WRC,RC1,14:25.0,+1.4 +1.4,108.0 0.05,SS2,Valinco 1,25.94,1.4,1.4,108.0,0.05,[Toyota Yaris WRC],Tänak Ott,Järveoja Martin,2
2,3.,#6,Sordo Dani - del Barrio Carlos Hyundai i20 Cou...,RC1,14:26.5,+2.9 +1.5,107.8 0.11,SS2,Valinco 1,25.94,2.9,1.5,107.8,0.11,[Hyundai i20 Coupe WRC],Sordo Dani,del Barrio Carlos,3
3,4.,#11,Neuville Thierry - Gilsoul Nicolas Hyundai i20...,RC1,14:27.7,+4.1 +1.2,107.6 0.16,SS2,Valinco 1,25.94,4.1,1.2,107.6,0.16,[Hyundai i20 Coupe WRC],Neuville Thierry,Gilsoul Nicolas,4
4,5.,#33,Evans Elfyn - Martin Scott Ford Fiesta WRC,RC1,14:28.8,+5.2 +1.1,107.5 0.20,SS2,Valinco 1,25.94,5.2,1.1,107.5,0.20,[Ford Fiesta WRC],Evans Elfyn,Martin Scott,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,87.,#109,Dutreuil Guy - Mallon Laurent Citroën DS3 R1,RC5,20:01.3,+5:37.7 +6.1,77.7 13.0,SS2,Valinco 1,25.94,337.7,6.1,77.7,13.0,[Citroën DS3 R1],Dutreuil Guy,Mallon Laurent,87
87,88.,#61,Sassi Alberto - Cangini Fabio Abarth 124 Rally...,RGT,21:27.1,+7:03.5 +1:25.8,72.6 16.3,SS2,Valinco 1,25.94,423.5,85.8,72.6,16.3,[Abarth 124 Rally RGT],Sassi Alberto,Cangini Fabio,88
88,89.,#84,Fraymouth François - Felicelli Romain Peugeot ...,RC4,22:18.8,+7:55.2 +51.7,69.8 18.3,SS2,Valinco 1,25.94,475.2,51.7,69.8,18.3,[Peugeot 208 R2],Fraymouth François,Felicelli Romain,89
89,90.,#43,Simonetti Robert - Simonetti Célia Citroën DS3...,RC1,23:42.0,+9:18.4 +1:23.2,65.7 21.5,SS2,Valinco 1,25.94,558.4,83.2,65.7,21.5,[Citroën DS3 WRC],Simonetti Robert,Simonetti Célia,90


In [89]:
ewrc.get_stage_results('SS2')[1]

Unnamed: 0,PosChange,CarNum,Desc,Class,Time,GapDiff,Speedkm,Stage,StageName,StageDist,Pos,Change,Gap,Diff,Speed,Dist
0,1. +3,#8,Tänak Ott - Järveoja Martin Toyota Yaris WRC,RC1,24:48.8,,105.3,SS2,Valinco 1,25.94,1,+3,0.0,0.0,105.3,
1,2. −1,#33,Evans Elfyn - Martin Scott Ford Fiesta WRC,RC1,24:49.4,+0.6 +0.6,105.2 0.01,SS2,Valinco 1,25.94,2,−1,0.6,0.6,105.2,0.01
2,3.,#6,Sordo Dani - del Barrio Carlos Hyundai i20 Cou...,RC1,24:50.2,+1.4 +0.8,105.2 0.03,SS2,Valinco 1,25.94,3,,1.4,0.8,105.2,0.03
3,4. −2,#11,Neuville Thierry - Gilsoul Nicolas Hyundai i20...,RC1,24:51.2,+2.4 +1.0,105.1 0.06,SS2,Valinco 1,25.94,4,−2,2.4,1.0,105.1,0.06
4,5. +2,#4,Lappi Esapekka - Ferm Janne Citroën C3 WRC,RC1,24:58.2,+9.4 +7.0,104.6 0.22,SS2,Valinco 1,25.94,5,+2,9.4,7.0,104.6,0.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,87. −47,#84,Fraymouth François - Felicelli Romain Peugeot ...,RC4,34:24.0,+9:35.2 +8.3,75.9 13.2,SS2,Valinco 1,25.94,87,−47,575.2,8.3,75.9,13.2
87,88. +1,#109,Dutreuil Guy - Mallon Laurent Citroën DS3 R1,RC5,34:31.8,+9:43.0 +7.8,75.7 13.4,SS2,Valinco 1,25.94,88,+1,583.0,7.8,75.7,13.4
88,89. +2,#64,Kemp Pierre - Kemp Jean Marc Renault Clio R3,RC3,34:51.2 0:20,+10:02.4 +19.4,75.7 13.8,SS2,Valinco 1,25.94,89,+2,602.4,19.4,75.7,13.8
89,90. +3,#43,Simonetti Robert - Simonetti Célia Citroën DS3...,RC1,42:28.1,+17:39.3 +7:36.9,61.5 24.3,SS2,Valinco 1,25.94,90,+3,1059.3,456.9,61.5,24.3


In [90]:
ewrc.get_stage_results('SS2')[2]

Unnamed: 0,CarNum,driverNav,Model,Status,Driver,CoDriver,Stage
0,#77,Johnston Sean - Kihurani Alexander,Ford Fiesta R2T19,Accident,Johnston Sean,Kihurani Alexander,SS2
1,#102,Donati Xavier - Ciavaldini France,Peugeot 208 R2,Mechanical,Donati Xavier,Ciavaldini France,SS2


In [91]:
ewrc.get_stage_results('SS2')[3]

Unnamed: 0,CarNum,driverNav,Model,PenReason,Driver,CoDriver,Stage,Time,Reason
0,#52,Göttig Stefan - Solbach-Schmidt N.,Škoda Fabia R5,0:10 - Late (1 min) at TC,Göttig Stefan,Solbach-Schmidt N.,SS2,0:10,- Late (1 min) at TC
