# Functions for grabbing data from WRC API

This package contains a range of functions for grabbing and parsing live timing results data from the WRC website via a simple JSON API that is used to generate the official WRC live timing results web pages.


TO DO - consider a scraper class with a requests session embedded in it.

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
%load_ext pycodestyle_magic
%flake8_on --ignore D100

In [161]:
import requests
import warnings
import json
import pandas as pd
from pandas.io.json import json_normalize

In [162]:
# Cache results in text notebook
import requests_cache
requests_cache.install_cache('wrc_cache',
                             backend='sqlite',
                             expire_after=30000)

In [163]:
# TO DO
# There is also an: activeSeasonId":19
# Is there something we can get there?

In [164]:
from WRCUtils2020 import _isnull, _notnull, _checkattr, _jsInt, listify

In [165]:
# Is this URL constant or picked up relative to each rally?
URL = 'https://www.wrc.com/ajax.php?contelPageId=176146'

In [166]:
def _getresponse(_url, args, ss={'conn': None}, secondtry=False):
    """Get response from a post request."""
    r = None
    if ss['conn'] is None or secondtry:
        try:
            ss['conn'] = requests.Session()
            ss['conn'].get('https://www.wrc.com')
        except:
            return None
        
    try:
        r = ss['conn'].post(_url, data = json.dumps(args))
    except: #requests.exceptions.ConnectionError:
        if not secondtry:
            #If there's an error, try once again
            try:
                _getresponse(_url, args, secondtry = True)
            except:
                return None
        else:
            return None
            
    return r

1:33: B006 Do not use mutable data structures for argument defaults.  They are created during function definition time. All calls to the function reuse this one instance of that data structure, persisting changes between them.
8:9: E722 do not use bare 'except'


Error in callback <bound method VarWatcher.auto_run_flake8 of <pycodestyle_magic.VarWatcher object at 0x109efa8d0>> (for post_run_cell):


ValueError: invalid literal for int() with base 10: '`, it also catches unexpected events like memory errors, interrupts, system exit, and so on.  Prefer `except Exception'

In [167]:
def _get_and_handle_response(_url, args, func, nargs=1,
                             raw=False, renamecols=None,
                             extracols=None, dropcols=None):
    """
    Make request to WRC API.

    Return a raw string or parse the response
    with a provided parser function.
    """
    def _add_cols(response, extracols):
        """Add extra columns to each dataframe."""
        if _isnull(response):
            return

        dupes = set(response.columns).intersection(extracols.keys())

        if dupes:
            warnings.warn(f"Trying to add pre-existing cols: {', '.join(dupes)}")
        for k in extracols:
            response[k] = extracols[k]

    r = _getresponse(_url, args)
    if raw or not callable(func):
        return r.text

    # Make sure we return the desired number of None items
    # in a tuple as a null response
    if not r or r is None or not r.text or r.text == 'null':
        return tuple([None for i in range(nargs)])

    response = func(r)

    if renamecols is None:
        renamecols = {}
    if extracols is None:
        extracols = {}

    # The dataframe type check can help if we have the wrong number of args
    # Could display a warning that the nargs is incorrect if so
    if nargs == 1 or isinstance(response, pd.DataFrame):
        if renamecols:
            response.rename(columns=renamecols, inplace=True)
        if extracols:
            _add_cols(response, extracols)
        if dropcols:
            response.drop(columns=dropcols, inplace=True, errors='ignore')
    else:
        for i in range(nargs):
            if renamecols:
                _cols = response[i].columns
                _cols = set(_cols).intersection(renamecols.keys())
                # Couldn't we just errors='ignore?
                response[i].rename(columns={k: renamecols[k] for k in _cols},
                                   inplace=True, errors='ignore')
            if extracols:
                _add_cols(response[i], extracols)
            if dropcols:
                response[i].drop(columns=dropcols,
                                 inplace=True, errors='ignore')

    return response

1:1: E302 expected 2 blank lines, found 0
18:80: E501 line too long (81 > 79 characters)


In [168]:
ACTIVE_RALLY_URL = 'https://www.wrc.com/ajax.php?contelPageId=171091'

In [169]:
def _parseActiveRally(r):
    """Parse active rally response."""
    event = json_normalize(r.json()).drop(columns='eventDays')
    days = json_normalize(r.json(),
                          'eventDays').drop(columns='spottChannel.assets')
    channels = json_normalize(r.json(),
                              ['eventDays', 'spottChannel', 'assets'])
    return (event, days, channels)


def getActiveRally(_url=None, raw=False, func=_parseActiveRally):
    """Get active rally details."""
    if not _url:
        _url = ACTIVE_RALLY_URL
    args = {"command": "getActiveRally", "context": None}

    return _get_and_handle_response(_url, args, func, nargs=3, raw=raw,
                                    dropcols='winner.driverImageFormats')


1:6: N802 function name '_parseActiveRally' should be lowercase
11:6: N802 function name 'getActiveRally' should be lowercase


In [170]:
event, days, channels = getActiveRally()  # Also works with passing URL
display(event.head())
display(days.head())
display(channels.head())
display(event.columns)

Unnamed: 0,id,name,externalIdRally,externalIdEvent,timezone,active,countdown,jwrc,winner,images.format16x9.320x180,...,status.id,status.name,pageInfo.id,pageInfo.title,pageInfo.feTitle,pageInfo.url,pageResult.id,pageResult.title,pageResult.feTitle,pageResult.url
0,102,Rally Sweden,154,125,1,True,True,True,,https://www.wrc.com/images/redaktion/Web-2020/...,...,21,Live Event,1914,Rally Sweden,Rally Sweden,/en/championship/calendar/wrc/rally-sweden/ove...,90029,Rally Sweden,Rally Sweden,/en/results/rally-results/rally-sweden/results/


Unnamed: 0,id,eventDay,spottChannel.id,spottChannel.displayName
0,363,2020-02-13,2,WRC 2020
1,368,2020-02-14,2,WRC 2020
2,378,2020-02-15,2,WRC 2020
3,381,2020-02-16,2,WRC 2020


Unnamed: 0,id,start,startUnix,end,endUnix,duration,alternative.title,alternative.description,alternative.image.480x270,alternative.image.thumbnail,...,content.image.400x225,content.image.800x450,content.image.thumbnail,content.dateTime.date,content.dateTime.timezone_type,content.dateTime.timezone,content.payment.id,content.payment.name,content.status.id,content.status.name
0,352,2020-02-13T18:30:00+00:00,1581618600,2020-02-13T18:55:00+00:00,1581620100,0,Good Evening Rally Fans - Karlstad,,https://ott.wrc.com/image/480/270/5e42945ab466...,https://ott.wrc.com/image/thumbnail/5e42945ab4...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-02-13 18:30:00.000000,1,+00:00,3,Pay,4,Delivered
1,355,2020-02-13T18:55:00+00:00,1581620100,2020-02-13T19:00:00+00:00,1581620400,0,Break,,https://ott.wrc.com/image/480/270/5e42957b8157...,https://ott.wrc.com/image/thumbnail/5e42957b81...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-02-13 18:30:00.000000,1,+00:00,3,Pay,4,Delivered
2,358,2020-02-13T19:00:00+00:00,1581620400,2020-02-13T20:00:00+00:00,1581624000,0,Rally Sweden Opening Karlstad,,https://ott.wrc.com/image/480/270/5e4295f67777...,https://ott.wrc.com/image/thumbnail/5e4295f677...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-02-13 18:30:00.000000,1,+00:00,3,Pay,4,Delivered
3,361,2020-02-14T05:45:00+00:00,1581659100,2020-02-14T06:30:00+00:00,1581661800,0,Good Morning Rally Fans - Service A Torsby,,https://ott.wrc.com/image/480/270/5e42963b034e...,https://ott.wrc.com/image/thumbnail/5e42963b03...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-02-14 05:45:00.000000,1,+00:00,3,Pay,4,Delivered
4,363,2020-02-14T06:30:00+00:00,1581661800,2020-02-14T07:30:00+00:00,1581665400,0,Break,,https://ott.wrc.com/image/480/270/5e42998c42e9...,https://ott.wrc.com/image/thumbnail/5e42998c42...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-02-14 05:45:00.000000,1,+00:00,3,Pay,4,Delivered


Index(['id', 'name', 'externalIdRally', 'externalIdEvent', 'timezone',
       'active', 'countdown', 'jwrc', 'winner', 'images.format16x9.320x180',
       'images.format16x9.160x90', 'images.format16x9.path', 'season.id',
       'season.year', 'season.externalId', 'season.active', 'rally.id',
       'rally.name', 'rally.nation.id', 'rally.nation.name',
       'rally.nation.isoNumCode', 'rally.nation.alpha2', 'rally.nation.alpha3',
       'rally.nation.ioc', 'status.id', 'status.name', 'pageInfo.id',
       'pageInfo.title', 'pageInfo.feTitle', 'pageInfo.url', 'pageResult.id',
       'pageResult.title', 'pageResult.feTitle', 'pageResult.url'],
      dtype='object')

In [171]:
event.loc[0].to_dict()

{'id': 102,
 'name': 'Rally Sweden',
 'externalIdRally': 154,
 'externalIdEvent': 125,
 'timezone': '1',
 'active': True,
 'countdown': True,
 'jwrc': True,
 'winner': None,
 'images.format16x9.320x180': 'https://www.wrc.com/images/redaktion/Web-2020/Web-Content/Events/logos/2020/Sweden_Sweden-generic-logo_1_5ae8e_fz_320x180.png',
 'images.format16x9.160x90': 'https://www.wrc.com/images/redaktion/Web-2020/Web-Content/Events/logos/2020/Sweden_Sweden-generic-logo_1_2d81b_fz_160x90.png',
 'images.format16x9.path': '/fileadmin/redaktion/Web-2020/Web-Content/Events/logos/2020/Sweden_Sweden-generic-logo_1.png',
 'season.id': 19,
 'season.year': 2020,
 'season.externalId': 6,
 'season.active': True,
 'rally.id': 4,
 'rally.name': 'Rally Sweden',
 'rally.nation.id': 953,
 'rally.nation.name': 'Sweden',
 'rally.nation.isoNumCode': 752,
 'rally.nation.alpha2': 'SE',
 'rally.nation.alpha3': 'SWE',
 'rally.nation.ioc': 'SWE',
 'status.id': 21,
 'status.name': 'Live Event',
 'pageInfo.id': 1914,
 '

In [172]:
# Raw https://webappsdata.wrc.com/srv API?
# Need to create separate package to query that API
# Season info
# _url = 'https://webappsdata.wrc.com/srv/wrc/json/api/wrcsrv/byType?t=%22Season%22&maxdepth=1' 
# r = s.get(_url)
# json_normalize(r.json())

4:80: E501 line too long (95 > 79 characters)
4:96: W291 trailing whitespace


In [173]:
CURRENT_SEASON_URL = 'https://www.wrc.com/ajax.php?contelPageId=181782'

In [174]:
def _parseActiveSeasonEvents(r):
    """Parse current season events response."""
    current_season_events = json_normalize(r.json(),
                                           ['rallyEvents', 'items'],
                                           meta='seasonYear').drop(columns='eventDays')
    eventdays = json_normalize(r.json(),
                               ['rallyEvents', 'items',
                                'eventDays']).drop(columns='spottChannel.assets')
    eventchannel = json_normalize(r.json(),
                                  ['rallyEvents', 'items', 'eventDays',
                                   'spottChannel', 'assets'])
    return (current_season_events, eventdays, eventchannel)


# TO DO - can we get events for other seasons?
def getActiveSeasonEvents(raw=False, func=_parseActiveSeasonEvents):
    """Get events for current season."""
    _url = CURRENT_SEASON_URL
    # There seems to be a second UTL giving same data?
    # _url='https://www.wrc.com/ajax.php?contelPageId=183400'
    args = {"command": "getActiveSeason", "context": None}

    return _get_and_handle_response(_url, args, func, nargs=3, raw=raw)
    # dropcols='winner.driverImageFormats')


1:6: N802 function name '_parseActiveSeasonEvents' should be lowercase
5:80: E501 line too long (87 > 79 characters)
8:80: E501 line too long (81 > 79 characters)
16:6: N802 function name 'getActiveSeasonEvents' should be lowercase


In [175]:
current_season_events, eventdays, eventchannel = getActiveSeasonEvents()
display(current_season_events.head())
display(eventdays.head())
display(eventchannel.head())
display(current_season_events.columns)
eventchannel.columns

Unnamed: 0,id,name,externalIdRally,externalIdEvent,timezone,active,countdown,jwrc,images.format16x9.320x180,images.format16x9.160x90,...,winner.birthDate,winner.birthPlace,winner.debutDate,winner.debutPlace,winner.website,winner.driverImageFormats,winner.externalId,winner.page,winner,seasonYear
0,100,Rallye Monte Carlo,153,124,1,False,False,False,https://www.wrc.com/images/redaktion/Web-2020/...,https://www.wrc.com/images/redaktion/Web-2020/...,...,1988-06-16,Belgium,2009-01-12,Rally de Portugal,https://www.thierryneuville.com/,"[{'id': 6, 'title': 'Format 16:9', 'imageForma...",762.0,,,2020
1,102,Rally Sweden,154,125,1,True,True,True,https://www.wrc.com/images/redaktion/Web-2020/...,https://www.wrc.com/images/redaktion/Web-2020/...,...,,,,,,,,,,2020
2,107,Rally Guanajuato Mexico,155,126,-6,False,False,False,https://www.wrc.com/images/redaktion/Web-2020/...,https://www.wrc.com/images/redaktion/Web-2020/...,...,,,,,,,,,,2020
3,114,Rally Argentina,156,127,-3,False,False,False,https://www.wrc.com/images/redaktion/Web-2020/...,https://www.wrc.com/images/redaktion/Web-2020/...,...,,,,,,,,,,2020
4,116,Rally de Portugal,157,128,1,False,False,False,https://www.wrc.com/images/redaktion/Web-2020/...,https://www.wrc.com/images/redaktion/Web-2020/...,...,,,,,,,,,,2020


Unnamed: 0,id,eventDay,spottChannel.id,spottChannel.displayName
0,334,2020-01-23,2,WRC 2020
1,341,2020-01-24,2,WRC 2020
2,344,2020-01-25,2,WRC 2020
3,355,2020-01-26,2,WRC 2020
4,363,2020-02-13,2,WRC 2020


Unnamed: 0,id,start,startUnix,end,endUnix,duration,alternative.title,alternative.description,alternative.image.480x270,alternative.image.thumbnail,...,content.image.400x225,content.image.800x450,content.image.thumbnail,content.dateTime.date,content.dateTime.timezone_type,content.dateTime.timezone,content.payment.id,content.payment.name,content.status.id,content.status.name
0,64,2020-01-23T18:00:00+00:00,1579802400,2020-01-23T18:30:00+00:00,1579804200,0,Good Evening Rally Fans - Service Gap,,https://ott.wrc.com/image/480/270/5e2618150992...,https://ott.wrc.com/image/thumbnail/5e26181509...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-01-23 18:00:00.000000,1,+00:00,3,Pay,4,Delivered
1,65,2020-01-23T18:30:00+00:00,1579804200,2020-01-23T19:15:00+00:00,1579806900,0,eSPORTS (TV Live),,https://ott.wrc.com/image/480/270/5e1e100fce9e...,https://ott.wrc.com/image/thumbnail/5e1e100fce...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-01-23 18:00:00.000000,1,+00:00,3,Pay,4,Delivered
2,239,2020-01-23T19:15:00+00:00,1579806900,2020-01-23T19:30:00+00:00,1579807800,0,Break,,https://ott.wrc.com/image/480/270/5e25b39e1be1...,https://ott.wrc.com/image/thumbnail/5e25b39e1b...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-01-23 18:00:00.000000,1,+00:00,3,Pay,4,Delivered
3,68,2020-01-23T19:30:00+00:00,1579807800,2020-01-23T20:30:00+00:00,1579811400,0,SS1 Malijai - Puimichel (TV LIVE),,https://ott.wrc.com/image/480/270/5e1dfc31d968...,https://ott.wrc.com/image/thumbnail/5e1dfc31d9...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-01-23 18:00:00.000000,1,+00:00,3,Pay,4,Delivered
4,241,2020-01-23T20:30:00+00:00,1579811400,2020-01-23T21:15:00+00:00,1579814100,0,Break,,https://ott.wrc.com/image/480/270/5e25b3a530fc...,https://ott.wrc.com/image/thumbnail/5e25b3a530...,...,https://ott.wrc.com/image/400/225/placeholder....,https://ott.wrc.com/image/800/450/placeholder....,https://ott.wrc.com/image/thumbnail/placeholde...,2020-01-23 18:00:00.000000,1,+00:00,3,Pay,4,Delivered


Index(['id', 'name', 'externalIdRally', 'externalIdEvent', 'timezone',
       'active', 'countdown', 'jwrc', 'images.format16x9.320x180',
       'images.format16x9.160x90', 'images.format16x9.path', 'season.id',
       'season.year', 'season.externalId', 'season.active', 'rally.id',
       'rally.name', 'rally.nation.id', 'rally.nation.name',
       'rally.nation.isoNumCode', 'rally.nation.alpha2', 'rally.nation.alpha3',
       'rally.nation.ioc', 'status.id', 'status.name', 'pageInfo.id',
       'pageInfo.title', 'pageInfo.feTitle', 'pageInfo.url', 'pageResult.id',
       'pageResult.title', 'pageResult.feTitle', 'pageResult.url', 'winner.id',
       'winner.firstName', 'winner.middleName', 'winner.lastName',
       'winner.nation.id', 'winner.nation.name', 'winner.nation.isoNumCode',
       'winner.nation.alpha2', 'winner.nation.alpha3', 'winner.nation.ioc',
       'winner.birthDate', 'winner.birthPlace', 'winner.debutDate',
       'winner.debutPlace', 'winner.website', 'winner.drive

Index(['id', 'start', 'startUnix', 'end', 'endUnix', 'duration',
       'alternative.title', 'alternative.description',
       'alternative.image.480x270', 'alternative.image.thumbnail',
       'alternative.image.original', 'content.id', 'content.title',
       'content.description', 'content.image.original',
       'content.image.1920x1080', 'content.image.400x225',
       'content.image.800x450', 'content.image.thumbnail',
       'content.dateTime.date', 'content.dateTime.timezone_type',
       'content.dateTime.timezone', 'content.payment.id',
       'content.payment.name', 'content.status.id', 'content.status.name'],
      dtype='object')

In [176]:
current_season_events.iloc[0].to_dict()

{'id': 100,
 'name': 'Rallye Monte Carlo',
 'externalIdRally': 153,
 'externalIdEvent': 124,
 'timezone': '1',
 'active': False,
 'countdown': False,
 'jwrc': False,
 'images.format16x9.320x180': 'https://www.wrc.com/images/redaktion/Web-2020/Web-Content/Events/logos/2020/ACM_MonteCarlo-generic-logo_1_fa186_fz_320x180.png',
 'images.format16x9.160x90': 'https://www.wrc.com/images/redaktion/Web-2020/Web-Content/Events/logos/2020/ACM_MonteCarlo-generic-logo_1_1b39b_fz_160x90.png',
 'images.format16x9.path': '/fileadmin/redaktion/Web-2020/Web-Content/Events/logos/2020/ACM_MonteCarlo-generic-logo_1.png',
 'season.id': 19,
 'season.year': 2020,
 'season.externalId': 6,
 'season.active': True,
 'rally.id': 2,
 'rally.name': 'Rallye Monte-Carlo',
 'rally.nation.id': 745,
 'rally.nation.name': 'Monaco',
 'rally.nation.isoNumCode': 492,
 'rally.nation.alpha2': 'MC',
 'rally.nation.alpha3': 'MCO',
 'rally.nation.ioc': 'MON',
 'status.id': 25,
 'status.name': 'Post Event',
 'pageInfo.id': 38227,


## getItinerary

In [177]:
# This seems to work with sdbRallyId=None, returning active rally?

def _parseItinerary(r):
    """Parse itinerary response."""
    itinerary = json_normalize(r.json()).drop(columns='itineraryLegs')
    legs = json_normalize(r.json(), 'itineraryLegs')
    if _notnull(legs):
        legs = legs.drop(columns='itinerarySections')
        sections = json_normalize(r.json(),
                                  ['itineraryLegs', 'itinerarySections']).drop(columns=['controls', 'stages'])
        controls = json_normalize(r.json(),
                                  ['itineraryLegs', 'itinerarySections', 'controls'],
                                  meta=[['itineraryLegs', 'itineraryLegId'],
                                         ['itineraryLegs', 'startListId']])
        controls.rename(columns={'itineraryLegs.itineraryLegId': 'itineraryLegId',
                                 'itineraryLegs.startListId': 'startListId'}, inplace=True)
        stages = json_normalize(r.json(),
                                ['itineraryLegs', 'itinerarySections', 'stages'],
                                meta=[['itineraryLegs', 'itineraryLegId'],
                                         ['itineraryLegs', 'startListId']])
        stages.rename(columns={'itineraryLegs.itineraryLegId': 'itineraryLegId',
                               'itineraryLegs.startListId': 'startListId'}, inplace=True)
    else:
        legs = sections = controls = stages = None
    return (itinerary, legs, sections, controls, stages)


def getItinerary(sdbRallyId=None, raw=False, func=_parseItinerary):
    """Get itinerary details for specified rally."""
    if not sdbRallyId:
        event, days, channels = getActiveRally()
        sdbRallyId = int(event.loc[0, 'id'])

    args = {"command": "getItinerary",
            "context": {"sdbRallyId": _jsInt(sdbRallyId)}}

    if sdbRallyId:
        extracols = {'rallyid': sdbRallyId}
    else:
        extracols = {}
    # TO DO - could we annotate with a looked up rally id?
    #Presumably from eg getActiveRally()? Or more generally ActiveSeasonEvents
    return _get_and_handle_response(URL, args, func, nargs=5,
                                    raw=raw, extracols = extracols)


3:1: E302 expected 2 blank lines, found 1
3:6: N802 function name '_parseItinerary' should be lowercase
10:80: E501 line too long (110 > 79 characters)
12:80: E501 line too long (85 > 79 characters)
14:42: E127 continuation line over-indented for visual indent
15:80: E501 line too long (82 > 79 characters)
16:80: E501 line too long (91 > 79 characters)
18:80: E501 line too long (81 > 79 characters)
20:42: E127 continuation line over-indented for visual indent
21:80: E501 line too long (80 > 79 characters)
22:80: E501 line too long (89 > 79 characters)
28:6: N802 function name 'getItinerary' should be lowercase
28:19: N803 argument name 'sdbRallyId' should be lowercase
32:10: N806 variable 'sdbRallyId' in function should be lowercase
42:5: E265 block comment should start with '# '
44:55: E251 unexpected spaces around keyword / parameter equals
44:57: E251 unexpected spaces around keyword / parameter equals


In [178]:
sdbRallyId = 100
itinerary, legs, sections, controls, stages = getItinerary(sdbRallyId)
display(itinerary.head())
display(legs.head())
display(sections.head())
display(controls.head())
display(stages.head())

Unnamed: 0,itineraryId,eventId,name,priority,rallyid
0,240,124,Itinerary,1,100


Unnamed: 0,itineraryLegId,itineraryId,startListId,name,legDate,order,status,rallyid
0,273,240,451,Thursday 23rd January,2020-01-23,1,Completed,100
1,272,240,452,Friday 24th January,2020-01-24,2,Completed,100
2,275,240,454,Saturday 25th January,2020-01-25,3,Completed,100
3,274,240,456,Sunday 26th January,2020-01-26,4,Completed,100


Unnamed: 0,itinerarySectionId,itineraryLegId,order,name,rallyid
0,637,273,1,Section 1,100
1,638,272,2,Section 2,100
2,639,272,3,Section 3,100
3,640,275,4,Section 4,100
4,641,275,5,Section 5,100


Unnamed: 0,controlId,eventId,stageId,type,code,location,timingPrecision,distance,targetDuration,targetDurationMs,firstCarDueDateTime,firstCarDueDateTimeLocal,status,controlPenalties,roundingPolicy,locked,itineraryLegId,startListId,rallyid
0,6539,124,,TimeControl,TC0,Monaco,Minute,0.0,,,2020-01-23T16:00:00,2020-01-23T17:00:00+01:00,Completed,All,NoRounding,True,273,451,100
1,6543,124,,TimeControl,TC0A,Tyre Fitting Zone IN,Minute,166.33,02:45:00,9900000.0,2020-01-23T18:45:00,2020-01-23T19:45:00+01:00,Completed,All,NoRounding,True,273,451,100
2,6541,124,,TimeControl,TC0B,Tyre Fitting Zone OUT,Minute,0.35,00:15:00,900000.0,2020-01-23T19:00:00,2020-01-23T20:00:00+01:00,Completed,All,NoRounding,True,273,451,100
3,6593,124,1528.0,TimeControl,TC1,Malijai,Minute,17.08,00:35:00,2100000.0,2020-01-23T19:35:00,2020-01-23T20:35:00+01:00,Completed,All,NoRounding,True,273,451,100
4,6592,124,1528.0,StageStart,SS1,Malijai - Puimichel (Live TV),Minute,17.47,00:03:00,180000.0,2020-01-23T19:38:00,2020-01-23T20:38:00+01:00,Interrupted,,RoundToClosestMinute,True,273,451,100


Unnamed: 0,stageId,eventId,number,name,distance,status,stageType,timingPrecision,locked,code,itineraryLegId,startListId,rallyid
0,1528,124,1,Malijai - Puimichel (Live TV),17.47,Interrupted,SpecialStage,Tenth,True,SS1,273,451,100
1,1538,124,2,Bayons - Bréziers,25.49,Completed,SpecialStage,Tenth,True,SS2,273,451,100
2,1533,124,3,Curbans - Venterol 1,20.02,Completed,SpecialStage,Tenth,True,SS3,272,452,100
3,1534,124,4,Saint-Clément - Freissinières 1,20.68,Completed,SpecialStage,Tenth,True,SS4,272,452,100
4,1535,124,5,Avançon - Notre-Dame-du-Laus 1,20.59,Completed,SpecialStage,Tenth,True,SS5,272,452,100


1:2: N816 variable 'sdbRallyId' in global scope should not be mixedCase


In [179]:
itinerary, legs, sections, controls, stages = getItinerary()
legs

Unnamed: 0,itineraryLegId,itineraryId,startListId,name,legDate,order,status,rallyid
0,287,243,460,Friday 14th February,2020-02-14,2,Completed,102
1,286,243,469,Saturday 15th February,2020-02-15,3,Completed,102
2,285,243,470,Sunday 16th February,2020-02-16,4,Completed,102


In [180]:
legs.where(legs['status']=='Running').last_valid_index()
ix = legs.where(legs['status']=='Completed').last_valid_index()
legs.loc[ix]

itineraryLegId                     285
itineraryId                        243
startListId                        470
name              Sunday 16th February
legDate                     2020-02-16
order                                4
status                       Completed
rallyid                            102
Name: 2, dtype: object

1:26: E225 missing whitespace around operator
2:31: E225 missing whitespace around operator


In [181]:
def getCurrentLeg(legs=None):
    """Get the current running leg, or the next leg to run."""
    # TO DO - need to know what the values of status are
    
    if _isnull(legs):
        # TO DO - if the class calls this, the data is obtained but not returned
        itinerary, legs, sections, controls, stages = getItinerary()
    
    _running = legs.where(legs['status']=='Running').last_valid_index()
    if not _running:
        # TO DO - need to check to run
        _running = legs.where(legs['status']=='ToRun').last_valid_index()
        
    if not _running:
        _running = legs.where(legs['status']=='Completed').last_valid_index()

    if _running is not None:
        return legs.loc[_running]
        
    return None

1:6: N802 function name 'getCurrentLeg' should be lowercase
4:1: W293 blank line contains whitespace
6:80: E501 line too long (80 > 79 characters)
8:1: W293 blank line contains whitespace
9:41: E225 missing whitespace around operator
12:45: E225 missing whitespace around operator
13:1: W293 blank line contains whitespace
15:45: E225 missing whitespace around operator
19:1: W293 blank line contains whitespace


In [182]:
getCurrentLeg()

itineraryLegId                     285
itineraryId                        243
startListId                        470
name              Sunday 16th February
legDate                     2020-02-16
order                                4
status                       Completed
rallyid                            102
Name: 2, dtype: object

In [183]:
def getStageDetails(stageNum, stages=None):
    """Get stage details from stage number (eg SS1)."""
    if _isnull(stages):
        # TO DO - if the class calls this, the data is obtained but not returned
        itinerary, legs, sections, controls, stages = getItinerary()
    if isinstance(stageNum, str) and stageNum.startswith('SS'):
        pass
    elif _jsInt(stageNum):
        stageNum = f'SS{stageNum}'
    else:
        stageNum = None

    stages_idx = stages.where(stages['code']==stageNum).last_valid_index()
    
    if stages_idx is not None:
        return stages.loc[stages_idx]

    return None

1:6: N802 function name 'getStageDetails' should be lowercase
1:22: N803 argument name 'stageNum' should be lowercase
4:80: E501 line too long (80 > 79 characters)
9:10: N806 variable 'stageNum' in function should be lowercase
11:10: N806 variable 'stageNum' in function should be lowercase
13:45: E225 missing whitespace around operator
14:1: W293 blank line contains whitespace


In [184]:
getStageDetails('SS4')  # also accepts: '2', 2

stageId                                       1562
eventId                                        125
number                                           4
name               Nyckelvattnet 1 (Live TV Stage)
distance                                     18.94
status                                   Completed
stageType                             SpecialStage
timingPrecision                              Tenth
locked                                        True
code                                           SS4
itineraryLegId                                 287
startListId                                    460
rallyid                                        102
Name: 2, dtype: object

In [202]:
def _parseStartlist(r):
    """Parse raw startlist response."""
    startList = json_normalize(r.json()).drop(columns='startListItems')
    startListItems = json_normalize(r.json(), 'startListItems')

    return (startList, startListItems)

def getStartlistId(stage='', startListId=None, legs=None, stages=None):
    """Get a generic startListId."""
    # We essentially hack the precedence ordering
    # TO DO - we should warn from this
    # If passed something as first parameter (stage) that is actually 
    # a startListId and we have no startListId, use _stage as startListId
    _stage = _jsInt(stage)
    if startListId is None and _stage and legs and _stage in legs['startListId']:
        startListId = _stage

    # If we don't have a valid startListId, try to finesse one from stage
    if _isnull(_jsInt(startListId)) or not (legs and _jsInt(startListId)
                                            and _jsInt(startListId) in legs['startListId']):
        # If the startListId is a str, is it a stage designator?
        if isinstance(startListId, str) and startListId.startswith('SS'):
            stage = startListId
        if stage and isinstance(stage, str) and stage.lower().startswith('current'):
            startListId = getCurrentLeg(legs=legs)['startListId']
        elif stage:
            stage_details = getStageDetails(startListId, stages=stages)
            if _notnull(stage_details):
                startListId = stage_details['startListId']
        if not startListId:
            startListId = getCurrentLeg(legs=legs)['startListId']
    return startListId


def getStartlist(stage='', startListId=None, legs=None, stages=None,
                 raw=False, func=_parseStartlist):
    """Get a generic startlist."""
    startListId = getStartlistId(stage=stage, startListId=startListId,
                                 legs=legs, stages=stages)

    args = {'command': 'getStartlist',
            'context': {'activeItineraryLeg': {'startListId': _jsInt(startListId)}}}

    return _get_and_handle_response(URL, args, func, nargs=2, raw=raw)

1:6: N802 function name '_parseStartlist' should be lowercase
3:6: N806 variable 'startList' in function should be lowercase
4:6: N806 variable 'startListItems' in function should be lowercase
8:1: E302 expected 2 blank lines, found 1
8:6: N802 function name 'getStartlistId' should be lowercase
8:31: N803 argument name 'startListId' should be lowercase
12:70: W291 trailing whitespace
15:80: E501 line too long (81 > 79 characters)
16:10: N806 variable 'startListId' in function should be lowercase
20:80: E501 line too long (92 > 79 characters)
24:80: E501 line too long (84 > 79 characters)
25:14: N806 variable 'startListId' in function should be lowercase
30:18: N806 variable 'startListId' in function should be lowercase
32:14: N806 variable 'startListId' in function should be lowercase
36:6: N802 function name 'getStartlist' should be lowercase
36:29: N803 argument name 'startListId' should be lowercase
39:6: N806 variable 'startListId' in function should be lowercase
43:80: E501 line t

In [208]:
#getStartlist('SS4')[0]#[1].head()
#getStartlist(469)[0], getStartlistId(startListId='SS4')#(startListId='SS4')
getStartlistId(startListId='SS3')

<class 'pandas.core.series.Series'>


460

1:1: E265 block comment should start with '# '
2:1: E265 block comment should start with '# '


In [188]:
startListId = 451

startList,startListItems = getStartlist(startListId)
display(startList.head())
display(startListItems.head())

Unnamed: 0,startListId,eventId,publishedStatus,name
0,470,125,Published,Sunday


Unnamed: 0,startListItemId,startListId,entryId,startDateTime,startDateTimeLocal,order
0,21898,470,20801,2020-02-16T10:55:00,2020-02-16T11:55:00+01:00,24
1,21899,470,20820,2020-02-16T10:57:00,2020-02-16T11:57:00+01:00,25
2,21900,470,20822,2020-02-16T10:58:00,2020-02-16T11:58:00+01:00,26
3,21901,470,20804,2020-02-16T10:59:00,2020-02-16T11:59:00+01:00,27
4,21902,470,20821,2020-02-16T11:00:00,2020-02-16T12:00:00+01:00,28


1:2: N816 variable 'startListId' in global scope should not be mixedCase
3:2: N816 variable 'startList' in global scope should not be mixedCase
3:2: N816 variable 'startListItems' in global scope should not be mixedCase
3:10: E231 missing whitespace after ','


In [None]:
def _parseCars(r):
    """Parser for raw cars response."""
    cars = json_normalize(r.json()).drop(columns='eventClasses')
    classes = json_normalize(r.json(), 'eventClasses', meta='entryId')
    return (cars, classes)


def getCars(sdbRallyId, raw=False, func=_parseCars):
    """Get cars for a specified rally."""
    args = {"command": "getCars", "context": {"sdbRallyId": _jsInt(sdbRallyId)}}

    return _get_and_handle_response(URL, args, func, nargs=2, raw=raw)

In [None]:
cars, classes = getCars(sdbRallyId)
display(cars.head())
display(classes.head())
cars.head().columns

In [None]:
def _parseRally(r):
    """Parser for raw rally response."""
    rally = json_normalize(r.json()).drop(columns=['eligibilities', 'groups'])
    eligibilities = json_normalize(r.json(), 'eligibilities', meta='rallyId')
    eligibilities.rename(columns={0: 'category'}, inplace=True)
    groups = json_normalize(r.json(), 'groups', meta='rallyId')
    return (rally, eligibilities, groups)


def getRally(sdbRallyId, raw=False, func=_parseRally):
    """Get rally details for specified rally."""
    args = {"command": "getRally",
            "context": {"sdbRallyId": _jsInt(sdbRallyId)}}

    return _get_and_handle_response(URL, args, func, nargs=3, raw=raw,
                                    renamecols={'rallyId': 'externalIdRally',
                                                'eventId': 'externalIdEvent'},
                                    extracols={'sdbRallyId': sdbRallyId})

In [None]:
rally, eligibilities, groups = getRally(sdbRallyId)
display(rally.head())
display(eligibilities.head())
display(groups.head())

In [None]:
def _parseOverall(r):
    """Parser for raw overall response."""
    overall = json_normalize(r.json())
    return overall


def getOverall(sdbRallyId, stageId, raw=False, func=_parseOverall):
    """Get overall standings for specified rally and stage."""
    args = {"command": "getOverall",
            "context": {"sdbRallyId": _jsInt(sdbRallyId),
                        "activeStage": {"stageId": _jsInt(stageId)}}}

    return _get_and_handle_response(URL, args, func, nargs=1,
                                    raw=raw, extracols={'stageId': stageId})

In [None]:
stageId = 1528
overall = getOverall(sdbRallyId, stageId)
overall.head()

In [None]:
def _parseSplitTimes(r):
    """Parser for raw splittimes response."""
    splitPoints = json_normalize(r.json(), 'splitPoints')
    entrySplitPointTimes = json_normalize(r.json(),
                                          'entrySplitPointTimes',
                                          meta='stageId')
    splitPointTimes = json_normalize(r.json(),
                                     ['entrySplitPointTimes', 'splitPointTimes'],
                                     meta='stageId')
    if _notNull(splitPointTimes):
        entrySplitPointTimes.drop(columns='splitPointTimes', inplace=True)

    return (splitPoints, entrySplitPointTimes, splitPointTimes)


def getSplitTimes(sdbRallyId, stageId,
                  raw=False, func=_parseSplitTimes):
    """Get split times for specified rally and stage."""
    args = {"command": "getSplitTimes",
            "context": {"sdbRallyId": _jsInt(sdbRallyId),
                        "activeStage": {"stageId": _jsInt(stageId)}}}

    return _get_and_handle_response(URL, args, func, nargs=3, raw=raw)

In [None]:
splitPoints, entrySplitPointTimes, splitPointTimes = getSplitTimes(sdbRallyId, stageId)
display(splitPoints.head())
display(entrySplitPointTimes.head())
display(splitPointTimes.head())

In [None]:
def _parseStageTimes(r):
    """Parser for raw stagetimes response."""
    stagetimes = json_normalize(r.json())
    return stagetimes


def getStageTimes(sdbRallyId, stageId, raw=False, func=_parseStageTimes):
    """Get stage times for specified rally and stage"""
    args = {"command": "getStageTimes",
            "context": {"sdbRallyId": _jsInt(sdbRallyId),
                        "activeStage": {"stageId": _jsInt(stageId)}}}

    return _get_and_handle_response(URL, args, func, nargs=1, raw=raw)

In [None]:
stagetimes = getStageTimes(sdbRallyId, stageId)
stagetimes.head()

In [None]:
def _parseStagewinners(r):
    """Parser for raw stagewinners response."""
    stagewinners = json_normalize(r.json())
    return stagewinners

 
def getStagewinners(sdbRallyId, raw=False, func=_parseStagewinners):
    """Get stage winners for specified rally."""
    args = {"command": "getStagewinners",
            "context": {"sdbRallyId": _jsInt(sdbRallyId)}}

    return _get_and_handle_response(URL, args, func, nargs=1, raw=raw)

In [None]:
stagewinners = getStagewinners(sdbRallyId)
stagewinners.head()

Should we return empty dataframes with appropriate columns, or `None`?

An advantage of returning an empty dataframe with labelled columns is that we can also use the column value list as a test of a returned column.

We need to be consistent so we can have a common, consistent way of dealing with empty responses. This means things like `is None` or `pd.DataFrame().empty` both have to be handled.

In [None]:
# COLS_PENALTIES=['penaltyId','controlId','entryId','penaltyDurationMs','penaltyDuration','reason']


def _parsePenalties(r):
    """Parser for raw penalties response."""
    penalties = json_normalize(r.json())
    return penalties


def getPenalties(sdbRallyId, raw=False, func=_parsePenalties):
    """Get penalties for specified rally."""
    args = {"command": "getPenalties",
            "context": {"sdbRallyId": _jsInt(sdbRallyId)}}

    return _get_and_handle_response(URL, args, func, nargs=1, raw=raw)

In [None]:
penalties = getPenalties(sdbRallyId)
penalties.head()

In [None]:
# COLS_RETIREMENT = ['retirementId','controlId','entryId','reason','retirementDateTime','retirementDateTimeLocal','status']

def _parseRetirements(r):
    """Parser for raw retirements response."""
    retirements = json_normalize(r.json())
    return retirements


def getRetirements(sdbRallyId, raw=False, func=_parseRetirements):
    """Get retirements for specified rally."""
    args = {"command": "getRetirements",
            "context": {"sdbRallyId": _jsInt(sdbRallyId)}}

    return _get_and_handle_response(URL, args, func, nargs=1, raw=raw)

In [None]:
retirements = getRetirements(sdbRallyId)
retirements.head()

In [None]:
SEASON_URL = 'https://www.wrc.com/ajax.php?contelPageId=186641'

In [None]:
#How can we look these up?
SEASON_CATEGORIES = {'WRC':"35", "WRC2":"46", "WRC3":"49", "JWRC":"58"}

In [None]:
def _parseSeasonCategory(r):
    """Parser for raw season category response."""
    season_category = json_normalize(r.json())
    return season_category


def getSeasonCategory(seasonCategory=SEASON_CATEGORIES['WRC'],
                      raw=False, func=_parseSeasonCategory):
    """Get championships in season category."""
    args = {"command": "getSeasonCategory",
            "context": {"seasonCategory": seasonCategory}}

    return _get_and_handle_response(SEASON_URL, args, func, nargs=1, raw=raw)


In [None]:
getSeasonCategory()

In [None]:
# TO DO - what about other seasons?

def getSeasonCategories(seasonCategories=None):
    """Create dataframe of external season categoties."""
    champs = pd.DataFrame()

    if seasonCategories is None:
        for sc in SEASON_CATEGORIES:
            seasonCategory = SEASON_CATEGORIES[sc]

            champs = champs.append(getSeasonCategory(seasonCategory))  # [SC_COLS])
    champs.reset_index(inplace=True, drop=True)
    return champs


In [None]:
getSeasonCategories()

In [None]:
SC_COLS = ['id', 'category.name', 'externalIdDriver',
           'externalIdCoDriver', 'externalIdManufacturer']


def getSeasonChampionshipCodes():
    """Get championship codes in an easily retrieved way."""
    champs = getSeasonCategories()[SC_COLS]
    champs.rename(columns={'externalIdDriver': 'drivers',
                           'externalIdCoDriver': 'codrivers',
                           'externalIdManufacturer': 'manufacturers'},
                  inplace=True)
    return champs

In [None]:
getSeasonChampionshipCodes()

In [None]:
def _getChampionshipId(category='WRC', typ='drivers'):
    """Look up external ids for championship by category and championship."""
    champs = getSeasonChampionshipCodes()
    championship_activeExternalId = champs.set_index('id').to_dict(orient='index')[int(SEASON_CATEGORIES[category])]
    activeExternalId = championship_activeExternalId[typ]
    return activeExternalId


def _getSeasonId():
    event, days, channels = getActiveRally()
    return int(event.loc[0, 'season.externalId'])


def _parseChampionship(r):
    """Parser for raw championship response."""
    championship = json_normalize(r.json()).drop(columns=['championshipRounds',
                                                          'championshipEntries'])
    championshipRounds = json_normalize(r.json(), 'championshipRounds')
    championshipEntries = json_normalize(r.json(), 'championshipEntries')
    return (championship, championshipRounds, championshipEntries)


def getChampionship(category='WRC', typ='drivers', season_external_id=None,
                    raw=False, func=_parseChampionship):
    """
    Get Championship details for specified category and championship.

    If nor season ID is provided, use the external seasonid from the active rally.
    """
    season_external_id = _getSeasonId()
    args = {"command": "getChampionship",
            "context": {"season": {"externalId": season_external_id},
                        "activeExternalId": _getChampionshipId(category, typ)}}

    return _get_and_handle_response(SEASON_URL, args, func,
                                    nargs=3, raw=raw)

    # to do - championship table decodes cols in other tables
                                                

In [None]:
(championship, championshipRounds, championshipEntries) = getChampionship()
display(championship)
display(championshipRounds.head())
display(championshipEntries.head())

In [None]:
getSeasonCategories().to_dict(orient='index')  # [int(SEASON_CATEGORIES['JWRC'])]

In [None]:
def _parseChampionshipStandings(r):
    """Parser for raw champioship standings response."""
    championship_standings = json_normalize(r.json(),
                                            'entryResults',
                                            meta='championshipId')
    if not championship_standings.empty:
        championship_standings.drop(columns='roundResults', inplace=True)
        round_results = json_normalize(r.json(),
                                       ['entryResults', 'roundResults'])
    else:
        round_results = pd.DataFrame()
    return (championship_standings, round_results)


def getChampionshipStandings(category='WRC', typ='drivers',
                             season_external_id=None,
                             raw=False, func=_parseChampionshipStandings):
    """Get championship standings."""
    season_external_id = _getSeasonId()
    args = {"command": "getChampionshipStandings",
            "context": {"season": {"externalId": season_external_id},
                        "activeExternalId": _getChampionshipId(category, typ)}}

    return _get_and_handle_response(SEASON_URL, args, func, nargs=2,
                                    raw=raw, extracols={'category': category,
                                                        'championship': typ})

In [None]:
championship_standings, round_results = getChampionshipStandings('WRC')
display(championship_standings.head())
display(round_results.head())