# WRC Scraper  2020

In [1]:
from WRC_2020_scraper import *

In [2]:
class SeasonBase:
    """Base class for things to do with seasons."""
    def __init__(self, season_external_id=None, autoseed=False):
        self.season_external_id = season_external_id or None
        if not self.season_external_id and autoseed:
            self._check_season_external_id()
            
    def _check_season_external_id(self):
        """Check that season_external_id exists and if not, get one."""
        if not hasattr(self,'season_external_id') or not self.season_external_id:
            #Get current one from active rally
            #It's also available from current_season_events
            event, days, channels = getActiveRallyBase()
            self.event, self.days, self.channels = event, days, channels
            #The returned np.int64 is not JSON serialisable
            self.season_external_id = int(event.loc[0,'season.externalId'])

# TO DO
class Championship(SeasonBase):       
    """Class for championship."""
    def __init__(self ):
        SeasonBase.__init__(self)


In [3]:
SeasonBase(autoseed=True).season_external_id

6

In [4]:
# TO DO - define a class for each table
import warnings


class WRCRally_sdb:
    """Base class for things with an sdbRallyId.
       Can also help find an active sdbRallyId"""
    def __init__(self, sdbRallyId=None, live=False,
                 autoseed=False, nowarn=True,):
        if not nowarn and not sdbRallyId:
            warnings.warn("sdbRallyId should really be set...")
        
        self.sdbRallyId = sdbRallyId or None
        
        if autoseed:
            self._checkRallyId(sdbRallyId)
    
    def _checkRallyId(self, sdbRallyId=None):
        """Return a rally ID or lookup active one."""
        sdbRallyId = sdbRallyId or self.sdbRallyId
        if not hasattr(self, 'sdbRallyId') or not self.sdbRallyId:
            self.activerally = WRCActiveRally()
            self.sdbRallyId = self.activerally.sdbRallyId
            self.name = self.activerally.name
        return self.sdbRallyId

In [5]:
class WRCActiveRally(WRCRally_sdb):
    """Class for the active rally."""
    def __init__(self, live=False ):
        WRCRally_sdb.__init__(self, live=live, nowarn=True)

        self.live = live
        self.fetchData()
        
    def fetchData(self):
        event, days, channels = getActiveRally()
        self.event, self.days, self.channels = event, days, channels

        #np.int64 is not JSON serialisable
        self.sdbRallyId = int(event.loc[0,'id'])

        self.name = event.loc[0,'name']

In [6]:
WRCActiveRally().sdbRallyId

100

In [7]:
zz = WRCRally_sdb(autoseed=True)
print(zz.sdbRallyId)

100


We use the `.fetchData()` method so as to ry not to be greedy. This way, we can define a class and start to work towards only grabbling the data if we need it.

In [103]:
class WRCRetirements(WRCRally_sdb):
    """Callable class for retirements"""
    def __init__(self, sdbRallyId=None, live=False, autoseed=False):
        """Initialise retirements class."""
        WRCRally_sdb.__init__(self, sdbRallyId=sdbRallyId,
                              live=live, autoseed=autoseed)
            
        self.retirements=None
        
        if self.sdbRallyId:
            self.fetchData(self.sdbRallyId)
        
    def fetchData(self, sdbRallyId=None):
        """Fetch the data from WRC API."""
        self._checkRallyId(sdbRallyId)
        self.retirements = getRetirements(self.sdbRallyId)
    
    def __call__(self):
        return self.retirements

In [106]:
zz=WRCRetirements(autoseed=True)
zz.retirements.head(3)

Unnamed: 0,retirementId,controlId,entryId,reason,retirementDateTime,retirementDateTimeLocal,status
0,1475,6591,20710,OFF ROAD,2020-01-23T20:40:00Z,0001-01-01T00:00:00+00:00,Temporary
1,1476,6588,20687,MECHANICAL,2020-01-23T21:57:00Z,0001-01-01T00:00:00+00:00,Temporary
2,1477,6591,20750,OFF ROAD,2020-01-23T22:51:00Z,0001-01-01T00:00:00+00:00,Permanent


In [291]:
class WRCPenalties(WRCRally_sdb):
    """Callable class for penalties."""
    def __init__(self, sdbRallyId=None, live=False, autoseed=False):
        """Initialise penalties class."""
        WRCRally_sdb.__init__(self, sdbRallyId=sdbRallyId,
                              live=live, autoseed=autoseed)
            
        self.penalties=None
        
        if self.sdbRallyId:
            self.fetchData(self.sdbRallyId)

    
    def fetchData(self, sdbRallyId=None):
        """Fetch the data from WRC API."""
        self._checkRallyId(sdbRallyId)
        self.penalties = getPenalties(self.sdbRallyId)
    
    def __call__(self):
        return self.penalties

In [292]:
zz=WRCPenalties(autoseed=True)
zz.penalties.head(3)

Unnamed: 0,penaltyId,controlId,entryId,penaltyDurationMs,penaltyDuration,reason
0,725,6592,20730,10000,PT10S,FALSE START
1,726,6592,20753,10000,PT10S,FALSE START
2,727,6590,20760,10000,PT10S,1 MIN LATE


In [289]:
zz.name

'Rallye Monte Carlo'

In [296]:
zz().head(2)

sdsds


Unnamed: 0,penaltyId,controlId,entryId,penaltyDurationMs,penaltyDuration,reason
0,725,6592,20730,10000,PT10S,FALSE START
1,726,6592,20753,10000,PT10S,FALSE START


In [60]:
# TO DO
#rally, eligibilities, groups = getRally(sdbRallyId)
#splitPoints, entrySplitPointTimes, splitPointTimes = getSplitTimes(sdbRallyId,stageId)
#stagetimes = getStageTimes(sdbRallyId,stageId)
#stagewinners = getStagewinners(sdbRallyId)
#championship = getChampionship()
#championship = getChampionshipStandings()

NameError: name 'getChampionshipStandingsLive' is not defined

In [62]:
class WRCItinerary(WRCRally_sdb):
    """Class for WRC2020 Itinerary."""
    def __init__(self, sdbRallyId=None, live=False, autoseed=False):
        """Initialise itinerary class."""
        WRCRally_sdb.__init__(self, sdbRallyId=sdbRallyId,
                              live=live, autoseed=autoseed)
        
        self.itinerary=None
        self.legs=None
        self.sections=None
        self.controls=None
        self.stages=None
        
        if self.sdbRallyId:
            self.fetchData(sdbRallyId)
        
    def fetchData(self, sdbRallyId=None):
        """Fetch the data from WRC API."""
        self._checkRallyId(sdbRallyId)
        
        itinerary, legs, sections, controls, stages = getItinerary(sdbRallyId)
        self.itinerary, self.legs, self.sections, self.controls, self.stages = itinerary, legs, sections, controls, stages
        

In [70]:
print(WRCItinerary(autoseed=True).sdbRallyId)

None


In [63]:
WRCItinerary(sdbRallyId=100).legs

Unnamed: 0,itineraryLegId,itineraryId,startListId,name,legDate,order,status
0,273,240,451,Thursday 23rd January,2020-01-23,1,Completed
1,272,240,452,Friday 24th January,2020-01-24,2,Completed
2,275,240,454,Saturday 25th January,2020-01-25,3,Completed
3,274,240,456,Sunday 26th January,2020-01-25,4,Completed


In [159]:
class WRCStartlist():
    """Class for WRC2020 Startlist table."""
    def __init__(self, startlistId=None, autoseed=True):
        self.startListId = startlistId or None
        
        if not self.startListId:
            warnings.warn("startListId should really be set..")
        
        if self.startListId or autoseed:
            self.fetchData(startListId)
    
    def _checkStartListId(self, startListId=None):
        """Return a startlistId or look one up."""
        self.startListId = startListId or self.startListId
        if not self.startListId:
            if not hasattr(self, 'itinerary') or not self.itinerary:
                self.itinerary = WRCItinerary(autoseed=True)
                self.sdbRallyId = self.itinerary.sdbRallyId
            self.startListId = int(self.itinerary.legs.loc[0,'startListId'])
        return self.startListId
        
        
    def fetchData(self, startListId=None):
        self._checkStartListId(startListId)
        startList,startListItems = getStartlist(self.startListId)
        self.startList, self.startListItems = startList,startListItems

In [161]:
WRCStartlist(autoseed=True).startList

  import sys


Unnamed: 0,startListId,eventId,publishedStatus,name
0,451,124,Published,Thursday


In [231]:
class WRCCars(WRCRally_sdb):
    """Class for WRC2020 Cars table."""
    def __init__(self, sdbRallyId=None, live=False, autoseed=False):  
        WRCRally_sdb.__init__(self, sdbRallyId=sdbRallyId,
                              live=live, autoseed=autoseed)
        
        self.cars=None
        self.classes=None
        
        if self.sdbRallyId:
            self.fetchData(sdbRallyId)
            
    def fetchData(self, sdbRallyId=None):
        self._checkRallyId(sdbRallyId)
        cars, classes = getCars(sdbRallyId)
        self.cars, self.classes = cars, classes

In [233]:
WRCCars(autoseed=True)

<__main__.WRCCars at 0x11cd18910>

In [197]:
#This class will contain everything about a single rally
class WRCRally(WRCRally_sdb):
    """Class for a rally - stuff where sdbRallyId is required."""
    def __init__(self, sdbRallyId=None, live=False, autoseed=False ):
        WRCRally_sdb.__init__(self, sdbRallyId=sdbRallyId, live=live,
                             autoseed=autoseed)
        
        self.live = live
        self.itinerary = None
        self.startListId = None
        self.activerally = None
    
    
    def getItinerary(self):
        """Get itinerary.
           If rally not known, use active rally.
           Also set a default startListId."""
        
        _i = self.itinerary = WRCItinerary(self.sdbRallyId)
        
        #Set a default startListId value if required
        if not self.startListId and _i and _i.legs and not _i.legs.empty :
            self.startListId = int(_i.legs.loc[0,'startListId'])
            
        return (_i.itinerary, _i.legs, _i.sections, _i.controls, _i.stages)
 
    def getCars(self):
        """Get cars for a rally.
           If no rally provided, use current one."""
        
        _c = self.cars = WRCCars(self.sdbRallyId)
        
        return (_c.cars, _c.classes)
       
        
    def getStartlist(self, startListId=None):
        """Get startlist.
           If no startListId provided, try to find a default."""
        
        _s = self.startlist = WRCStartlist(self.startListId)
      
        return (_s.startList, _s.startListItems)
    
    
    def getPenalties(self):
        """Get penalties."""
        
        self._penalties = WRCPenalties(self.sdbRallyId)
        self.penalties = self._penalties.penalties
        return self.penalties
     
        
    def getRetirements(self):
        """Get retirements."""
        
        self._retirements = WRCRetirements(self.sdbRallyId)
        self.retirements = self._retirements.retirements
        return self.retirements
     
    
    
    

In [195]:
# NEXT TO DO - active rally class

In [200]:
zz = WRCRally()
zz.getRetirements()

In [None]:
# TO DO - need a more gernal season events class?
# If, that is, we can we look up arbtrary season events...
class WRCCurrentSeasonEvents:
    """Class for Season events."""
    def __init__(self ):
        self.current_season_events, self.eventdays, self.eventchannel = getCurrentSeasonEvents()


In [755]:
#This class needs renaming...
#What does it actually represent? An event? A live event? A set of events?
class WRC2020(WRCRally):
    """Class for WRC data scrape using 2020 API."""

    def __init__(self, sdbRallyId=None, live=False):
        WRCRally.__init__(self, sdbRallyId, live)
        
        self.live = live
        self.currentseasonevents = None
        
        
    def getCurrentSeasonEvents(self):
        """Get Current season events."""
        if not self.currentseasonevents:
            _cse = self.currentseasonevents = WRCCurrentSeasonEvents()
        return (_cse.current_season_events, _cse.eventdays, _cse.eventchannel)


    
    
    
    

In [756]:
wrc=WRC2020()
wrc.getCurrentSeasonEvents()
wrc.currentseasonevents.current_season_events

  if __name__ == '__main__':


Unnamed: 0,id,name,externalIdRally,externalIdEvent,timezone,active,countdown,jwrc,winner,images.format16x9.320x180,...,status.name,pageInfo.id,pageInfo.title,pageInfo.feTitle,pageInfo.url,pageResult.id,pageResult.title,pageResult.feTitle,pageResult.url,seasonYear
0,100,Rallye Monte Carlo,153,124,1,True,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Live Event,38227,Rallye-Monte-Carlo,Rallye Monte-Carlo,/en/championship/calendar/wrc/rallye-monte-car...,89623,Rally Monte Carlo,Rally Monte Carlo,/en/results/rally-results/rally-monte-carlo/re...,2020
1,102,Rally Sweden,154,125,2,False,False,True,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,1914,Rally Sweden,Rally Sweden,/en/championship/calendar/wrc/rally-sweden/ove...,90029,Rally Sweden,Rally Sweden,/en/results/rally-results/rally-sweden/results/,2020
2,107,Rally Guanajuato Mexico,155,126,-6,False,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,3083,Rally-Mexico,Rally México,/en/championship/calendar/wrc/rally-mexico/ove...,90044,Rally Mexico,Rally Mexico,/en/results/rally-results/rally-mexico/results/,2020
3,114,Rally Argentina,156,127,-3,False,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,51444,Rally-Argentina,Rally Argentina,/en/championship/calendar/wrc/rally-argentina/...,90072,Rally Argentina,Rally Argentina,/en/results/rally-results/rally-argentina/resu...,2020
4,116,Rally de Portugal,157,128,1,False,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,85219,Rally Portugal,Rally de Portugal,/en/championship/calendar/wrc/rally-de-portuga...,90091,Rally Portugal,Rally Portugal,/en/results/rally-results/rally-portugal/results/,2020
5,126,Rally Italia Sardegna,158,129,2,False,False,True,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,85270,Rally Italia Sardegna,Rally Italia Sardegna,/en/championship/calendar/wrc/rally-italia-sar...,90113,Rally Italia Sardegna,Rally Italia Sardegna,/en/results/rally-results/rally-italia-sardegn...,2020
6,137,Safari Rally Kenya,159,130,3,False,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,85348,Safari Rally Kenya,Rally Italia Sardegna,/en/championship/calendar/wrc/safari-rally-ken...,90134,Safari Rally Kenya,Safari Rally Kenya,/en/results/rally-results/safari-rally-kenya/r...,2020
7,148,Neste Rally Finland,160,131,3,False,False,True,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,85394,Rally Finland,Neste Rally Finland,/en/championship/calendar/wrc/neste-rally-finl...,90150,Rally Finland,Rally Finland,/en/results/rally-results/rally-finland/results/,2020
8,157,Rally New Zealand,161,132,12,False,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,85447,Rally New Zealand,Rally New Zealand,/en/championship/calendar/wrc/rally-new-zealan...,90174,Rally New Zealand,Rally New Zealand,/en/results/rally-results/rally-new-zealand/re...,2020
9,158,Rally Turkey,162,133,3,False,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,Pre Event,85490,Rally Turkey,Rally Turkey,/en/championship/calendar/wrc/rally-turkey/ove...,90193,Rally Turkey,Rally Turkey,/en/results/rally-results/rally-turkey/results/,2020


In [456]:
wrc.getStartlist()

(   startListId  eventId publishedStatus      name
 0          451      124       Published  Thursday,
     startListItemId  startListId  entryId         startDateTime  \
 0             20891          451    20745  2020-01-23T17:37:00Z   
 1             20892          451    20743  2020-01-23T17:36:00Z   
 2             20893          451    20722  2020-01-23T17:35:00Z   
 3             20894          451    20741  2020-01-23T17:34:00Z   
 4             20895          451    20740  2020-01-23T17:33:00Z   
 ..              ...          ...      ...                   ...   
 83            20974          451    20702  2020-01-23T16:55:00Z   
 84            20975          451    20703  2020-01-23T16:54:00Z   
 85            20976          451    20707  2020-01-23T16:53:00Z   
 86            20977          451    20728  2020-01-23T17:15:00Z   
 87            20978          451    20683  2020-01-23T16:00:00Z   
 
            startDateTimeLocal  order  
 0   2020-01-23T18:37:00+01:00     64  

In [325]:
wrc.itinerary.sections

Unnamed: 0,itinerarySectionId,itineraryLegId,order,name
0,637,273,1,Section 1
1,638,272,2,Section 2
2,639,272,3,Section 3
3,640,275,4,Section 4
4,641,275,5,Section 5
5,642,274,6,Section 6
6,643,274,7,Section 7


In [280]:
itinerary, legs, sections, controls, stages = wrc.getItinerary()
startList,startListItems = getStartlist(startListId)
cars, classes = getCars(sdbRallyId)
#rally, eligibilities, groups = getRally(sdbRallyId)
#overall = getOverall(sdbRallyId, stageId)
#splitPoints, entrySplitPointTimes, splitPointTimes = getSplitTimes(sdbRallyId,stageId)
#stagetimes = getStageTimes(sdbRallyId,stageId)
#stagewinners = getStagewinners(sdbRallyId)
#penalties = getPenalties(sdbRallyId)
#retirements = getRetirements(sdbRallyId)
#championship = getChampionship()
#championship = getChampionshipStandingsLive()

In [236]:
current_season_events, eventdays, eventchannel = wrc.getCurrentSeasonEvents()

In [242]:
event, days, channels = wrc.getActiveRally()


In [238]:
event

Unnamed: 0,id,name,externalIdRally,externalIdEvent,timezone,active,countdown,jwrc,winner,images.format16x9.320x180,...,status.id,status.name,pageInfo.id,pageInfo.title,pageInfo.feTitle,pageInfo.url,pageResult.id,pageResult.title,pageResult.feTitle,pageResult.url
0,100,Rallye Monte Carlo,153,124,1,True,False,False,,https://www.wrc.com/images/redaktion/Web-2020/...,...,21,Live Event,38227,Rallye-Monte-Carlo,Rallye Monte-Carlo,/en/championship/calendar/wrc/rallye-monte-car...,89623,Rally Monte Carlo,Rally Monte Carlo,/en/results/rally-results/rally-monte-carlo/re...


In [243]:
wrc.sdbRallyId

100