# Driver Splits

Script to rebase the split times for a stage and display them relative to a specified driver.

The intention is to generate a report on a stage that is meaningful to a specified driver.

Ideally the report should:

- show where the driver finished on the stage (stage rank)
- show the running stage delta at each split compared to each other driver
- show the extent to which a driver gained or lost time on each split compared to each other driver
- show the start order (so that this can be related to stage rank)
- identify the overall position at the end of the stage for each driver
- show whether overall positions were gained or lost after the stage (not implemented yet; need a +=- column)

In [1]:
import notebookimport

if __name__=='__main__':
    typ = 'overall' #this defines ???
    #rebase='overallleader' #TO DO
    rebase='OGI'#'PAD'
    MAXINSPLITDELTA=20 #set xlim on the within split delta
    ss='SS9'
    
    #The drivercode inbuilds some intelligence
    drivercode=rebase

In [2]:
sr = __import__("Charts - Stage Results")
ssd = __import__("Charts - Split Sector Delta")

importing Jupyter notebook from Charts - Stage Results.ipynb
importing Jupyter notebook from Charts - Split Sector Delta.ipynb


In [3]:
#!pip3 install pytablewriter

Set up a connection to a simple SQLite database, and specify some metadata relating to the actual rally we are interested in.

In [4]:
import os
import sqlite3
import pandas as pd
import pytablewriter
import six
from numpy import NaN

#dbname='wrc18.db'
#dbname='france18.db'
#conn = sqlite3.connect(dbname)

if __name__=='__main__':
    #dbname='wrc18.db'
    dbname='australia18.db'
    conn = sqlite3.connect(dbname)
    rally='Australia'
    rc='RC1'
    year=2018
    #ss='SS4'

In [5]:
if __name__=='__main__':
    #This doesn't appear to be used elsewhere in this notebook
    #May support logic for checking stage status?
    stagedetails = sr.dbGetRallyStages(conn, rally).sort_values('number')
    stagedetails.head()

In [6]:
if __name__=='__main__':
    #Let's see what data is available to us in the stagerank_overall table
    stagerank_overall = sr.getEnrichedStageRank(conn, rally, typ='overall')
    print(stagerank_overall.columns)
    display(stagerank_overall.head())

Index(['diffFirst', 'diffFirstMs', 'diffPrev', 'diffPrevMs', 'entryId',
       'penaltyTime', 'penaltyTimeMs', 'position', 'stageTime', 'stageTimeMs',
       'totalTime', 'totalTimeMs', 'stageId', 'class', 'code', 'distance',
       'name', 'snum', 'driver.code', 'entrant.name', 'classrank',
       'gainedClassPos', 'gainedClassLead', 'classPosDiff', 'lostClassLead',
       'retainedClassLead', 'gainedOverallPos', 'gainedOverallLead',
       'overallPosDiff', 'lostOverallLead', 'retainedOverallLead', 'stagewin',
       'stagewincount', 'winsinarow', 'gainedTime'],
      dtype='object')


Unnamed: 0,diffFirst,diffFirstMs,diffPrev,diffPrevMs,entryId,penaltyTime,penaltyTimeMs,position,stageTime,stageTimeMs,...,retainedClassLead,gainedOverallPos,gainedOverallLead,overallPosDiff,lostOverallLead,retainedOverallLead,stagewin,stagewincount,winsinarow,gainedTime
0,PT0S,0,PT0S,0,3313,PT0S,0,1,PT4M45.5S,285500,...,False,False,False,0.0,False,False,True,1.0,1,False
1,PT0.8S,800,PT0.8S,800,3312,PT0S,0,2,PT4M46.3S,286300,...,False,False,False,0.0,False,False,False,0.0,0,False
2,PT1.2S,1200,PT0.4S,400,3311,PT0S,0,3,PT4M46.7S,286700,...,False,False,False,0.0,False,False,False,0.0,0,False
3,PT1.8S,1800,PT0.6S,600,3308,PT0S,0,4,PT4M47.3S,287300,...,False,False,False,0.0,False,False,False,0.0,0,False
4,PT2.1S,2100,PT0.3S,300,3309,PT0S,0,5,PT4M47.6S,287600,...,False,False,False,0.0,False,False,False,0.0,0,False


In [7]:
if __name__=='__main__':
    #Get the total stage time for specified driver on each stage
    #We can then subtract this from each driver's time to get their times as rebased delta times
    #  compared to the the specified driver
    rebaser = stagerank_overall[stagerank_overall['driver.code']==drivercode][['code','totalTimeMs']].set_index('code').to_dict(orient='dict')['totalTimeMs']
    display(rebaser)

{'SS1': 288100,
 'SS2': 775100,
 'SS3': 1562000,
 'SS4': 1846500,
 'SS5': 2319200,
 'SS6': 3089000,
 'SS7': 3172400,
 'SS8': 3255600,
 'SS9': 3705600,
 'SS10': 4639200,
 'SS11': 5350900,
 'SS12': 5444400,
 'SS13': 5888000,
 'SS14': 6802400,
 'SS15': 7507100,
 'SS16': 7600100,
 'SS17': 7685200,
 'SS18': 7776800}

In [8]:
def rebaseOverallRallyTime(stagerank_overall, drivercode):
    ''' Rebase overall stage rank relative to a specified driver. '''
    #Get the time for each stage for a particular driver
    rebaser = stagerank_overall[stagerank_overall['driver.code']==drivercode][['code','totalTimeMs']].set_index('code').to_dict(orient='dict')['totalTimeMs']
    #The stagerank_overall['code'].map(rebaser) returns the total time for each stage achieved by the rebase driver
    # stagerank_overall['code'] identifies the stage
    #Subtract this rebase time from the overall stage time for each driver by stage
    stagerank_overall['rebased'] = stagerank_overall['totalTimeMs'] - stagerank_overall['code'].map(rebaser)
    return stagerank_overall

In [9]:
if __name__=='__main__':
    #Preview the stagerank_overall contents for a particular stage
    display(stagerank_overall[stagerank_overall['code']==ss][['driver.code','position','totalTimeMs','code']])

Unnamed: 0,driver.code,position,totalTimeMs,code
96,OST,1,3660400,SS9
97,LAT,2,3667400,SS9
98,BRE,3,3669100,SS9
99,PAD,4,3670800,SS9
100,TÄN,5,3677500,SS9
101,LAP,6,3691000,SS9
102,OGI,7,3705600,SS9
103,EVA,8,3714100,SS9
104,SUN,9,3724100,SS9
105,NEU,10,3742100,SS9


In [10]:
def rebased_stage_stagerank(conn,rally,ss,drivercode,typ='overall'):
    ''' Calculate the rebased time for each driver, in a specified stage (ss),
        relative to a specified driver (drivercode). '''
    stagerank_overall = sr.getEnrichedStageRank(conn, rally, typ=typ)
    zz=rebaseOverallRallyTime(stagerank_overall, drivercode)#, ss)
    #Get the rebased times for a particular stage
    zz=zz[zz['code']==ss][['driver.code','position','totalTimeMs','code', 'rebased']].set_index('driver.code')
    #Scale down the time from milliseconds to seconds
    zz['Overall Time']=-zz['rebased']/1000
    return zz

In [11]:
if __name__=='__main__':
    zz=rebased_stage_stagerank(conn,rally,ss, drivercode)
    display(zz)

Unnamed: 0_level_0,position,totalTimeMs,code,rebased,Overall Time
driver.code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
OST,1,3660400,SS9,-45200,45.2
LAT,2,3667400,SS9,-38200,38.2
BRE,3,3669100,SS9,-36500,36.5
PAD,4,3670800,SS9,-34800,34.8
TÄN,5,3677500,SS9,-28100,28.1
LAP,6,3691000,SS9,-14600,14.6
OGI,7,3705600,SS9,0,0.0
EVA,8,3714100,SS9,8500,-8.5
SUN,9,3724100,SS9,18500,-18.5
NEU,10,3742100,SS9,36500,-36.5


In [12]:
if __name__=='__main__':
    display(stagerank_overall.columns)

Index(['diffFirst', 'diffFirstMs', 'diffPrev', 'diffPrevMs', 'entryId',
       'penaltyTime', 'penaltyTimeMs', 'position', 'stageTime', 'stageTimeMs',
       'totalTime', 'totalTimeMs', 'stageId', 'class', 'code', 'distance',
       'name', 'snum', 'driver.code', 'entrant.name', 'classrank',
       'gainedClassPos', 'gainedClassLead', 'classPosDiff', 'lostClassLead',
       'retainedClassLead', 'gainedOverallPos', 'gainedOverallLead',
       'overallPosDiff', 'lostOverallLead', 'retainedOverallLead', 'stagewin',
       'stagewincount', 'winsinarow', 'gainedTime'],
      dtype='object')

In [13]:
if __name__=='__main__':
    #Preview a long format dataframe describing position and stage code for a specified driver
    #This appears not be be referenced anywhere else in this notebook
    stagerank_stage = sr.getEnrichedStageRank(conn, rally, typ='stage')
    stagerank_stage[stagerank_stage['driver.code']==rebase][['position','code']]

In [14]:
if __name__=='__main__':
    splits = ssd.dbGetSplits(conn,rally,ss,rc)

    elapseddurations=ssd.getElapsedDurations(splits)
    elapseddurations.head()

In [15]:
def getRoadPosition(splits):
    ''' Get road position for each driver for a given stage. '''
    roadPos=splits[['drivercode','startDateTime']].drop_duplicates()
    roadPos = roadPos.set_index('drivercode')
    roadPos['Road Position']=roadPos['startDateTime'].rank().astype(int).astype(str)
    return roadPos

#Is this already provided by sr.dbGetStageStart(conn, rally, rc, stages=None) ?

In [16]:
if __name__=='__main__':
    roadPos = getRoadPosition(splits)
    display(roadPos)

Unnamed: 0_level_0,startDateTime,Road Position
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1
SER,2018-11-16T20:08:00,1
NEU,2018-11-16T20:13:00,2
EVA,2018-11-16T20:16:00,3
SUN,2018-11-16T20:19:00,4
OGI,2018-11-16T20:22:00,5
LAP,2018-11-16T20:25:00,6
TÄN,2018-11-16T20:28:00,7
PAD,2018-11-16T20:31:00,8
BRE,2018-11-16T20:34:00,9
LAT,2018-11-16T20:37:00,10


In [17]:
if __name__=='__main__':
    rebasedelapseddurations = ssd.rebaseElapsedDurations(elapseddurations, drivercode)
    display(rebasedelapseddurations.head())

Unnamed: 0,drivercode,elapsedDurationS,startDateTime,section,rebased
0,SER,211.3,2018-11-16T20:08:00,1,29.2
1,SER,341.6,2018-11-16T20:08:00,2,49.5
2,SER,470.3,2018-11-16T20:08:00,3,67.0
45,SER,523.7,2018-11-16T20:08:00,4,73.7
3,NEU,184.4,2018-11-16T20:13:00,1,2.3


In [18]:
def pivotRebasedElapsedDurations(rebasedelapseddurations, ss):
    ''' Pivot rebased elapsed durations (that is, deltas relative target).
        Rows give stage delta at each split for a specific driver. '''
    rbe=-rebasedelapseddurations.pivot('drivercode','section','rebased')
    rbe.columns=list(rbe.columns)[:-1]+['{} Overall'.format(ss)]
    rbe=rbe.sort_values(rbe.columns[-1],ascending = False)
    return rbe

if __name__=='__main__':
    rbe = pivotRebasedElapsedDurations(rebasedelapseddurations, ss)
    display(rbe)

Unnamed: 0_level_0,1,2,3,SS9 Overall
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PAD,5.4,7.9,8.3,9.1
LAT,3.5,6.1,7.7,8.7
MIK,4.0,6.3,6.8,7.7
OST,3.5,6.3,6.4,7.0
TÄN,2.4,4.5,6.3,6.8
BRE,2.5,3.6,4.5,5.1
LAP,1.2,3.4,4.3,4.7
OGI,-0.0,-0.0,-0.0,-0.0
EVA,-2.1,-1.6,-1.4,-1.5
NEU,-2.3,-3.0,-2.8,-2.8


In [19]:
#https://pandas.pydata.org/pandas-docs/stable/style.html
def color_negative(val):
    """
    Takes a scalar and returns a string with
    the css property `'color: red'` for negative
    strings, black otherwise.
    """
    if isinstance(val, str): return ''
    elif val and (isinstance(val,int) or isinstance(val,float)):
        color = 'green' if val < 0 else 'red' if val > 0  else 'black'
    else:
        color='white'
    return 'color: %s' % color

In [20]:
if __name__=='__main__':
    #test of applying style to pandas dataframe
    s = rbe.style.applymap(color_negative)
    display(s)

Unnamed: 0_level_0,1,2,3,SS9 Overall
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PAD,5.4,7.9,8.3,9.1
LAT,3.5,6.1,7.7,8.7
MIK,4.0,6.3,6.8,7.7
OST,3.5,6.3,6.4,7.0
TÄN,2.4,4.5,6.3,6.8
BRE,2.5,3.6,4.5,5.1
LAP,1.2,3.4,4.3,4.7
OGI,-0.0,-0.0,-0.0,-0.0
EVA,-2.1,-1.6,-1.4,-1.5
NEU,-2.3,-3.0,-2.8,-2.8


In [21]:
# TO DO:
# - calculate stage position at each split
# - calculate rank within that sector

In [22]:
if __name__=='__main__':
    #splitdurations are the time in each sector (time take to get from one split to the next)
    splitdurations = ssd.getSplitDurationsFromSplits(conn,rally,ss,rc)
    splitdurations.head()

In [23]:
if __name__=='__main__':
    rebasedSplits = ssd.rebaseSplitDurations(splitdurations, drivercode)
    rebasedSplits.head()

In [24]:
if __name__=='__main__':
    #preview what's available as a splitduration
    display(splitdurations[splitdurations['drivercode'].isin( ['PAD','NEU'])])

Unnamed: 0,drivercode,splitDurationS,startDateTime,stageTimeDurationMs,section,rebased
3,NEU,184.4,2018-11-16T20:13:00,452800.0,1,2.3
4,NEU,110.7,2018-11-16T20:13:00,452800.0,2,0.7
5,NEU,111.1,2018-11-16T20:13:00,452800.0,3,-0.2
41,NEU,46.7,2018-11-16T20:13:00,452800.0,4,0.0
21,PAD,176.7,2018-11-16T20:31:00,440900.0,1,-5.4
22,PAD,107.6,2018-11-16T20:31:00,440900.0,2,-2.4
23,PAD,110.7,2018-11-16T20:31:00,440900.0,3,-0.6
44,PAD,45.9,2018-11-16T20:31:00,440900.0,4,-0.8


In [25]:
def pivotRebasedSplits(rebasedSplits):
    ''' For each driver row, find the split '''
    rbp=-rebasedSplits.pivot('drivercode','section','rebased')
    rbp.columns=['D{}'.format(c) for c in rbp.columns]
    rbp.sort_values(rbp.columns[-1],ascending =True)
    return rbp

if __name__=='__main__':
    rbp = pivotRebasedSplits(rebasedSplits)
    display(rbp)

Unnamed: 0_level_0,D1,D2,D3,D4
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BRE,2.5,1.1,1.0,0.6
EVA,-2.1,0.5,0.3,-0.1
LAP,1.2,2.1,1.0,0.4
LAT,3.5,2.7,1.7,1.0
MIK,4.0,2.3,0.6,0.9
NEU,-2.3,-0.7,0.2,-0.0
OGI,-0.0,-0.0,-0.0,-0.0
OST,3.5,2.8,0.2,0.6
PAD,5.4,2.4,0.6,0.8
SER,-29.2,-20.3,-17.4,-6.7


In [26]:
if __name__=='__main__':
    #Just remind ourselves of what is available in the road position data
    display(roadPos)

Unnamed: 0_level_0,startDateTime,Road Position
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1
SER,2018-11-16T20:08:00,1
NEU,2018-11-16T20:13:00,2
EVA,2018-11-16T20:16:00,3
SUN,2018-11-16T20:19:00,4
OGI,2018-11-16T20:22:00,5
LAP,2018-11-16T20:25:00,6
TÄN,2018-11-16T20:28:00,7
PAD,2018-11-16T20:31:00,8
BRE,2018-11-16T20:34:00,9
LAT,2018-11-16T20:37:00,10


In [27]:
def getDriverSplitReportBaseDataframe(rbe,rbp, zz, roadPos, ss):
    ''' Create a base dataframe for the rebased driver split report. '''
    rb2=pd.merge(rbe,zz[['position','Overall Time']],left_index=True, right_index=True)
        
    #The following is calculated rather than being based on the actual timing data / result for the previous stage
    #Would be better to explicitly grab data for previous stage, along with previous ranking
    rb2['Previous'] =  rb2['Overall Time'] - rb2['{} Overall'.format(ss)]
    #Related to this, would be useful to have an overall places gained / lost column
    
    rb2=pd.merge(rb2,rbp,left_index=True, right_index=True)
    rb2=pd.merge(rb2,roadPos[['Road Position']],left_index=True, right_index=True)
    cols=rb2.columns.tolist()
    #Reorder the columns - move Road Position to first column
    rb2=rb2[[cols[-1]]+cols[:-1]]
    
    #reorder cols
    prev = rb2['Previous']
    rb2.drop(labels=['Previous'], axis=1,inplace = True)
    rb2.insert(1, 'Previous', prev)
    
    return rb2

if __name__=='__main__':
    rb2=getDriverSplitReportBaseDataframe(rbe,rbp, zz, roadPos, ss)
    display(rb2)

Unnamed: 0,Road Position,Previous,1,2,3,SS9 Overall,position,Overall Time,D1,D2,D3,D4
PAD,8,25.7,5.4,7.9,8.3,9.1,4,34.8,5.4,2.4,0.6,0.8
LAT,10,29.5,3.5,6.1,7.7,8.7,2,38.2,3.5,2.7,1.7,1.0
MIK,12,-2478.9,4.0,6.3,6.8,7.7,22,-2471.2,4.0,2.3,0.6,0.9
OST,11,38.2,3.5,6.3,6.4,7.0,1,45.2,3.5,2.8,0.2,0.6
TÄN,7,21.3,2.4,4.5,6.3,6.8,5,28.1,2.4,2.1,1.9,0.5
BRE,9,31.4,2.5,3.6,4.5,5.1,3,36.5,2.5,1.1,1.0,0.6
LAP,6,9.9,1.2,3.4,4.3,4.7,6,14.6,1.2,2.1,1.0,0.4
OGI,5,0.0,-0.0,-0.0,-0.0,-0.0,7,0.0,-0.0,-0.0,-0.0,-0.0
EVA,3,-7.0,-2.1,-1.6,-1.4,-1.5,8,-8.5,-2.1,0.5,0.3,-0.1
NEU,2,-33.7,-2.3,-3.0,-2.8,-2.8,10,-36.5,-2.3,-0.7,0.2,-0.0


In [28]:
if __name__=='__main__':
    display(rb2.dtypes)

Road Position     object
Previous         float64
1                float64
2                float64
3                float64
SS9 Overall      float64
position           int64
Overall Time     float64
D1               float64
D2               float64
D3               float64
D4               float64
dtype: object

In [29]:
#There seems to be missing tenths?
#Elapsed durations are provided in milliseconds. Need to round correctly to tenths?
#Elapsed times grabbed from ssd.dbGetSplits(conn,rally,ss,rc)

def cleanDriverSplitReportBaseDataframe(rb2, ss):
    ''' Tidy up the driver split report dataframe, replacing 0 values with NaNs that can be hidden.
        Check column names and data types. '''
    rb2=rb2.replace(0,NaN)
    #rb2=rb2.fillna('') #This casts columns containing NA to object type which means we can't use nan processing
    rb2.rename(columns={'position': 'Overall Position'}, inplace=True)
    
    rb2['Road Position']=rb2['Road Position'].astype(float)
    return rb2

def __styleDriverSplitReportBaseDataframe(rb2, ss):
    ''' Test if basic dataframe styling.
        DEPRECATED. '''
    s=rb2.fillna('').style.applymap(color_negative,
                                    subset=[c for c in rb2.columns if isinstance(c, int) and c not in ['Overall Position', 'Road Position']])
    #data.style.applymap(highlight_cols, subset=pd.IndexSlice[:, ['B', 'C']])

    s.set_caption("{}: running split times and deltas within each split.".format(ss))
    return s
    
if __name__=='__main__':
    rb2c = cleanDriverSplitReportBaseDataframe(rb2.copy(), ss)
    s = __styleDriverSplitReportBaseDataframe(rb2c, ss)

In [30]:
from IPython.core.display import HTML

if __name__=='__main__':
    html=s.render()
    display(HTML(html))

Unnamed: 0,Road Position,Previous,1,2,3,SS9 Overall,Overall Position,Overall Time,D1,D2,D3,D4
PAD,8,25.7,5.4,7.9,8.3,9.1,4,34.8,5.4,2.4,0.6,0.8
LAT,10,29.5,3.5,6.1,7.7,8.7,2,38.2,3.5,2.7,1.7,1.0
MIK,12,-2478.9,4.0,6.3,6.8,7.7,22,-2471.2,4.0,2.3,0.6,0.9
OST,11,38.2,3.5,6.3,6.4,7.0,1,45.2,3.5,2.8,0.2,0.6
TÄN,7,21.3,2.4,4.5,6.3,6.8,5,28.1,2.4,2.1,1.9,0.5
BRE,9,31.4,2.5,3.6,4.5,5.1,3,36.5,2.5,1.1,1.0,0.6
LAP,6,9.9,1.2,3.4,4.3,4.7,6,14.6,1.2,2.1,1.0,0.4
OGI,5,,,,,,7,,,,,
EVA,3,-7.0,-2.1,-1.6,-1.4,-1.5,8,-8.5,-2.1,0.5,0.3,-0.1
NEU,2,-33.7,-2.3,-3.0,-2.8,-2.8,10,-36.5,-2.3,-0.7,0.2,


In [31]:
from math import nan
def bg_color(s):
    ''' Set background colour sensitive to time gained or lost.
    '''
    attrs=[]
    for _s in s:
        if _s < 0:
            attr = 'background-color: green; color: white'
        elif _s > 0: 
            attr = 'background-color: red; color: white'
        else:
            attr = ''
        attrs.append(attr)
    return attrs

In [32]:
import seaborn as sns

def moreStyleDriverSplitReportBaseDataframe(rb2,ss, caption=None):
    ''' Style the driver split report dataframe. '''
    
    #https://community.modeanalytics.com/gallery/python_dataframe_styling/
    # Set CSS properties for th elements in dataframe
    th_props = [
      ('font-size', '11px'),
      ('text-align', 'center'),
      ('font-weight', 'bold'),
      ('color', '#6d6d6d'),
      ('background-color', '#f7f7f9')
      ]

    # Set CSS properties for td elements in dataframe
    td_props = [
      ('font-size', '11px')
      ]

    # Set table styles
    styles = [
      dict(selector="th", props=th_props),
      dict(selector="td", props=td_props)
      ]
    
    #Define colour palettes
    #cmg = sns.light_palette("green", as_cmap=True)
    #The blue palette helps us scale the Road Position column
    # This may help us to help identify any obvious road position effect when sorting stage times by stage rank
    cm=sns.light_palette((210, 90, 60), input="husl",as_cmap=True)

    s2=(rb2.style
        .background_gradient(cmap=cm, subset=['Road Position' ])
        .applymap(color_negative,
                  subset=[c for c in rb2.columns if isinstance(c, int) and c not in ['Overall Position', 'Road Position']])
        .highlight_min(subset=['Overall Position'], color='lightgrey')
        .highlight_max(subset=['Overall Time'], color='lightgrey')
        .highlight_max(subset=['Previous'], color='lightgrey')
        .apply(bg_color,subset=['{} Overall'.format(ss), 'Overall Time', 'Previous'])
        .bar(subset=[c for c in rb2.columns if str(c).startswith('D')], align='zero', color=[ '#5fba7d','#d65f5f'])
        .set_table_styles(styles)
        
        #.format({'total_amt_usd_pct_diff': "{:.2%}"})
       )
    
    if caption is not None:
        s2.set_caption(caption)

    #nan issue: https://github.com/pandas-dev/pandas/issues/21527
    return s2.render().replace('nan','')

if __name__=='__main__':
    rb2c = cleanDriverSplitReportBaseDataframe(rb2.copy(), ss)
    s2 = moreStyleDriverSplitReportBaseDataframe(rb2c, ss)
    display(HTML(s2))

Unnamed: 0,Road Position,Previous,1,2,3,SS9 Overall,Overall Position,Overall Time,D1,D2,D3,D4
PAD,8,25.7,5.4,7.9,8.3,9.1,4,34.8,5.4,2.4,0.6,0.8
LAT,10,29.5,3.5,6.1,7.7,8.7,2,38.2,3.5,2.7,1.7,1.0
MIK,12,-2478.9,4.0,6.3,6.8,7.7,22,-2471.2,4.0,2.3,0.6,0.9
OST,11,38.2,3.5,6.3,6.4,7.0,1,45.2,3.5,2.8,0.2,0.6
TÄN,7,21.3,2.4,4.5,6.3,6.8,5,28.1,2.4,2.1,1.9,0.5
BRE,9,31.4,2.5,3.6,4.5,5.1,3,36.5,2.5,1.1,1.0,0.6
LAP,6,9.9,1.2,3.4,4.3,4.7,6,14.6,1.2,2.1,1.0,0.4
OGI,5,,,,,,7,,,,,
EVA,3,-7.0,-2.1,-1.6,-1.4,-1.5,8,-8.5,-2.1,0.5,0.3,-0.1
NEU,2,-33.7,-2.3,-3.0,-2.8,-2.8,10,-36.5,-2.3,-0.7,0.2,


In [52]:
if __name__=='__main__':
    sr.dbGetStageRank(conn, rally, rc, typ='stage', stages=ss)[['position','driver.code','classrank']]
#'overall':'stage_times_overall', 'stage_times_overall':'stage_times_overall',
#              'stage':'stage_times_stage', 'stage_times_stage':'stage_times_stage'
#sr.getEnrichedStageRank(conn, rally, typ=typ)

Unnamed: 0,position,driver.code,classrank
0,1,PAD,1.0
1,2,LAT,2.0
2,3,MIK,3.0
3,4,OST,4.0
4,5,TÄN,5.0
5,6,BRE,6.0
6,7,LAP,7.0
7,8,OGI,8.0
8,9,EVA,9.0
9,10,NEU,10.0


In [54]:
if __name__=='__main__':
    sr.getDriverCodeBy(conn, rally, ss,'stage')

'PAD'

In [49]:
def getDriverSplitsReport(conn, rally, ss, drivercode, rc='RC1', typ='overall', order=None, caption=None):
    ''' Generate dataframe report relative to a given driver on a given stage.
            order: sorts table according to: overall | previous | roadpos
    '''
    
    #Allow the drivercode to be relative to a position
    #if drivercode=='firstonroad':
        #allow things like onroad1, onroad2?
    #    drivercode=
    #elif drivercode=='previousfirst':
        #allow things like previous1, previous2?
    #    drivercode = 
    #elif drivercode = 'stagewinner':
        #allowthings like stage1, stage2?
    #    drivercode = 
    
    zz = rebased_stage_stagerank(conn,rally,ss,drivercode, typ=typ)
    splits = ssd.dbGetSplits(conn,rally,ss,rc)
    elapseddurations=ssd.getElapsedDurations(splits)
    roadPos = getRoadPosition(splits)
    rebasedelapseddurations = ssd.rebaseElapsedDurations(elapseddurations, drivercode)
    rbe = pivotRebasedElapsedDurations(rebasedelapseddurations, ss)
    
    #splitdurations are the time in each sector (time take to get from one split to the next)
    splitdurations = ssd.getSplitDurationsFromSplits(conn,rally,ss,rc)
    rebasedSplits = ssd.rebaseSplitDurations(splitdurations, drivercode)
    rbp = pivotRebasedSplits(rebasedSplits)
    
    rb2=getDriverSplitReportBaseDataframe(rbe,rbp, zz, roadPos, ss)
    rb2 = cleanDriverSplitReportBaseDataframe(rb2, ss)
    
    if ss=='SS1':
        rb2['Previous']=NaN

    if order=='overall':
        rb2=rb2.sort_values('Overall Position', ascending=True)
        #rb2=rb2.rename(columns={'Overall Position':'{} Overall*'.format(ss)})
    elif order=='previous':
        rb2=rb2.fillna(0).sort_values('Previous', ascending=False).replace(0,NaN)
        #rb2 = rb2.rename(columns={'Previous':'Previous*'})
    elif order=='roadpos':
        rb2=rb2.sort_values('Road Position', ascending=True)
        #rb2 = rb2.rename(columns={'Road Position':'Road Position*'})
    else:
        pass
        #rb2 = rb2.rename(columns={'{} Overall'.format(ss):'{} Overall*'.format(ss)})

    if caption =='auto':
        caption = 'Rebased stage split times for {}{}.'.format('{}, '.format(drivercode), ss)

    #s = styleDriverSplitReportBaseDataframe(rb2, ss)
    s2 = moreStyleDriverSplitReportBaseDataframe(rb2,ss, caption)
    

    return s2

if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, ss, drivercode, rc, typ)#, caption='auto')
    display(HTML(s2))

Unnamed: 0,Road Position,Previous,1,2,3,SS9 Overall,Overall Position,Overall Time,D1,D2,D3,D4
PAD,8,25.7,5.4,7.9,8.3,9.1,4,34.8,5.4,2.4,0.6,0.8
LAT,10,29.5,3.5,6.1,7.7,8.7,2,38.2,3.5,2.7,1.7,1.0
MIK,12,-2478.9,4.0,6.3,6.8,7.7,22,-2471.2,4.0,2.3,0.6,0.9
OST,11,38.2,3.5,6.3,6.4,7.0,1,45.2,3.5,2.8,0.2,0.6
TÄN,7,21.3,2.4,4.5,6.3,6.8,5,28.1,2.4,2.1,1.9,0.5
BRE,9,31.4,2.5,3.6,4.5,5.1,3,36.5,2.5,1.1,1.0,0.6
LAP,6,9.9,1.2,3.4,4.3,4.7,6,14.6,1.2,2.1,1.0,0.4
OGI,5,,,,,,7,,,,,
EVA,3,-7.0,-2.1,-1.6,-1.4,-1.5,8,-8.5,-2.1,0.5,0.3,-0.1
NEU,2,-33.7,-2.3,-3.0,-2.8,-2.8,10,-36.5,-2.3,-0.7,0.2,


In [37]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS11', 'LAT', rc, typ)
    display(HTML(s2))

Unnamed: 0,Road Position,Previous,1,2,3,SS11 Overall,Overall Position,Overall Time,D1,D2,D3,D4
TÄN,7,-6.3,0.8,1.8,2.1,1.7,2,-4.6,0.8,1.0,0.2,-0.4
LAT,10,,,,,,1,,,,,
MIK,12,-2512.0,-1.7,-2.3,-3.3,-4.7,20,-2516.7,-1.7,-0.6,-1.1,-1.4
LAP,6,-28.2,-2.4,-3.5,-3.4,-4.8,5,-33.0,-2.4,-1.0,,-1.4
PAD,8,-4.1,-3.0,-5.9,-6.9,-8.3,4,-12.4,-3.0,-2.9,-1.1,-1.4
OST,11,3.7,-2.0,-2.8,-7.5,-11.8,3,-8.1,-2.0,-0.8,-4.8,-4.3
EVA,3,-74.2,-4.1,-6.8,-8.7,-12.9,7,-87.1,-4.1,-2.6,-2.0,-4.2
OGI,5,-55.4,-6.4,-9.6,-13.4,-17.7,6,-73.1,-6.4,-3.2,-3.9,-4.3
NEU,2,-99.5,-7.9,-14.4,-17.3,-24.0,10,-123.5,-7.9,-6.5,-3.0,-6.7
SUN,4,-90.0,-8.2,-13.3,-18.1,-25.4,9,-115.4,-8.2,-5.1,-4.9,-7.3


In [38]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS11', 'LAT', rc, typ, 'overall')
    display(HTML(s2))

Unnamed: 0,Road Position,Previous,1,2,3,SS11 Overall,Overall Position,Overall Time,D1,D2,D3,D4
LAT,10,,,,,,1,,,,,
TÄN,7,-6.3,0.8,1.8,2.1,1.7,2,-4.6,0.8,1.0,0.2,-0.4
OST,11,3.7,-2.0,-2.8,-7.5,-11.8,3,-8.1,-2.0,-0.8,-4.8,-4.3
PAD,8,-4.1,-3.0,-5.9,-6.9,-8.3,4,-12.4,-3.0,-2.9,-1.1,-1.4
LAP,6,-28.2,-2.4,-3.5,-3.4,-4.8,5,-33.0,-2.4,-1.0,,-1.4
OGI,5,-55.4,-6.4,-9.6,-13.4,-17.7,6,-73.1,-6.4,-3.2,-3.9,-4.3
EVA,3,-74.2,-4.1,-6.8,-8.7,-12.9,7,-87.1,-4.1,-2.6,-2.0,-4.2
BRE,9,-8.8,-3.2,-4.6,-35.9,-78.6,8,-87.4,-3.2,-1.4,-31.4,-42.7
SUN,4,-90.0,-8.2,-13.3,-18.1,-25.4,9,-115.4,-8.2,-5.1,-4.9,-7.3
NEU,2,-99.5,-7.9,-14.4,-17.3,-24.0,10,-123.5,-7.9,-6.5,-3.0,-6.7


In [39]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS11', 'PAD', rc, typ, 'previous')
    display(HTML(s2))

Unnamed: 0,Road Position,Previous,1,2,3,SS11 Overall,Overall Position,Overall Time,D1,D2,D3,D4
OST,11,7.8,1.0,3.1,-0.6,-3.5,3,4.3,1.0,2.1,-3.7,-2.9
LAT,10,4.1,3.0,5.9,6.9,8.3,1,12.4,3.0,2.9,1.1,1.4
PAD,8,,,,,,4,,,,,
TÄN,7,-2.2,3.8,7.7,9.0,10.0,2,7.8,3.8,3.9,1.3,1.0
BRE,9,-4.7,-0.2,1.3,-29.0,-70.3,8,-75.0,-0.2,1.5,-30.3,-41.3
LAP,6,-24.1,0.6,2.4,3.5,3.5,5,-20.6,0.6,1.9,1.1,
OGI,5,-51.3,-3.4,-3.7,-6.5,-9.4,6,-60.7,-3.4,-0.3,-2.8,-2.9
EVA,3,-70.1,-1.1,-0.9,-1.8,-4.6,7,-74.7,-1.1,0.3,-0.9,-2.8
SUN,4,-85.9,-5.2,-7.4,-11.2,-17.1,9,-103.0,-5.2,-2.2,-3.8,-5.9
NEU,2,-95.4,-4.9,-8.5,-10.4,-15.7,10,-111.1,-4.9,-3.6,-1.9,-5.3


In [50]:
import os
import time
from selenium import webdriver


def getTableImage(url, sn='dummy_table', path='.', delay=5, height=380, width=800):
    ''' Render HTML file in browser and grab a screenshot. '''
    #should be a tmp file?
    #fname='testmap.html'
    #tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
    #folium_map.save(fn)
    browser = webdriver.Chrome()
    browser.set_window_size(width, height)
    browser.get(url)
    #Give the map tiles some time to load
    time.sleep(delay)
    imgfn = '{}/{}/{}.png'.format(os.getcwd(), path,sn)
    browser.save_screenshot(imgfn)
    browser.quit()
    print(imgfn)


def getTablePNG(tablehtml,path='testpng', fnstub='testhtml'):
    ''' Save HTML table as file. '''
    if not os.path.exists(path):
        os.makedirs(path)
    fn='{cwd}/{path}/{fn}.html'.format(cwd=os.getcwd(), path=path,fn=fnstub)
    tmpurl='file://{fn}'.format(fn=fn)
    with open(fn, 'w') as out:
        out.write(tablehtml)
    getTableImage(tmpurl, 'dummy', path)
    #print(tmpurl)

    

if __name__=='__main__':
    getTablePNG(s2)

file:///Users/tonyhirst/Documents/GitHub/WRC_sketches/doodles/testpng/dummy.html


In [41]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS10', 'PAD', rc, typ, 'roadpos')
    display(HTML(s2))

Unnamed: 0,Road Position,Previous,1,2,3,4,5,SS10 Overall,Overall Position,Overall Time,D1,D2,D3,D4,D5,D6
SER,1,-628.0,-28.5,-55.1,-80.7,-108.2,-132.6,-158.6,16,-786.6,-28.5,-26.6,-25.6,-27.7,-24.3,-26.0
NEU,2,-71.3,-4.2,-8.1,-12.9,-15.8,-19.7,-24.1,10,-95.4,-4.2,-3.9,-4.8,-3.0,-3.8,-4.4
EVA,3,-43.3,-4.5,-9.1,-13.8,-18.2,-22.5,-26.8,8,-70.1,-4.5,-4.6,-4.7,-4.5,-4.3,-4.3
SUN,4,-53.3,-6.0,-10.6,-16.2,-21.9,-25.9,-32.6,9,-85.9,-6.0,-4.6,-5.6,-5.7,-4.1,-6.7
OGI,5,-34.8,-3.4,-5.4,-8.8,-11.1,-13.8,-16.5,7,-51.3,-3.4,-2.1,-3.4,-2.4,-2.8,-2.7
LAP,6,-20.2,-3.0,-2.0,-3.4,-3.2,-3.2,-3.9,6,-24.1,-3.0,1.0,-1.4,0.1,,-0.7
TÄN,7,-6.7,0.2,1.9,2.5,4.1,5.1,4.5,4,-2.2,0.2,1.7,0.6,1.5,1.0,-0.6
PAD,8,,,,,,,,3,,,,,,,
BRE,9,1.7,-0.8,-1.1,-1.5,-3.8,-4.3,-6.4,5,-4.7,-0.8,-0.3,-0.4,-2.4,-0.6,-2.1
LAT,10,3.4,-0.6,,-0.4,0.5,0.6,0.7,2,4.1,-0.6,0.6,-0.3,0.7,0.1,0.1


In [39]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS10', 'TÄN', rc, typ,'previous')
    display(HTML(s2))

Unnamed: 0,Road Position,Previous,1,2,3,4,5,SS10 Overall,Overall Position,Overall Time,D1,D2,D3,D4,D5,D6
OST,11,17.1,-0.7,-3.8,-4.5,-5.3,-6.3,-7.1,1,10.0,-0.7,-3.1,-0.7,-0.8,-1.0,-0.8
LAT,10,10.1,-0.8,-1.9,-2.9,-3.6,-4.5,-3.8,2,6.3,-0.8,-1.1,-0.9,-0.8,-0.9,0.7
BRE,9,8.4,-1.0,-3.0,-4.0,-7.9,-9.4,-10.9,5,-2.5,-1.0,-2.0,-1.0,-3.9,-1.6,-1.5
PAD,8,6.7,-0.2,-1.9,-2.5,-4.1,-5.1,-4.5,3,2.2,-0.2,-1.7,-0.6,-1.5,-1.0,0.6
TÄN,7,,,,,,,,4,,,,,,,
LAP,6,-13.5,-3.2,-3.9,-5.9,-7.3,-8.3,-8.4,6,-21.9,-3.2,-0.7,-2.0,-1.4,-1.0,-0.1
OGI,5,-28.1,-3.6,-7.3,-11.3,-15.2,-18.9,-21.0,7,-49.1,-3.6,-3.8,-4.0,-3.9,-3.8,-2.1
EVA,3,-36.6,-4.7,-11.0,-16.3,-22.3,-27.6,-31.3,8,-67.9,-4.7,-6.3,-5.3,-6.0,-5.3,-3.7
SUN,4,-46.6,-6.2,-12.5,-18.7,-26.0,-31.0,-37.1,9,-83.7,-6.2,-6.3,-6.2,-7.2,-5.1,-6.1
NEU,2,-64.6,-4.4,-10.0,-15.4,-19.9,-24.8,-28.6,10,-93.2,-4.4,-5.6,-5.4,-4.5,-4.8,-3.8


In [40]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS18', 'OGI', rc, typ)
    display(HTML(s2))

ValueError: Length mismatch: Expected axis has 0 elements, new values have 1 elements

Problem with the bars is that the range is different in each column; ideally we want the same range in each column; could do this with two dummy rows to force max and min values?

In [None]:
if __name__=='__main__':
    #Example for pandas issue https://github.com/pandas-dev/pandas/issues/21526
    import pandas as pd
    import numpy as np
    
    df=pd.DataFrame({'x1':list(np.random.randint(-10,10,size=10))+[-500,1000, -1000],
               'y1':list(np.random.randint(-5,5,size=13)),'y2':list(np.random.randint(-2,3,size=13)) })
    
    display(df.style.bar( align='zero', color=[ '#5fba7d','#d65f5f']))

In [None]:
if __name__=='__main__':
    #clip lets us set a max limiting range although it means we lose the actual value?
    df['x2']= df['x1'].clip(upper=10, lower=-10)
    display(df.style.bar( align='zero', color=[ '#d65f5f','#5fba7d']))

In [620]:
if __name__=='__main__':
    #for pandas 0.24 ? https://github.com/pandas-dev/pandas/pull/21548
    df['x2']= df['x1'].clip(upper=10, lower=-10)
    #Set axis=None for table wide range?
    #display(df.style.bar( align='zero', axis=None, color=[ '#d65f5f','#5fba7d']))
    