# Driver Splits

Script to rebase the split times for a stage and display them relative to a specified driver.

The intention is to generate a report on a stage that is meaningful to a specified driver.

Ideally the report should:

- show where the driver finished on the stage (stage rank)
- show the running stage delta at each split compared to each other driver
- show the extent to which a driver gained or lost time on each split compared to each other driver
- show the start order (so that this can be related to stage rank)
- identify the overall position at the end of the stage for each driver
- show whether overall positions were gained or lost after the stage (not implemented yet; need a +=- column)

In [170]:
import notebookimport

if __name__=='__main__':
    typ = 'overall' #this defines ???
    #rebase='overallleader' #TO DO
    rebase='OGI'#'PAD'
    MAXINSPLITDELTA=20 #set xlim on the within split delta
    ss='SS12'
    
    #The drivercode inbuilds some intelligence
    drivercode=rebase

In [171]:
sr = __import__("Charts - Stage Results")
ssd = __import__("Charts - Split Sector Delta")

In [172]:
#!pip3 install pytablewriter

Set up a connection to a simple SQLite database, and specify some metadata relating to the actual rally we are interested in.

In [173]:
import os
import sqlite3
import pandas as pd
import pytablewriter
import six
from numpy import NaN

#dbname='wrc18.db'
#dbname='france18.db'
#conn = sqlite3.connect(dbname)

if __name__=='__main__':
    #dbname='wrc18.db'
    dbname='australia18.db'
    conn = sqlite3.connect(dbname)
    rally='Australia'
    rc='RC1'
    year=2018
    #ss='SS4'

In [174]:
if __name__=='__main__':
    #This doesn't appear to be used elsewhere in this notebook
    #May support logic for checking stage status?
    stagedetails = sr.dbGetRallyStages(conn, rally).sort_values('number')
    stagedetails.head()

In [175]:
if __name__=='__main__':
    #Let's see what data is available to us in the stagerank_overall table
    stagerank_overall = sr.getEnrichedStageRank(conn, rally, typ='overall')
    print(stagerank_overall.columns)
    display(stagerank_overall.head())

Index(['diffFirst', 'diffFirstMs', 'diffPrev', 'diffPrevMs', 'entryId',
       'penaltyTime', 'penaltyTimeMs', 'position', 'stageTime', 'stageTimeMs',
       'totalTime', 'totalTimeMs', 'stageId', 'class', 'code', 'distance',
       'name', 'snum', 'drivercode', 'entrant.name', 'classrank',
       'gainedClassPos', 'gainedClassLead', 'classPosDiff', 'lostClassLead',
       'retainedClassLead', 'gainedOverallPos', 'gainedOverallLead',
       'overallPosDiff', 'lostOverallLead', 'retainedOverallLead', 'stagewin',
       'stagewincount', 'winsinarow', 'gainedTime'],
      dtype='object')


Unnamed: 0,diffFirst,diffFirstMs,diffPrev,diffPrevMs,entryId,penaltyTime,penaltyTimeMs,position,stageTime,stageTimeMs,...,retainedClassLead,gainedOverallPos,gainedOverallLead,overallPosDiff,lostOverallLead,retainedOverallLead,stagewin,stagewincount,winsinarow,gainedTime
0,PT0S,0,PT0S,0,3313,PT0S,0,1,PT4M45.5S,285500,...,False,False,False,0.0,False,False,True,1.0,1,False
1,PT0.8S,800,PT0.8S,800,3312,PT0S,0,2,PT4M46.3S,286300,...,False,False,False,0.0,False,False,False,0.0,0,False
2,PT1.2S,1200,PT0.4S,400,3311,PT0S,0,3,PT4M46.7S,286700,...,False,False,False,0.0,False,False,False,0.0,0,False
3,PT1.8S,1800,PT0.6S,600,3308,PT0S,0,4,PT4M47.3S,287300,...,False,False,False,0.0,False,False,False,0.0,0,False
4,PT2.1S,2100,PT0.3S,300,3309,PT0S,0,5,PT4M47.6S,287600,...,False,False,False,0.0,False,False,False,0.0,0,False


In [176]:
if __name__=='__main__':
    #Get the total stage time for specified driver on each stage
    #We can then subtract this from each driver's time to get their times as rebased delta times
    #  compared to the the specified driver
    rebaser = stagerank_overall[stagerank_overall['drivercode']==drivercode][['code','totalTimeMs']].set_index('code').to_dict(orient='dict')['totalTimeMs']
    display(rebaser)

{'SS1': 288100,
 'SS2': 775100,
 'SS3': 1562000,
 'SS4': 1846500,
 'SS5': 2319200,
 'SS6': 3089000,
 'SS7': 3172400,
 'SS8': 3255600,
 'SS9': 3705600,
 'SS10': 4639200,
 'SS11': 5350900,
 'SS12': 5444400,
 'SS13': 5888000,
 'SS14': 6802400,
 'SS15': 7507100,
 'SS16': 7600100,
 'SS17': 7685200,
 'SS18': 7776800,
 'SS19': 8369200,
 'SS20': 9065300,
 'SS21': 9324200,
 'SS22': 9947100,
 'SS23': 10686600,
 'SS24': 10942800}

In [177]:
def rebaseOverallRallyTime(stagerank_overall, drivercode):
    ''' Rebase overall stage rank relative to a specified driver. '''
    #Get the time for each stage for a particular driver
    rebaser = stagerank_overall[stagerank_overall['drivercode']==drivercode][['code','totalTimeMs']].set_index('code').to_dict(orient='dict')['totalTimeMs']
    #The stagerank_overall['code'].map(rebaser) returns the total time for each stage achieved by the rebase driver
    # stagerank_overall['code'] identifies the stage
    #Subtract this rebase time from the overall stage time for each driver by stage
    stagerank_overall['rebased'] = stagerank_overall['totalTimeMs'] - stagerank_overall['code'].map(rebaser)
    return stagerank_overall

In [178]:
if __name__=='__main__':
    #Preview the stagerank_overall contents for a particular stage
    display(stagerank_overall[stagerank_overall['code']==ss][['drivercode','position','totalTimeMs','code']])

Unnamed: 0,drivercode,position,totalTimeMs,code
132,LAT,1,5372300,SS12
133,TÄN,2,5375500,SS12
134,OST,3,5380300,SS12
135,PAD,4,5384600,SS12
136,LAP,5,5404100,SS12
137,OGI,6,5444400,SS12
138,EVA,7,5458000,SS12
139,SUN,8,5487000,SS12
140,NEU,9,5496300,SS12
141,BRE,10,5718800,SS12


In [179]:
def rebased_stage_stagerank(conn,rally,ss,drivercode,typ='overall'):
    ''' Calculate the rebased time for each driver, in a specified stage (ss),
        relative to a specified driver (drivercode).
        Returns columns: ['position','totalTimeMs','code','rebased','Overall Time']
    '''
    stagerank_overall = sr.getEnrichedStageRank(conn, rally, typ=typ)
    zz=rebaseOverallRallyTime(stagerank_overall, drivercode)#, ss)
    #Get the rebased times for a particular stage
    zz=zz[zz['code']==ss][['drivercode','position','totalTimeMs','code', 'rebased']].set_index('drivercode')
    #Scale down the time from milliseconds to seconds
    zz['Overall Time']=-zz['rebased']/1000
    return zz

In [180]:
if __name__=='__main__':
    zz=rebased_stage_stagerank(conn,rally,ss, drivercode)
    display(zz)

Unnamed: 0_level_0,position,totalTimeMs,code,rebased,Overall Time
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LAT,1,5372300,SS12,-72100,72.1
TÄN,2,5375500,SS12,-68900,68.9
OST,3,5380300,SS12,-64100,64.1
PAD,4,5384600,SS12,-59800,59.8
LAP,5,5404100,SS12,-40300,40.3
OGI,6,5444400,SS12,0,0.0
EVA,7,5458000,SS12,13600,-13.6
SUN,8,5487000,SS12,42600,-42.6
NEU,9,5496300,SS12,51900,-51.9
BRE,10,5718800,SS12,274400,-274.4


In [181]:
if __name__=='__main__':
    display(stagerank_overall.columns)

Index(['diffFirst', 'diffFirstMs', 'diffPrev', 'diffPrevMs', 'entryId',
       'penaltyTime', 'penaltyTimeMs', 'position', 'stageTime', 'stageTimeMs',
       'totalTime', 'totalTimeMs', 'stageId', 'class', 'code', 'distance',
       'name', 'snum', 'drivercode', 'entrant.name', 'classrank',
       'gainedClassPos', 'gainedClassLead', 'classPosDiff', 'lostClassLead',
       'retainedClassLead', 'gainedOverallPos', 'gainedOverallLead',
       'overallPosDiff', 'lostOverallLead', 'retainedOverallLead', 'stagewin',
       'stagewincount', 'winsinarow', 'gainedTime'],
      dtype='object')

In [182]:
if __name__=='__main__':
    #Preview a long format dataframe describing position and stage code for a specified driver
    #This appears not be be referenced anywhere else in this notebook
    stagerank_stage = sr.getEnrichedStageRank(conn, rally, typ='stage')
    stagerank_stage[stagerank_stage['drivercode']==rebase][['position','code']]

In [183]:
if __name__=='__main__':
    sr.dbGetStageRank(conn, rally, rc, 'overall', stages='SS8').columns

In [184]:
if __name__=='__main__':
    #If there are no splits, ssd.dbGetSplits should optionally get the overall times from elsewhere as a single split
    splits = ssd.dbGetSplits(conn,rally,ss,rc)#, forcesingle=True)

    elapseddurations=ssd.getElapsedDurations(splits)
    display(elapseddurations.head())

Unnamed: 0,drivercode,elapsedDurationS,startDateTime,section


In [185]:
def getRoadPosition(conn,rally,rc='RC1',stages=None):
    ''' Get road position for each driver for a given stage.
    
        NOTE:
        The start time is only available from stages with split times recorded.
        We can't get road position for stages with no splits.
    
    '''
    
    #TO DO - this doesn't seem to work on stage with no splits?
    roadPos=sr.dbGetStageStart(conn, rally, rc, stages)
    roadPos=roadPos[['drivercode','startDateTime','startpos']]
    roadPos.columns=['drivercode','startDateTime','Road Position']
    roadPos = roadPos.set_index('drivercode')
    return roadPos


In [186]:
if __name__=='__main__':
    roadPos = getRoadPosition(conn,rally,rc,ss)
    display(roadPos)

Unnamed: 0_level_0,startDateTime,Road Position
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1


In [187]:
if __name__=='__main__':
    rebasedelapseddurations = ssd.rebaseElapsedDurations(elapseddurations, drivercode)
    #This returns columns of the form: drivercode	elapsedDurationS	startDateTime	section	rebased
    #If there are no splits, this is currently an empty datframe
    display(rebasedelapseddurations.head())

Unnamed: 0,drivercode,elapsedDurationS,startDateTime,section,rebased


In [188]:
if __name__=='__main__':
    rebasedelapseddurations

In [189]:
def pivotRebasedElapsedDurations(rebasedelapseddurations, ss):
    ''' Pivot rebased elapsed durations (that is, deltas relative target).
        Rows give stage delta at each split for a specific driver.
        
        Returns columns of the form: ['1','2','3','SS9 Overall']
    '''
    if rebasedelapseddurations.empty:
        return pd.DataFrame(columns=['drivercode']).set_index('drivercode')
    
    rbe=-rebasedelapseddurations.pivot('drivercode','section','rebased')
    rbe.columns=list(rbe.columns)[:-1]+['{} Overall'.format(ss)]
    rbe=rbe.sort_values(rbe.columns[-1],ascending = False)
    return rbe

if __name__=='__main__':
    rbe = pivotRebasedElapsedDurations(rebasedelapseddurations, ss)
    display(rbe)

In [190]:
#https://pandas.pydata.org/pandas-docs/stable/style.html
def color_negative(val):
    """
    Takes a scalar and returns a string with
    the css property `'color: red'` for negative
    strings, black otherwise.
    """
    if isinstance(val, str): return ''
    elif val and (isinstance(val,int) or isinstance(val,float)):
        color = 'green' if val < 0 else 'red' if val > 0  else 'black'
    else:
        color='white'
    return 'color: %s' % color

In [191]:
if __name__=='__main__':
    #test of applying style to pandas dataframe
    #Is this really fown to pandas to fail gracefully if df is empty??
    s = rbe.style.applymap(color_negative)
    display(s)

IndexError: list index out of range

<pandas.io.formats.style.Styler at 0x116e7c550>

In [193]:
# TO DO:
# - calculate stage position at each split
# - calculate rank within that sector

In [194]:
if __name__=='__main__':
    #splitdurations are the time in each sector (time take to get from one split to the next)
    #But what if there are no splits? We get an empty dataframe...
    splitdurations = ssd.getSplitDurationsFromSplits(conn,rally,ss,rc)
    display(splitdurations)#.head()

Unnamed: 0,drivercode,splitDurationS,startDateTime,stageTimeDurationMs,section


In [195]:
if __name__=='__main__':
    #This will be an empty dataframe if there are no splits
    rebasedSplits = ssd.rebaseSplitDurations(splitdurations, drivercode)
    display(rebasedSplits.head())

Unnamed: 0,drivercode,splitDurationS,startDateTime,stageTimeDurationMs,section,rebased


In [196]:
if __name__=='__main__':
    #preview what's available as a splitduration
    display(splitdurations[splitdurations['drivercode'].isin( ['PAD','NEU'])])

Unnamed: 0,drivercode,splitDurationS,startDateTime,stageTimeDurationMs,section,rebased


In [197]:
def pivotRebasedSplits(rebasedSplits):
    ''' For each driver row, find the split. '''
    
    #If there are no splits...
    if rebasedSplits.empty:
        return pd.DataFrame(columns=['drivercode']).set_index('drivercode')
    
    rbp=-rebasedSplits.pivot('drivercode','section','rebased')
    rbp.columns=['D{}'.format(c) for c in rbp.columns]
    rbp.sort_values(rbp.columns[-1],ascending =True)
    return rbp

if __name__=='__main__':
    rbp = pivotRebasedSplits(rebasedSplits)
    display(rbp)

In [198]:
if __name__=='__main__':
    #Just remind ourselves of what is available in the road position data
    display(roadPos)

Unnamed: 0_level_0,startDateTime,Road Position
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1


In [199]:
def getDriverSplitReportBaseDataframe(rbe,rbp, zz, roadPos, stageresult, ss):
    #TO DO: return empty w/ proper colnames
    if roadPos.empty: return pd.DataFrame()
    ''' Create a base dataframe for the rebased driver split report. '''
    
    stageresult.columns = ['drivercode','Stage Rank']
    rb2 = pd.merge(rbe,stageresult.set_index('drivercode'),left_index=True, right_index=True)

    rb2=pd.merge(rb2,zz[['position','Overall Time']],left_index=True, right_index=True)
    rb2.rename(columns={'position': 'Overall Position'}, inplace=True)
    
    #The following is calculated rather than being based on the actual timing data / result for the previous stage
    #Would be better to explicitly grab data for previous stage, along with previous ranking
    rb2['Previous'] =  rb2['Overall Time'] - rb2['{} Overall'.format(ss)]
    #Related to this, would be useful to have an overall places gained / lost column
    
    rb2=pd.merge(rb2,rbp,left_index=True, right_index=True)
    rb2=pd.merge(rb2,roadPos[['Road Position']],left_index=True, right_index=True)
    cols=rb2.columns.tolist()
    #Reorder the columns - move Road Position to first column
    rb2=rb2[[cols[-1]]+cols[:-1]]
    
    #reorder cols
    prev = rb2['Previous']
    rb2.drop(labels=['Previous'], axis=1,inplace = True)
    rb2.insert(1, 'Previous', prev)
    
    return rb2

if __name__=='__main__':
    stageresult=sr.getEnrichedStageRank(conn, rally, stages=ss,typ='stage')[['drivercode','position']]
    rb2=getDriverSplitReportBaseDataframe(rbe,rbp, zz, roadPos, stageresult, ss)
    display(rb2)

In [200]:
if __name__=='__main__':
    display(rb2.dtypes)

Series([], dtype: object)

In [201]:
#There seems to be missing tenths?
#Elapsed durations are provided in milliseconds. Need to round correctly to tenths?
#Elapsed times grabbed from ssd.dbGetSplits(conn,rally,ss,rc)

def cleanDriverSplitReportBaseDataframe(rb2, ss):
    ''' Tidy up the driver split report dataframe, replacing 0 values with NaNs that can be hidden.
        Check column names and data types. '''
    
    #TO DO: set proper colnames
    if rb2.empty: return rb2
    
    rb2=rb2.replace(0,NaN)
    #rb2=rb2.fillna('') #This casts columns containing NA to object type which means we can't use nan processing
    
    rb2['Road Position']=rb2['Road Position'].astype(float)
    return rb2

def __styleDriverSplitReportBaseDataframe(rb2, ss):
    ''' Test if basic dataframe styling.
        DEPRECATED. '''
    s=rb2.fillna('').style.applymap(color_negative,
                                    subset=[c for c in rb2.columns if isinstance(c, int) and c not in ['Overall Position', 'Road Position']])
    #data.style.applymap(highlight_cols, subset=pd.IndexSlice[:, ['B', 'C']])

    s.set_caption("{}: running split times and deltas within each split.".format(ss))
    return s
    
if __name__=='__main__':
    rb2c = cleanDriverSplitReportBaseDataframe(rb2.copy(), ss)
    s = __styleDriverSplitReportBaseDataframe(rb2c, ss)

In [202]:
from IPython.core.display import HTML

if __name__=='__main__':
    html=s.render()
    display(HTML(html))

In [203]:
from math import nan
def bg_color(s):
    ''' Set background colour sensitive to time gained or lost.
    '''
    attrs=[]
    for _s in s:
        if _s < 0:
            attr = 'background-color: green; color: white'
        elif _s > 0: 
            attr = 'background-color: red; color: white'
        else:
            attr = ''
        attrs.append(attr)
    return attrs

In [221]:
import seaborn as sns

def moreStyleDriverSplitReportBaseDataframe(rb2,ss, caption=None):
    ''' Style the driver split report dataframe. '''
    
    if rb2.empty: return ''
        
    def _subsetter(cols, items):
        ''' Generate a subset of valid columns from a list. '''
        return [c for c in cols if c in items]
    
    
    #https://community.modeanalytics.com/gallery/python_dataframe_styling/
    # Set CSS properties for th elements in dataframe
    th_props = [
      ('font-size', '11px'),
      ('text-align', 'center'),
      ('font-weight', 'bold'),
      ('color', '#6d6d6d'),
      ('background-color', '#f7f7f9')
      ]

    # Set CSS properties for td elements in dataframe
    td_props = [
      ('font-size', '11px')
      ]

    # Set table styles
    styles = [
      dict(selector="th", props=th_props),
      dict(selector="td", props=td_props)
      ]
    
    #Define colour palettes
    #cmg = sns.light_palette("green", as_cmap=True)
    #The blue palette helps us scale the Road Position column
    # This may help us to help identify any obvious road position effect when sorting stage times by stage rank
    cm=sns.light_palette((210, 90, 60), input="husl",as_cmap=True)

    s2=(rb2.style
        .background_gradient(cmap=cm, subset=_subsetter(rb2.columns, ['Road Position']))
        .applymap(color_negative,
                  subset=[c for c in rb2.columns if isinstance(c, int) and c not in ['Overall Position', 'Road Position']])
        .highlight_min(subset=_subsetter(rb2.columns, ['Overall Position']), color='lightgrey')
        .highlight_max(subset=_subsetter(rb2.columns, ['Overall Time']), color='lightgrey')
        .highlight_max(subset=_subsetter(rb2.columns, ['Previous']), color='lightgrey')
        .apply(bg_color,subset=_subsetter(rb2.columns, ['{} Overall'.format(ss), 'Overall Time', 'Previous']))
        .bar(subset=[c for c in rb2.columns if str(c).startswith('D')], align='zero', color=[ '#5fba7d','#d65f5f'])
        .set_table_styles(styles)
        
        #.format({'total_amt_usd_pct_diff': "{:.2%}"})
       )
    
    if caption is not None:
        s2.set_caption(caption)

    #nan issue: https://github.com/pandas-dev/pandas/issues/21527
    return s2.render().replace('nan','')

if __name__=='__main__':
    rb2c = cleanDriverSplitReportBaseDataframe(rb2.copy(), ss)
    s2 = moreStyleDriverSplitReportBaseDataframe(rb2c, ss)
    display(HTML(s2))

In [205]:
if __name__=='__main__':
    sr.dbGetStageRank(conn, rally, rc, typ='stage', stages=ss)[['position','drivercode','classrank']]
#'overall':'stage_times_overall', 'stage_times_overall':'stage_times_overall',
#              'stage':'stage_times_stage', 'stage_times_stage':'stage_times_stage'
#sr.getEnrichedStageRank(conn, rally, typ=typ)

In [207]:
if __name__=='__main__':
    sr.getDriverCodeBy(conn, rally, ss,'stage')

In [208]:
if __name__=='__main__':
    ss

In [209]:
if __name__=='__main__':
    sr.getEnrichedStageRank(conn, rally, stages=ss,typ='stage')

In [210]:
if __name__=='__main__':
    rebased_stage_stagerank(conn,rally,ss,drivercode, typ='overall')

In [214]:
def getDriverStageReport(conn, rally, ss, drivercode, rc='RC1', typ='overall', order=None, caption=None):
    ''' Generate a dataframe to report overall stage result. '''
    #'Previous',' SS9 Overall', 'Overall Position'	'Overall Time'; stage position by sort order
    
    if order is None: order='stage'
    #change cols depending on what report / sort order ie. remove redundant col
    
    #Get the overall results, rebased
    zz = rebased_stage_stagerank(conn,rally,ss,drivercode, typ='overall')
    zz.rename(columns={'position':'Overall Position'}, inplace=True)
    
    #Get stage result - does it need to be enriched?
    stageresult=sr.getEnrichedStageRank(conn, rally, stages=ss,typ='stage')

    stagerebaser = stageresult[stageresult['drivercode']==drivercode][['code','elapsedDurationMs']].set_index('code').to_dict(orient='dict')['elapsedDurationMs']
    #The stagerank_overall['code'].map(rebaser) returns the total time for each stage achieved by the rebase driver
    # stagerank_overall['code'] identifies the stage
    #Subtract this rebase time from the overall stage time for each driver by stage

    stcol='{} Time'.format(ss)
    sdeltacol='{} Overall'.format(ss)
    stageresult[sdeltacol] = -(stageresult['elapsedDurationMs'] - stageresult['code'].map(stagerebaser))
    stageresult=stageresult[['drivercode', 'position','elapsedDuration', sdeltacol,'elapsedDurationMs']]
    stageresult.columns=['drivercode', 'Stage Rank',stcol, sdeltacol,'stageDurationMs']
    stageresult[stcol] = stageresult[stcol].str.replace('00000','')
    
    combined = pd.merge(zz,stageresult, on='drivercode' )
    
    combined[sdeltacol] = combined[sdeltacol]/1000
    combined['Previous'] = (combined['Overall Time']-combined[sdeltacol])
    
    _tmp=combined[['drivercode','Previous','Stage Rank',stcol,sdeltacol,'Overall Position','Overall Time']].replace(0,NaN).set_index('drivercode')

    if order=='overall':
        combined=combined.sort_values('Overall Position', ascending=True)
    elif order=='previous':
        combined=combined.fillna(0).sort_values('Previous', ascending=False).replace(0,NaN)
    elif order=='stage':
        combined=combined.sort_values('Stage Rank', ascending=True)
    else:
        #Default is stage order
        combined=combined.sort_values('Stage Rank', ascending=True)
    
    s2 = moreStyleDriverSplitReportBaseDataframe(_tmp, ss, caption)
    return s2

if __name__=='__main__':
    s2=getDriverStageReport(conn, rally, ss, drivercode)
    display(HTML(s2))

Unnamed: 0_level_0,Previous,Stage Rank,SS12 Time,SS12 Overall,Overall Position,Overall Time
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAT,73.1,9,00:01:34.50,-1.0,1,72.1
TÄN,68.5,1,00:01:33.10,0.4,2,68.9
OST,65.0,8,00:01:34.40,-0.9,3,64.1
PAD,60.7,7,00:01:34.40,-0.9,4,59.8
LAP,40.1,3,00:01:33.30,0.2,5,40.3
OGI,,5,00:01:33.50,,6,
EVA,-14.0,2,00:01:33.10,0.4,7,-13.6
SUN,-42.3,6,00:01:33.80,-0.3,8,-42.6
NEU,-50.4,10,00:01:35,-1.5,9,-51.9
BRE,-244.3,22,00:02:03.60,-30.1,10,-274.4


In [228]:
def getDriverSplitsReport(conn, rally, ss, drivercode, rc='RC1', typ='overall', order=None, caption=None, bars=True):
    ''' Generate dataframe report relative to a given driver on a given stage.
            order: sorts table according to: overall | previous | roadpos
            
        At the moment, the splits reporter doesn't report anything if there are no splits.
        In this case, default to a simple overal stage (without splits) reporter table.
    '''
    
    #TO DO - this needs to fail gracefully if there are no splits
    
    #Allow the drivercode to be relative to a position
    #if drivercode=='firstonroad':
        #allow things like onroad1, onroad2?
    #    drivercode=
    #elif drivercode=='previousfirst':
        #allow things like previous1, previous2?
    #    drivercode = 
    #elif drivercode = 'stagewinner':
        #allowthings like stage1, stage2?
    #    drivercode = 
    
    
    #Get the overall results, rebased
    zz = rebased_stage_stagerank(conn,rally,ss,drivercode, typ=typ)
    
    #Get the road position
    roadPos = getRoadPosition(conn,rally,rc,ss)
    if roadPos.empty:
        #Should we automatically offer the stagetable report as an alternative
        return getDriverStageReport(conn, rally, ss, drivercode, order=order, caption=caption)
    
    #Get the splits
    splits = ssd.dbGetSplits(conn,rally,ss,rc)
    elapseddurations=ssd.getElapsedDurations(splits)
    
    #Rebase the split elapsed durations
    rebasedelapseddurations = ssd.rebaseElapsedDurations(elapseddurations, drivercode)
    rbe = pivotRebasedElapsedDurations(rebasedelapseddurations, ss)
    
    #splitdurations are the time in each sector (time take to get from one split to the next)
    splitdurations = ssd.getSplitDurationsFromSplits(conn,rally,ss,rc)
    rebasedSplits = ssd.rebaseSplitDurations(splitdurations, drivercode)
    rbp = pivotRebasedSplits(rebasedSplits)

    #Get stage result to erge in stage position
    stageresult=sr.getEnrichedStageRank(conn, rally, stages=ss,typ='stage')[['drivercode','position']]

    rb2=getDriverSplitReportBaseDataframe(rbe, rbp, zz, roadPos, stageresult, ss)
    rb2 = cleanDriverSplitReportBaseDataframe(rb2, ss)
    if not bars:
        rb2=rb2.drop([c for c in rb2.columns if str(c).startswith('D')], axis=1)
        
    if ss=='SS1':
        rb2['Previous']=NaN

    if order=='overall':
        rb2=rb2.sort_values('Overall Position', ascending=True)
        #Remove the redundant column
        rb2=rb2.drop(['Overall Position'], axis=1)
        #rb2=rb2.rename(columns={'Overall Position':'{} Overall*'.format(ss)})
    elif order=='previous':
        rb2=rb2.fillna(0).sort_values('Previous', ascending=False).replace(0,NaN)
        #rb2 = rb2.rename(columns={'Previous':'Previous*'})
    elif order=='roadpos':
        rb2=rb2.sort_values('Road Position', ascending=True)
        #rb2 = rb2.rename(columns={'Road Position':'Road Position*'})
    elif order=='stage':
        rb2.sort_values('Stage Rank', ascending=True)
        #Remove the redundant column
        rb2=rb2.drop(['Stage Rank'], axis=1)
    else:
        #Default is stage order
        rb2.sort_values('Stage Rank', ascending=True)
        #Remove the redundant column
        rb2=rb2.drop(['Stage Rank'], axis=1)
        rb2 = rb2.rename(columns={'{} Overall'.format(ss):'{} Overall*'.format(ss)})

    if caption =='auto':
        caption = 'Rebased stage split times for {}{}.'.format('{}, '.format(drivercode), ss)

    #s = styleDriverSplitReportBaseDataframe(rb2, ss)
    s2 = moreStyleDriverSplitReportBaseDataframe(rb2,ss, caption)
    return s2

if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, ss, drivercode, rc, typ)#, caption='auto')
    display(HTML(s2))

Unnamed: 0_level_0,Previous,Stage Rank,SS12 Time,SS12 Overall,Overall Position,Overall Time
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAT,73.1,9,00:01:34.50,-1.0,1,72.1
TÄN,68.5,1,00:01:33.10,0.4,2,68.9
OST,65.0,8,00:01:34.40,-0.9,3,64.1
PAD,60.7,7,00:01:34.40,-0.9,4,59.8
LAP,40.1,3,00:01:33.30,0.2,5,40.3
OGI,,5,00:01:33.50,,6,
EVA,-14.0,2,00:01:33.10,0.4,7,-13.6
SUN,-42.3,6,00:01:33.80,-0.3,8,-42.6
NEU,-50.4,10,00:01:35,-1.5,9,-51.9
BRE,-244.3,22,00:02:03.60,-30.1,10,-274.4


In [137]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS11', 'LAT', rc, typ)
    display(HTML(s2))

Unnamed: 0_level_0,1,2,3,D1,D2,D3,D4,Overall Position,Overall Time,Previous,Road Position,SS11 Overall
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
TÄN,0.8,1.8,2.1,0.8,1.0,0.2,-0.4,2,-4.6,-6.3,7,1.7
LAT,,,,,,,,1,,,10,
MIK,-1.7,-2.3,-3.3,-1.7,-0.6,-1.1,-1.4,21,-2516.7,-2512.0,12,-4.7
LAP,-2.4,-3.5,-3.4,-2.4,-1.0,,-1.4,5,-33.0,-28.2,6,-4.8
PAD,-3.0,-5.9,-6.9,-3.0,-2.9,-1.1,-1.4,4,-12.4,-4.1,8,-8.3
OST,-2.0,-2.8,-7.5,-2.0,-0.8,-4.8,-4.3,3,-8.1,3.7,11,-11.8
EVA,-4.1,-6.8,-8.7,-4.1,-2.6,-2.0,-4.2,7,-87.1,-74.2,3,-12.9
OGI,-6.4,-9.6,-13.4,-6.4,-3.2,-3.9,-4.3,6,-73.1,-55.4,5,-17.7
NEU,-7.9,-14.4,-17.3,-7.9,-6.5,-3.0,-6.7,10,-123.5,-99.5,2,-24.0
SUN,-8.2,-13.3,-18.1,-8.2,-5.1,-4.9,-7.3,9,-115.4,-90.0,4,-25.4


In [50]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS11', 'LAT', rc, typ, 'overall')
    display(HTML(s2))

Unnamed: 0_level_0,Road Position,Previous,1,2,3,SS11 Overall,Overall Position,Overall Time,D1,D2,D3,D4
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
LAT,10,,,,,,1,,,,,
TÄN,7,-6.3,0.8,1.8,2.1,1.7,2,-4.6,0.8,1.0,0.2,-0.4
OST,11,3.7,-2.0,-2.8,-7.5,-11.8,3,-8.1,-2.0,-0.8,-4.8,-4.3
PAD,8,-4.1,-3.0,-5.9,-6.9,-8.3,4,-12.4,-3.0,-2.9,-1.1,-1.4
LAP,6,-28.2,-2.4,-3.5,-3.4,-4.8,5,-33.0,-2.4,-1.0,,-1.4
OGI,5,-55.4,-6.4,-9.6,-13.4,-17.7,6,-73.1,-6.4,-3.2,-3.9,-4.3
EVA,3,-74.2,-4.1,-6.8,-8.7,-12.9,7,-87.1,-4.1,-2.6,-2.0,-4.2
BRE,9,-8.8,-3.2,-4.6,-35.9,-78.6,8,-87.4,-3.2,-1.4,-31.4,-42.7
SUN,4,-90.0,-8.2,-13.3,-18.1,-25.4,9,-115.4,-8.2,-5.1,-4.9,-7.3
NEU,2,-99.5,-7.9,-14.4,-17.3,-24.0,10,-123.5,-7.9,-6.5,-3.0,-6.7


In [51]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS11', 'PAD', rc, typ, 'previous')
    display(HTML(s2))

Unnamed: 0_level_0,Road Position,Previous,1,2,3,SS11 Overall,Overall Position,Overall Time,D1,D2,D3,D4
drivercode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
OST,11,7.8,1.0,3.1,-0.6,-3.5,3,4.3,1.0,2.1,-3.7,-2.9
LAT,10,4.1,3.0,5.9,6.9,8.3,1,12.4,3.0,2.9,1.1,1.4
PAD,8,,,,,,4,,,,,
TÄN,7,-2.2,3.8,7.7,9.0,10.0,2,7.8,3.8,3.9,1.3,1.0
BRE,9,-4.7,-0.2,1.3,-29.0,-70.3,8,-75.0,-0.2,1.5,-30.3,-41.3
LAP,6,-24.1,0.6,2.4,3.5,3.5,5,-20.6,0.6,1.9,1.1,
OGI,5,-51.3,-3.4,-3.7,-6.5,-9.4,6,-60.7,-3.4,-0.3,-2.8,-2.9
EVA,3,-70.1,-1.1,-0.9,-1.8,-4.6,7,-74.7,-1.1,0.3,-0.9,-2.8
SUN,4,-85.9,-5.2,-7.4,-11.2,-17.1,9,-103.0,-5.2,-2.2,-3.8,-5.9
NEU,2,-95.4,-4.9,-8.5,-10.4,-15.7,10,-111.1,-4.9,-3.6,-1.9,-5.3


In [93]:
import os
import time
from selenium import webdriver


def getTableImage(url, fn='dummy_table', basepath='.', path='.', delay=5, height=420, width=800):
    ''' Render HTML file in browser and grab a screenshot. '''
    #should be a tmp file?
    #fname='testmap.html'
    #tmpurl='file://{path}/{mapfile}'.format(path=os.getcwd(),mapfile=fn)
    #folium_map.save(fn)
    browser = webdriver.Chrome()
    browser.set_window_size(width, height)
    browser.get(url)
    #Give the map tiles some time to load
    time.sleep(delay)
    imgpath='{}/{}.png'.format(path,fn)
    imgfn = '{}/{}'.format(basepath, imgpath)
    imgfile = '{}/{}'.format(os.getcwd(),imgfn)
    browser.save_screenshot(imgfile)
    browser.quit()
    os.remove(imgfile.replace('.png','.html'))
    #print(imgfn)
    return imgpath


def getTablePNG(tablehtml,basepath='.', path='testpng', fnstub='testhtml'):
    ''' Save HTML table as file. '''
    if not os.path.exists(path):
        os.makedirs('{}/{}'.format(basepath, path))
    fn='{cwd}/{basepath}/{path}/{fn}.html'.format(cwd=os.getcwd(), basepath=basepath, path=path,fn=fnstub)
    tmpurl='file://{fn}'.format(fn=fn)
    with open(fn, 'w') as out:
        out.write(tablehtml)
    return getTableImage(tmpurl, fnstub, basepath, path)
    #print(tmpurl)

    

if __name__=='__main__':
    getTablePNG(s2)

NoSuchWindowException: Message: no such window: window was already closed
  (Session info: chrome=70.0.3538.102)
  (Driver info: chromedriver=2.43.600229 (3fae4d0cda5334b4f533bede5a4787f7b832d052),platform=Mac OS X 10.13.6 x86_64)


In [None]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS10', 'PAD', rc, typ, 'roadpos')
    display(HTML(s2))

In [None]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS10', 'TÄN', rc, typ,'previous')
    display(HTML(s2))

In [40]:
if __name__=='__main__':
    s2 = getDriverSplitsReport(conn, rally, 'SS18', 'OGI', rc, typ)
    display(HTML(s2))

ValueError: Length mismatch: Expected axis has 0 elements, new values have 1 elements

Problem with the bars is that the range is different in each column; ideally we want the same range in each column; could do this with two dummy rows to force max and min values?

In [None]:
if __name__=='__main__':
    #Example for pandas issue https://github.com/pandas-dev/pandas/issues/21526
    import pandas as pd
    import numpy as np
    
    df=pd.DataFrame({'x1':list(np.random.randint(-10,10,size=10))+[-500,1000, -1000],
               'y1':list(np.random.randint(-5,5,size=13)),'y2':list(np.random.randint(-2,3,size=13)) })
    
    display(df.style.bar( align='zero', color=[ '#5fba7d','#d65f5f']))

In [None]:
if __name__=='__main__':
    #clip lets us set a max limiting range although it means we lose the actual value?
    df['x2']= df['x1'].clip(upper=10, lower=-10)
    display(df.style.bar( align='zero', color=[ '#d65f5f','#5fba7d']))

In [620]:
if __name__=='__main__':
    #for pandas 0.24 ? https://github.com/pandas-dev/pandas/pull/21548
    df['x2']= df['x1'].clip(upper=10, lower=-10)
    #Set axis=None for table wide range?
    #display(df.style.bar( align='zero', axis=None, color=[ '#d65f5f','#5fba7d']))
    