# Bulk processing

Take all the jump files in a data lake and process them as a group.  Produce the mean results for scores, max speed, curves, and the 5-second partial results.

Conforming to Lucyfer's default configuration, the data lake starts at the `./data` directory and includes any and all files below it.

---
## Prepare the environment

In [6]:
from collections import namedtuple
from copy import deepcopy

from ssscoring import BREAKOFF_ALTITUDE
from ssscoring import PERFORMANCE_WINDOW_LENGTH
from ssscoring import convertFlySight2SSScoring
from ssscoring import dropNonSkydiveDataFrom
from ssscoring import getAllSpeedJumpFilesFrom
from ssscoring import getSpeedSkydiveFrom
from ssscoring import isValidJump
from ssscoring import jumpAnalysisTable
from ssscoring import validFlySightHeaderIn
from ssscoring.notebook import processJump

import csv
import os
import os.path as path

import bokeh.plotting as bp
import ipywidgets as widgets
import pandas as pd

In [7]:
DATA_LAKE_ROOT = './data' # Lucyfer default
FLYSIGHT_HEADER = set([ 'time', 'lat', 'lon', 'hMSL', 'velN', 'velE', 'velD', 'hAcc', 'vAcc', 'sAcc', 'heading', 'cAcc', 'gpsFix', 'numSV', ])
IGNORE_LIST = [ '.ipynb_checkpoints', ]
MIN_JUMP_FILE_SIZE = 1024*512
LAST_TIME_TRANCHE = 25.0
SPEED_COLORS = colors = ('blue', 'limegreen', 'tomato', 'turquoise', 'deepskyblue', 'forestgreen', 'coral', 'darkcyan',)

In [8]:
bp.output_notebook()

## Get a list of all FlySight files in the data lake

This also discards all files that don't reflect a valid jump:

- Detect the files by size and discard any file smaller than `MIN_JUMP_FILE_SIZE`
- Detect that each file has the FlySight header on the first line

In [9]:
jumpFiles = getAllSpeedJumpFilesFrom(DATA_LAKE_ROOT)

---
## Process all files

In [10]:
resultsIndex = pd.Series((tag for tag in jumpFiles))

In [11]:
jumpResults = dict()
for jumpFile in jumpFiles:
    jumpResult = processJump(
        convertFlySight2SSScoring(
            pd.read_csv(jumpFile, skiprows = (1, 1))))
    tag = jumpFile.replace('CSV', '').replace('.', '').replace('/data', '').replace('/', ' ').strip()
    if 'valid' in jumpResult.result:
        jumpResults[tag] = jumpResult

---
## Results

In [12]:
def aggregateResults(jumpResults: dict) -> pd.DataFrame:
    ref = 0
    speeds = pd.DataFrame()
    for jumpResultIndex in jumpResults.keys():
        jumpResult = jumpResults[jumpResultIndex]
        if 'invalid' not in jumpResult.result:
            t = jumpResult.table
            finalTime = t.iloc[-1].time
            t.iloc[-1].time = LAST_TIME_TRANCHE
            t = pd.pivot_table(t, columns = t.time)
            t.drop(['altitude (ft)'], inplace = True)
            d = pd.DataFrame([ jumpResult.score, ], index = [ jumpResultIndex, ], columns = [ 'score', ], dtype = object)
            for column in t.columns:
                d[column] = t[column][0]
            d['finalTime'] = [ finalTime, ]
            d['maxSpeed'] = jumpResult.maxSpeed
    
            if speeds.empty:
                speeds = d.copy()
            else:
                speeds = pd.concat([ speeds, d, ])
    return speeds

In [13]:
aggregate = aggregateResults(jumpResults)
display(widgets.HTML('<h3>Round 1 - FAI World Championship 2022 (sample)</h3>'))
aggregate

HTML(value='<h3>Round 1 - FAI World Championship 2022 (sample)</h3>')

Unnamed: 0,score,5.0,10.0,15.0,20.0,25.0,finalTime,maxSpeed
01_Ciurana_21-01-08,445.698,201.492,332.856,412.704,444.168,445.176,24.6,447.228
01_Dingle_20-29-24,472.158,166.32,303.624,406.692,454.932,464.112,25.0,476.604
01_Tuschl_20-33-34,460.548,203.328,322.704,409.392,452.376,460.188,24.6,463.104


In [14]:
jumpResults.keys()

dict_keys(['01_Ciurana_21-01-08', '01_Dingle_20-29-24', '01_Hepp-01-INVALID_21-00-38', '01_Tuschl_20-33-34'])

In [15]:
jumpResults['01_Hepp-01-INVALID_21-00-38'].result

'ðŸ”´ invalid'

In [16]:
def initializePlot(jumpTitle):
    return bp.figure(title = jumpTitle,
                     height = 300,
                     width = 600,
                     x_axis_label = 'seconds from exit',
                     y_axis_label = 'km/h',
                     x_range = (0,30),
                     y_range = (0, 550))    

In [17]:
def graphJumpResult(plot,
                    jumpResult,
                    lineColor = 'green',
                    showIt = True):
    data = jumpResult.data
    maxSpeed = jumpResult.maxSpeed
    scores = jumpResult.scores
    score = jumpResult.score
    plot.line(data.plotTime, data.vKMh, legend_label = 'speed', line_width = 2, line_color = lineColor)
    
    if showIt:
        plot.segment(x0 = [ scores[score], scores[score]+3.0, ],
                  y0 = [ 0.0, 0.0, ],
                  x1 = [ scores[score], scores[score]+3.0, ],
                  y1 = [ maxSpeed, maxSpeed, ])        
        bp.show(plot)

In [18]:
allJumpsPlot = initializePlot('All jumps in set')
jumpNumber = 0
mixColor = 0
for resultRef in jumpResults.keys():
    jumpResult = jumpResults[resultRef]
    if 'invalid' in jumpResult.result:
        validJumpStatus = '<hr><h1><span style="color: %s">%s jump - %s</span></h1>' % (jumpResult.color, resultRef, jumpResult.result)
    else:
        validJumpStatus = '<hr><h1><span style="color: %s">%s jump - %s - score = %.02f km/h</span></h1>' % (jumpResult.color, resultRef, jumpResult.result, jumpResult.score)
    display(widgets.HTML(validJumpStatus))

    maxSpeed = jumpResult.maxSpeed
    window = jumpResult.window
    mixColor = (mixColor+1)%len(SPEED_COLORS)
    if 'invalid' not in jumpResult.result:
        display(widgets.HTML('<h3>Max speed = {0:,.0f}; '.format(maxSpeed)+('exit at %d m (%d ft), breakoff at %d m (%d ft)</h3>'%(window.start, 3.2808*window.start, window.end, 3.2808*window.end))))
        graphJumpResult(initializePlot(resultRef), jumpResult)
        graphJumpResult(allJumpsPlot, jumpResult, lineColor = SPEED_COLORS[mixColor], showIt = False)

HTML(value='<hr><h1><span style="color: #0f0">01_Ciurana_21-01-08 jump - ðŸŸ¢ valid - score = 445.70 km/h</span><â€¦

HTML(value='<h3>Max speed = 447; exit at 4574 m (15007 ft), breakoff at 2318 m (7606 ft)</h3>')

HTML(value='<hr><h1><span style="color: #0f0">01_Dingle_20-29-24 jump - ðŸŸ¢ valid - score = 472.16 km/h</span></â€¦

HTML(value='<h3>Max speed = 477; exit at 4522 m (14838 ft), breakoff at 2266 m (7437 ft)</h3>')

HTML(value='<hr><h1><span style="color: #f00">01_Hepp-01-INVALID_21-00-38 jump - ðŸ”´ invalid</span></h1>')

HTML(value='<hr><h1><span style="color: #0f0">01_Tuschl_20-33-34 jump - ðŸŸ¢ valid - score = 460.55 km/h</span></â€¦

HTML(value='<h3>Max speed = 463; exit at 4599 m (15089 ft), breakoff at 2343 m (7688 ft)</h3>')

## All jumps

In [19]:
bp.show(allJumpsPlot)

##### 