# Bulk processing

Take all the jump files in a data lake and process them as a group.  Produce the mean results for scores, max speed, curves, and the 5-second partial results.

Conforming to Lucyfer's default configuration, the data lake starts at the `./data` directory and includes any and all files below it.

---
## Prepare the environment

In [None]:
from collections import namedtuple
from copy import deepcopy

from ssscoring import ALTITUDE_SKYDIVE_PARACLETE_XP
from ssscoring import BREAKOFF_ALTITUDE
from ssscoring import PERFORMANCE_WINDOW_LENGTH
from ssscoring import convertFlySight2SSScoring
from ssscoring import dropNonSkydiveDataFrom
from ssscoring import getAllSpeedJumpFilesFrom
from ssscoring import getSpeedSkydiveFrom
from ssscoring import isValidJump
from ssscoring import jumpAnalysisTable
from ssscoring import validFlySightHeaderIn
from ssscoring.notebook import processJump

import csv
import os
import os.path as path

import bokeh.plotting as bp
import ipywidgets as widgets
import pandas as pd

In [None]:
DATA_LAKE_ROOT = './data' # Lucyfer default
FLYSIGHT_HEADER = set([ 'time', 'lat', 'lon', 'hMSL', 'velN', 'velE', 'velD', 'hAcc', 'vAcc', 'sAcc', 'heading', 'cAcc', 'gpsFix', 'numSV', ])
IGNORE_LIST = [ '.ipynb_checkpoints', ]
MIN_JUMP_FILE_SIZE = 1024*512
LAST_TIME_TRANCHE = 25.0
SPEED_COLORS = colors = ('blue', 'limegreen', 'tomato', 'turquoise', 'deepskyblue', 'forestgreen', 'coral', 'darkcyan',)

In [None]:
bp.output_notebook()

## Get a list of all FlySight files in the data lake

This also discards all files that don't reflect a valid jump:

- Detect the files by size and discard any file smaller than `MIN_JUMP_FILE_SIZE`
- Detect that each file has the FlySight header on the first line

In [None]:
jumpFiles = getAllSpeedJumpFilesFrom(DATA_LAKE_ROOT)

---
## Process all files

In [None]:
resultsIndex = pd.Series((tag for tag in jumpFiles))

In [None]:
jumpResults = dict()
for jumpFile in jumpFiles:
    jumpResult = processJump(
        convertFlySight2SSScoring(
            pd.read_csv(jumpFile, skiprows = (1, 1)),
            altitudeDZMeters = ALTITUDE_SKYDIVE_PARACLETE_XP))
    tag = jumpFile.replace('CSV', '').replace('.', '').replace('/data', '').replace('/', ' ').strip()
    if 'valid' in jumpResult.result:
        jumpResults[tag] = jumpResult

---
## Results

In [None]:
def aggregateResults(jumpResults: dict) -> pd.DataFrame:
    ref = 0
    speeds = pd.DataFrame()
    for jumpResultIndex in jumpResults.keys():
        jumpResult = jumpResults[jumpResultIndex]
        if 'invalid' not in jumpResult.result:
            t = jumpResult.table
            finalTime = t.iloc[-1].time
            t.iloc[-1].time = LAST_TIME_TRANCHE
            t = pd.pivot_table(t, columns = t.time)
            t.drop(['altitude (ft)'], inplace = True)
            d = pd.DataFrame([ jumpResult.score, ], index = [ jumpResultIndex, ], columns = [ 'score', ], dtype = object)
            for column in t.columns:
                d[column] = t[column][0]
            d['finalTime'] = [ finalTime, ]
            d['maxSpeed'] = jumpResult.maxSpeed
    
            if speeds.empty:
                speeds = d.copy()
            else:
                speeds = pd.concat([ speeds, d, ])
    return speeds

In [None]:
aggregate = aggregateResults(jumpResults)
sum = pd.DataFrame({ 'totalSpeed': [ aggregate.score.sum(), ], 'meanSpeed': [ aggregate.score.mean(), ], }, index = [ 'totalSpeed'],)
display(aggregate)
display(sum)

In [None]:
def initializePlot(jumpTitle):
    return bp.figure(title = jumpTitle,
                     height = 500,
                     width = 900,
                     x_axis_label = 'seconds from exit',
                     y_axis_label = 'km/h',
                     x_range = (0,40),
                     y_range = (0, 550))    

In [None]:
def _graphSegment(plot,
                  x0 = 0.0,
                  y0 = 0.0,
                  x1 = 0.0,
                  y1 = 0.0,
                  lineWidth = 1,
                  color = 'black'):
    plot.segment(x0 = [ x0, ],
                 y0 = [ y0, ],
                 x1 = [ x1, ],
                 y1 = [ y1, ],
                 line_width = lineWidth,
                 color = color)

In [None]:
def graphJumpResult(plot,
                    jumpResult,
                    lineColor = 'green',
                    legend = 'speed',
                    showIt = True):
    data = jumpResult.data
    maxSpeed = jumpResult.maxSpeed
    scores = jumpResult.scores
    score = jumpResult.score
    plot.line(data.plotTime, data.vKMh, legend_label = legend, line_width = 2, line_color = lineColor)
    
    if showIt:
        meanData = pd.DataFrame({ 'plotTime': scores.values(), 'score': scores.keys(), })
        meanData = meanData[meanData.plotTime >= 15.0]
        # plot.line(meanData.plotTime, meanData.score, legend_label = 'mean', line_width = 2, line_color = 'red')
        _graphSegment(plot, scores[score], 0.0, scores[score], score, 3, 'green')
        _graphSegment(plot, scores[score]+1.5, 0.0, scores[score]+1.5, score, 1, 'darkseagreen')
        _graphSegment(plot, scores[score]-1.5, 0.0, scores[score]-1.5, score, 1, 'darkseagreen')
        # plot.square_cross(x = [ scores[score], ], y = [ score, ], size = [ 20, ], line_color = 'green', fill_color = None, line_width = 1)
        # plot.triangle_dot(x = [ scores[score], ], y = [ score, ], size = [ 20, ], line_color = 'green', fill_color = None, line_width = 1)
        plot.y(x = [ scores[score], ], y = [ score, ], size = [ 20, ], line_color = 'green', line_width = 3)
        bp.show(plot)

In [None]:
allJumpsPlot = initializePlot('All jumps in set')
jumpNumber = 0
mixColor = 0
for resultRef in jumpResults.keys():
    jumpResult = jumpResults[resultRef]
    if 'invalid' in jumpResult.result:
        validJumpStatus = '<hr><h1><span style="color: %s">%s jump - %s</span></h1>' % (jumpResult.color, resultRef, jumpResult.result)
    else:
        validJumpStatus = '<hr><h1><span style="color: %s">%s jump - %s - score = %.02f km/h</span></h1>' % (jumpResult.color, resultRef, jumpResult.result, jumpResult.score)
    display(widgets.HTML(validJumpStatus))

    maxSpeed = jumpResult.maxSpeed
    window = jumpResult.window
    mixColor = (mixColor+1)%len(SPEED_COLORS)
    if 'invalid' not in jumpResult.result:
        display(widgets.HTML('<h3>Max speed = {0:,.0f}; '.format(maxSpeed)+('exit at %d m (%d ft), end scoring window at %d m (%d ft)</h3>'%(window.start, 3.2808*window.start, window.end, 3.2808*window.end))))
        graphJumpResult(initializePlot(resultRef), jumpResult, lineColor = SPEED_COLORS[0])
        graphJumpResult(allJumpsPlot, jumpResult, lineColor = SPEED_COLORS[mixColor], legend = '%s - %.2f' % (resultRef, jumpResult.score), showIt = False)

## All jumps

In [None]:
bp.show(allJumpsPlot)