The purpose of this notebook is to create a interactive CPG chart, given the pre-computed max speed matrices. This notebook is a combination and improvement upon the notebooks "CPG Regression" and "CPG Chart (computations from scratch)".

In [217]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LinearRegression
import datetime

A helper function to read in the max speed matrices. Meant to be called individually for each file.

In [220]:
def read_max_speed_matrix(INDIR, file):
    if file.endswith('.csv'):
        with open(INDIR + file, 'r') as f:
            metadata = {}
            rows_to_skip = 0
            line = f.readline()
            while line.startswith('#'):
                key, val = line.split(':')
                key = key.replace('#', '')
                val = val.replace('\n', '')
                metadata[key] = val
                line = f.readline()
                rows_to_skip += 1
            metadata_and_df = [metadata, pd.read_csv(INDIR + file, \
                                                     skiprows = rows_to_skip)]
            
    return metadata_and_df

In [225]:
def gradient_speed_array(gradient, CPG_matrix, gradient_range):
    '''Return a numpy array showing the maximum speed achieved at a given gradient'''
    gradient_index = np.where(gradient_range == gradient)
    gradient_speed_array = CPG_matrix[:,gradient_index[0][0]]
    return gradient_speed_array

In [226]:
def max_speed_matrix_and_metadata_by_date(start_date, end_date):
    start_date_formatted = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    end_date_formatted = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    
    files = os.listdir(r'data/max_speed_matrices/')
    file_dates = []
    for file in files:
        if file.endswith('.csv'):
            file_date = datetime.datetime.strptime(file.split(':')[0], '%Y-%m-%d')
            if (start_date_formatted <= file_date <= end_date_formatted):
                file_dates.append(file)
    
    matrix_list = [read_max_speed_matrix(r'data/max_speed_matrices/', file) for file in file_dates]
                
    return matrix_list

In [236]:
def compute_CPG_matrix(df_list, time_interval, gradient_interval):
    max_speed_list = [df[gradient_interval].iloc[time_interval].as_matrix() for df in df_list] # Select the requested gradient and time intervals
    CPG_matrix = np.maximum.reduce(max_speed_list)
    return CPG_matrix

In [262]:
minutes = 10
t = np.arange(60*minutes + 1)
g = np.arange(-10, 11)
all_gradients = np.arange(-100, 100)

start_date = '2018-01-05'
end_date = '2018-01-20'

matrices_by_date = max_speed_matrix_and_metadata_by_date(start_date, end_date)
unzipped = list(zip(*matrices_by_date))
metadata_by_date = unzipped[0]
df_by_date = unzipped[1]

CPG_matrix_by_date = compute_CPG_matrix(df_by_date, t, g)

In [265]:
def compute_CP_Wprime(CPG_matrix, gradient, gradient_range, time_range):
    ''' This uses linear regression on the equation t*P(t) = t*CP + Wprime to compute CP and Wprime.'''
    CPG_fixed_gradient = gradient_speed_array(gradient, CPG_matrix, gradient_range)
    indices_with_nonzero_pace = np.nonzero(CPG_fixed_gradient)
    CPG_fixed_gradient_nonzero = CPG_fixed_gradient[indices_with_nonzero_pace]
    time_range_nonzero = time_range[indices_with_nonzero_pace]
    
    lr = LinearRegression()
    
    pace_times_time_gradient_fixed = np.array(CPG_fixed_gradient_nonzero*time_range_nonzero)
    input_length = len(time_range_nonzero)
    pace_times_time_gradient_fixed = pace_times_time_gradient_fixed.reshape((input_length,1))
    time_range = time_range_nonzero.reshape((input_length, 1)) # This reshaping is needed by sklearn

    lr.fit(time_range, pace_times_time_gradient_fixed)
    return (lr.coef_[0][0], lr.intercept_[0])

In [274]:
from ipywidgets import interact
import numpy as np

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.models.widgets import RangeSlider
output_notebook()

In [266]:
CP_vals = []
Wprime_vals = []

for gradient in g:
    CP, Wprime = compute_CP_Wprime(CPG_matrix_by_date, gradient, all_gradients, t)
    CP_vals.append(CP)
    Wprime_vals.append(Wprime)

In [268]:
p = figure(title="CP chart", plot_height=300, plot_width=600, y_range=(0,7))
r = p.line(t[1:], gradient_speed_array(0, CPG_matrix_by_date, all_gradients)[1:], color="#2222aa", line_width=3)

In [269]:
show(p, notebook_handle=True)

In [272]:
def update(gradient):
    r.data_source.data['y'] = gradient_speed_array(gradient, CPG_matrix_by_date, all_gradients)[1:]
    push_notebook()

In [273]:
interact(update, gradient=(-10,10))

<function __main__.update>

In [None]:
def update(gradient):
    r.data_source.data['y'] = gradient_speed_array(gradient, CPG_matrix_by_date, all_gradients)[1:]
    push_notebook()