**Predicting Functional Threshold Power (FTP)**

In [124]:
import os
import datetime
file = '/Users/kinley/git/ftp-proba/data/ref/ftp_4917647.csv'
ftp_pd = pd.read_csv(file, index_col='date', usecols=['date', 'ftp'], parse_dates=True)

def __get_ftp(timestamp):
    idx = ftp_pd.index.get_loc(timestamp, method='pad')
    ftp = ftp_pd.iloc(idx)
    print(ftp)
    
__get_ftp(datetime.datetime(2018, 1, 1))

AttributeError: 'DataFrame' object has no attribute 'get_loc'

In [119]:
import os
import pandas as pd
import strava_export as strava
from tabulate import tabulate as tab

athletes = pd.read_csv(os.path.join(strava.pro_dir, 'athletes.csv'))
rides = pd.read_csv(os.path.join(strava.pro_dir, 'rides.csv'))

rc = rides.groupby('id').size().rename('num_rides')
athletes = athletes.merge(rc.to_frame(), left_on='id', right_on='id')

print(tab(athletes, tablefmt='psql', floatfmt='.4f'))

FileNotFoundError: File b'data/process/athletes.csv' does not exist

**Feature Engineering**

Automate the feature engineering process using _**Deep Feature Synthesis**_:
* https://docs.featuretools.com/automated_feature_engineering/afe.html
* http://www.jmaxkanter.com/static/papers/DSAA_DSM_2015.pdf

In [118]:
import featuretools as ft
print('featuretools version: {}'.format(ft.__version__))

es = ft.EntitySet(id='ftp_proba')

es = es.entity_from_dataframe(
    entity_id = 'athletes',
    dataframe = athletes[['id', 'sex', 'weight']],
    index = 'id',
    variable_types = {
        'id': ft.variable_types.Index,
        'sex': ft.variable_types.Categorical,
        'weight': ft.variable_types.Numeric
    }
)

es = es.entity_from_dataframe(
    entity_id = 'rides',
    dataframe = rides,
    index = 'ride_id',
    make_index = True,
    time_index = 'timestamp'
)

rel = ft.Relationship(
    es['athletes']['id'],
    es['rides']['id']
)
es = es.add_relationship(rel)

print(es)

cutoff_dt = es['rides'].df['timestamp']
print(cutoff_dt)
# check index matches!

featuretools version: 0.3.1
Entityset: ftp_proba
  Entities:
    athletes [Rows: 2, Columns: 3]
    rides [Rows: 784, Columns: 25]
  Relationships:
    rides.id -> athletes.id
766   2013-09-27 16:28:50
548   2014-01-17 13:49:29
553   2014-01-18 17:05:48
732   2014-01-27 16:03:02
639   2014-01-29 13:37:32
386   2014-02-01 11:13:32
607   2014-02-03 08:19:34
722   2014-02-07 19:50:38
539   2014-02-09 08:14:33
210   2014-02-09 13:38:34
554   2014-02-14 08:20:05
731   2014-02-22 12:06:28
750   2014-02-22 17:07:16
457   2014-02-23 13:13:21
43    2014-03-06 07:26:01
231   2014-03-08 11:46:51
739   2014-03-09 20:51:46
23    2014-03-10 16:27:35
415   2014-03-10 16:43:06
384   2014-03-11 07:27:27
258   2014-03-11 16:47:31
355   2014-03-13 07:26:47
488   2014-03-15 14:55:39
498   2014-03-17 17:53:06
105   2014-03-17 18:13:30
292   2014-03-19 07:16:09
367   2014-03-20 07:19:01
271   2014-03-20 16:57:07
207   2014-03-24 07:23:52
499   2014-03-27 07:28:11
              ...        
265   2018-05-10 0