# Developing the Activity class
__Keith Cheveralls__<br>
__March 2019__



In [None]:
import os
import re
import sys
import git
import gzip
import time
import json
import pickle
import fitparse
import datetime
import psycopg2
import subprocess
import sqlalchemy
import numpy as np
import pandas as pd
import seaborn as sns

from scipy import stats
from psycopg2 import sql
from fitparse import FitFile
import matplotlib as mpl
from matplotlib import pyplot as plt

In [None]:
from cycler import cycler
red, blue, green, purple, orange, yellow, brown, pink, gray  = sns.color_palette('Set1')
mpl.rcParams['axes.prop_cycle'] = cycler(color=[blue, orange, green, red, brown, gray])

In [None]:
%matplotlib 
%load_ext autoreload
%autoreload 2

In [None]:
sys.path.insert(0, '../../dbutils/')
import dbutils

sys.path.insert(0, '../')
import cypy2

colors = sns.color_palette()

root = '/home/keith/Downloads/export_7989839-1'
wahoo_example = '2326365683.fit.gz'
garmin_example = '2122584483.fit.gz'
garmin_indoor_example = '2324139976.fit.gz'

### Load a single activity directly from a FIT file

In [None]:
a = cypy2.LocalActivity.from_fit_file(os.path.join(root, 'activities', wahoo_example))

In [None]:
a.to_db(conn, kind='processed')

### Load all activities from a cached strava export

In [None]:
strava_export = cypy2.StravaExportManager(root, from_cache=True)
manager = cypy2.ActivityManager.from_strava_export(strava_export, raise_errors=True)

In [None]:
# testing id_from_fit
cypy2.LocalActivity.id_from_fit(file_id=pd.Series(dict(time_created=pd.to_datetime('2018-01-02 3:45:56'))))

### Sanity checks

In [None]:
len(manager.activities()), manager.metadata().shape

In [None]:
# check activity types
manager.metadata().activity_type.unique()

In [None]:
# count sensor flags by device
md = manager.metadata()
pd.concat((md.groupby('device_model').count().activity_id, md.groupby('device_model').sum()), axis=1)

In [None]:
# rides with power but without heart rate
manager.metadata(activity_type='ride', heart_rate_flag=False, power_flag=True)

In [None]:
# rides with heart rate but without an antplus_device_type column in device_info
# these *should* be only runs from fenix3
activities = []
for a in manager.activities(activity_type='run'):
    if 'antplus_device_type' not in a._fit_data['device_info'].columns:
        if 'heart_rate' in a._fit_data['record'].columns:
            activities.append(a)
        
set([a.metadata.device_model for a in activities])

In [None]:
# rides with cadence but no power - should all be from fr220
activities = []
for a in manager.activities(activity_type='ride'):
    columns = a._fit_data['record'].columns
    if ('cadence' in columns) and ('power' not in columns):
        activities.append(a)
        
set([a.metadata.device_model for a in activities])

In [None]:
# check that the first and last events are starts and stops, respectively,
# and check that first and last event timestamps are equal to the first/last record's timestamp

# currently, there are four events - all runs - for which there is only a single 'start' event
# presumably, these are activities for which the device died during the activity
for a in manager.activities():
    if a.events().iloc[0].event_type != 'start':
        print('First event is not a start for %s' % a.metadata.activity_id)
    if a.events().iloc[-1].event_type != 'stop':
        print('Last event is not a stop for %s' % a.metadata.activity_id)
        
for a in manager.activities():
    if (a.events().iloc[0].event_time - a.records().iloc[0].timepoint).seconds > 1:
        print('First event and record timestamps not equal for %s' % a.metadata.activity_id)
    if (a.events().iloc[-1].event_time - a.records().iloc[-1].timepoint).seconds > 1:
        print('Last event and record timestamps not equal for %s' % a.metadata.activity_id)

In [None]:
# cat all messages for some activities
dcat = pd.concat(tuple([a._fit_data['session'] for a in manager.activities(activity_type='ride', device_model='edge520')]))

In [None]:
dcat = dcat.dropna(axis='columns', how='all')

In [None]:
dcat.tail()

### Insert __raw__ data into the database

This requires that we have an ActivityManager instance, `manager`, initialized from a `StravaExportManager` (i.e., a local cache of parsed FIT file data). 

In [None]:
user = 'keith'
host = 'localhost'
dbname = 'cypy2'
conn = psycopg2.connect(user=user, host=host, dbname=dbname)
cursor = conn.cursor()

In [None]:
dbutils.get_table_sizes(conn)

In [None]:
dbutils.get_rows(conn, 'metadata', {'activity_id': '20190301001401'}).to_json(date_format='iso')

In [None]:
for table in dbutils.get_table_names(conn):
    print('%s: %s' % (table, dbutils.get_column_names(conn, table)))

In [None]:
# truncate all tables
for table in dbutils.get_table_names(conn):
    dbutils.truncate_table(conn, table, freal=True)

In [None]:
# attempt to insert all *raw* activity data
start = time.time()

for activity in m.activities():
    sys.stdout.write('\r%s' % activity.metadata.activity_id)
    activity.to_db(conn, kind='raw')

print('\nElapsed time: %0.2f' % (time.time() - start))

### Insert __processed__ data from all activities

In [None]:
user = 'keith'
host = 'localhost'
dbname = 'cypy2'
conn = psycopg2.connect(user=user, host=host, dbname=dbname)

In [None]:
# initialize a manager from the database
manager = cypy2.ActivityManager.from_db(conn, kind='metadata')

In [None]:
# insert *processed* data from all activities
start = time.time()

for activity in manager.activities():
    sys.stdout.write('\r%s' % activity.metadata.activity_id)
    
    # load and process the raw data
    activity.load(conn, kind='raw')
    activity.process()
    
    activity.to_db(conn, kind='processed', verbose=False)

print('\nElapsed time: %0.2f' % (time.time() - start))

### Load all activities from the database
TODO: in from_db, load the processed, and not just the raw, data

In [None]:
manager = cypy2.ActivityManager.from_db(conn, kind='metadata')

In [None]:
manager.metadata().shape

In [None]:
manager.metadata(activity_type='ride').groupby(['heart_rate_flag', 'power_flag']).count()

### Development: inserting trajectories as LineString geometries

In [None]:
a = manager.activities('20190301001401').pop()
a.load(conn, kind='raw')
a.process()

In [None]:
a.to_db(conn, kind='processed')

In [None]:
a.records('proc')[['lon', 'lat']].values.tolist()

In [None]:
# for LINESTRING (lon, lat only)
d = {
    'type': 'LineString',
    'coordinates': a.records('proc')[['lon', 'lat']].values.tolist()
}

query = (
'update proc_records set geom = ST_SetSRID(ST_GeomFromGeoJSON(%s), 4269) '
'where activity_id = \'20190301001401\''
)

In [None]:
dbutils.execute_query(conn, query, (json.dumps(d),), commit=True)

### Figuring out how postGIS handles NaNs in LineStrings

np.nan throws an error, as expected, and nulls are converted to zero (no matter what field - lat/lon/z/m - they're in). 

In [None]:
dbutils.execute_query(conn, 'drop table test;', commit=True)
dbutils.execute_query(conn, 'create table test (geom geometry(LineString, 4326));', commit=True)

In [None]:
d = {'type': 'LineString', 'coordinates': [[None, None, None, None], [-120, 38, 300, 1], [-121, 37, 400, 2]]}
json.dumps(d)

In [None]:
dbutils.execute_query(conn, 'insert into test values (St_SetSRID(ST_Force2D(ST_GeomFromGeoJSON(%s)), 4326))', (json.dumps(d),), commit=True)

In [None]:
dbutils.execute_query(conn, 'select ST_AsText(geom) from test')

### Plotting activity records

In [None]:
a = cypy2.LocalActivity.from_fit_file(os.path.join(root, 'activities', garmin_example))

In [None]:
# 2018 horseshoe meadows road
a = manager.activities('20180923163103').pop()
a.load(conn, kind='raw')
a.process()

In [None]:
a.plot(['power', 'vam'], xmode='seconds', overlay=False, xrange=[10000, 10888], halflife=1)

In [None]:
a.plot(['power', 'heart_rate', 'cadence', 'vam', 'speed'], xmode='hours', overlay=False, xrange=[2, 3], halflife=5)

In [None]:
a.plot(['power', 'altitude'], xmode='hours', overlay=True, xrange=[], halflife=11)

In [None]:
recsw = a.records('raw')
recsp = a.records('proc')

### Development: calculating VAM from altitude

Legacy plots from when I was figuring out how to calculate VAM using a moving linear regression. Here we just compare the VAM calculated in `Activity._calculate_vam` to an EWM of the derivative of the altitude.

In [None]:
# 2018 horseshoe meadows road
a = manager.activities('20180923163103').pop()

In [None]:
alt = a.records('raw').altitude
alt = (alt - 2500)/5.

In [None]:
vam = a._calculate_vam(alt)

In [None]:
fig, ax = plt.subplots(1, 1)

x = alt.index.values
y = alt.values

dy = np.diff(y)
ax.plot(x[1:], dy, color=np.ones(3)*.7)

# pandas exponentially weighted moving average
naive_slopes = pd.Series(dy).ewm(halflife=7).mean()
ax.plot(x[1:], naive_slopes.values, color=colors[0])

# moving linear regression
ax.plot(x, vam/3600, color=colors[1])

ax.set_xlim([0, 400])
ax.set_ylim([0, .5])

ax2 = ax.twinx()
ax2.plot(alt.values, color=colors[2])
ax2.set_ylim([1140, 1220])

### Database debugging

In [None]:
user = 'keith'
host = 'localhost'
dbname = 'cypy2'
conn = psycopg2.connect(user=user, host=host, dbname=dbname)

In [None]:
selector={'activity_id': '20191122010203', 'date_created': '2019-03-12 22:23:53.694945-07'}

In [None]:
kwargs = {'table': sql.Identifier('proc_records')}
kwargs['column'] = sql.SQL('*')

cols, vals = list(selector.keys()), list(selector.values())
kwargs.update({
    'filter_columns': sql.SQL(', ').join([sql.Identifier(col) for col in cols]),
    'filter_values': sql.SQL(', ').join([sql.Literal(val) for val in vals])
})
query = sql.SQL('select {column} from {table} where ({filter_columns}) = (%s, %s)').format(**kwargs)
query.as_string(conn)

In [None]:
dbutils.execute_query(conn, query, vals)

In [None]:
dbutils.insert_row(conn, 'metadata', {'activity_id': 'testid'})

In [None]:
dbutils.insert_row(conn, 'proc_records', {'activity_id': 'testid', 'commit_hash': 'testhash'})

In [None]:
d = pd.read_sql('select activity_id, date_created from proc_records', conn)
d.sort_values(by='date_created', ascending=False).iloc[0]

In [None]:
conn.commit()

In [None]:
conn.rollback()

In [None]:
# all activity_ids
dbutils.get_rows(conn, 'metadata', column='activity_id').values.flatten()

In [None]:
# events for one activity
dbutils.get_rows(conn, 'raw_events', selector={'activity_id': a.metadata.activity_id})

In [None]:
# the raw records as a dataframe
d = dbutils.get_rows(conn, 'raw_records', selector={'activity_id', a.metadata.activity_id})
pd.DataFrame(d.to_dict(orient='records').pop())

In [None]:
# query debugging
query = sql.SQL('select {column} from {table}').format(**{
    'table': sql.Identifier('tablename'),
    'column': sql.SQL('*'),
})

query.as_string(conn)

### Possible pandas bug (on v0.22.0)

In [None]:
pd.__version__

In [None]:
timestamp = pd.to_datetime('2019-01-01')

In [None]:
# d.timestamp is a timestamp, as expected
d = pd.Series(dict(value=None, timestamp=None))
d.at['timestamp'] = timestamp
type(d.timestamp)

In [None]:
# d.timestamp is cast to an int
d = pd.Series(dict(value=None))
d.at['timestamp'] = timestamp
print(type(d.timestamp))

# but now it's a timestamp
d.at['timestamp'] = timestamp
print(type(d.timestamp))