Skip to content

Commit

Permalink
EHN report complementary data in the power profile (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre committed Feb 10, 2018
1 parent 56f54a5 commit 21f7249
Show file tree
Hide file tree
Showing 12 changed files with 51,822 additions and 47,306 deletions.
35 changes: 35 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,38 @@ install_manifest.txt
coverage/

cythonize.dat
.pyc
*.pyo
__pycache__
*.so
*.o

*.egg
*.egg-info

Cython/Compiler/*.c
Cython/Plex/*.c
Cython/Runtime/refnanny.c
Cython/Tempita/*.c

Tools/*.elc

TEST_TMP/
build/
wheelhouse*/
!tests/build/
dist/
.gitrev
.coverage
*.orig
*.rej
*.dep
*.swp
*~

tags
TAGS
MANIFEST

.tox
*.c
181 changes: 118 additions & 63 deletions skcycling/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,28 +54,35 @@ def add_activities(self, filenames):
>>> from skcycling.base import Rider
>>> rider = Rider()
>>> rider.add_activities(load_fit()[0])
>>> rider.power_profile_.head() # doctest: +NORMALIZE_WHITESPACE
2014-05-07
00:00:01 500.000000
00:00:02 475.500000
00:00:03 469.333333
00:00:04 464.000000
00:00:05 463.000000
>>> rider.power_profile_.head()
2014-05-07 12:26:22
cadence 00:00:01 78.000000
00:00:02 64.000000
00:00:03 62.666667
00:00:04 62.500000
00:00:05 64.400000
"""
filenames = validate_filenames(filenames)
activities_pp = [activity_power_profile(bikeread(f),
n_jobs=self.n_jobs)
activities_pp = [activity_power_profile(bikeread(f))
for f in filenames]
activities_pp = pd.concat(activities_pp, axis=1)

if self.power_profile_ is not None:
self.power_profile_ = pd.concat(
[self.power_profile_, activities_pp], axis=1)
try:
self.power_profile_ = self.power_profile_.join(activities_pp,
how='outer')
except ValueError as e:
if 'columns overlap but no suffix specified' in e.args[0]:
raise ValueError('One of the activity was already added'
' to the rider power-profile. Remove this'
' activity before to try to add it.')
else:
raise
else:
self.power_profile_ = activities_pp

def delete_activities(self, dates):
def delete_activities(self, dates, time_comparison=False):
"""Delete the activities power-profile from some specific dates.
Parameters
Expand All @@ -90,6 +97,10 @@ def delete_activities(self, dates):
activities for which the dates are included in the range will be
deleted.
time_comparison : bool, optional
Whether to make a strict comparison using time or to relax to
constraints with only the date.
Returns
-------
None
Expand All @@ -100,35 +111,47 @@ def delete_activities(self, dates):
>>> from skcycling import Rider
>>> rider = Rider.from_csv(load_rider())
>>> rider.delete_activities('07 May 2014')
>>> print(rider) # doctest: +NORMALIZE_WHITESPACE
>>> print(rider)
RIDER INFORMATION:
power-profile:
2014-05-11 2014-07-26
00:00:01 717.00 750.000000
00:00:02 717.00 741.000000
00:00:03 590.00 731.666667
00:00:04 552.25 719.500000
00:00:05 552.60 712.200000
2014-05-11 09:39:38 2014-07-26 16:50:56
cadence 00:00:01 100.000000 60.000000
00:00:02 89.000000 58.000000
00:00:03 68.333333 56.333333
00:00:04 59.500000 59.250000
00:00:05 63.200000 61.000000
"""
def _strict_comparison(dates_pp, date, strict_equal):
if strict_equal:
return dates_pp == date
else:
return np.bitwise_and(
dates_pp >= date,
dates_pp <= pd.Timestamp(date) + pd.DateOffset(1))

if isinstance(dates, tuple):
if len(dates) != 2:
raise ValueError("Wrong tuple format. Expecting a tuple of"
" format (start_date, end_date). Got {!r}"
" instead.".format(dates))
mask_date = np.bitwise_and(self.power_profile_.columns >= dates[0],
self.power_profile_.columns <= dates[1])
mask_date = np.bitwise_and(
self.power_profile_.columns >= dates[0],
self.power_profile_.columns <= pd.Timestamp(dates[1]) +
pd.DateOffset(1))
elif isinstance(dates, list):
mask_date = np.any([self.power_profile_.columns == d
for d in dates],
axis=0)
mask_date = np.any(
[_strict_comparison(self.power_profile_.columns, d,
time_comparison)
for d in dates], axis=0)
else:
mask_date = self.power_profile_.columns == dates
mask_date = _strict_comparison(self.power_profile_.columns, dates,
time_comparison)

mask_date = np.bitwise_not(mask_date)
self.power_profile_ = self.power_profile_.loc[:, mask_date]

def record_power_profile(self, range_dates=None):
def record_power_profile(self, range_dates=None, columns=None):
"""Compute the record power-profile.
Parameters
Expand All @@ -137,9 +160,13 @@ def record_power_profile(self, range_dates=None):
The start and end date to consider when computing the record
power-profile. By default, all data will be used.
columns : array-like or None, optional
Name of data field to return. By default, all available data will
be returned.
Returns
-------
record_power_profile : Series
record_power_profile : DataFrame
Record power-profile taken between the range of dates.
Examples
Expand All @@ -148,36 +175,54 @@ def record_power_profile(self, range_dates=None):
>>> from skcycling.datasets import load_rider
>>> rider = Rider.from_csv(load_rider())
>>> record_power_profile = rider.record_power_profile()
>>> record_power_profile.head() # doctest : +NORMALIZE_WHITESPACE
00:00:01 750.000000
00:00:02 741.000000
00:00:03 731.666667
00:00:04 719.500000
00:00:05 712.200000
Name: record power-profile, dtype: float64
>>> record_power_profile.head() # doctest: +NORMALIZE_WHITESPACE
cadence distance elevation heart-rate power
00:00:01 60.000000 27162.600000 NaN NaN 750.000000
00:00:02 58.000000 27163.750000 NaN NaN 741.000000
00:00:03 56.333333 27164.586667 NaN NaN 731.666667
00:00:04 59.250000 27163.402500 NaN NaN 719.500000
00:00:05 61.000000 27162.142000 NaN NaN 712.200000
This is also possible to give a range of dates to compute the record
power-profile.
power-profile. We can also select some specific information.
>>> record_power_profile = rider.record_power_profile(('07 May 2014',
... '11 May 2014'))
>>> record_power_profile = rider.record_power_profile(
... range_dates=('07 May 2014', '11 May 2014'),
... columns=['power', 'cadence'])
>>> record_power_profile.head()
00:00:01 717.00
00:00:02 717.00
00:00:03 590.00
00:00:04 552.25
00:00:05 552.60
Name: record power-profile, dtype: float64
cadence power
00:00:01 100.000000 717.00
00:00:02 89.000000 717.00
00:00:03 68.333333 590.00
00:00:04 59.500000 552.25
00:00:05 63.200000 552.60
"""
if range_dates is None:
rpp = self.power_profile_.max(axis=1).dropna()
mask_date = np.ones_like(self.power_profile_.columns,
dtype=bool)
else:
mask_date = np.bitwise_and(
self.power_profile_.columns >= range_dates[0],
self.power_profile_.columns <= range_dates[1])
rpp = self.power_profile_.loc[:, mask_date].max(axis=1).dropna()
return rpp.rename('record power-profile')
self.power_profile_.columns <= pd.Timestamp(range_dates[1]) +
pd.DateOffset(1))

if columns is None:
columns = self.power_profile_.index.levels[0]

pp_idxmax = (self.power_profile_.loc['power']
.loc[:, mask_date]
.idxmax(axis=1)
.dropna())
rpp = {}
for dt in columns:
data = self.power_profile_.loc[dt].loc[:, mask_date]
rpp[dt] = pd.Series(
[data.loc[date_idx]
for date_idx in pp_idxmax.iteritems()],
index=data.index[:pp_idxmax.size])

return pd.DataFrame(rpp)

@classmethod
def from_csv(cls, filename, n_jobs=1):
Expand All @@ -204,17 +249,27 @@ def from_csv(cls, filename, n_jobs=1):
>>> print(rider) # doctest: +NORMALIZE_WHITESPACE
RIDER INFORMATION:
power-profile:
2014-05-07 2014-05-11 2014-07-26
00:00:01 500.000000 717.00 750.000000
00:00:02 475.500000 717.00 741.000000
00:00:03 469.333333 590.00 731.666667
00:00:04 464.000000 552.25 719.500000
00:00:05 463.000000 552.60 712.200000
2014-05-07 12:26:22 2014-05-11 09:39:38 \\
cadence 00:00:01 78.000000 100.000000
00:00:02 64.000000 89.000000
00:00:03 62.666667 68.333333
00:00:04 62.500000 59.500000
00:00:05 64.400000 63.200000
<BLANKLINE>
2014-07-26 16:50:56
cadence 00:00:01 60.000000
00:00:02 58.000000
00:00:03 56.333333
00:00:04 59.250000
00:00:05 61.000000
"""
df = pd.read_csv(filename, index_col=0)
df = pd.read_csv(filename, index_col=[0, 1])
df.columns = pd.to_datetime(df.columns)
df.index = pd.to_timedelta(df.index)
df.index = pd.MultiIndex(levels=[df.index.levels[0],
pd.to_timedelta(df.index.levels[1])],
labels=df.index.labels,
name=[None, None])
rider = cls(n_jobs=n_jobs)
rider.power_profile_ = df
return rider
Expand All @@ -237,18 +292,18 @@ def to_csv(self, filename):
>>> from skcycling import Rider
>>> rider = Rider(n_jobs=-1)
>>> rider.add_activities(load_fit()[:1])
>>> print(rider) # doctest: +NORMALIZE_WHITESPACE
>>> print(rider)
RIDER INFORMATION:
power-profile:
2014-05-07
00:00:01 500.000000
00:00:02 475.500000
00:00:03 469.333333
00:00:04 464.000000
00:00:05 463.000000
2014-05-07 12:26:22
cadence 00:00:01 78.000000
00:00:02 64.000000
00:00:03 62.666667
00:00:04 62.500000
00:00:05 64.400000
"""
self.power_profile_.to_csv(filename)
self.power_profile_.to_csv(filename, date_format='%Y-%m-%d %H:%M:%S')

def __repr__(self):
return 'RIDER INFORMATION:\n power-profile:\n {}'.format(
Expand Down

0 comments on commit 21f7249

Please sign in to comment.