Skip to content

Commit

Permalink
EHN/IO Read all useful data from FIT file (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre committed Jan 8, 2018
1 parent 4d56123 commit 6f15bf3
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 16 deletions.
35 changes: 26 additions & 9 deletions skcycling/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,21 @@

from .fit import load_power_from_fit

DROP_OPTIONS = ('columns', 'rows', 'both')

def bikeread(filename):

def bikeread(filename, drop_nan=None):
"""Read power data file.
Parameters
----------
filename : str
Path to the file to read.
drop_nan : str {'columns', 'rows', 'both'} or None
Either to remove the columns/rows containing NaN values. By default,
all data will be kept.
Returns
-------
data : DataFrame
Expand All @@ -26,18 +32,29 @@ def bikeread(filename):
--------
>>> from skcycling.datasets import load_fit
>>> from skcycling.io import bikeread
>>> activity = bikeread(load_fit()[0])
>>> activity = bikeread(load_fit()[0], drop_nan='columns')
>>> activity.head() # doctest : +NORMALIZE_WHITESPACE
power
2014-05-07 12:26:22 256.0
2014-05-07 12:26:23 185.0
2014-05-07 12:26:24 343.0
2014-05-07 12:26:25 344.0
2014-05-07 12:26:26 389.0
cadence distance power
2014-05-07 12:26:22 45.0 3.05 256.0
2014-05-07 12:26:23 42.0 6.09 185.0
2014-05-07 12:26:24 44.0 9.09 343.0
2014-05-07 12:26:25 45.0 11.94 344.0
2014-05-07 12:26:26 48.0 15.03 389.0
"""
if drop_nan is not None and drop_nan not in DROP_OPTIONS:
raise ValueError('"drop_nan" should be one of {}.'
' Got {} instead.'.format(DROP_OPTIONS, drop_nan))

df = load_power_from_fit(filename)

if drop_nan is not None:
if drop_nan == 'columns':
df.dropna(axis=1, inplace=True)
elif drop_nan == 'rows':
df.dropna(axis=0, inplace=True)
else:
df.dropna(axis=1, inplace=True).dropna(axis=0, inplace=True)

# remove possible outliers by clipping the value
df[df['power'] > 2500.] = np.nan

Expand Down
26 changes: 21 additions & 5 deletions skcycling/io/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,18 @@
# License: BSD 3 clause

import os
from collections import defaultdict

import pandas as pd
import numpy as np
import six

from fitparse import FitFile

# 'timestamp' will be consider as the index of the DataFrame later on
FIELDS_DATA = ('timestamp', 'power', 'heart-rate', 'cadence', 'distance',
'elevation')


def check_filename_fit(filename):
"""Method to check if the filename corresponds to a fit file.
Expand Down Expand Up @@ -52,7 +59,7 @@ def load_power_from_fit(filename):
Returns
-------
power_rec : ndarray, shape (n_samples)
data : DataFrame
Power records of the ride.
"""
Expand All @@ -61,8 +68,17 @@ def load_power_from_fit(filename):
activity.parse()
records = activity.get_messages(name='record')

power, timestamp = zip(*[
(rec.get_value('power'), rec.get_value('timestamp'))
for rec in records])
data = defaultdict(list)
for rec in records:
values = rec.get_values()
for key in FIELDS_DATA:
data[key].append(values.get(key, np.NaN))

data = pd.DataFrame(data)
if data.empty:
raise IOError('The file {} does not contain any data.'.format(
filename))
data.set_index(FIELDS_DATA[0], inplace=True)
del data.index.name

return pd.DataFrame({'power': power}, index=timestamp)
return data
4 changes: 2 additions & 2 deletions skcycling/io/tests/test_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def test_load_power_if_no_record():
for f in filenames:
if pattern in f:
filename = f
msg = "There is no data to treat in that file."
with pytest.raises(ValueError, message=msg):
msg = "does not contain any data."
with pytest.raises(IOError, message=msg):
load_power_from_fit(filename)


Expand Down

0 comments on commit 6f15bf3

Please sign in to comment.