Skip to content

Commit

Permalink
apply filters on data
Browse files Browse the repository at this point in the history
  • Loading branch information
sirfoga committed Oct 14, 2018
1 parent b731422 commit 2e81783
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 84 deletions.
4 changes: 4 additions & 0 deletions peeper/analysis/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import matplotlib.pyplot as plt
import pandas as pd
from hal.files.models.system import get_parent_folder_name


class Plotter:
Expand Down Expand Up @@ -83,6 +84,9 @@ def save(self, output_file):
"""

fig, ax = plt.subplots(2, 2, sharex="all")
title = get_parent_folder_name(self.path)
title = "Telemetry data from " + title.replace("-", ":")
fig.suptitle(title)

self.plots["Compass"].plot(ax=ax[0, 0], title="Compass")
self.plots["RotationVector"].plot(ax=ax[0, 1], title="Rotation vector")
Expand Down
17 changes: 9 additions & 8 deletions peeper/preprocessing/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import argparse
import os

from hal.files.models.system import is_folder, ls_recurse
from hal.files.models.system import is_folder, ls_recurse, \
get_parent_folder_name
from hal.streams.logger import log_message

from peeper.preprocessing.models import Merger
from peeper.preprocessing.models import Processer


def create_args():
Expand Down Expand Up @@ -52,7 +53,7 @@ def get_output_file(folder):
data_day = folders[-3]
output_file = "sensors.csv"
output_folder = folder
for _ in range(5):
for _ in range(4):
output_folder = os.path.dirname(output_folder)
output_folder = os.path.join(output_folder, "output", data_day, data_time)
output_file = os.path.join(output_folder, output_file)
Expand All @@ -73,10 +74,11 @@ def pre_process_test(folder):
:return: Saves processed data
"""

log_message("Pre-processing", get_parent_folder_name(folder))
output_file = get_output_file(folder)

driver = Merger(folder)
driver.merge_into(output_file)
driver = Processer(folder)
driver.combine_into(output_file)

log_message("Merged into", output_file)

Expand All @@ -99,9 +101,8 @@ def pre_process_day(folder):
if "Accelerometer.csv" in os.listdir(folder)
]

for day_folder in folders:
log_message("Pre-processing", day_folder)
pre_process_test(day_folder)
for folder in folders:
pre_process_test(folder)


def main():
Expand Down
174 changes: 98 additions & 76 deletions peeper/preprocessing/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,81 +6,10 @@
import pandas as pd
from hal.files.models.files import Document
from hal.files.models.system import ls_dir
from scipy.signal import savgol_filter


def sample_by_frequency(data, hertz):
"""Samples data with given frequency. Averages values if multiple
:param data: data frame
:param hertz: frequency, e.g 5 Hz => 1 / 5s => one sample each 0.2s
:return: sampled data
"""

freq = 1000.0 / hertz # ms interval between 2 samples
sampled_time = 0.0
sampled_data = [] # sampled data (each row)
start_sample_index = 0 # sample from this index ...

for i, ms in enumerate(data.index):
delta = ms - data.index[start_sample_index]

if delta >= freq: # end sample here
end_sample_index = i

while delta > freq and start_sample_index <= end_sample_index:
delta = data.index[end_sample_index] - \
data.index[start_sample_index]
end_sample_index -= 1

sampled_time += freq
sample_data = data.iloc[start_sample_index: end_sample_index]

averages = sample_data.apply(np.nanmean, axis=0) # average sample
row = [sampled_time] + averages.tolist()
for j, val in enumerate(row):
if np.isnan(val):
row[j] = sampled_data[-1][j] # last known value

sampled_data.append(row)

start_sample_index = end_sample_index + 1

# build data frame from samples
sampled_label = "Sample milliseconds"
columns = [sampled_label] + list(data.keys())
sampled_data = pd.DataFrame(data=sampled_data, columns=columns)
sampled_data = sampled_data.set_index(sampled_label)

return sampled_data


# todo handle removed data (if there are holes in between -> interpolate)
def remove_null_values(data, epsilon):
"""Removes rows that are null (under epsilon)
:param data: data frame
:param epsilon: remove all rows that are under this value
:return: data frame
"""

to_drop = []

for i in data.index:
row = data.loc[i]
accelerations = [
row["AccelerometerLinear X"],
row["AccelerometerLinear Y"],
row["AccelerometerLinear Z"]
]

magnitude = np.linalg.norm(accelerations)
if magnitude < epsilon:
to_drop.append(i)

return data.drop(to_drop)


class Merger:
class Processer:
"""Merges multiple .csv data files into a big one"""

def __init__(self, folder):
Expand Down Expand Up @@ -142,17 +71,110 @@ def _merge(self):

return data

@staticmethod
def sample_by_frequency(data, hertz):
"""Samples data with given frequency. Averages values if multiple
:param data: data frame
:param hertz: frequency, e.g 5 Hz => 1 / 5s => one sample each 0.2s
:return: sampled data
"""

freq = 1000.0 / hertz # ms interval between 2 samples
sampled_time = 0.0
sampled_data = [] # sampled data (each row)
start_sample_index = 0 # sample from this index ...

for i, ms in enumerate(data.index):
delta = ms - data.index[start_sample_index]

if delta >= freq: # end sample here
end_sample_index = i

while delta > freq and start_sample_index <= end_sample_index:
delta = data.index[end_sample_index] - \
data.index[start_sample_index]
end_sample_index -= 1

sampled_time += freq
sample_data = data.iloc[start_sample_index: end_sample_index]

averages = sample_data.apply(np.nanmean,
axis=0) # average sample
row = [sampled_time] + averages.tolist()
for j, val in enumerate(row):
if np.isnan(val):
row[j] = sampled_data[-1][j] # last known value

sampled_data.append(row)

start_sample_index = end_sample_index + 1

# build data frame from samples
sampled_label = "Sample milliseconds"
columns = [sampled_label] + list(data.keys())
sampled_data = pd.DataFrame(data=sampled_data, columns=columns)
sampled_data = sampled_data.set_index(sampled_label)

return sampled_data

@staticmethod
def filter(data, filt, *args, **kwargs):
"""Filter data
:param data: data frame to filter
:param filt: filter to use
:param args: args of filter
:param kwargs: extra args of filter
:return: data frame
"""

for column in data:
filtered = filt(data[column], *args, **kwargs)
data[column] = filtered

return data

# todo handle removed data (if there are holes in between -> interpolate)
@staticmethod
def remove_null_values(data, epsilon):
"""Removes rows that are null (under epsilon)
:param data: data frame
:param epsilon: remove all rows that are under this value
:return: data frame
"""

to_drop = []

for i in data.index:
row = data.loc[i]
accelerations = [
row["AccelerometerLinear X"],
row["AccelerometerLinear Y"],
row["AccelerometerLinear Z"]
]

magnitude = np.linalg.norm(accelerations)

if magnitude <= epsilon:
to_drop.append(i)

return data.drop(to_drop)

def _process(self):
"""Process data
:return: data frame
"""
data = self._merge()
data = sample_by_frequency(data, 10)
data = remove_null_values(data, 1)
data = self.sample_by_frequency(data, 10)
data = self.filter(data, savgol_filter, window_length=13, polyorder=1)
data = self.remove_null_values(data, 1)

return data

def merge_into(self, output_file):
def combine_into(self, output_file):
"""Merges all inputs files into one
:param output_file: output file (where to write data)
Expand Down

0 comments on commit 2e81783

Please sign in to comment.