In [3]:
# Copyright (c) 2017, MD2K Center of Excellence
# - Nasir Ali <nasir.ali08@gmail.com>
# - Timothy Hnat <twhnat@memphis.edu>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import datetime
import uuid
from typing import List
from uuid import UUID

from typing import List

import numpy as np
from numpy.linalg import norm
from sklearn import preprocessing

# from cerebralcortex.core.datatypes.datapoint import DataPoint
# from cerebralcortex.core.datatypes.datastream import DataStream


from datetime import datetime
from typing import Any
import sys
import json
import codecs
import gzip

class DataPoint:
    def __init__(self,
                 start_time: datetime = None,
                 end_time: datetime = None,
                 sample: Any = None):
        self._start_time = start_time
        self._end_time = end_time
        self._sample = sample

    @property
    def sample(self):
        return self._sample

    @sample.setter
    def sample(self, val):
        self._sample = val

    @property
    def start_time(self):
        return self._start_time

    @start_time.setter
    def start_time(self, val):
        self._start_time = val

    @property
    def end_time(self):
        return self._end_time

    @end_time.setter
    def end_time(self, val):
        self._end_time = val

    @classmethod
    def from_tuple(cls, start_time: datetime, sample: Any, end_time: datetime = None):
        return cls(start_time, end_time, sample)

    def __str__(self):
        return str(self.start_time) + " - " + str(self.sample)

    def __repr__(self):
        return 'DataPoint(' + ', '.join(map(str, [self.start_time, self.end_time, self.sample]))


class DataStream:
    def __init__(self,
                 identifier: UUID = None,
                 owner: UUID = None,
                 name: UUID = None,
                 data_descriptor = [],
                 execution_context = {},
                 annotations: List = [],
                 stream_type: str = "1",
                 start_time: datetime = None,
                 end_time: datetime = None,
                 data: List[DataPoint] = None
                 ):
        self._identifier = identifier
        self._owner = owner
        self._name = name
        self._data_descriptor = data_descriptor
        self._datastream_type = stream_type
        self._execution_context = execution_context
        self._annotations = annotations
        self._start_time = start_time
        self._end_time = end_time
        self._data = data


    @property
    def identifier(self):
        return self._identifier

    @property
    def owner(self):
        return self._owner

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, value):
        self._name = value

    @property
    def start_time(self):
        return self._start_time

    @start_time.setter
    def start_time(self, val):
        self._start_time = val

    @property
    def end_time(self):
        return self._end_time

    @end_time.setter
    def end_time(self, val):
        self._end_time = val

    @property
    def datastream_type(self):
        return self._datastream_type

    @property
    def data(self):
        return self._data

    
    @data.setter
    def data(self, value):
        result = []
        for dp in value:
            result.append(DataPoint(dp.start_time, dp.end_time, dp.sample))
        self._data = result

    @classmethod
    def from_datastream(cls, input_streams: List):
        result = cls(owner=input_streams[0].owner)
        # TODO: Something with provenance tracking from datastream list
        return result

    def __str__(self):
        return str(self.identifier) + " - " + str(self.owner) + " - " + str(self.data)

    def __repr__(self):
        result = "Stream(" + ', '.join(map(str, [self.identifier,
                                                 self.owner,
                                                 self.name,
                                                 self.data_descriptor,
                                                 self.datastream_type,
                                                 self.execution_context,
                                                 self.annotations]))
        return result

def convert_sample(sample):
    return list([float(x.strip()) for x in sample.split(',')])


def line_parser(input):
    ts, offset, sample = input.split(',', 2)
    start_time = int(ts) / 1000.0
    offset = int(offset)
    return DataPoint(datetime.fromtimestamp(start_time), convert_sample(sample))


def load_datastream(filebase):
    metadata = {}
    with codecs.open(filebase + '.json', encoding='utf-8', errors='ignore') as f:
        metadata = json.loads(f.read())

    fp = gzip.open(filebase + '.gz')
    gzip_file_content = fp.read()
    fp.close()
    gzip_file_content = gzip_file_content.decode('utf-8')

    lines = gzip_file_content.splitlines()
    data = list(map(line_parser, lines))

    identifier = uuid.UUID(metadata['identifier'])
    owner = uuid.UUID(metadata['owner'])
    name = metadata['name']
    data_descriptor = metadata['data_descriptor']
    execution_context = metadata['execution_context']
    annotations = metadata['annotations']
    stream_type = "1"
    start_time = data[0].start_time
    end_time = data[-1].start_time

    return DataStream(identifier,owner,name,
    data_descriptor,
    execution_context,
    annotations,
    stream_type,
    start_time,
    end_time,
    data)


def save_datastream(datastream):
    print(datastream)
    print(datastream.data)
    pass


def count(datastream):

    identifier = uuid.uuid1()
    name = datastream.name + '--COUNT'
    execution_context = {}
    annotations = {}
    data_descriptor = []


    data = [DataPoint(datastream.data[0].start_time, datastream.data[-1].start_time, len(datastream.data))]
    start_time = data[0].start_time
    end_time = data[-1].start_time

    return DataStream(identifier, datastream.owner, name, data_descriptor,
    execution_context,
    annotations,
    "1",
    start_time,
    end_time,
    data)


def magnitude(datastream: DataStream) -> DataStream:
    """

    :param datastream:
    :return: magnitude of the dataastream
    """
    result = DataStream.from_datastream(input_streams=[datastream])
    if datastream.data is None or len(datastream.data) == 0:
        result.data = []
        return result

    input_data = np.array([i.sample for i in datastream.data])

    data = norm(input_data, axis=1).tolist()

    result.data = [DataPoint.from_tuple(start_time=v.start_time, sample=data[i])
                   for i, v in enumerate(datastream.data)]

    return result


def smooth(datastream: DataStream,
           span: int = 5) -> DataStream:
    
    data = datastream.data
    """
    Smooths data using moving average filter over a span.
    The first few elements of data_smooth are given by
    data_smooth(1) = data(1)
    data_smooth(2) = (data(1) + data(2) + data(3))/3
    data_smooth(3) = (data(1) + data(2) + data(3) + data(4) + data(5))/5
    data_smooth(4) = (data(2) + data(3) + data(4) + data(5) + data(6))/5

    for more details follow the below links:
    https://www.mathworks.com/help/curvefit/smooth.html
    http://stackoverflow.com/a/40443565

    :return: data_smooth
    :param data:
    :param span:
    """

    if data is None or len(data) == 0:
        return []

    sample = [i.sample for i in data]
    sample_middle = np.convolve(sample, np.ones(span, dtype=int), 'valid') / span
    divisor = np.arange(1, span - 1, 2)
    sample_start = np.cumsum(sample[:span - 1])[::2] / divisor
    sample_end = (np.cumsum(sample[:-span:-1])[::2] / divisor)[::-1]
    sample_smooth = np.concatenate((sample_start, sample_middle, sample_end))

    data_smooth = []

    if len(sample_smooth) == len(data):
        for i, item in enumerate(data):
            dp = DataPoint.from_tuple(sample=sample_smooth[i], start_time=item.start_time, end_time=item.end_time)
            data_smooth.append(dp)
    else:
        raise Exception("Smoothed data length does not match with original data length.")

    data_smooth_stream = DataStream.from_datastream([datastream])
    data_smooth_stream.data = data_smooth
    return data_smooth_stream


def moving_average_curve(data: List[DataPoint],
                         window_length: int) -> List[DataPoint]:
    """
    Moving average curve from filtered (using moving average) samples.

    :return: mac
    :param data:
    :param window_length:
    """
    if data is None or len(data) == 0:
        return []

    sample = [i.sample for i in data]
    mac = []
    for i in range(window_length, len(sample) - (window_length + 1)):
        sample_avg = np.mean(sample[i - window_length:i + window_length + 1])
        mac.append(DataPoint.from_tuple(sample=sample_avg, start_time=data[i].start_time, end_time=data[i].end_time))

    return mac



if __name__ == '__main__':
    filename = '7007221c-1aec-4481-aee2-200c7e61533d'

#     datastream = load_datastream(sys.argv[1])
    datastream = load_datastream(filename)
    number_entries = count(datastream)
    save_datastream(number_entries)


6292176e-0ac3-11e8-9b8d-a434d9b1e6b7 - 636fcc1f-8966-4e63-a9df-0cbaa6e9296c - [DataPoint(2017-10-07 13:59:56.441000, 2017-10-07 14:59:55.598000, 86535]
[DataPoint(2017-10-07 13:59:56.441000, 2017-10-07 14:59:55.598000, 86535]


In [5]:
import pandas as pd
import matplotlib.pylab as plt
import math
import statistics
import numpy as np
import os


def segmentationUsingTwoMovingAverage(slowMovingAverageDataStream: DataStream
                                      , fastMovingAverageDataStream: DataStream
                                      , THRESHOLD: float, near: int):
    
    slowMovingAverage = np.array([data.sample for data in slowMovingAverageDataStream.data])
    fastMovingAverage = np.array([data.sample for data in fastMovingAverageDataStream.data])

    indexList = [0]*len(slowMovingAverage)
    curIndex = 0;

    for i in range(len(slowMovingAverage)):
        diff = slowMovingAverage[i] - fastMovingAverage[i]
        if diff > THRESHOLD:
            if curIndex == 0:
                indexList[curIndex] = i
                curIndex = curIndex + 1
                indexList[curIndex] = i
            else:
                if i <= indexList[curIndex] + near :
                    indexList[curIndex] = i
                else:
                    curIndex = curIndex + 1
                    indexList[curIndex] = i
                    curIndex = curIndex + 1
                    indexList[curIndex] = i

    output = []
    if curIndex > 0:
        for i in range(0, curIndex, 2):
            sIndex = indexList[i]
            eIndex = indexList[i+1]
            sTime = slowMovingAverageDataStream.data[sIndex].start_time
            eTime = slowMovingAverageDataStream.data[eIndex].start_time
            output.append(DataPoint(start_time=sTime, end_time=eTime, sample=[indexList[i], indexList[i + 1]]))

    intersectionPoints = DataStream.from_datastream([slowMovingAverageDataStream])
    intersectionPoints.data = output
    return intersectionPoints

    return intersectionPoints

def calculateRollStream(accel_stream: DataStream) :
    roll = []
    for dp in accel_stream.data:
        ax = dp.sample[0]
        ay = dp.sample[1]
        az = dp.sample[2]
        rll = 180 * math.atan2(ax, math.sqrt(ay * ay + az * az)) / math.pi
        roll.append(DataPoint.from_tuple(start_time=dp.start_time, end_time=dp.end_time, sample=rll))

    roll_datastream = DataStream.from_datastream([accel_stream])
    roll_datastream.data = roll
    return roll_datastream

def calculatePitchStream(accel_stream: DataStream):
    pitch = []
    for dp in accel_stream.data:
        ax = dp.sample[0]
        ay = dp.sample[1]
        az = dp.sample[2]
        ptch = 180 * math.atan2(-ay, -az) / math.pi
        pitch.append(DataPoint.from_tuple(start_time=dp.start_time, end_time=dp.end_time, sample=ptch))

    pitch_datastream = DataStream.from_datastream([accel_stream])
    pitch_datastream.data = pitch
    return pitch_datastream

def calculateYawStream(accel_stream: DataStream):
    yaw = []
    for dp in accel_stream.data:
        ax = dp.sample[0]
        ay = dp.sample[1]
        az = dp.sample[2]
        yw = 180 * math.atan2(ay, ax) / math.pi
        yaw.append(DataPoint.from_tuple(start_time=dp.start_time, end_time=dp.end_time, sample=yw))

    yaw_datastream = DataStream.from_datastream([accel_stream])
    yaw_datastream.data = yaw
    return yaw_datastream


def filterDuration(gyr_intersections: DataStream):
    gyr_intersections_filtered = []
    
    for I in gyr_intersections.data:
        dur = (I.end_time - I.start_time).total_seconds()
#         print(dur)
        if (dur >= 1.0) & (dur <= 5.0):
            gyr_intersections_filtered.append(I)

    gyr_intersections_filtered_datastream = DataStream.from_datastream([gyr_intersections])
    gyr_intersections_filtered_datastream.data = gyr_intersections_filtered
    return gyr_intersections_filtered_datastream

def filterRollPitch(gyr_intersections_stream: DataStream, roll_stream: DataStream, pitch_stream: DataStream):
    gyr_intersections_filtered = []
    
    for I in gyr_intersections_stream.data:
        sTime = I.start_time
        eTime = I.end_time
        sIndex = I.sample[0]
        eIndex = I.sample[1]
        
        roll_sub = [roll_stream.data[i].sample for i in range(sIndex, eIndex)]
        pitch_sub = [pitch_stream.data[i].sample for i in range(sIndex, eIndex)]
        
        mean_roll = np.mean(roll_sub)
        mean_pitch = np.mean(pitch_sub)
        
#         r > -20 && r <= 65 && p >= -125 && p <= -40
        if (mean_roll > -20) & (mean_roll <= 65) & (mean_pitch >= - 125) & (mean_pitch <= - 40):
            gyr_intersections_filtered.append(I)

    gyr_intersections_filtered_stream = DataStream.from_datastream([gyr_intersections_stream])
    gyr_intersections_filtered_stream.data = gyr_intersections_filtered
    return gyr_intersections_filtered_stream

def computeBasicFeatures(data):
    
    mean = np.mean(data)
    median = np.median(data)
    sd = np.std(data)
    quartile = np.percentile(data, 75) - np.percentile(data, 25)
    
    return mean, median, sd, quartile

def computeWindowFeatures(gyr_intersections_stream, gyr_mag_stream, roll_stream, pitch_stream, yaw_stream, accel_stream):

    all_features = []
    
    for I in gyr_intersections_stream.data:
        sTime = I.start_time
        eTime = I.end_time
        sIndex = I.sample[0]
        eIndex = I.sample[1]
        
        roll_sub = [roll_stream.data[i].sample for i in range(sIndex, eIndex)]
        pitch_sub = [pitch_stream.data[i].sample for i in range(sIndex, eIndex)]
        yaw_sub = [yaw_stream.data[i].sample for i in range(sIndex, eIndex)]

        Gmag_sub = [gyr_mag_stream.data[i].sample for i in range(sIndex, eIndex)]
    
        duration = 1000 * (eTime - sTime).total_seconds()
        
        roll_mean, roll_median, roll_sd, roll_quartile = computeBasicFeatures(roll_sub)
        pitch_mean, pitch_median, pitch_sd, pitch_quartile = computeBasicFeatures(pitch_sub)
        yaw_mean, yaw_median, yaw_sd, yaw_quartile = computeBasicFeatures(yaw_sub)
        
        gyro_mean, gyro_median, gyro_sd, gyro_quartile = computeBasicFeatures(Gmag_sub)
        
        f = [duration, roll_mean, roll_median, roll_sd, roll_quartile, pitch_mean, pitch_median, pitch_sd, pitch_quartile, yaw_mean, yaw_median, yaw_sd, yaw_quartile, gyro_mean, gyro_median, gyro_sd, gyro_quartile]
        
        all_features.append(DataPoint.from_tuple(start_time=sTime, sample=f))
        
    feature_vector_stream = DataStream.from_datastream([all_features])
    feature_vector_stream.data = all_features
    return feature_vector_stream

def getLabel(st, et, epi_st, epi_et, puff_times):
    label = 0 # not puff
#     print(range(len(puff_times)))
    for i in range(len(puff_times)):
        if (puff_times[i] >= st) & (puff_times[i] <= et):
            label = 1
            return label
    
    for i in range(len(epi_et)):
        if ((epi_st[i] <=st) & (st <= epi_et[i])) | ((epi_st[i] <=et) & (et <= epi_et[i])):
            label = -1 # included episode but not puff
            return label
    return label

def getData(cur_dir, filename):
    col_name = ['timestamp', 'value']
    D = pd.read_csv(cur_dir + filename, names = col_name)
    
    return D['timestamp'], D['value']

def convert_sample(sample):
    return list([float(x.strip()) for x in sample.split(',')])


def line_parser(input):
    ts, offset, sample = input.split(',', 2)
    start_time = int(ts) / 1000.0
    offset = int(offset)
    return DataPoint(datetime.fromtimestamp(start_time), convert_sample(sample))

def getInputData(cur_dir, wrist):
    
    epi_st, epi_et = getData(cur_dir, 'episode_start_end.csv')
    
    if wrist == 0:
        puff_times = pd.read_csv(cur_dir + 'puff_timestamp_leftwrist.csv', names=['timings'])
        puff_times = puff_times['timings']
        puff_times = puff_times.values

        t, Ax = getData(cur_dir, 'left-wrist-accelx.csv')
        t, Ay = getData(cur_dir, 'left-wrist-accely.csv')
        t, Az = getData(cur_dir, 'left-wrist-accelz.csv')

        t, Gx = getData(cur_dir, 'left-wrist-gyrox.csv')
        t, Gy = getData(cur_dir, 'left-wrist-gyroy.csv')
        t, Gz = getData(cur_dir, 'left-wrist-gyroz.csv')
    else:
        puff_times = pd.read_csv(cur_dir + 'puff_timestamp_rightwrist.csv', names=['timings'])
        puff_times = puff_times['timings']
        puff_times = puff_times.values

        t, Ax = getData(cur_dir, 'right-wrist-accelx.csv')
        t, Ay = getData(cur_dir, 'right-wrist-accely.csv')
        t, Az = getData(cur_dir, 'right-wrist-accelz.csv')

        t, Gx = getData(cur_dir, 'right-wrist-gyrox.csv')
        t, Gy = getData(cur_dir, 'right-wrist-gyroy.csv')
        t, Gz = getData(cur_dir, 'right-wrist-gyroz.csv')
    
    return t, Ax, Ay, Az, Gx, Gy, Gz, epi_st, epi_et, puff_times

def getInputDataStream(cur_dir, wrist):
    
    t, Ax, Ay, Az, Gx, Gy, Gz, epi_st, epi_et, puff_times = getInputData(cur_dir, wrist)
    accel_data = []
    gyro_data = []
    for i in range(len(t)):
        accel_data.append(DataPoint.from_tuple(start_time=datetime.fromtimestamp(t.iloc[i]/1000), sample = [Ax.iloc[i], Ay.iloc[i], Az.iloc[i]]))
        gyro_data.append(DataPoint.from_tuple(start_time=datetime.fromtimestamp(t.iloc[i]/1000), sample = [Gx.iloc[i], Gy.iloc[i], Gz.iloc[i]]))
            
    start_time = accel_data[0].start_time
    end_time = accel_data[-1].start_time
    
    identifier = 'joy001'
    name = datastream.name + '--COUNT'
    execution_context = {}
    annotations = {}
    data_descriptor = []

    accel_datastream = DataStream(identifier, 'owner', 'accel', data_descriptor,
        execution_context,
        annotations,
        "1",
        start_time,
        end_time,
        accel_data)
    gyro_datastream = DataStream(identifier, 'owner', 'gyro', data_descriptor,
        execution_context,
        annotations,
        "1",
        start_time,
        end_time,
        accel_data)
    
    return accel_datastream, gyro_datastream # t, Ax, Ay, Az, Gx, Gy, Gz, epi_st, epi_et, puff_times
    
if __name__ == '__main__':
#     cur_dir = 'C:\\Users\\sakther\\Documents\\python_workshop\\md2k_mCerebralCortex\\wristdata\\'
#     cur_dir = 'C:\\Users\\sakther\\DATA\\csvdataSI_new\\p03\\s03\\'

    fastSize = 13
    slowSize = 131
    
    data_dir = '/home/nsaleheen/data/csvdataSI_new/'

#     pids = ['p01', 'p02', 'p03', 'p04', 'p05', 'p06']
    pids = ['p05']
    Xs = []
    Ys = []
    nSample = 0

    for i in range(len(pids)):
        basedir = data_dir + pids[i] + '/'
        sids = [d for d in os.listdir(basedir) if os.path.isdir(os.path.join(basedir, d))]     

        for j in range(len(sids)):
            cur_dir = data_dir + pids[i] + '/' + sids[j] + '/'
            print(cur_dir)
            
            for wrist in range(2): # 0 for left wrist, 1 for right wrist
            
                t, Ax, Ay, Az, Gx, Gy, Gz, epi_st, epi_et, puff_times = getInputData(cur_dir, wrist)
                accel_stream, gyro_stream = getInputDataStream(cur_dir, wrist)
                print('success')
                print(len(accel_stream.data))
                print(len(Ax))

                nSample = nSample + len(puff_times)

                A_mag = getMag(Ax, Ay, Az)
                gyr_mag = getMag(Gx, Gy, Gz)

                roll = calculateRoll(Ax, Ay, Az)
                pitch = calculatePitch(Ax, Ay, Az)
                yaw = calculateYaw(Ax, Ay, Az)

                roll_datastream = calculateRollStream(accel_stream)
                pitch_datastream = calculatePitchStream(accel_stream)
                yaw_datastream = calculateYawStream(accel_stream)

                gyr_mag_800 = smooth(gyr_mag, fastSize)
                gyr_mag_8000 = smooth(gyr_mag, slowSize)
                gyr_intersections, dont_care = segmentationUsingTwoMovingAverage(gyr_mag_8000, gyr_mag_800, 0, 4)
                gyr_intersections = filterDuration(gyr_intersections, t)
                gyr_intersections = filterRollPitch(gyr_intersections, roll, pitch)

                all_features = computeWindowFeatures(gyr_intersections, gyr_mag, roll, pitch, yaw, Ax, Ay, Az, t)

                st = [f[0] for f in all_features]
                et = [f[0]+f[1] for f in all_features]
                labels = [0]*len(st)

                for k in range(len(st)):
                    labels[k] = getLabel(st[k], et[k], epi_st, epi_et, puff_times)
                    if labels[k] != -1:
                        Xs.append(all_features[k][1:])
                        if labels[k] == 0:
                            Ys.append('non-puff')
                        else:
                            Ys.append('puff')

                print(len(Xs))
    #             print(sum(Ys))     
        
    print(len(Ys))
#     print(sum(Ys))
    print(nSample)
    
#     print(Xs)
#     print(Ys)

    Xs = np.array(Xs)
    Ys = np.array([Ys])
    M = np.concatenate((Xs, Ys.T), axis=1)

    df = pd.DataFrame(M)
    feature_name = ['duration', 'roll_mean', 'roll_median', 'roll_sd', 'roll_quartile', 'pitch_mean', 'pitch_median', 'pitch_sd', 'pitch_quartile', 'yaw_mean', 'yaw_median', 'yaw_sd', 'yaw_quartile', 'gyro_mean', 'gyro_median', 'gyro_sd', 'gyro_quartile', 'label(1:puff; 0:non-puff)']
    df.to_csv("feature_file_puffmarker_wrist_py.csv", header=feature_name)

    

/home/nsaleheen/data/csvdataSI_new/p05/s01/
success
10509
10509


NameError: name 'getMag' is not defined

In [44]:
def doProcess(accel_stream, gyro_stream):

    A_mag_stream = magnitude(accel_stream)
    gyr_mag_stream = magnitude(gyro_stream)

    roll_datastream = calculateRollStream(accel_stream)
    pitch_datastream = calculatePitchStream(accel_stream)
    yaw_datastream = calculateYawStream(accel_stream)

    gyr_mag_800 = smooth(gyr_mag_stream, fastSize)
    gyr_mag_8000 = smooth(gyr_mag_stream, slowSize)
    
    gyr_intersections = segmentationUsingTwoMovingAverage(gyr_mag_8000, gyr_mag_800, 0, 4)
    
    gyr_intersections = filterDuration(gyr_intersections)
    gyr_intersections = filterRollPitch(gyr_intersections, roll_datastream, pitch_datastream)
    
    print('gyr_intersections')
    print(len(gyr_intersections.data))

    all_features = computeWindowFeatures(gyr_intersections, gyr_mag_stream, roll_datastream, pitch_datastream, yaw_datastream, accel_stream)

    st = [f[0] for f in all_features]
    et = [f[0]+f[1] for f in all_features]
    labels = [0]*len(st)

    for k in range(len(st)):
        labels[k] = getLabel(st[k], et[k], epi_st, epi_et, puff_times)
        if labels[k] != -1:
            Xs.append(all_features[k][1:])
            if labels[k] == 0:
                Ys.append('non-puff')
            else:
                Ys.append('puff')

    print(len(Xs))


if __name__ == '__main__':
#     cur_dir = 'C:\\Users\\sakther\\Documents\\python_workshop\\md2k_mCerebralCortex\\wristdata\\'
#     cur_dir = 'C:\\Users\\sakther\\DATA\\csvdataSI_new\\p03\\s03\\'

    fastSize = 13
    slowSize = 131
    
    data_dir = '/home/nsaleheen/data/csvdataSI_new/'

    pids = ['p01', 'p02', 'p03', 'p04', 'p05', 'p06']
    pids = ['p05']
    Xs = []
    Ys = []
    nSample = 0

    for i in range(len(pids)):
        basedir = data_dir + pids[i] + '/'
        sids = [d for d in os.listdir(basedir) if os.path.isdir(os.path.join(basedir, d))]     

        for j in range(len(sids)):
            cur_dir = data_dir + pids[i] + '/' + sids[j] + '/'
            print(cur_dir)
            
            for wrist in range(2): # 0 for left wrist, 1 for right wrist
            
                t, Ax, Ay, Az, Gx, Gy, Gz, epi_st, epi_et, puff_times = getInputData(cur_dir, wrist)
                accel_stream, gyro_stream = getInputDataStream(cur_dir, wrist)
                print('success')
                print(len(accel_stream.data))
                print(len(Ax))
                
                doProcess(accel_stream, gyro_stream)

                nSample = nSample + len(puff_times)

                A_mag = getMag(Ax, Ay, Az)
                gyr_mag = getMag(Gx, Gy, Gz)

                roll = calculateRoll(Ax, Ay, Az)
                pitch = calculatePitch(Ax, Ay, Az)
                yaw = calculateYaw(Ax, Ay, Az)

                roll_datastream = calculateRollStream(accel_stream)
                pitch_datastream = calculatePitchStream(accel_stream)
                yaw_datastream = calculateYawStream(accel_stream)

                gyr_mag_800 = smooth(gyr_mag, fastSize)
                gyr_mag_8000 = smooth(gyr_mag, slowSize)
                
                gyr_intersections, dont_care = segmentationUsingTwoMovingAverage(gyr_mag_8000, gyr_mag_800, 0, 4)
                gyr_intersections = filterDuration(gyr_intersections, t)
                gyr_intersections = filterRollPitch(gyr_intersections, roll, pitch)

                all_features = computeWindowFeatures(gyr_intersections, gyr_mag, roll, pitch, yaw, Ax, Ay, Az, t)

                st = [f[0] for f in all_features]
                et = [f[0]+f[1] for f in all_features]
                labels = [0]*len(st)

                for k in range(len(st)):
                    labels[k] = getLabel(st[k], et[k], epi_st, epi_et, puff_times)
                    if labels[k] != -1:
                        Xs.append(all_features[k][1:])
                        if labels[k] == 0:
                            Ys.append('non-puff')
                        else:
                            Ys.append('puff')

                print(len(Xs))
    #             print(sum(Ys))     
        
    print(len(Ys))
#     print(sum(Ys))
    print(nSample)
    
#     print(Xs)
#     print(Ys)

    Xs = np.array(Xs)
    Ys = np.array([Ys])
    M = np.concatenate((Xs, Ys.T), axis=1)

    df = pd.DataFrame(M)
    feature_name = ['duration', 'roll_mean', 'roll_median', 'roll_sd', 'roll_quartile', 'pitch_mean', 'pitch_median', 'pitch_sd', 'pitch_quartile', 'yaw_mean', 'yaw_median', 'yaw_sd', 'yaw_quartile', 'gyro_mean', 'gyro_median', 'gyro_sd', 'gyro_quartile', 'label(1:puff; 0:non-puff)']
    df.to_csv("file_path.csv", header=feature_name)


/home/nsaleheen/data/csvdataSI_new/p05/s01/
success
10509
10509
gyr_intersections
2


TypeError: 'DataStream' object is not iterable

In [23]:
A=[1, 2,3 ]
B=[2, 3, 4]
C=[]
C.extend(A)
C.extend(B)
print(C)
print(len(C))
C=[]
C.append(A)
C.append(B)
print(C)
print(C[0][2])
print(len(C))

for i in range(7, 10):
    print(i)




[1, 2, 3, 2, 3, 4]
6
[[1, 2, 3], [2, 3, 4]]
3
2
7
8
9


In [7]:
from numpy.linalg import norm
print(norm([1, 1, 1]))
print(norm([1, 1, 0]))

1.73205080757
1.41421356237
