# 按开始时间分组
## 原始数据

In [1]:
# import and config
import pandas as pd
from read_tdms_file import read_tdms_file

pd.options.display.max_rows = 20

In [2]:
file_name = r'tdms_files\01-01#20180311180000.TDMS'
channel_list = [u'NLHQ-X-03-S08', u'NLHQ-X-03-S09']
raw_data = read_tdms_file(file_name, channel_list)

In [3]:
# 定义Pandas DataFrame重组的函数
def convert_df(seriels):
    index = pd.MultiIndex.from_product([[seriels.index[0]], seriels.index], names=['start_time', 'timestamp'])
    df = pd.DataFrame(data=seriels.values, index=index, columns=[seriels.name])
    return df

In [4]:
df1 = convert_df(raw_data[channel_list[0]])
df2 = convert_df(raw_data[channel_list[1]])
df = df1.combine_first(df2)

In [5]:
file_name = r'tdms_files\01-01#20180311180005.TDMS'
channel_list = [u'NLHQ-X-03-S08', u'NLHQ-X-03-S09']
raw_data = read_tdms_file(file_name, channel_list)
df3 = convert_df(raw_data[channel_list[0]])
df = df.combine_first(df3)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,NLHQ-X-03-S08,NLHQ-X-03-S09
start_time,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-03-11 10:00:00,2018-03-11 10:00:00.000,219.117111,-828.362610
2018-03-11 10:00:00,2018-03-11 10:00:00.020,219.414963,-828.970520
2018-03-11 10:00:00,2018-03-11 10:00:00.040,219.210724,-828.194641
2018-03-11 10:00:00,2018-03-11 10:00:00.060,218.844788,-828.614563
2018-03-11 10:00:00,2018-03-11 10:00:00.080,218.921371,-827.882690
2018-03-11 10:00:00,2018-03-11 10:00:00.100,218.129944,-827.966675
2018-03-11 10:00:00,2018-03-11 10:00:00.120,218.942657,-828.466614
2018-03-11 10:00:00,2018-03-11 10:00:00.140,218.972443,-827.926636
2018-03-11 10:00:00,2018-03-11 10:00:00.160,218.712875,-828.346619
2018-03-11 10:00:00,2018-03-11 10:00:00.180,219.397934,-828.506592


## 第一组数据包的最大变幅

In [6]:
start_time = df.index[0][0]
idx_max = df['NLHQ-X-03-S08'][start_time].idxmax()
idx_min = df['NLHQ-X-03-S08'][start_time].idxmin()
v1 = df['NLHQ-X-03-S08'][start_time][idx_max] - df['NLHQ-X-03-S08'][start_time][idx_min]
v2 = df['NLHQ-X-03-S09'][start_time][idx_max] - df['NLHQ-X-03-S09'][start_time][idx_min]
v1, v2

(2.9274597, 3.863525390625)

In [7]:
height = {channel_list[0]: 0.1, channel_list[1]: 0.5}
height = pd.Series(height, name='height')
height.sort_values(ascending=True, inplace=True)
height

NLHQ-X-03-S08    0.1
NLHQ-X-03-S09    0.5
Name: height, dtype: float64

## 采用groupby方法

In [8]:
from numpy import polyfit, polyval

def neutral_axis(df, height, threshold):
    if df.notnull().all().all():
        master = height.index[0]
        if df[master].max() - df[master].min() > threshold:
            idx_max = df[master].idxmax()
            idx_min = df[master].idxmin()
            ptp = df.loc[idx_max] - df.loc[idx_min]

            ptp.rename('ptp', inplace=True)
            xy = pd.concat([height, ptp], axis='columns')
            print xy

            neutral_axis = polyfit(xy['ptp'], xy['height'], 1)[-1]  # 截距即为中性轴高度
            return neutral_axis

In [9]:
ptp = df.groupby(level='start_time').apply(neutral_axis, height, 0)
ptp = ptp.dropna(axis='index', how='any')
ptp

               height       ptp
NLHQ-X-03-S08     0.1  2.927460
NLHQ-X-03-S09     0.5  3.863525


start_time
2018-03-11 10:00:00   -1.150963
dtype: float64

In [10]:
df.drop(ptp.index, level='start_time', inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,NLHQ-X-03-S08,NLHQ-X-03-S09
start_time,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-03-11 10:00:05,2018-03-11 10:00:05.000,219.414963,
2018-03-11 10:00:05,2018-03-11 10:00:05.020,218.844788,
2018-03-11 10:00:05,2018-03-11 10:00:05.040,220.172348,
2018-03-11 10:00:05,2018-03-11 10:00:05.060,219.512817,
2018-03-11 10:00:05,2018-03-11 10:00:05.080,219.648987,
2018-03-11 10:00:05,2018-03-11 10:00:05.100,219.759613,
2018-03-11 10:00:05,2018-03-11 10:00:05.120,219.180939,
2018-03-11 10:00:05,2018-03-11 10:00:05.140,219.066040,
2018-03-11 10:00:05,2018-03-11 10:00:05.160,218.376740,
2018-03-11 10:00:05,2018-03-11 10:00:05.180,218.495880,


# 按每5秒分组

In [18]:
# -*- coding: utf-8 -*-
__author__ = 'SUN Shouwang'

from os import listdir, path
import time
import nptdms


class TdmsSpout(object):

    def __init__(self, folder, channel_list):
        random_index = [3, 0, 2, 1, 5, 7, 4, 8, 6, 9]
        self.file_list = [path.join(folder, listdir(folder)[ind]) for ind in random_index]
        self.channel_list = channel_list

    def process(self):
        for tup in self._parse():
            yield tup

    def _parse(self):
        for file_name in self.file_list:
            tdms_file = nptdms.TdmsFile(file_name)
            for channel_name in self.channel_list:
                channel_object = tdms_file.object(u'未命名', channel_name)

                # acquire this channel's 'wf_start_time' property
                # and get its timestamp value for JSON serialize
                start_time = channel_object.property('wf_start_time')
                timestamp = time.mktime(start_time.timetuple())
                tup = [timestamp]

                # acquire this channel's other properties
                others = [v for k, v in channel_object.properties.items() if k != 'wf_start_time']
                tup.extend(others)

                # acquire channel data
                data = channel_object.data.tolist()
                tup.append(data)

                yield tup

In [21]:
folder = r'./tdms_files'
channel_list = ['NLHQ-X-03-S08', 'NLHQ-X-03-S09']

tdms_spout = TdmsSpout(folder, channel_list)

for tup in tdms_spout.process():
    print tup[:-1]

[1520733615.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733615.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733600.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733600.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733610.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733610.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733605.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733605.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733625.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733625.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733635.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733635.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733620.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733620.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733640.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733640.0, 0.0, 0.02, 50, u'NLHQ-X-03-S09', u'Mod3']
[1520733630.0, 0.0, 0.02, 50, u'NLHQ-X-03-S08', u'Mod3']
[1520733630.0, 0.0, 0.02, 50, u

In [40]:
# -*- coding: utf-8 -*-
__author__ = 'SUN Shouwang'

import pandas as pd
from numpy import polyfit
from streamparse.bolt import Bolt


# noinspection PyAttributeOutsideInit
class NeutralAxisBolt(object):

    def __init__(self, storm_conf, context):
        """
        receive parameters set in topology definition from storm_conf argument
        :param dict storm_conf: the Storm configuration for this component
        :param dict context: information about the component’s place within the topology
        """

        self.threshold = storm_conf['threshold']

        self.height = pd.Series(dict(storm_conf['height']), name='height')
        self.height.sort_values(ascending=True, inplace=True)

        self.history = pd.DataFrame()

    def process(self, tup):
        """
        step 1: receive tup and reorganize to pandas.Dataframe;
        step 2: call self._neutral_axis() function to get the neutral axis height of a section ;
        step 3: convert resulted pandas.Dataframe to list and send out.
        :param streamparse.Tuple, tup: tup.values =
                [timestamp, time_offset, time_increment, samples, channel_name, module_name, data]
        :return streamparse.Tuple, tup: tup.values =
                [timestamp, data]
        """
        timestamp = tup[0]
        time_increment = tup[2]
        channel_name = tup[4]
        data = tup[6]

        start_time = pd.Timestamp(timestamp, unit='s', tz='UTC')
        periods = data.__len__()
        freq = '{}ms'.format(int(time_increment / 0.001))

        index = pd.date_range(start=start_time, periods=periods, freq=freq)
        df = pd.DataFrame(data=data, index=index, columns=[channel_name])
        self.history = self.history.combine_first(df)

        if self.history.columns.__len__() == self.height.index.__len__():
            for res in self._neutral_axis():
                tup = [res.index[0].timestamp(), res.values[0]]
                self.emit(tup)

    def _neutral_axis(self):
        grouper = pd.Grouper(axis='index', freq='5s')
        ptp = self.history.groupby(grouper).apply(_na, self.height, self.threshold)
        ptp.dropna(axis='index', how='any', inplace=True)
        self.history.drop(ptp.index, level='start_time', inplace=True)
        return ptp


def _na(df, height, threshold):
    if df.notnull().all().all():
        master = height.index[0]
        if df[master].max() - df[master].min > threshold:
            idx_max = df[master].idxmax()
            idx_min = df[master].idxmin()
            ptp = df.loc[idx_max] - df.loc[idx_min]

            ptp.rename('ptp', inplace=True)
            xy = pd.concat([height, ptp], axis='columns')

            neutral_axis = polyfit(xy['ptp'], xy['height'], 1)[-1]  # 截距即为中性轴高度
            return neutral_axis

In [41]:
config = {'threshold': 100, 'height': [['NLHQ-X-03-S08', 0.1], ['NLHQ-X-03-S09', 0.5]]}
neutral_axis_bolt = NeutralAxisBolt(storm_conf=config, context=None)

In [None]:
folder = r'./tdms_files'
channel_list = ['NLHQ-X-03-S08', 'NLHQ-X-03-S09']

tdms_spout = TdmsSpout(folder, channel_list)

for tup in tdms_spout.process():
    print tup[:-1]