# 本代码用来给指定的根据指定的csv文件生成对应的特征列

## 1. 导入包

In [1]:
import numpy as np  
import pandas as pd
import math
import os
from scipy.fftpack import fft,ifft
import matplotlib.pyplot as plt
import matplotlib.axes as axes
from tqdm import tqdm

# 2. 定义特征获取函数

In [54]:
def getFeatures(acc_list,LEN):
    FFT_list = []
    
    for i in tqdm(range(acc_list.size)):
        start32 = i - LEN if i>= LEN else 0
        if i == 0:
            FFT = {"skewFFT":0,"kurtFFT":0,"maxFFT":0,"max_indexFFT":0,"engyFFT":0,"centroidFFT":0,"entropyFFT":0,"fft":None}
        else:
            FFT = FFT_get(acc_list[start32:i],LEN)
        FFT_list.append(FFT)
            
    avg_list = acc_list.rolling(window=LEN,min_periods=1).apply(np.mean,raw=True)
    std_list = acc_list.rolling(window=LEN,min_periods=1).apply(np.std,raw=True)
    max_list= acc_list.rolling(window=LEN,min_periods=1).apply(np.max,raw=True)
    min_list = acc_list.rolling(window=LEN,min_periods=1).apply(np.min,raw=True)
    media_list = acc_list.rolling(window=LEN,min_periods=1).apply(np.median,raw=True)
    quantile_list = acc_list.rolling(window=LEN,min_periods=1).quantile(quantile=0.75,interpolation="nearest")-acc_list.rolling(window=LEN,min_periods=1).quantile(quantile=0.25,interpolation="nearest")

    rms_list = rms(avg_list,std_list)
    engy_list = acc_list.rolling(window=LEN,min_periods=1).apply(engy,raw=True)
    skew_list= acc_list.rolling(window=LEN,min_periods=1).skew().values
    kurt_list= acc_list.rolling(window=LEN,min_periods=1).kurt().values

    return {"avg":avg_list,"std":std_list,"max":max_list,"min":min_list,"med":media_list,"iqr":quantile_list,"rms":rms_list,"engy":engy_list,"skw":skew_list,"krt":kurt_list,"FFT":FFT_list}

def FFT_get(acc_list,LEN):
    fft_list = abs(fft(acc_list)[:int(acc_list.size/2)])/(acc_list.size/2)
    if fft_list.size != LEN/2:
        return {"skewFFT":0,"kurtFFT":0,"maxFFT":0,"max_indexFFT":0,"engyFFT":0,"centroidFFT":0,"entropyFFT":0,"fft":fft_list}
    fft_list[0]=0
    skew = pd.DataFrame(fft_list).skew().values
    kurt = pd.DataFrame(fft_list).kurt().values
    ma_x = np.max(fft_list)
    max_index = (list(fft_list).index(ma_x)/(acc_list.size/2))*16
    engyFFT = engy(fft_list)
    centroid = Centroid(fft_list)
    entropy = Entropy(fft_list)
    return  {"skewFFT":skew,"kurtFFT":kurt,"maxFFT":ma_x,"max_indexFFT":max_index,"engyFFT":engyFFT,"centroidFFT":centroid,"entropyFFT":entropy,"fft":fft_list}

def rms(acc_list_1,acc_list_2):
    pow_1st = pd.Series([pow(indiv,2) for indiv in acc_list_1])
    pow_2nd = pd.Series([pow(indiv,2) for indiv in acc_list_2])
    pow_sum = pow_1st+pow_2nd
    return [math.sqrt(indiv) for indiv in pow_sum]

def engy(data):
    return np.sum([pow(point,2) for point in data])/data.size

def Entropy(data):

    
def Centroid(data):
    sum1=0
    sum2=0
    for i in range(data.size):
        sum1+=pow(data[i],2)
        sum2+=pow(data[i],2)*i
    return (sum2/sum1)/(data.size/16)
    

## 2. 加载文件

In [55]:

csv_filename = "raw data/Printer.csv"
csv_filename_featured = "printer_featured.csv"
df = pd.read_csv(csv_filename,names=["time","us","acc","ws","label","avg","std"])
df.head(5)


Unnamed: 0,time,us,acc,ws,label,avg,std
0,52:08.4,1012464,0.03598,2.27844,,0.0,0.0
1,52:08.4,1044468,0.03342,2.23848,,0.0,0.0
2,52:08.4,1076472,0.03821,2.43992,,0.0,0.0
3,52:08.5,1108476,0.03703,2.40637,,0.0,0.0
4,52:08.5,1140476,0.03831,2.15383,,0.0,0.0


In [56]:
df = df.drop(["ws","label","avg","std"],axis=1)
df.head(10)

acc_list = df["acc"]

## 3. 处理数据

In [57]:
# for i in getFeatures(acc_list,32)["FFT"]:
#     print(i["skewFFT"],i["fft"])

In [58]:

features = getFeatures(acc_list,32)
df["avg_1"] = features["avg"]
df["std_1"] = features["std"]
df["max_1"] = features["max"]
df["min_1"] = features["min"]
df["med_1"] = features["med"]
df["iqr_1"] = features["iqr"]
df["rms_1"] = features["rms"]
df["eng_1"] =features["engy"]
df["skw_1"] = features["skw"]
df["krt_1"] = features["krt"]
df["fskw_1"] = [indiv["skewFFT"] for indiv in features["FFT"]]
df["fkrt_1"] = [indiv["kurtFFT"] for indiv in features["FFT"]]
df["fmax_1"] = [indiv["maxFFT"] for indiv in features["FFT"]]
df["fspp_1"] = [indiv["max_indexFFT"] for indiv in features["FFT"]]
df["fetp_1"] = [indiv["entropyFFT"] for indiv in features["FFT"]]
df["fctd_1"] = [indiv["centroidFFT"] for indiv in features["FFT"]]

features = getFeatures(acc_list,64)
df["avg_2"] = features["avg"]
df["std_2"] = features["std"]
df["max_2"] = features["max"]
df["min_2"] = features["min"]
df["med_2"] = features["med"]
df["iqr_2"] = features["iqr"]
df["rms_2"] = features["rms"]
df["eng_2"] =features["engy"]
df["skw_2"] = features["skw"]
df["krt_2"] = features["krt"]
df["fskw_2"] = [indiv["skewFFT"] for indiv in features["FFT"]]
df["fkrt_2"] = [indiv["kurtFFT"] for indiv in features["FFT"]]
df["fmax_2"] = [indiv["maxFFT"] for indiv in features["FFT"]]
df["fspp_2"] = [indiv["max_indexFFT"] for indiv in features["FFT"]]
df["fetp_2"] = [indiv["entropyFFT"] for indiv in features["FFT"]]
df["fctd_2"] = [indiv["centroidFFT"] for indiv in features["FFT"]]

features = getFeatures(acc_list,128)
df["avg_4"] = features["avg"]
df["std_4"] = features["std"]
df["max_4"] = features["max"]
df["min_4"] = features["min"]
df["med_4"] = features["med"]
df["iqr_4"] = features["iqr"]
df["rms_4"] = features["rms"]
df["eng_4"] =features["engy"]
df["skw_4"] = features["skw"]
df["krt_4"] = features["krt"]
df["fskw_4"] = [indiv["skewFFT"] for indiv in features["FFT"]]
df["fkrt_4"] = [indiv["kurtFFT"] for indiv in features["FFT"]]
df["fmax_4"] = [indiv["maxFFT"] for indiv in features["FFT"]]
df["fspp_4"] = [indiv["max_indexFFT"] for indiv in features["FFT"]]
df["fetp_4"] = [indiv["entropyFFT"] for indiv in features["FFT"]]
df["fctd_4"] = [indiv["centroidFFT"] for indiv in features["FFT"]]

features = getFeatures(acc_list,256)
df["avg_8"] = features["avg"]
df["std_8"] = features["std"]
df["max_8"] = features["max"]
df["min_8"] = features["min"]
df["med_8"] = features["med"]
df["iqr_8"] = features["iqr"]
df["rms_8"] = features["rms"]
df["eng_8"] =features["engy"]
df["skw_8"] = features["skw"]
df["krt_8"] = features["krt"]
df["fskw_8"] = [indiv["skewFFT"] for indiv in features["FFT"]]
df["fkrt_8"] = [indiv["kurtFFT"] for indiv in features["FFT"]]
df["fmax_8"] = [indiv["maxFFT"] for indiv in features["FFT"]]
df["fspp_8"] = [indiv["max_indexFFT"] for indiv in features["FFT"]]
df["fetp_8"] = [indiv["entropyFFT"] for indiv in features["FFT"]]
df["fctd_8"] = [indiv["centroidFFT"] for indiv in features["FFT"]]

df.head(5)

100%|██████████| 7228/7228 [00:07<00:00, 1016.25it/s]
100%|██████████| 7228/7228 [00:06<00:00, 1044.14it/s]
100%|██████████| 7228/7228 [00:07<00:00, 924.88it/s]
100%|██████████| 7228/7228 [00:09<00:00, 795.12it/s]


Unnamed: 0,time,us,acc,avg_1,std_1,max_1,min_1,med_1,iqr_1,rms_1,...,rms_8,engy_8,skw_8,krt_8,Fskw_8,Fkrt_8,Fmax_8,FmaxId_8,Fentrp_8,Fcentr_8
0,52:08.4,1012464,0.03598,0.03598,0.0,0.03598,0.03598,0.03598,0.0,0.03598,...,0.03598,0.001295,,,0,0,0.0,0.0,0.0,0.0
1,52:08.4,1044468,0.03342,0.0347,0.00128,0.03598,0.03342,0.0347,0.00256,0.034724,...,0.034724,0.001206,,,0,0,0.0,0.0,0.0,0.0
2,52:08.4,1076472,0.03821,0.03587,0.001957,0.03821,0.03342,0.03598,0.00479,0.035923,...,0.035923,0.00129,-0.206082,,0,0,0.0,0.0,0.0,0.0
3,52:08.5,1108476,0.03703,0.03616,0.001768,0.03821,0.03342,0.036505,0.00105,0.036203,...,0.036203,0.001311,-0.886037,0.824385,0,0,0.0,0.0,0.0,0.0
4,52:08.5,1140476,0.03831,0.03659,0.0018,0.03831,0.03342,0.03703,0.00223,0.036634,...,0.036634,0.001342,-1.158537,0.90363,0,0,0.0,0.0,0.0,0.0


## 4. 保存文件

In [59]:
## index=0,去掉索引列
df.to_csv(csv_filename_featured,index=0,float_format='%.6f')

In [72]:
pd.DataFrame([1,1])-pd.DataFrame([1,0.5])

Unnamed: 0,0
0,0.0
1,0.5
