# import module

In [3]:
import json
import numpy as np
import pandas as pd
import math
import glob
import os
from tqdm import tqdm

# load data

In [None]:
for file in glob.glob("../../etc/features/*.csv"):
    print(os.path.basename(file).split(".csv")[0])

# Benri Kansu

In [2]:
# 合成加速度
def get_SynAcc(x, y, z):
    return math.sqrt(x*x + y*y + z*z)

# 平均
def get_ave(df):
    stack_list=[]
    for row in df.itertuples():
        x, y, z=row[1], row[2], row[3]
        stack_list.append(get_SynAcc(x, y, z))
    return sum(stack_list)/len(stack_list)

# 各軸の平均
def get_Ave_value(value):
    return sum(value)/len(value)

# 各軸の標準偏差
def get_Std_value(value):
    stack_list=[]
    ave=get_Ave_value(value)
    stack_list=[(v-ave)**2 for v in value]
    return math.sqrt(sum(stack_list)/(len(value)-1))

# 幅
def get_Range(df):
    stack_list=[]
    for row in df.itertuples():
        x, y, z=row[1], row[2], row[3]
        stack_list.append(get_SynAcc(x, y, z))
    return max(stack_list) - min(stack_list)

# 標準偏差
def get_Std(df):
    stack_list=[]
    ave=get_ave(df)
    for row in df.itertuples():
        syn=get_SynAcc(row[1], row[2], row[3])
        stack_list.append((syn-ave)**2)
    return math.sqrt(sum(stack_list)/(len(df)-1))

# 歪度
def get_Skewness(df):
    stack_list=[]
    ave=get_ave(df)
    for row in df.itertuples():
        syn=get_SynAcc(row[1], row[2], row[3])
        stack_list.append((syn-ave)**3)
    return sum(stack_list)/(get_Std(df)**3)*(len(df)/((len(df)-1)*(len(df)-2)))

# 尖度
def get_Kurtosis(df):
    stack_list=[]
    ave=get_ave(df)
    for row in df.itertuples():
        syn=get_SynAcc(row[1], row[2], row[3])
        stack_list.append((syn-ave)**4)
    return ((sum(stack_list)/(get_Std(df)**4))*((len(df)*(len(df)+1))/((len(df)-1)*(len(df)-2)*(len(df)-3)))) - (3*(len(df)-1)**2)/((len(df)-2)*(len(df)-3))

# エネルギー
def get_Energy(df):
    stack_list=[]
    for row in df.itertuples():
        x, y, z=row[1], row[2], row[3]
        stack_list.append(get_SynAcc(x, y, z)**2)
    return sum(stack_list)

def get_Fft(df):
    stack_list=[]
    for row in df.itertuples():
        x,y,z=row[1], row[2], row[3]
        stack_list.append(get_SynAcc(x,y,z))
    return max(np.fft.fft(stack_list))

In [None]:
df=pd.read_json("../../etc/no_header_pdr_raw_data/{}.json".format('5NM1shibataku'), orient='records', lines=True)
df=df[df['type']=='Accelerometer']

# タイムウインド重複なし

In [None]:
df=pd.read_json("../../etc/no_header_pdr_raw_data/{}.json".format("7NM8miyazaki"), orient='records', lines=True)

In [None]:
ROUND=1000

df_all=df[df['unixTime']//ROUND==((df['unixTime'].min()//ROUND)+30)]
df_acc=df_all[df_all['type']=='Accelerometer']

In [None]:
stack_list=[]
for row in df_acc.itertuples():
    x,y,z=row[1], row[2], row[3]
    stack_list.append(get_SynAcc(x,y,z))

In [None]:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt

def fft(time, ampl):
   
    # サンプリング周期[sec]の計算 #################################################
    sampling_cycle = time
   
    # データ数カウント ############################################################
    N = len(ampl)
   
    # 高速フーリエ変換（ FFT ） ####################################################
    fft_ampl = np.fft.fft(ampl)
   
    # FFT の複素数結果を絶対に変換 ###############################################
    abs_fft_amp = np.abs(fft_ampl)
   
    # 振幅をもとの信号に揃える #####################################################
    abs_fft_amp    = abs_fft_amp    / N * 2   # 交流成分
    abs_fft_amp[0] = abs_fft_amp[0] / 2       # 直流成分
   
    # 周波数軸のデータ作成 #######################################################
    frequency = np.linspace(0, 1.0/sampling_cycle, N) # 周波数軸　linspace(開始, 終了, 分割数)
   
    # ピーク検出 ################################################################
    maximal_idx = signal.argrelmax(abs_fft_amp[:int(N/2)+1], order=1) # 極大値インデックスの取得
    print(maximal_idx)
    print(abs_fft_amp)
   
    # グラフ表示 ################################################################
    plt.figure(figsize=(10, 8))
    plt.plot(frequency[:int(N/2)+1], abs_fft_amp[:int(N/2)+1])
    plt.scatter(frequency[maximal_idx], abs_fft_amp[maximal_idx], c='red', s=25)
    plt.grid(True)
    plt.title('Fast Fourier Transform')
    plt.xlabel('freqency[Hz]')
    plt.ylabel('amplitude')
    
    return fft_ampl
    
def low_pass_filter(fft_time, fft_amp, cut_off):
   
    # データ数カウント ############################################################
    N = len(fft_amp)
   
    # cut_off 以下の周波数の amplitude をゼロにする ################################
    cut_off2 = fft_time - cut_off
    fft_amp[((fft_time > cut_off)&(fft_time < cut_off2))] = 0 + 0j
          
    # グラフ用に実波形データに変換 #################################################
   
    # FFT の複素数結果を絶対に変換
    abs_fft_amp = np.abs(fft_amp)
   
    # 振幅をもとの信号に揃える
    abs_fft_amp    = abs_fft_amp    / N * 2 # 交流成分
    abs_fft_amp[0] = abs_fft_amp[0] / 2     # 直流成分
   
    # ピーク検出 ################################################################
    #maximal_idx = signal.argrelmax(abs_fft_amp[:int(N/2)+1], order=1) # 極大値インデックスの取得
    maximal_idx = signal.argrelmax(abs_fft_amp, order=1) # 極大値インデックスの取得
   
    # グラフ表示 ################################################################
    plt.figure(figsize=(10, 8))
    #plt.plot(fft_time[:int(N/2)+1], abs_fft_amp[:int(N/2)+1])
    #plt.scatter(fft_time[maximal_idx], abs_fft_amp[maximal_idx], c='red', s=25)
    plt.plot(fft_time, abs_fft_amp)
    plt.scatter(fft_time[maximal_idx], abs_fft_amp[maximal_idx], c='red', s=25)
    plt.grid(True)
    plt.title('Low Pass Filter')
    plt.xlabel('freqency[Hz]')
    plt.ylabel('amplitude')
   
    return fft_time, fft_amp

## 差分をとる

In [None]:
ROUND=1000

for file in tqdm(glob.glob("../../etc/label/*.csv")):
    file_name=os.path.basename(file).split(".csv")[0]
    new_df=pd.DataFrame()
    label=pd.read_csv("../../etc/label/{}.csv".format(file_name), header=None)
    index=0
            
    df=pd.read_json("../../etc/no_header_pdr_raw_data/{}.json".format(file_name), orient='records', lines=True)

    print('{}LOAD DONE!!!!'.format(file_name))
            
    for i in range(label[0][0], label[0][len(label)-1]+1):
        df_all=df[df['unixTime']//ROUND==((df['unixTime'].min()//ROUND)+i)]
        df_acc=df_all[df_all['type']=='Accelerometer']
        df_gyro=df_all[df_all['type']=='Gyroscope']
        
        df_old_all=df[df['unixTime']//ROUND==((df['unixTime'].min()//ROUND)+i-1)]
        df_old_acc=df_old_all[df_old_all['type']=='Accelerometer']
        df_old_gyro=df_old_all[df_old_all['type']=='Gyroscope']
        if len(df_acc)==0 or len(df_gyro)==0:
            index+=1
            continue

        old_acc_x_ave, old_acc_y_ave, old_acc_z_ave, old_acc_range, old_acc_x_std, old_acc_y_std, old_acc_z_std, old_gyro_range, \
        old_gyro_x_ave, old_gyro_y_ave, old_gyro_z_ave, old_gyro_x_std, old_gyro_y_std, old_gyro_z_std, old_acc_std, old_gyro_std, \
        old_acc_skewness, old_gyro_skewness, old_acc_kurtosis, old_gyro_kurtosis, old_acc_energy, old_gyro_energy, old_acc_ave, old_gyro_ave= \
        get_Ave_value(df_old_acc['x']), get_Ave_value(df_old_acc['y']), get_Ave_value(df_old_acc['z']), get_Range(df_old_acc), \
        get_Std_value(df_old_acc['x']), get_Std_value(df_old_acc['y']), get_Std_value(df_old_acc['z']), get_Range(df_old_gyro), \
        get_Ave_value(df_old_gyro['x']), get_Ave_value(df_old_gyro['y']), get_Ave_value(df_old_gyro['z']), get_Std_value(df_old_gyro['x']), \
        get_Std_value(df_old_gyro['y']), get_Std_value(df_old_gyro['z']), get_Std(df_old_acc), get_Std(df_old_gyro), get_Skewness(df_old_acc), \
        get_Skewness(df_old_gyro), get_Kurtosis(df_old_acc), get_Kurtosis(df_old_gyro), get_Energy(df_old_acc), get_Energy(df_old_gyro), get_ave(df_old_acc), get_ave(df_old_gyro)

        acc_x_ave, acc_y_ave, acc_z_ave, acc_range, acc_x_std, acc_y_std, acc_z_std, gyro_range, \
        gyro_x_ave, gyro_y_ave, gyro_z_ave, gyro_x_std, gyro_y_std, gyro_z_std, acc_std, gyro_std, \
        acc_skewness, gyro_skewness, acc_kurtosis, gyro_kurtosis, acc_energy, gyro_energy, acc_ave, gyro_ave= \
        get_Ave_value(df_acc['x']), get_Ave_value(df_acc['y']), get_Ave_value(df_acc['z']), get_Range(df_acc), \
        get_Std_value(df_acc['x']), get_Std_value(df_acc['y']), get_Std_value(df_acc['z']), get_Range(df_gyro), \
        get_Ave_value(df_gyro['x']), get_Ave_value(df_gyro['y']), get_Ave_value(df_gyro['z']), get_Std_value(df_gyro['x']), \
        get_Std_value(df_gyro['y']), get_Std_value(df_gyro['z']), get_Std(df_acc), get_Std(df_gyro), get_Skewness(df_acc), \
        get_Skewness(df_gyro), get_Kurtosis(df_acc), get_Kurtosis(df_gyro), get_Energy(df_acc), get_Energy(df_gyro), get_ave(df_acc), get_ave(df_gyro)

        new_df=new_df.append({'label':label[1][index], 'user':file_name, 'acc_x_ave':acc_x_ave, 'acc_y_ave':acc_y_ave, 'acc_z_ave':acc_z_ave, 
                              'acc_range':acc_range, 'acc_x_std':acc_x_std, 'acc_y_std':acc_y_std, 'acc_z_std':acc_z_std, 'gyro_range':gyro_range, 
                              'gyro_x_ave':gyro_x_ave, 'gyro_y_ave':gyro_y_ave, 'gyro_z_ave':gyro_z_ave, 'gyro_x_std':gyro_x_std, 
                              'gyro_y_std':gyro_y_std, 'gyro_z_std':gyro_z_std,'acc_std':acc_std, 'gyro_std':gyro_std, 'acc_skewness':acc_skewness, 
                              'gyro_skewness':gyro_skewness, 'acc_kurtosis':acc_kurtosis, 'gyro_kurtosis':gyro_kurtosis, 'acc_energy':acc_energy, 
                              'gyro_energy':gyro_energy, 'acc_ave':acc_ave, 'gyro_ave':gyro_ave, 
                              'dif_acc_x_ave':(acc_x_ave-old_acc_x_ave), 'dif_acc_y_ave':(acc_y_ave-old_acc_y_ave), 'dif_acc_z_ave':(acc_z_ave-old_acc_z_ave), 
                              'dif_acc_range':(acc_range-old_acc_range), 'dif_acc_x_std':(acc_x_std-old_acc_x_std), 
                              'dif_acc_y_std':(acc_y_std-old_acc_y_std), 'dif_acc_z_std':(acc_z_std-old_acc_z_std), 
                              'dif_gyro_range':(gyro_range-old_gyro_range), 'dif_gyro_x_ave':(gyro_x_ave-old_gyro_x_ave), 'dif_gyro_y_ave':(gyro_y_ave-old_gyro_y_ave), 
                              'dif_gyro_z_ave':(gyro_z_ave-old_gyro_z_ave), 'dif_gyro_x_std':(gyro_x_std-old_gyro_x_std), 
                              'dif_gyro_y_std':(gyro_y_std-old_gyro_y_std), 'dif_gyro_z_std':(gyro_z_std-old_gyro_z_std), 'dif_acc_std':(acc_std-old_acc_std), 
                              'dif_gyro_std':(gyro_std-old_gyro_std), 'dif_acc_skewness':(acc_skewness-old_acc_skewness), 
                              'dif_gyro_skewness':(gyro_skewness-old_gyro_skewness), 'dif_acc_kurtosis':(acc_kurtosis-old_acc_kurtosis), 
                              'dif_gyro_kurtosis':(gyro_kurtosis-old_gyro_kurtosis), 'dif_acc_energy':(acc_energy-old_acc_energy), 
                              'dif_gyro_energy':(gyro_energy-old_gyro_energy), 'dif_acc_ave':(acc_ave-old_acc_ave), 'dif_gyro_ave':(gyro_ave-old_gyro_ave)}, 
                         ignore_index=True)

        index+=1
    new_df.to_csv("../../etc/dif_features/{}.csv".format(file_name), index=False)
    print('{}DONE!!!!'.format(file_name))

# タイムウインドウ重複

In [None]:
glob.glob('../../etc/label/*.csv')[66]

In [5]:
for file in tqdm(glob.glob("../../etc/label/*.csv")):
    file_name=os.path.basename(file).split(".csv")[0]
    all_df=pd.DataFrame()
    label=pd.read_csv("../../etc/label/{}.csv".format(file_name), header=None)
    index=0
            
    df=pd.read_json("../../etc/no_header_pdr_raw_data/{}.json".format(file_name), orient='records', lines=True)

    print('{}LOAD DONE!!!!'.format(file_name))
    
    for i in range(label[0][0], label[0][len(label)-1]+1):
        new_df=pd.DataFrame()
        new_df=new_df.append({'label':label[1][index], 'user':file_name}, ignore_index=True)
        for window in range(0, 4):
            features=get_features(df, i, window)
            if len(features)==0:
                index+=1
                continue
            new_df=pd.concat([new_df, features], axis=1)
        all_df=all_df.append(new_df)
            
        index+=1
    all_df.to_csv("../../etc/windows_features/{}.csv".format(file_name), index=False)
    print('{}DONE!!!!'.format(file_name))


  0%|          | 0/81 [00:00<?, ?it/s][A

6NM9tsubouchiLOAD DONE!!!!



  1%|          | 1/81 [01:30<2:01:07, 90.85s/it][A

6NM9tsubouchiDONE!!!!
6NM6yahooLOAD DONE!!!!



  2%|▏         | 2/81 [02:21<1:43:42, 78.76s/it][A

6NM6yahooDONE!!!!
7NM3miyazakiLOAD DONE!!!!



  4%|▎         | 3/81 [02:54<1:24:38, 65.12s/it][A

7NM3miyazakiDONE!!!!
5NM4tsukkyLOAD DONE!!!!



  5%|▍         | 4/81 [03:34<1:13:53, 57.57s/it][A

5NM4tsukkyDONE!!!!
6NM7kobeGLOAD DONE!!!!



  6%|▌         | 5/81 [05:28<1:34:12, 74.37s/it][A

6NM7kobeGDONE!!!!
6NM1yossiLOAD DONE!!!!



  7%|▋         | 6/81 [06:22<1:25:36, 68.49s/it][A

6NM1yossiDONE!!!!
7NM5yakkunLOAD DONE!!!!



  9%|▊         | 7/81 [06:59<1:12:32, 58.82s/it][A

7NM5yakkunDONE!!!!
5NM8shishamoLOAD DONE!!!!



 10%|▉         | 8/81 [08:13<1:17:10, 63.43s/it][A

5NM8shishamoDONE!!!!
7NM2toshikiLOAD DONE!!!!



 11%|█         | 9/81 [08:58<1:09:27, 57.89s/it][A

7NM2toshikiDONE!!!!
5NM8kanLOAD DONE!!!!



 12%|█▏        | 10/81 [09:43<1:03:50, 53.95s/it][A

5NM8kanDONE!!!!
6NM6kobeHLOAD DONE!!!!



 14%|█▎        | 11/81 [10:12<54:27, 46.67s/it]  [A

6NM6kobeHDONE!!!!
6NM6nishioLOAD DONE!!!!



 15%|█▍        | 12/81 [10:47<49:29, 43.04s/it][A

6NM6nishioDONE!!!!
7NM3zimotoELOAD DONE!!!!



 16%|█▌        | 13/81 [12:28<1:08:25, 60.37s/it][A

7NM3zimotoEDONE!!!!
6NM7tamuchinLOAD DONE!!!!



 17%|█▋        | 14/81 [13:04<59:14, 53.05s/it]  [A

6NM7tamuchinDONE!!!!
6NM3kobeCLOAD DONE!!!!



 19%|█▊        | 15/81 [14:01<59:46, 54.35s/it][A

6NM3kobeCDONE!!!!
6NM4kobeILOAD DONE!!!!



 20%|█▉        | 16/81 [14:36<52:27, 48.43s/it][A

6NM4kobeIDONE!!!!
6NM5kobeDLOAD DONE!!!!



 21%|██        | 17/81 [15:17<49:22, 46.28s/it][A

6NM5kobeDDONE!!!!
7NM4zimotoDLOAD DONE!!!!



 22%|██▏       | 18/81 [15:48<43:46, 41.70s/it][A

7NM4zimotoDDONE!!!!
6NM5kobeFLOAD DONE!!!!



 23%|██▎       | 19/81 [16:52<50:00, 48.40s/it][A

6NM5kobeFDONE!!!!
7NM7yakkunLOAD DONE!!!!



 25%|██▍       | 20/81 [17:25<44:22, 43.65s/it][A

7NM7yakkunDONE!!!!
7NM9zimotoBLOAD DONE!!!!



 26%|██▌       | 21/81 [20:11<1:20:33, 80.56s/it][A

7NM9zimotoBDONE!!!!
6NM5yahooLOAD DONE!!!!



 27%|██▋       | 22/81 [20:54<1:08:12, 69.37s/it][A

6NM5yahooDONE!!!!
6NM2kobeBLOAD DONE!!!!



 28%|██▊       | 23/81 [21:49<1:02:51, 65.03s/it][A

6NM2kobeBDONE!!!!
7NM5zimotoALOAD DONE!!!!



 30%|██▉       | 24/81 [22:19<51:38, 54.35s/it]  [A

7NM5zimotoADONE!!!!
6NM6tamuchinLOAD DONE!!!!



 31%|███       | 25/81 [22:49<43:51, 46.99s/it][A

6NM6tamuchinDONE!!!!
7NM8zimotoCLOAD DONE!!!!



 32%|███▏      | 26/81 [24:31<58:25, 63.74s/it][A

7NM8zimotoCDONE!!!!
5NMEXTRAharashoLOAD DONE!!!!



 33%|███▎      | 27/81 [25:24<54:15, 60.29s/it][A

5NMEXTRAharashoDONE!!!!
6NM6kobeALOAD DONE!!!!



 35%|███▍      | 28/81 [25:43<42:19, 47.91s/it][A

6NM6kobeADONE!!!!
7NM7zimotoDLOAD DONE!!!!



 36%|███▌      | 29/81 [26:41<44:10, 50.97s/it][A

7NM7zimotoDDONE!!!!
7NM6kohoLOAD DONE!!!!



 37%|███▋      | 30/81 [27:41<45:33, 53.59s/it][A

7NM6kohoDONE!!!!
7NM1kohoLOAD DONE!!!!



 38%|███▊      | 31/81 [28:34<44:39, 53.58s/it][A

7NM1kohoDONE!!!!
7NM6zimotoELOAD DONE!!!!



 40%|███▉      | 32/81 [29:34<45:23, 55.58s/it][A

7NM6zimotoEDONE!!!!
6NM9nishioLOAD DONE!!!!



 41%|████      | 33/81 [32:37<1:14:51, 93.58s/it][A

6NM9nishioDONE!!!!
5NM1shibatakuLOAD DONE!!!!



 42%|████▏     | 34/81 [33:20<1:01:24, 78.39s/it][A

5NM1shibatakuDONE!!!!
7NM1miyazakiLOAD DONE!!!!



 43%|████▎     | 35/81 [34:22<56:27, 73.65s/it]  [A

7NM1miyazakiDONE!!!!
6NM9senkitaLOAD DONE!!!!



 44%|████▍     | 36/81 [36:24<1:06:06, 88.15s/it][A

6NM9senkitaDONE!!!!
7NM5zimotoCLOAD DONE!!!!



 46%|████▌     | 37/81 [37:47<1:03:35, 86.71s/it][A

7NM5zimotoCDONE!!!!
6NM2tamuchinLOAD DONE!!!!



 47%|████▋     | 38/81 [38:47<56:19, 78.60s/it]  [A

6NM2tamuchinDONE!!!!
5NM4tigerLOAD DONE!!!!



 48%|████▊     | 39/81 [39:28<47:10, 67.40s/it][A

5NM4tigerDONE!!!!
6NM5tamuchinLOAD DONE!!!!



 49%|████▉     | 40/81 [39:58<38:21, 56.13s/it][A

6NM5tamuchinDONE!!!!
7NM2zimotoDLOAD DONE!!!!



 51%|█████     | 41/81 [41:12<41:00, 61.51s/it][A

7NM2zimotoDDONE!!!!
7NM2miyazakiLOAD DONE!!!!



 52%|█████▏    | 42/81 [42:21<41:27, 63.79s/it][A

7NM2miyazakiDONE!!!!
6NM4kobeDLOAD DONE!!!!



 53%|█████▎    | 43/81 [42:47<33:09, 52.36s/it][A

6NM4kobeDDONE!!!!
5NM4yahooLOAD DONE!!!!



 54%|█████▍    | 44/81 [43:21<28:52, 46.83s/it][A

5NM4yahooDONE!!!!
7NM7zimotoELOAD DONE!!!!



 56%|█████▌    | 45/81 [45:09<39:02, 65.07s/it][A

7NM7zimotoEDONE!!!!
6NM8kobeILOAD DONE!!!!



 57%|█████▋    | 46/81 [46:11<37:26, 64.17s/it][A

6NM8kobeIDONE!!!!
7NM9yakkunLOAD DONE!!!!



 58%|█████▊    | 47/81 [48:27<48:40, 85.91s/it][A

7NM9yakkunDONE!!!!
7NM9miyazakiLOAD DONE!!!!



 59%|█████▉    | 48/81 [49:55<47:34, 86.51s/it][A

7NM9miyazakiDONE!!!!
6NM2kobeGLOAD DONE!!!!



 60%|██████    | 49/81 [51:24<46:27, 87.10s/it][A

6NM2kobeGDONE!!!!
5NM2tigerLOAD DONE!!!!



 62%|██████▏   | 50/81 [52:05<37:51, 73.28s/it][A

5NM2tigerDONE!!!!
5NM9yahooLOAD DONE!!!!



 63%|██████▎   | 51/81 [54:02<43:11, 86.39s/it][A

5NM9yahooDONE!!!!
6NM3tamuchinLOAD DONE!!!!



 64%|██████▍   | 52/81 [54:54<36:50, 76.23s/it][A

6NM3tamuchinDONE!!!!
6NM9tamuchinLOAD DONE!!!!



 65%|██████▌   | 53/81 [56:34<38:49, 83.20s/it][A

6NM9tamuchinDONE!!!!
6NM3kobeHLOAD DONE!!!!



 67%|██████▋   | 54/81 [57:21<32:32, 72.32s/it][A

6NM3kobeHDONE!!!!
6NM8tsubouchiLOAD DONE!!!!



 68%|██████▊   | 55/81 [58:11<28:25, 65.58s/it][A

6NM8tsubouchiDONE!!!!
7NM1zimotoALOAD DONE!!!!



 69%|██████▉   | 56/81 [59:14<27:05, 65.02s/it][A

7NM1zimotoADONE!!!!
6NM4tsubouchiLOAD DONE!!!!



 70%|███████   | 57/81 [59:55<23:02, 57.61s/it][A

6NM4tsubouchiDONE!!!!
7NM6zimotoBLOAD DONE!!!!



 72%|███████▏  | 58/81 [1:00:43<20:59, 54.75s/it][A

7NM6zimotoBDONE!!!!
7NM4zimotoELOAD DONE!!!!



 73%|███████▎  | 59/81 [1:01:22<18:23, 50.17s/it][A

7NM4zimotoEDONE!!!!
7NM3zimotoALOAD DONE!!!!



 74%|███████▍  | 60/81 [1:02:03<16:37, 47.49s/it][A

7NM3zimotoADONE!!!!
6NM8yahooLOAD DONE!!!!



 75%|███████▌  | 61/81 [1:02:51<15:52, 47.63s/it][A

6NM8yahooDONE!!!!
6NM1kobeALOAD DONE!!!!



 77%|███████▋  | 62/81 [1:03:35<14:42, 46.47s/it][A

6NM1kobeADONE!!!!
5NM4arthurLOAD DONE!!!!



 78%|███████▊  | 63/81 [1:04:20<13:48, 46.05s/it][A

5NM4arthurDONE!!!!
7NM8zimotoBLOAD DONE!!!!



 79%|███████▉  | 64/81 [1:05:46<16:25, 57.95s/it][A

7NM8zimotoBDONE!!!!
7NM5miyazakiLOAD DONE!!!!



 80%|████████  | 65/81 [1:06:17<13:16, 49.77s/it][A

7NM5miyazakiDONE!!!!
6NM8kobeCLOAD DONE!!!!



 81%|████████▏ | 66/81 [1:07:01<12:04, 48.30s/it][A

6NM8kobeCDONE!!!!
5NM7shibatakuLOAD DONE!!!!



 83%|████████▎ | 67/81 [1:09:30<18:15, 78.25s/it][A

5NM7shibatakuDONE!!!!
7NM2zimotoCLOAD DONE!!!!



 84%|████████▍ | 68/81 [1:11:16<18:46, 86.67s/it][A

7NM2zimotoCDONE!!!!
7NM2zimotoBLOAD DONE!!!!



 85%|████████▌ | 69/81 [1:12:30<16:35, 82.98s/it][A

7NM2zimotoBDONE!!!!
7NM5toshikiLOAD DONE!!!!



 86%|████████▋ | 70/81 [1:13:10<12:51, 70.10s/it][A

7NM5toshikiDONE!!!!
7NM4miyazakiLOAD DONE!!!!



 88%|████████▊ | 71/81 [1:14:06<10:59, 65.92s/it][A

7NM4miyazakiDONE!!!!
7NM7miyazakiLOAD DONE!!!!



 89%|████████▉ | 72/81 [1:15:00<09:19, 62.14s/it][A

7NM7miyazakiDONE!!!!
6NM7kobeBLOAD DONE!!!!



 90%|█████████ | 73/81 [1:16:27<09:16, 69.55s/it][A

6NM7kobeBDONE!!!!
7NM9zimotoALOAD DONE!!!!



 91%|█████████▏| 74/81 [1:19:03<11:09, 95.60s/it][A

7NM9zimotoADONE!!!!
6NM1senkitaLOAD DONE!!!!



 93%|█████████▎| 75/81 [1:19:40<07:47, 78.00s/it][A

6NM1senkitaDONE!!!!
6NM8tamuchinLOAD DONE!!!!



 94%|█████████▍| 76/81 [1:20:22<05:36, 67.25s/it][A

6NM8tamuchinDONE!!!!
7NM4zimotoBLOAD DONE!!!!



 95%|█████████▌| 77/81 [1:21:00<03:53, 58.44s/it][A

7NM4zimotoBDONE!!!!
6NM5kobeELOAD DONE!!!!



 96%|█████████▋| 78/81 [1:22:08<03:03, 61.27s/it][A

6NM5kobeEDONE!!!!
6NM9ganponLOAD DONE!!!!



 98%|█████████▊| 79/81 [1:24:07<02:37, 78.55s/it][A

6NM9ganponDONE!!!!
7NM8miyazakiLOAD DONE!!!!



 99%|█████████▉| 80/81 [1:24:57<01:10, 70.14s/it][A

7NM8miyazakiDONE!!!!
6NM1kobeELOAD DONE!!!!



100%|██████████| 81/81 [1:26:10<00:00, 63.84s/it][A

6NM1kobeEDONE!!!!





In [None]:
ROUND=1000

for file in glob.glob("../../etc/label/*.csv")[1:]:
    file_name=os.path.basename(file).split(".csv")[0]
    df=pd.DataFrame(columns=["x", "y", "z", "unixTime", "type"])
    new_df=pd.DataFrame(columns=["label", "acc_range", "acc_std", "acc_skewness", "acc_kurtosis", "acc_energy"])
    label=pd.read_csv("../../etc/label/{}.csv".format(file_name), header=None)
    index=0
    is_first=True

    with open("../../etc/pdr_raw_data/{}.json".format(file_name), "r") as f:
        lines=f.readlines()
        for line in lines[1:]:
            l=json.loads(line)
            df=df.append({'x':l["x"], 'y':l["y"], 'z':l["z"], 'unixTime':l["unixTime"], 'type':l["type"]}, ignore_index=True)

    print('{}LOAD DONE!!!!'.format(file_name))
    
    for i in range(label[0][0], label[0][len(label)-1]+1):
        if is_first==True:
            df_acc=df[df['unixTime']//ROUND==((df['unixTime'].min()//ROUND)+i)]
            df_acc=df_acc[df_acc['type']=='Accelerometer']
            is_first=False
        else:
            df_acc=df[((df['unixTime']//ROUND)-1)==((df['unixTime'].min()//ROUND)+i-1)]
            df_acc=df_acc.append(df[df['unixTime']//ROUND==((df['unixTime'].min()//ROUND)+i)], ignore_index=True)
            df_acc=df_acc[df_acc['type']=='Accelerometer']
        if len(df_acc)==0:
            index+=1
            continue
        new_df=new_df.append({'label':label[1][index], 'acc_range':get_Range(df_acc), 'acc_std':get_Std(df_acc), 
                              'acc_skewness':get_Skewness(df_acc), 'acc_kurtosis':get_Kurtosis(df_acc), 'acc_energy':get_Energy(df_acc)}, 
                             ignore_index=True)
        index+=1
    new_df.to_csv("../../etc/timed_features/{}.csv".format(file_name), index=False)
    print('{}DONE!!!!'.format(file_name))

# raw_dataへのラベル付け

In [None]:
ROUND=1000

for file in glob.glob("../../etc/label/*.csv"):
    file_name=os.path.basename(file).split(".csv")[0]
    new_df=pd.DataFrame()
    label=pd.read_csv("../../etc/label/{}.csv".format(file_name), header=None)
    index=0
    
    df=pd.read_json("../../etc/no_header_pdr_raw_data/{}.json".format(file_name), orient='records', lines=True)

    print('{}LOAD DONE!!!!'.format(file_name))
            
    for i in range(label[0][0], label[0][len(label)-1]+1):
        df_acc=df[df['unixTime']//ROUND==((df['unixTime'].min()//ROUND)+i)]
        # df_acc=df_acc[df_acc['type']=='Accelerometer']
        if len(df_acc)==0:
            index+=1
            continue
        df_acc['user']=file_name
        df_acc['label']=label[1][index]
        new_df=new_df.append(df_acc, ignore_index=True)
        index+=1
    new_df.to_csv("../../etc/labeled_pdr_raw_data/{}.csv".format(file_name), index=False)
    print('{}DONE!!!!'.format(file_name))

# データ作るぞ

In [None]:
for file in glob.glob("../../etc/test/*.csv"):
    file_name=os.path.basename(file).split(".csv")[0]
    df_features=pd.read_csv("../../etc/test/{}.csv".format(file_name))
    df_features['user']=file_name
    df_features.to_csv("../../etc/test/{}.csv".format(file_name), index=False)

In [None]:
# awk 'NR==1 || FNR!=1' *.csv

# clipping

In [48]:
df=pd.read_csv('../../etc/step_peak_num/features.csv')
print(len(df))
dfs=df.drop(['label', 'user'], axis=1)

for column in dfs.columns:
    p01=dfs[column].quantile(0.01)
    p99=dfs[column].quantile(0.99)
    dfs[column]=dfs[column].clip(p01, p99)
print(len(dfs))
# print(max(dfs['acc_ave']))

29360
29360


In [49]:
dfs['user']=df['user']
dfs['label']=df['label']

In [50]:
df.to_csv('../../etc/step_peak_num/clipped_raw_features.csv', index=False)

# 正規化

In [51]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import scipy.stats as stats

In [52]:
df_features=pd.read_csv('../../etc/step_peak_num/clipped_raw_features.csv')

In [53]:
df_features=df_features.dropna()

In [54]:
columns=df_features.columns[:3]

In [55]:
sc = StandardScaler()

for column in columns:
    # print(stats.zscore(df_features[column].values))
    df_features[column]=stats.zscore(df_features[column].values)

In [56]:
df_features.to_csv('../../etc/step_peak_num/clipped_std_features.csv', index=False)

In [28]:
minmax = MinMaxScaler(feature_range=(-1, 1))
mm_acc=minmax.fit_transform(x_train)

In [None]:
new_df=pd.DataFrame()

for row in df_features.itertuples():
    new_df=new_df.append({'label':row[14], 'user':row[15], 'acc_range':std_acc[row[0]][3], 'acc_std':std_acc[row[0]][5], 
                          'acc_skewness':std_acc[row[0]][4], 'acc_kurtosis':std_acc[row[0]][2], 'acc_energy':std_acc[row[0]][1], 
                          'gyro_range':std_acc[row[0]][9], 'gyro_std':std_acc[row[0]][11], 'gyro_skewness':std_acc[row[0]][10], 
                          'gyro_kurtosis':std_acc[row[0]][8], 'gyro_energy':std_acc[row[0]][7], 'acc_ave':std_acc[row[0]][0], 
                          'gyro_ave':std_acc[row[0]][6]},  
                             ignore_index=True)

In [None]:
new_df.to_csv('../../etc/labeled_features/clipped_std_features.csv', index=False)

# 時刻t-1のラベルを追加

In [None]:
df_features=pd.read_csv('../../etc/dif_features/clipped_std_features.csv')

In [None]:
labels=[]
new_df=pd.DataFrame()

for file in glob.glob("../../etc/label/*.csv"):
    labels=[]
    file_name=os.path.basename(file).split(".csv")[0]
    df=df_features[df_features['user']==file_name]
    labels+=[list(df['label'])[0]]
    labels+=list(df['label'])[:len(df)-1]
    df['pre_label']=labels
    new_df=new_df.append(df)

In [None]:
new_df.to_csv('../../etc/dif_features/clipped_std_prelabel_features.csv', index=False)

# PCA

In [None]:
df=pd.read_csv('../../etc/windows_features/clipped_std_features.csv')

In [None]:
features=df.drop(['label', 'user'], axis=1)

In [None]:
pca.fit(features)
features = pca.transform(features)

In [None]:
pd.DataFrame(features, columns=["PC{}".format(x + 1) for x in range(len(df.drop(['label', 'user'], axis=1).columns))]).head()

In [None]:
pd.DataFrame(features, columns=["PC{}".format(x + 1) for x in range(len(df.drop(['label', 'user'], axis=1).columns))]).head()

In [None]:
# 累積寄与率を図示する
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
plt.gca().get_xaxis().set_major_locator(ticker.MaxNLocator(integer=True))
plt.plot([0] + list( np.cumsum(pca.explained_variance_ratio_)), "-o")
plt.xlabel("Number of principal components")
plt.ylabel("Cumulative contribution rate")
plt.grid()
plt.show()

In [None]:
new_df=pd.DataFrame(features, columns=["PC{}".format(x + 1) for x in range(len(df.drop(['label', 'user'], axis=1).columns))])

In [None]:
new_df=new_df.iloc[:, 0:16]

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
# 主成分数を指定して、PCA のインスタンスを生成
pca = PCA()
# ロジスティック回帰のインスタンスを生成
lr = LogisticRegression()
# トレーニングデータとテストデータで PCA を実行
# X_train_pca = pca.fit_transform(features)
# X_test_pca = pca.transform(X_test_std)

In [None]:
new_df.to_csv('../../etc/windows_features/pca.csv', index=False)