In [22]:
import pandas as pd
import numpy as np
import math
import os
import neurokit2 as nk
import nolds
from scipy import signal
import heartpy as hp

In [23]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [24]:
pd.options.mode.chained_assignment = None

In [25]:
where_PPG = 'C:/Users/citioplab/聯發科PPG Raw Data'

# 前處理 acc

In [26]:
# 消除基線飄移
def filt_ma(acc):
    newlist = [x for x in acc if math.isnan(x) == False]
    b, a = signal.butter(3, 0.1, btype="highpass", output="ba")
    ma_acc = signal.filtfilt(b, a, newlist)
    return ma_acc

# 讀所有病人所有的資料

In [27]:
def time_imputation(df, column, filename):
    # 向下補值
    df[column].replace(0, np.nan, inplace=True)
    df[column].fillna(method='ffill', inplace=True)
    # 補開頭
    df[column].replace(np.nan, filename, inplace=True)
    # 統一格式
    df[column] = df[column].astype(str).str.rstrip('.0')
    df[column] = df[column].str.pad(width=14, side='right', fillchar='0')
    df[column] = pd.to_datetime(df[column], format='%Y%m%d%H%M%S', errors='coerce')
    return df

In [28]:
# 讀取資料，並做成dataframe
def read_data(where_PPG, patient, verbose):

    day_dict = {}

    if "(x)" not in patient:
        goto = where_PPG + "//" + str(patient) + "//"
        raw =  os.listdir(goto)
        
        for raw_data in raw:
            new_df = pd.DataFrame(columns=['Measure_time', 'X_acc', 'Y_acc', 'Z_acc'])

            if "raw" in raw_data.split(".")[0]: 
                filename = raw_data.split(".")[0].replace('_raw.xlsx', '')[:14]
                df = pd.read_csv(goto + raw_data ,header = None)
                df = df.dropna()

                # 取出csv中的欄位值
                measure_time = np.array(df[0].values.tolist()).T[0]
                x_acc = filt_ma(np.array(df[[4]].values.tolist()).T[0]) # 消除基線飄移
                y_acc = filt_ma(np.array(df[[5]].values.tolist()).T[0]) # 消除基線飄移
                z_acc = filt_ma(np.array(df[[6]].values.tolist()).T[0]) # 消除基線飄移

                # 做成dataframe
                part_df = pd.DataFrame({'Measure_time': df[0].tolist(), 'X_acc':x_acc, 'Y_acc': y_acc, 'Z_acc': z_acc})
                file_start_time = pd.to_datetime(filename[:14], format='%Y%m%d%H%M%S', errors='coerce') - pd.Timedelta(seconds=1)
                part_df = time_imputation(part_df, 'Measure_time', file_start_time)
                
                day_dict[filename] = part_df
        if verbose:
            print(f"{patient} read, length = {day_dict.__len__()}")
    else:
        print(f"{patient} empty")
    return day_dict
        

In [29]:
# # 確認每秒不超過64筆資料
# for data in day_dict.values():
#     period_counts_df = pd.DataFrame({'Measure_time': data['Measure_time'].value_counts().index, 'Count': data['Measure_time'].value_counts().values})
#     sorted_counts_df = period_counts_df.sort_values(by='Measure_time').reset_index(drop=True)
#     if len(sorted_counts_df.loc[sorted_counts_df['Count'] > 64, 'Measure_time']) > 0:
#         print(sorted_counts_df.loc[sorted_counts_df['Count'] > 64, 'Measure_time'])

# 計算SPT

In [30]:
def five_sec_acc_rolling_med(data):
    # 取平均改以秒為單位
    X_avg = pd.DataFrame(data.groupby('Measure_time')['X_acc'].mean())
    Y_avg = pd.DataFrame(data.groupby('Measure_time')['Y_acc'].mean())
    Z_avg = pd.DataFrame(data.groupby('Measure_time')['Z_acc'].mean())
    df_sec = pd.DataFrame({'X_acc': X_avg['X_acc'], 'Y_acc': Y_avg['Y_acc'], 'Z_acc': Z_avg['Z_acc']})
    
    # Compute rolling medians for x, y, z coordinates
    rolling_medians_x = df_sec['X_acc'].rolling(window=5).median()
    rolling_medians_y = df_sec['Y_acc'].rolling(window=5).median()
    rolling_medians_z = df_sec['Z_acc'].rolling(window=5).median()

    # Combine rolling medians into a new DataFrame
    rolling_medians_df = pd.DataFrame({
        'rolling_median_x': rolling_medians_x,
        'rolling_median_y': rolling_medians_y,
        'rolling_median_z': rolling_medians_z
    })
    rolling_medians_df = rolling_medians_df.dropna()

    return rolling_medians_df

In [31]:
def calculate_angle(acc_x, acc_y, acc_z):
    deg = math.atan(acc_x / (math.sqrt(acc_y**2 + acc_z**2)))*180/math.pi
    return deg

In [32]:
def angle_diff(rolling_medians_df):
    rolling_medians_df['angle'] = rolling_medians_df.apply(lambda row: calculate_angle(row['rolling_median_x'], row['rolling_median_y'], row['rolling_median_z']), axis=1)
    
    # Compute 5 sec rolling average for angles
    rolling_avg_angle = pd.DataFrame(rolling_medians_df['angle'].rolling(window=5).mean())
    # rolling_avg_angle = rolling_avg_angle.dropna()

    # Compute the difference between consecutive angle averages
    rolling_avg_angle['angle_diff'] = rolling_avg_angle['angle'].diff()
    rolling_avg_angle['angle_diff'] = abs(rolling_avg_angle['angle_diff'])
    rolling_avg_angle = rolling_avg_angle.dropna()

    # 確認測量時間是連續的(無中斷，2019-04-13 15:36:29是中斷的)
    rolling_avg_angle = rolling_avg_angle.reset_index()
    rolling_avg_angle['time_diff'] = pd.to_timedelta(rolling_avg_angle['Measure_time'].diff())
    filtered_df = rolling_avg_angle[rolling_avg_angle['time_diff'] != pd.Timedelta('0 days 00:00:01')]
    # if filtered_df.shape[0] > 1:
    #     return "Time is not continuous"
    
    # Compute the 5 minutes rolling median for the angle differences
    rolling_median_5min_angle = pd.DataFrame({
        'Measure_time': rolling_avg_angle['Measure_time'],
        'rolling_median_angle': rolling_avg_angle['angle_diff'].rolling(window=300).median()
    })
    rolling_median_5min_angle = rolling_median_5min_angle.dropna()
    
    return rolling_median_5min_angle


In [33]:
def block_detection(angDiff, q, verbose):
    # 標註靜止的時間
    threshold = angDiff['rolling_median_angle'].quantile(q)
    angDiff['still'] = angDiff['rolling_median_angle'].apply(lambda x: 1 if x < threshold else 0)  
    if verbose:
        print(angDiff['still'].value_counts())
        
    # 找到所有的blocks
    block_start = []
    block_end = []
    pre_value = 0

    for idx, value in enumerate(angDiff['still']):
        if pre_value == 0 and value == 1:
            block_start.append(angDiff['Measure_time'].iloc[idx])
        elif pre_value == 1 and value == 0:
            block_end.append(angDiff['Measure_time'].iloc[idx-1])
        pre_value = value
    block_duration = [block_end[i] - block_start[i] for i in range(len(block_end))]
    if verbose:
        print(len(block_start), len(block_end), len(block_duration))
    blocks_df = pd.DataFrame({'block_start': block_start[:len(block_end)], 'block_end': block_end, 'duration':block_duration})
    
    # 留下持續超過30分鐘的blocks，並計算time gap
    spt_blocks = blocks_df[blocks_df['duration'] > pd.Timedelta('0 days 00:10:00')]
    time_gap_list = []
    for idx, value in enumerate(spt_blocks['block_end']):
        if idx < len(spt_blocks)-1:
            time_gap_list.append(spt_blocks['block_start'].iloc[idx+1] - spt_blocks['block_end'].iloc[idx])
        else:
            time_gap_list.append(np.nan)
    spt_blocks['time_gap'] = time_gap_list
    return spt_blocks

In [34]:
def turn_blocks_to_SPT(spt_blocks):
    # 整併間隔<60min的blocks
    SPT_start = 0
    SPT_end = 0
    SPT_duration = pd.Timedelta('0 days 00:00:00')
    temp_start = 0
    temp_end = 0
    temp_duration = pd.Timedelta('0 days 00:00:00')

    for idx, value in enumerate(spt_blocks['time_gap']):

        if value < pd.Timedelta('0 days 00:60:00'):
            if idx == 0:
                temp_start = spt_blocks['block_start'].iloc[idx]
            elif spt_blocks['time_gap'].iloc[idx-1] > pd.Timedelta('0 days 00:60:00'):
                temp_start = spt_blocks['block_start'].iloc[idx]

            temp_end = spt_blocks['block_end'].iloc[idx+1]
            temp_duration += spt_blocks['duration'].iloc[idx]

        else:
            if temp_duration > SPT_duration:
                SPT_duration = temp_duration
                SPT_start = temp_start
                SPT_end = temp_end
            temp_duration = pd.Timedelta('0 days 00:00:00')
            
    return SPT_start, SPT_end


# MAIN

In [35]:
# # 1-42號病患（沒有奇怪缺失值）
# patients = os.listdir(where_PPG)[:-8]
    
# for patient in patients:
#     all_data = read_data(where_PPG, patient)
#     for date in all_data.keys():
#         day_data = all_data[date]
        
#         rolling_medians_df = five_sec_acc_rolling_med(day_data)
#         angDiff = angle_diff(rolling_medians_df)
#         spt_blocks = block_detection(angDiff, q = 0.55,verbose=False)
#         SPT_start, SPT_end = turn_blocks_to_SPT(spt_blocks)
        
#         by_patient = pd.DataFrame({'Date':pd.to_datetime(date[:14]), 'SPT_start':SPT_start, 'SPT_end':SPT_end}, index=[0]) 
#         by_patient.to_csv(f'C:/Users/citioplab/Desktop/github/Lab/PPG/SPT windows/{patient}_SPT.csv', mode='a', header=True, index=False)   
            
#         # print(f"Date:{pd.to_datetime(date[:14])} ; SPT_start:{SPT_start} ; SPT_end:{SPT_end}")

In [37]:
# 所有病患
patients = os.listdir(where_PPG)
    
for patient in patients:
    by_patient = pd.DataFrame(columns=['Date', 'SPT_start', 'SPT_end']) 

    try:
        all_data = read_data(where_PPG, patient, verbose=False)
        for date in all_data.keys():
            day_data = all_data[date]
            
            rolling_medians_df = five_sec_acc_rolling_med(day_data)
            angDiff = angle_diff(rolling_medians_df)
            spt_blocks = block_detection(angDiff, q = 0.55,verbose=False)
            SPT_start, SPT_end = turn_blocks_to_SPT(spt_blocks)
            
            new_row = {'Date': pd.to_datetime(date[:14]), 'SPT_start': SPT_start, 'SPT_end': SPT_end}
            by_patient.loc[len(by_patient.index)] = new_row

        if by_patient.shape[0] > 0:
            by_patient.to_csv(f'C:/Users/citioplab/Desktop/github/Lab/PPG/SPT windows/{patient}_SPT.csv', mode='a', header=True, index=False)  
            print(f"{patient} done, length = {by_patient.shape[0]}") 
            
    except Exception as e:
        print(f"{patient} is not available, {e}")

TVGH001 (x) empty
TVGH002 done, length = 9
TVGH003 done, length = 17
TVGH004 (x) empty
TVGH005 done, length = 26
TVGH006 done, length = 32
TVGH007 done, length = 7
TVGH008 (x) empty
TVGH009 done, length = 12
TVGH010 (x) empty
TVGH011 (x) empty
TVGH012 done, length = 30
TVGH013 done, length = 13
TVGH014 done, length = 12
TVGH015 (x) empty
TVGH016 done, length = 14
TVGH017 done, length = 9
TVGH018 done, length = 18
TVGH019 done, length = 17
TVGH020 done, length = 10
TVGH021 done, length = 9
TVGH022 done, length = 19
TVGH023 done, length = 12
TVGH024 done, length = 13
TVGH025 done, length = 18
TVGH026 done, length = 10
TVGH027 done, length = 9
TVGH028 done, length = 11
TVGH029 done, length = 16
TVGH030 (x) empty
TVGH031 done, length = 20
TVGH032 done, length = 11
TVGH033 done, length = 17
TVGH034 done, length = 9
TVGH035 (x) empty
TVGH036 (x) empty
TVGH037 done, length = 11
TVGH038 done, length = 9
TVGH039 done, length = 6
TVGH040 done, length = 10
TVGH041 done, length = 14
TVGH042 done, 