In [109]:
import numpy as np
from scipy import stats
import pandas as pd
from scipy import interpolate
from scipy.signal import welch
from biosppy.signals import eda
import os, json


In [110]:
#change work directory to Code
workDir = r'C:\Users\kein9\OneDrive\桌面\LAB\RelaxingBC_AC'
os.chdir(workDir)

In [111]:
sampleRate = 256
amp = 0.000000001

feature_names=['diff' ,'startle', 'duration', 
            'average_filter', 'mini_scr', 'maxi_scr', 
            'average_scr', 'std_scr', 'average_ampl',
            'std_ampl']

# lable_names = ['addictLabel', 'stateLabel', 'trainingCount']
lable_names = ['addictLabel', 'stateLabel']

data_name = ['data_name']

# testState = ['pre', 'VR', 'post']
# testDuration = [300, 480, 300]
# stagePoint = [-1080*sampleRate, -780*sampleRate, -300*sampleRate,]

inputPath = "DataBC&AC/BC/GSR/"
featureOutputPath = "FeatureBC&AC/BC/"

In [112]:
def scr_calculate(raw):
    scr = eda.eda(signal=raw, sampling_rate=sampleRate, show=False, min_amplitude=amp)
    scr = list(scr)
    # print(scr)  #2維  5個一維   前2個個數相同 後3個個數相同

    #ts           = scr[0]  # array   Signal time axis reference (seconds).
    #filtered     = scr[1]  # array   Filtered EDA signal.
    #onsets_index = scr[2]  # array   Indices of SCR pulse onsets.
    #peaks_index  = scr[3]  # array   Indices of the SCR peaks.
    #amplitudes   = scr[4]  # array   SCR pulse amplitudes.
# --- 核心修正：根據您 print 的 11 個陣列的輸出 ---
    ts           = scr[0]
    filtered     = scr[1]
    # scr[2] 和 scr[3] 是我們不需要的訊號
    onsets_index = scr[4] # <- 真正的 Onset 索引
    peaks_index  = scr[5] # <- 真正的 Peak 索引
    amplitudes   = scr[6] # <- 真正的 Amplitudes
    # scr[7] 到 scr[10] 我們目前不需要
    # ----------------------------------------------
    onsets = []
    peaks = []
    for o_i in onsets_index:
        onsets.append(filtered[int(o_i)])
    for p_i in peaks_index:
        peaks.append(filtered[int(p_i)])
    onsets = np.array(onsets)
    peaks = np.array(peaks)

    return ts, filtered, onsets, peaks, amplitudes, onsets_index, peaks_index

In [113]:
def feature_Cal(sc_list):
    # 倆倆差值
    diff_array = np.array(sc_list[1:]) - np.array(sc_list[:-1])
    diff_mean = diff_array.mean()

    # try:
    # 算SCR
    ts, filtered, onsets, peaks, amplitudes, onsets_index, peaks_index = scr_calculate(sc_list)

    # 刺激數
    startle = len(onsets)
    duration = []

    # filtered後平均
    average_filter = filtered.mean()

    # filtered後標準差
    # average_filter = np.std(filtered)

    # 平均scr持續時間?(index-index)
    for i in range(len(onsets_index)):
        duration.append(peaks_index[i] - onsets_index[i])
    duration = np.array(duration).mean()

    if len(peaks) != 0:
        # 極小 scr峰值
        mini_scr = min(peaks)
        # 極大 scr峰值
        maxi_scr = max(peaks)
        # 平均scr峰值
        average_scr = peaks.mean()
        # scr標準差
        std_scr = np.std(peaks)
    else:
        mini_scr,maxi_scr,average_scr,std_scr = np.nan, np.nan, np.nan, np.nan

    # 極小 振福
    # mini_ampl = min(amplitudes)
    # 極大 振福
    # maxi_ampl = max(amplitudes)
    # 平均振幅
    average_ampl = amplitudes.mean()
    # 振福標準差
    std_ampl = np.std(amplitudes)
    # except:
    #     diff_mean = np.nan
    #     startle = np.nan
    #     duration = np.nan
    #     average_filter = np.nan
    #     mini_scr = np.nan
    #     maxi_scr = np.nan
    #     average_scr = np.nan
    #     std_scr = np.nan
    #     average_ampl = np.nan
    #     std_ampl = np.nan

    result = {}
    result['diff'] = diff_mean
    result['startle'] = startle
    result['duration'] = duration
    result['average_filter'] = average_filter
    result['mini_scr'] = mini_scr
    result['maxi_scr'] = maxi_scr
    result['average_scr'] = average_scr
    result['std_scr'] = std_scr
    result['average_ampl'] = average_ampl
    result['std_ampl'] = std_ampl
    return  result 

In [114]:
def set_label(dataInfo):
    # addictLabel = int(dataInfo[0][0] == 'A')
    addictLabel = int(dataInfo[0][0] == 'A')

    # if len(dataInfo) == 2:
        # trainingCount = 1
        # state = dataInfo[1]
    # elif len(dataInfo) == 3:
        # trainingCount = int(dataInfo[1])
        # state = dataInfo[2]
    
    if dataInfo[3] == "PreTest":
        state = 0
    elif dataInfo[3] == "VRTest":
        state = 1
    else:
        state = 2

    stateLabel = state

    # label = {'addictLabel' : addictLabel, 'stateLabel':stateLabel, 'trainingCount': trainingCount}
    label = {'addictLabel' : addictLabel, 'stateLabel':stateLabel}
    
    return label


In [115]:
def CalGSRFeatures(filePath, sampleRate, output = False):
    
    dataList = os.listdir(filePath)
    name = pd.DataFrame(columns = data_name)
    result = pd.DataFrame(columns=feature_names)
    lable = pd.DataFrame(columns=lable_names)

    for dataName in dataList:
        # if dataName == 'A09_1.csv' or dataName == 'A09_2.csv':
        #     continue

        # N1_VR_1_PreTest_Skin
        dataInfo = dataName[:-4].split('_')
        
        data = pd.read_csv("%s%s"%(filePath, dataName), names=['TimeStamp', 'GSRdata'])
        # print(data[0:5])

        # for i in range(0, 3): #3 stages
            # if i != 2:
            #     stageData = data[stagePoint[i]:stagePoint[i+1]]
            # elif i == 2:
            #     stageData = data[stagePoint[i]:]

            # features = feature_Cal(stageData['GSRdata'])
            # name = name.append({"data_name" : "%s_%s"%(dataName[:-4], testState[i])}, ignore_index= True)
            # result = result.append(features, ignore_index=True)
            # lable = lable.append(set_label(dataInfo=dataInfo+[i]), ignore_index=True)
        features = feature_Cal(data['GSRdata'])
        # print(type(features)) #dict
        # name = name.append({"data_name" : "%s_%s"%(dataInfo[0], dataInfo[2][:-4])}, ignore_index= True)
        datanameDF = pd.DataFrame({'data_name' : ["%s%s_%s"%(dataInfo[0], dataInfo[1], dataInfo[3][:-4])]})
        name = pd.concat([name, datanameDF], axis=0, ignore_index= True)
        # result = result.append(features, ignore_index=True)
        result = pd.concat([result, pd.DataFrame(features, index=[0])], axis=0, ignore_index=True)
        # lable = lable.append(set_label(dataInfo), ignore_index=True)
        lable = pd.concat([lable, pd.DataFrame(set_label(dataInfo), index=[0])], axis=0, ignore_index=True)
        
    result = name.join(result)
    result = result.join(lable)


        # result = result.fillna(result.mean())
    return result

In [116]:
feature = CalGSRFeatures(inputPath, sampleRate=sampleRate, output=False)


  result = pd.concat([result, pd.DataFrame(features, index=[0])], axis=0, ignore_index=True)


In [117]:
feature.to_csv('FeatureBC&AC/BC/GSR_test.csv')
# feature.sort_values(by=['stateLabel', 'data_name'])
feature

Unnamed: 0,data_name,diff,startle,duration,average_filter,mini_scr,maxi_scr,average_scr,std_scr,average_ampl,std_ampl,addictLabel,stateLabel
0,A107VR_Post,-0.000105,40,261.375000,-3585.180548,-3586.367925,-3582.304162,-3585.130437,0.971015,0.140059,0.411963,1,2
1,A107VR_Pre,0.000110,36,372.972222,-3592.229085,-3595.607872,-3589.417715,-3592.178221,1.383017,0.398479,0.498805,1,0
2,A10VR_Post,0.000094,32,341.625000,-3568.876654,-3569.088967,-3568.015411,-3568.811888,0.307883,0.115656,0.145591,1,2
3,A10VR_Pre,0.000048,29,369.448276,-3569.094997,-3569.208625,-3568.877820,-3569.024004,0.085508,0.090050,0.058264,1,0
4,A111VR_Post,0.009218,22,626.363636,-3390.073342,-3563.997576,-3188.329459,-3377.801212,140.219439,38.950997,43.609266,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,A6VR_Pre,0.000000,39,290.000000,-3596.722002,-3596.748869,-3596.573953,-3596.650503,0.045665,0.109250,0.273637,1,0
74,A7VR_Post,-0.000239,17,250.882353,-3558.619134,-3559.271318,-3557.168119,-3558.389482,0.524148,0.276676,0.522162,1,2
75,A7VR_Pre,-0.000166,21,220.285714,-3557.213732,-3557.705033,-3556.775064,-3557.203434,0.310459,0.049222,0.045614,1,0
76,A9VR_Post,-0.000110,37,245.297297,-3593.602399,-3594.039121,-3593.240437,-3593.551450,0.215573,0.048488,0.037172,1,2
