In [1]:
import numpy as np
import neurokit2 as nk

import pyhrv
import pyhrv.tools as tools
import pyhrv.frequency_domain as fd

import pandas as pd
import os, json
from tsfresh import extract_features
import matplotlib.pyplot as plt

In [2]:
workDir = 'D:/LAB/放鬆-三總/Relaxing/分析'
os.chdir(workDir)

In [9]:
feature_names = ['RRV_RMSSD', 'RRV_MeanBB', 'RRV_SDBB', 'RRV_SDSD', 'RRV_CVBB',
       'RRV_CVSD', 'RRV_MedianBB', 'RRV_MadBB', 'RRV_MCVBB', 'RRV_VLF',
       'RRV_LF', 'RRV_HF', 'RRV_LFHF', 'RRV_LFn', 'RRV_HFn', 'RRV_SD1',
       'RRV_SD2', 'RRV_SD2SD1', 'RRV_ApEn', 'RRV_SampEn', 'RRV_DFA_alpha1',
       ]
print(len(feature_names))
       
lable_names = ['addictLabel', 'stateLabel']

data_name = ['data_name']

# sampleRate = 64
sampleRate = 20

inputPath = "Data/RESfreq/"
featureOutputPath = "Feature/"

21


In [9]:
# 初始化一个空的DataFrame来存儲儲结果
all_rsp_rates = pd.DataFrame()

dataList = os.listdir(inputPath)

for dataName in dataList:
    dataInfo = dataName[:-4].split('_')
    data = pd.read_csv("%s%s"%(inputPath, dataName), names=['TimeStamp', 'RESdata'])

    # 轉換 TimeStamp 為 datetime 類型（如果它還不是）
    data['TimeStamp'] = pd.to_datetime(data['TimeStamp'])

    # 根据時間戳切割數據 四分鐘一切
    start_time = data['TimeStamp'].iloc[0]
    end_time = start_time + pd.Timedelta(minutes=4)

    rsp_rates = []
    # 添加數據名稱
    rsp_rates.append(dataInfo[0])
    for _ in range(3):  # 切割成三段
        segment = data[(data['TimeStamp'] >= start_time) & (data['TimeStamp'] < end_time)]
        if not segment.empty:
            signals, info = nk.rsp_process(segment['RESdata'], sampling_rate=sampleRate)
            rate = nk.rsp_rate(signals, sampling_rate=sampleRate, method="peak")
            rsp_rates.append(rate.mean())  # 取每段的平均呼吸频率
        else:
            rsp_rates.append(np.nan)  # 如果段是空的，添加NaN作为占位符

        start_time += pd.Timedelta(minutes=4)
        end_time += pd.Timedelta(minutes=4)

    # 将当前数据的呼吸频率添加到总的DataFrame中
    # 这里假设你希望每列代表一段时间内的平均呼吸频率
    all_rsp_rates = pd.concat([all_rsp_rates, pd.DataFrame([pd.Series(rsp_rates)])], ignore_index=True)

# 如果需要，可以给all_rsp_rates的列添加合适的列名
all_rsp_rates.columns = ['dataName', 'Segment1', 'Segment2', 'Segment3']
all_rsp_rates
# 最终，all_rsp_rates包含了所有数据的呼吸频率信息

Unnamed: 0,dataName,Segment1,Segment2,Segment3
0,A05,6.037867,6.706162,8.81583
1,A06,4.975337,10.097401,13.390061
2,A07,4.349022,9.469279,5.085399
3,A08,6.400353,7.517504,7.989117
4,A17,10.717428,9.985401,10.883639
5,A19,8.345505,9.516019,9.750872
6,A29,4.581895,4.983955,4.572018
7,A31,9.90294,11.401355,13.253443
8,A33,7.93588,7.861387,9.478653
9,A35,4.839048,4.086652,10.48536


In [7]:
# 初始化一个空的DataFrame来存储结果
all_rsp_rates = pd.DataFrame()

dataList = os.listdir(inputPath)

for dataName in dataList:
    # 读取数据，这里假设数据文件的格式是CSV，且只有时间戳和呼吸信号两列
    data = pd.read_csv(f"{inputPath}/{dataName}", names=['TimeStamp', 'RESdata'])

    # 获取数据名称，假设通过文件名前缀
    dataName = dataName.split('_')[0]

    # 计算每段的数据点数
    points_per_segment = len(data) // 3

    rsp_rates = [dataName]  # 用于存储每段的呼吸频率，首个元素是数据名称
    for i in range(3):  # 分成三段
        # 根据数据点数切割每段
        segment = data.iloc[i * points_per_segment: (i + 1) * points_per_segment]
        if not segment.empty:
            signals, info = nk.rsp_process(segment['RESdata'].values, sampling_rate=sampleRate)
            rate = nk.rsp_rate(signals, sampling_rate=sampleRate, method="peak")
            rsp_rates.append(rate.mean())  # 取每段的平均呼吸频率
        else:
            rsp_rates.append(np.nan)  # 如果段是空的，添加NaN作为占位符

    # 将当前数据的呼吸频率添加到总的DataFrame中
    all_rsp_rates = pd.concat([all_rsp_rates, pd.DataFrame([rsp_rates])], ignore_index=True)

# 设置列名
all_rsp_rates.columns = ['dataName', 'Segment1', 'Segment2', 'Segment3']

# 查看结果
all_rsp_rates


  dataName   Segment1   Segment2   Segment3
0      A05   5.838641   7.073338   8.856857
1      A06   4.887178  11.528548  13.217310
2      A07   4.418955   9.063748   4.938811
3      A08   6.397073   7.531839   7.931931
4      A17  10.719553  10.019994  10.852272
5      A19   8.498331   9.445226   9.687936
6      A29   4.703900   4.639119   4.391829
7      A31   9.872371  11.393188  13.200646
8      A33   7.915185   7.679250   9.306880
9      A35   5.088587   3.128916   9.768540


In [17]:
# 初始化一个空的DataFrame来存储结果
all_rsp_rates = pd.DataFrame()

dataList = os.listdir(inputPath)

for dataName in dataList:
    # 读取数据，这里假设数据文件的格式是CSV，且只有时间戳和呼吸信号两列
    data = pd.read_csv(f"{inputPath}/{dataName}", names=['TimeStamp', 'RESdata'])

    # 获取数据名称，假设通过文件名前缀
    dataName = dataName.split('_')[0] + '_' + dataName.split('_')[3]
    
    if not data.empty:
        signals, info = nk.rsp_process(data['RESdata'].values, sampling_rate=sampleRate)
        rate = nk.rsp_rate(signals, sampling_rate=sampleRate, method="peak")
        avg_rate = np.mean(rate)  # 计算整个数据集的平均呼吸频率

        # 创建一个临时DataFrame来存储当前文件的结果
        temp_df = pd.DataFrame([{"dataName": dataName, "AvgRate": avg_rate}])

        # 使用pd.concat合并到all_rsp_rates
        all_rsp_rates = pd.concat([all_rsp_rates, temp_df], ignore_index=True)

# 设置列名
# all_rsp_rates.columns = ['dataName', 'AvgRate']

# 查看结果
all_rsp_rates

Unnamed: 0,dataName,AvgRate
0,A05_VRTest,7.140441
1,A06_VRTest,10.023437
2,A07_VRTest,4.582703
3,A08_VRTest,7.273543
4,A17_VRTest,10.513147
5,A19_VRTest,9.179428
6,A29_PostTest,5.998015
7,A29_PreTest,8.958231
8,A29_VRTest,4.744545
9,A31_PostTest,9.173496


In [23]:
# 初始化一个空的DataFrame来存储结果
all_rsp_rates = pd.DataFrame()

dataList = os.listdir(inputPath)

for dataName in dataList:
    # 读取数据，这里假设数据文件的格式是CSV，且只有时间戳和呼吸信号两列
    data = pd.read_csv(f"{inputPath}/{dataName}", names=['TimeStamp', 'RESdata'])
    # 确保TimeStamp是datetime类型
    data['TimeStamp'] = pd.to_datetime(data['TimeStamp'])

    # 获取数据名称，假设通过文件名前缀
    dataName = dataName.split('_')[0] + '_' + dataName.split('_')[3]

    # 计算每段的数据点数
    points_per_segment = len(data) // 3

    rsp_rates = [dataName]  # 用于存储每段的呼吸频率，首个元素是数据名称
    for i in range(3):  # 分成三段
        # 根据数据点数切割每段
        segment = data.iloc[i * points_per_segment: (i + 1) * points_per_segment]
        if not segment.empty:
            # 对RESdata应用FFT
            fft_values = np.fft.fft(segment['RESdata'].values)
            fft_freq = np.fft.fftfreq(len(fft_values))
            
            # 忽略负频率，找到最大振幅对应的频率
            positive_freqs = fft_freq[:len(fft_values)//2]
            positive_amps = np.abs(fft_values[:len(fft_values)//2])
            peak_freq = positive_freqs[np.argmax(positive_amps)]
            
            # 计算呼吸频率（次/分钟），假设数据的采样率为sample_rate
            # 计算两个时间戳之间的差异，转换为秒
            time_diffs = data['TimeStamp'].diff().dt.total_seconds().fillna(0)

            # 估计采样率（每秒的样本数）
            sample_rate = 1 / time_diffs.mean()
            # sample_rate = 60 / np.diff(data['TimeStamp'].dt.seconds).mean()  # 每分钟的采样点数
            breaths_per_minute = peak_freq * sample_rate * 60
            
            # 存储结果
            rsp_rates.append(breaths_per_minute)
        else:
            rsp_rates.append(np.nan)  # 如果段是空的，添加NaN作为占位符

    # 将当前数据的呼吸频率添加到总的DataFrame中
    all_rsp_rates = pd.concat([all_rsp_rates, pd.DataFrame([rsp_rates])], ignore_index=True)

# 设置列名
all_rsp_rates.columns = ['dataName', 'Segment1', 'Segment2', 'Segment3']

# 查看结果
all_rsp_rates


Unnamed: 0,dataName,Segment1,Segment2,Segment3
0,A05_VRTest,0.0,0.0,0.0
1,A06_VRTest,0.0,0.0,0.0
2,A07_VRTest,0.0,0.0,0.0
3,A08_VRTest,0.0,0.0,0.0
4,A17_VRTest,0.0,0.0,0.0
5,A19_VRTest,0.0,0.0,0.0
6,A29_PostTest,0.0,0.0,0.0
7,A29_PreTest,0.0,0.0,0.0
8,A29_VRTest,0.0,0.0,0.0
9,A31_PostTest,0.0,0.0,0.0


In [24]:
# 获取文件夹内所有CSV文件
dataList = os.listdir(inputPath)

# 遍历所有文件
for dataName in dataList:
    # 读取数据
    data = pd.read_csv(f"{inputPath}/{dataName}", names=['TimeStamp', 'RESdata'])
    data['TimeStamp'] = pd.to_datetime(data['TimeStamp'])

    segment_length = 4 * 60 * sampleRate  # 每4分钟的数据长度

    # 存储每个文件的结果
    results = []

    for i in range(0, len(data), segment_length):
        segment = data.iloc[i:i + segment_length]
        if len(segment) < segment_length:
            break  # 如果数据不足以形成一个完整段，则跳过

        fft_values = np.fft.fft(segment['RESdata'])
        fft_freq = np.fft.fftfreq(len(fft_values), d=1/sampleRate)

        # 忽略直流成分，并找到主频率
        positive_freqs = fft_freq[:len(fft_values)//2]
        positive_amps = np.abs(fft_values[:len(fft_values)//2])
        primary_freq = positive_freqs[np.argmax(positive_amps[1:]) + 1]  # 排除直流成分

        # 计算呼吸频率（次/分钟）
        breaths_per_minute = primary_freq * 60
        results.append(breaths_per_minute)

    # 输出当前文件的结果
    print(f"File: {dataName}")
    for i, rate in enumerate(results, 1):
        print(f"Segment {i}: {rate:.2f} breaths/minute")
    print("-" * 20)


File: A05_VR_1_VRTest_Res.csv
Segment 1: 4.50 breaths/minute
Segment 2: 5.50 breaths/minute
Segment 3: 7.50 breaths/minute
--------------------
File: A06_VR_1_VRTest_Res.csv
Segment 1: 3.50 breaths/minute
Segment 2: 0.25 breaths/minute
Segment 3: 0.25 breaths/minute
Segment 4: 4.25 breaths/minute
--------------------
File: A07_VR_1_VRTest_Res.csv
Segment 1: 3.50 breaths/minute
Segment 2: 5.50 breaths/minute
Segment 3: 4.75 breaths/minute
Segment 4: 5.00 breaths/minute
--------------------
File: A08_VR_1_VRTest_Res.csv
Segment 1: 3.00 breaths/minute
Segment 2: 6.75 breaths/minute
Segment 3: 7.00 breaths/minute
Segment 4: 7.75 breaths/minute
Segment 5: 8.00 breaths/minute
--------------------
File: A17_VR_1_VRTest_Res.csv
Segment 1: 11.25 breaths/minute
Segment 2: 10.50 breaths/minute
Segment 3: 10.75 breaths/minute
Segment 4: 11.00 breaths/minute
--------------------
File: A19_VR_1_VRTest_Res.csv
Segment 1: 7.75 breaths/minute
Segment 2: 2.50 breaths/minute
Segment 3: 9.50 breaths/minut

In [27]:
# 获取文件夹内所有CSV文件
dataList = os.listdir(inputPath)

# 遍历所有文件
for dataName in dataList:
    # 读取数据
    data = pd.read_csv(f"{inputPath}/{dataName}", names=['TimeStamp', 'RESdata'])

    # 计算每段的数据点数
    segment_length = len(data) // 3

    # 存储每个文件的结果
    results = []

    for i in range(3):  # 分成三段
        start_index = i * segment_length
        end_index = (i + 1) * segment_length if i < 2 else len(data)  # 确保最后一段包含所有剩余的数据点
        
        segment = data.iloc[start_index:end_index]['RESdata']

        # 计算每段的平均呼吸频率
        segment_length = len(segment)
        fft_values = np.fft.fft(segment)
        fft_freq = np.fft.fftfreq(segment_length, d=1/sample_rate)

        # 忽略直流成分，并找到主频率
        positive_freqs = fft_freq[:segment_length//2]
        positive_amps = np.abs(fft_values[:segment_length//2])
        primary_freq_index = np.argmax(positive_amps[1:]) + 1  # 排除直流成分
        primary_freq = positive_freqs[primary_freq_index]

        # 估计呼吸频率（次/分钟）
        breaths_per_minute = primary_freq * 60
        results.append(breaths_per_minute)

    # 输出当前文件的结果
    print(f"File: {dataName}")
    for i, rate in enumerate(results, 1):
        print(f"Segment {i}: {rate:.2f} breaths/minute")
    print("-" * 20)


File: A05_VR_1_VRTest_Res.csv
Segment 1: 6.63 breaths/minute
Segment 2: 8.93 breaths/minute
Segment 3: 14.40 breaths/minute
--------------------
File: A06_VR_1_VRTest_Res.csv
Segment 1: 5.25 breaths/minute
Segment 2: 1.31 breaths/minute
Segment 3: 5.25 breaths/minute
--------------------
File: A07_VR_1_VRTest_Res.csv
Segment 1: 5.37 breaths/minute
Segment 2: 6.07 breaths/minute
Segment 3: 8.41 breaths/minute
--------------------
File: A08_VR_1_VRTest_Res.csv
Segment 1: 0.21 breaths/minute
Segment 2: 11.22 breaths/minute
Segment 3: 12.06 breaths/minute
--------------------
File: A17_VR_1_VRTest_Res.csv
Segment 1: 16.14 breaths/minute
Segment 2: 13.88 breaths/minute
Segment 3: 16.14 breaths/minute
--------------------
File: A19_VR_1_VRTest_Res.csv
Segment 1: 11.60 breaths/minute
Segment 2: 14.57 breaths/minute
Segment 3: 13.76 breaths/minute
--------------------
File: A29_VR_1_PostTest_Res.csv
Segment 1: 8.55 breaths/minute
Segment 2: 8.55 breaths/minute
Segment 3: 8.53 breaths/minute
--