In [14]:
from scipy.stats import mannwhitneyu
import pandas as pd

# 假設 data 是包含特徵的 DataFrame，並且有一列 'Group' 表示類別（0：正常，1：病患）
# 每一列是不同的特徵，例如 'Feature_A', 'Feature_B', 'Feature_C' 等
file_path = "F:\\All data\\Patient_Signal\\PPG.csv"
data = pd.read_csv(file_path, header=None)
data.columns = ['Name', 'i', 'Bad', 'hands', 'Label', 'systolic peak', 'Diastolic peak', 'Cardiac cycle', 'Cycle Area', 'SSI', 'Peak to Valley', 'Systolic peak y', '1st Derivative cycle', '1st Derivative peak']
# 初始化結果存儲
results = []

# 按「名字 (Name)」和「周期 (i)」分組
for name, name_group in data.groupby('Name'):
    for cycle, cycle_group in name_group.groupby('i'):
        # 分離左手和右手數據
        left_data = cycle_group[cycle_group['hands'] == 'Left'].iloc[:, 5:]  # 取特徵列
        right_data = cycle_group[cycle_group['hands'] == 'Right'].iloc[:, 5:]  # 取特徵列

        if not left_data.empty and not right_data.empty:
            # 若左手和右手數據都存在，則直接做 U 檢定
            for col in left_data.columns:
                stat, p_value = mannwhitneyu(left_data[col], right_data[col], alternative='two-sided')
                results.append({'Name': name, 'Cycle': cycle, 'Feature': col, 'U_statistic': stat, 'p_value': p_value})
        
        elif not left_data.empty or not right_data.empty:
            # 若僅有一側數據存在，則與最近的周期數據進行 U 檢定
            next_cycle = cycle + 2
            previous_cycle = cycle - 2
            
            # 嘗試從最近的周期中找到對應數據
            comparison_data = name_group[(name_group['i'] == next_cycle) | (name_group['i'] == previous_cycle)]
            if left_data.empty:
                left_data = comparison_data[comparison_data['hands'] == 'Left'].iloc[:, 5:]
            elif right_data.empty:
                right_data = comparison_data[comparison_data['hands'] == 'Right'].iloc[:, 5:]
            
            # 若找到最近周期的數據，則進行檢定
            if not left_data.empty and not right_data.empty:
                for col in left_data.columns:
                    stat, p_value = mannwhitneyu(left_data[col], right_data[col], alternative='two-sided')
                    results.append({'Name': name, 'Cycle': cycle, 'Feature': col, 'U_statistic': stat, 'p_value': p_value})

# 將結果儲存為 DataFrame 並查看
results_df = pd.DataFrame(results)
print(results_df)

# 若需要將結果存為 CSV
results_df.to_csv('F:\\All data\\Patient_Signal\\U_Test_Results.csv', index=False, encoding='utf-8-sig')
print("U檢定結果已儲存為 U_Test_Results.csv")

       Name  Cycle               Feature  U_statistic  p_value
0      伍黃秀霞      1         systolic peak          1.0      1.0
1      伍黃秀霞      1        Diastolic peak          1.0      1.0
2      伍黃秀霞      1         Cardiac cycle          1.0      1.0
3      伍黃秀霞      1            Cycle Area          1.0      1.0
4      伍黃秀霞      1                   SSI          0.0      1.0
...     ...    ...                   ...          ...      ...
43555  高陳賽金    333                   SSI          0.0      1.0
43556  高陳賽金    333        Peak to Valley          1.0      1.0
43557  高陳賽金    333       Systolic peak y          0.0      1.0
43558  高陳賽金    333  1st Derivative cycle          1.0      1.0
43559  高陳賽金    333   1st Derivative peak          0.0      1.0

[43560 rows x 5 columns]
U檢定結果已儲存為 U_Test_Results.csv


In [13]:
from scipy.stats import mannwhitneyu
import pandas as pd

# 定義兩組數據（每一列代表一個特徵）
data1 = {
    'systolic peak': [1270],
    'Diastolic peak': [0.249551501],
    'Cardiac cycle': [992],
    'Cycle Area': [328.9242554],
    'SSI': [1018],
    'Peak to Valley': [728],
    'Systolic peak y': [0.588581024],
    '1st Derivative cycle': [995],
    '1st Derivative peak': [0.627225822]
}

data2 = {
    'systolic peak': [1265],
    'Diastolic peak': [0.145165521],
    'Cardiac cycle': [995],
    'Cycle Area': [285.8301763],
    'SSI': [1039],
    'Peak to Valley': [733],
    'Systolic peak y': [0.500092327],
    '1st Derivative cycle': [991],
    '1st Derivative peak': [0.620003262]
}

# 將數據轉為 DataFrame
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# 初始化結果存儲
results = []

# 針對每個特徵進行 U 檢定
for column in df1.columns:
    stat, p_value = mannwhitneyu(df1[column], df2[column], alternative='two-sided')
    results.append({'Feature': column, 'U_statistic': stat, 'p_value': p_value})

# 將結果轉換為 DataFrame
results_df = pd.DataFrame(results)
print(results_df)



                Feature  U_statistic  p_value
0         systolic peak          1.0      1.0
1        Diastolic peak          1.0      1.0
2         Cardiac cycle          0.0      1.0
3            Cycle Area          1.0      1.0
4                   SSI          0.0      1.0
5        Peak to Valley          0.0      1.0
6       Systolic peak y          1.0      1.0
7  1st Derivative cycle          1.0      1.0
8   1st Derivative peak          1.0      1.0
