In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import os
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.api as sm
from statsmodels.tsa.api import SimpleExpSmoothing # 단순지수평활법(트렌드나 계절성이 없는 데이터에 적합)
import warnings
warnings.filterwarnings("ignore")
from performance import performance
from statsmodels.tsa.stattools import adfuller

# ADF test

In [8]:
def adfuller_test(births):
    result=adfuller(births)     #adf검정
    labels = ['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']
    for value,label in zip(result,labels):
        print(label+' : '+str(value) )
    if result[1] <= 0.05:    #p : result[1] <= 0.05
        print("정상성 만족O")
    else:
        print("정상성 만족X ")

# 정규분포

## 적당 운동 이상치 비율 5%

### 1. 청년 적당

In [4]:
df = pd.read_csv('../data/정규/젊음_적당.csv')
df.head()

Unnamed: 0,update_time,heart_rate1,event1,heart_rate2,event2,heart_rate3,event3,heart_rate4,event4,heart_rate5,...,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,heart_rate1000,event1000
0,2023-10-01 00:00:00,104,0,164,1,114,0,115,0,152,...,126,0,126,0,90,0,93,0,128,0
1,2023-10-01 00:10:00,99,0,100,0,159,0,145,0,151,...,142,0,125,0,136,0,138,0,135,0
2,2023-10-01 00:20:00,145,0,140,0,161,0,132,0,122,...,135,0,138,0,140,0,117,0,135,0
3,2023-10-01 00:30:00,129,0,109,0,134,0,122,0,116,...,131,0,113,0,125,0,91,0,141,0
4,2023-10-01 00:40:00,132,0,111,0,96,0,126,0,166,...,103,0,130,0,123,0,132,0,108,0


In [10]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [11]:
# 정상성 확인
adfuller_test(df_train['heart_rate1'])

ADF Test Statistic : -7.601263864328634
p-value : 2.385874253759186e-11
#Lags Used : 13
Number of Observations Used : 994
정상성 만족O


In [27]:
# 반복문으로 차수 aic확인
for i in range(1, 5):
    for j in range(1, 4):
        try:
            model = ARIMA(df_train['heart_rate1'], order=(i,0,j))
            result = model.fit()
            print(f"ARIMA({i},0,{j})의 AIC: {result.aic}")
        except:
            continue

ARIMA(1,0,1)의 AIC: 8832.085464477123
ARIMA(1,0,2)의 AIC: 8834.014980353659
ARIMA(1,0,3)의 AIC: 8836.087235792997
ARIMA(2,0,1)의 AIC: 8834.010484619232
ARIMA(2,0,2)의 AIC: 8836.083929187624
ARIMA(2,0,3)의 AIC: 8832.80303685643
ARIMA(3,0,1)의 AIC: 8835.783110403409
ARIMA(3,0,2)의 AIC: 8837.190547429418
ARIMA(3,0,3)의 AIC: 8831.652886157324
ARIMA(4,0,1)의 AIC: 8837.49273358834
ARIMA(4,0,2)의 AIC: 8828.437423567186
ARIMA(4,0,3)의 AIC: 8835.63068593718


In [28]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [29]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head()

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,100.0,97.575758,97.660819,60.0,100.0,75.0
0,100.0,98.773006,98.830409,80.0,100.0,88.888889
0,100.0,99.697885,99.707602,91.666667,100.0,95.652174
0,100.0,97.256098,97.368421,60.869565,100.0,75.675676
0,100.0,100.0,100.0,100.0,100.0,100.0


In [30]:
청년_적당_5 = pd.DataFrame(index=['mean'])

청년_적당_5['mean_sens'] = result['Sensitivity'].mean()
청년_적당_5['std_sens'] = result['Sensitivity'].std()
청년_적당_5['mean_spec'] = result['Specificity'].mean()
청년_적당_5['std_spec'] = result['Specificity'].std()
청년_적당_5['mean_accu'] = result['Accuracy'].mean()
청년_적당_5['std_accu'] = result['Accuracy'].std()
청년_적당_5['mean_ppv'] = result['PPV'].mean()
청년_적당_5['std_ppv'] = result['PPV'].std()
청년_적당_5['mean_npv'] = result['NPV'].mean()
청년_적당_5['std_npv'] = result['NPV'].std()
청년_적당_5['mean_F1_score'] = result['F_1 score'].mean()
청년_적당_5['std_F1_score'] = result['F_1 score'].std()

청년_적당_5

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,99.609993,2.634002,98.920997,0.781285,98.949708,0.740751,80.208949,12.617083,99.984826,0.095567,88.259139,8.000853


In [32]:
청년_적당_5.to_csv('../result/정규/청년_적당_5.csv')

### 2. 중년 적당

In [33]:
df = pd.read_csv('../data/정규/중년_적당.csv')
df.head()

Unnamed: 0,update_time,heart_rate1,event1,heart_rate2,event2,heart_rate3,event3,heart_rate4,event4,heart_rate5,...,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,heart_rate1000,event1000
0,2023-10-01 00:00:00,93,0,149,1,102,0,103,0,138,...,114,0,113,0,79,0,83,0,116,0
1,2023-10-01 00:10:00,88,0,89,0,145,0,132,0,137,...,129,0,113,0,123,0,125,0,122,0
2,2023-10-01 00:20:00,131,0,127,0,147,0,119,0,110,...,122,0,125,0,127,0,105,0,122,0
3,2023-10-01 00:30:00,117,0,98,0,121,0,110,0,104,...,119,0,101,0,113,0,80,0,128,0
4,2023-10-01 00:40:00,119,0,100,0,85,0,114,0,152,...,92,0,117,0,111,0,119,0,97,0


In [34]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [37]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [38]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head()

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,100.0,97.575758,97.660819,60.0,100.0,75.0
0,100.0,98.773006,98.830409,80.0,100.0,88.888889
0,100.0,99.697885,99.707602,91.666667,100.0,95.652174
0,100.0,96.95122,97.076023,58.333333,100.0,73.684211
0,100.0,100.0,100.0,100.0,100.0,100.0


In [39]:
중년_적당_5 = pd.DataFrame(index=['mean'])

중년_적당_5['mean_sens'] = result['Sensitivity'].mean()
중년_적당_5['std_sens'] = result['Sensitivity'].std()
중년_적당_5['mean_spec'] = result['Specificity'].mean()
중년_적당_5['std_spec'] = result['Specificity'].std()
중년_적당_5['mean_accu'] = result['Accuracy'].mean()
중년_적당_5['std_accu'] = result['Accuracy'].std()
중년_적당_5['mean_ppv'] = result['PPV'].mean()
중년_적당_5['std_ppv'] = result['PPV'].std()
중년_적당_5['mean_npv'] = result['NPV'].mean()
중년_적당_5['std_npv'] = result['NPV'].std()
중년_적당_5['mean_F1_score'] = result['F_1 score'].mean()
중년_적당_5['std_F1_score'] = result['F_1 score'].std()

중년_적당_5

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,99.642268,2.455697,98.893014,0.79334,98.923684,0.753449,79.69881,12.783216,99.986033,0.090892,87.943418,8.147578


In [40]:
중년_적당_5.to_csv('../result/정규/중년_적당_5.csv')

### 3. 장년 적당

In [41]:
df = pd.read_csv('../data/정규/장년_적당.csv')
df.head()

Unnamed: 0,update_time,heart_rate1,event1,heart_rate2,event2,heart_rate3,event3,heart_rate4,event4,heart_rate5,...,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,heart_rate1000,event1000
0,2023-10-01 00:00:00,82,0,137,1,91,0,92,0,126,...,102,0,102,0,68,0,72,0,104,0
1,2023-10-01 00:10:00,77,0,78,0,133,0,120,0,125,...,117,0,101,0,111,0,114,0,111,0
2,2023-10-01 00:20:00,119,0,115,0,134,0,107,0,99,...,110,0,113,0,115,0,94,0,111,0
3,2023-10-01 00:30:00,105,0,86,0,109,0,99,0,93,...,107,0,90,0,101,0,69,0,116,0
4,2023-10-01 00:40:00,108,0,88,0,74,0,102,0,139,...,81,0,105,0,99,0,107,0,85,0


In [42]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [43]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [44]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head()

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,100.0,98.47561,98.538012,73.684211,100.0,84.848485
0,100.0,99.076923,99.122807,85.0,100.0,91.891892
0,100.0,99.69697,99.707602,92.307692,100.0,96.0
0,100.0,96.95122,97.076023,58.333333,100.0,73.684211
0,100.0,100.0,100.0,100.0,100.0,100.0


In [45]:
장년_적당_5 = pd.DataFrame(index=['mean'])

장년_적당_5['mean_sens'] = result['Sensitivity'].mean()
장년_적당_5['std_sens'] = result['Sensitivity'].std()
장년_적당_5['mean_spec'] = result['Specificity'].mean()
장년_적당_5['std_spec'] = result['Specificity'].std()
장년_적당_5['mean_accu'] = result['Accuracy'].mean()
장년_적당_5['std_accu'] = result['Accuracy'].std()
장년_적당_5['mean_ppv'] = result['PPV'].mean()
장년_적당_5['std_ppv'] = result['PPV'].std()
장년_적당_5['mean_npv'] = result['NPV'].mean()
장년_적당_5['std_npv'] = result['NPV'].std()
장년_적당_5['mean_F1_score'] = result['F_1 score'].mean()
장년_적당_5['std_F1_score'] = result['F_1 score'].std()

장년_적당_5

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,99.142753,3.627568,99.156061,0.705346,99.157602,0.663555,84.723328,11.52729,99.964079,0.146436,90.855455,7.034319


In [46]:
장년_적당_5.to_csv('../result/정규/장년_적당_5.csv')

## 적당 운동 이상치 비율 10%

### 1. 청년 적당

In [50]:
df = pd.read_csv('../data/정규/젊음_적당_10.csv')
df.head()

Unnamed: 0,update_time,heart_rate1,event1,heart_rate2,event2,heart_rate3,event3,heart_rate4,event4,heart_rate5,...,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,heart_rate1000,event1000
0,2023-10-01 00:00:00,100,0,175,1,113,0,114,0,160,...,128,0,127,0,82,0,87,0,130,0
1,2023-10-01 00:10:00,93,0,95,0,169,1,151,0,159,...,147,0,126,0,140,0,143,0,139,0
2,2023-10-01 00:20:00,151,0,145,0,171,1,135,0,123,...,138,0,142,0,145,0,116,0,139,0
3,2023-10-01 00:30:00,132,0,106,0,137,0,123,0,115,...,134,0,111,0,127,0,83,0,147,0
4,2023-10-01 00:40:00,135,0,109,0,90,0,128,0,178,...,99,0,132,0,123,0,135,0,105,0


In [51]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [52]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [53]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head()

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,52.777778,100.0,95.02924,100.0,94.736842,69.090909
0,55.882353,100.0,95.614035,100.0,95.356037,71.698113
0,33.333333,100.0,92.982456,100.0,92.727273,50.0
0,63.157895,100.0,95.906433,100.0,95.597484,77.419355
0,41.935484,100.0,94.736842,100.0,94.528875,59.090909


In [54]:
청년_적당_10 = pd.DataFrame(index=['mean'])

청년_적당_10['mean_sens'] = result['Sensitivity'].mean()
청년_적당_10['std_sens'] = result['Sensitivity'].std()
청년_적당_10['mean_spec'] = result['Specificity'].mean()
청년_적당_10['std_spec'] = result['Specificity'].std()
청년_적당_10['mean_accu'] = result['Accuracy'].mean()
청년_적당_10['std_accu'] = result['Accuracy'].std()
청년_적당_10['mean_ppv'] = result['PPV'].mean()
청년_적당_10['std_ppv'] = result['PPV'].std()
청년_적당_10['mean_npv'] = result['NPV'].mean()
청년_적당_10['std_npv'] = result['NPV'].std()
청년_적당_10['mean_F1_score'] = result['F_1 score'].mean()
청년_적당_10['std_F1_score'] = result['F_1 score'].std()

청년_적당_10

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,49.955249,10.244953,100.0,0.0,94.95117,1.303831,100.0,0.0,94.686908,1.356018,65.990204,9.372051


In [55]:
청년_적당_10.to_csv('../result/정규/청년_적당_10.csv')

### 2. 중년 적당

In [56]:
df = pd.read_csv('../data/정규/중년_적당_10.csv')
df.head(2)

Unnamed: 0,update_time,heart_rate1,event1,heart_rate2,event2,heart_rate3,event3,heart_rate4,event4,heart_rate5,...,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,heart_rate1000,event1000
0,2023-10-01 00:00:00,90,0,158,1,101,0,102,0,145,...,115,0,115,0,73,0,77,0,118,0
1,2023-10-01 00:10:00,83,0,85,0,153,1,137,0,143,...,133,0,113,0,126,0,129,0,125,0


In [57]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [58]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [59]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head()

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,59.375,100.0,96.19883,100.0,95.975232,74.509804
0,60.606061,100.0,96.19883,100.0,95.962733,75.471698
0,38.709677,100.0,94.444444,100.0,94.242424,55.813953
0,77.419355,100.0,97.953216,100.0,97.798742,87.272727
0,48.148148,100.0,95.906433,100.0,95.744681,65.0


In [60]:
중년_적당_10 = pd.DataFrame(index=['mean'])

중년_적당_10['mean_sens'] = result['Sensitivity'].mean()
중년_적당_10['std_sens'] = result['Sensitivity'].std()
중년_적당_10['mean_spec'] = result['Specificity'].mean()
중년_적당_10['std_spec'] = result['Specificity'].std()
중년_적당_10['mean_accu'] = result['Accuracy'].mean()
중년_적당_10['std_accu'] = result['Accuracy'].std()
중년_적당_10['mean_ppv'] = result['PPV'].mean()
중년_적당_10['std_ppv'] = result['PPV'].std()
중년_적당_10['mean_npv'] = result['NPV'].mean()
중년_적당_10['std_npv'] = result['NPV'].std()
중년_적당_10['mean_F1_score'] = result['F_1 score'].mean()
중년_적당_10['std_F1_score'] = result['F_1 score'].std()

중년_적당_10

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,56.133942,10.982671,100.0,0.0,96.057895,1.197634,100.0,0.0,95.851843,1.24935,71.254432,9.306766


In [61]:
중년_적당_10.to_csv('../result/정규/중년_적당_10.csv')

### 3. 장년 적당

In [63]:
df = pd.read_csv('../data/정규/장년_적당_10.csv')
df.head(2)

Unnamed: 0,update_time,heart_rate1,event1,heart_rate2,event2,heart_rate3,event3,heart_rate4,event4,heart_rate5,...,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,heart_rate1000,event1000
0,2023-10-01 00:00:00,79,0,146,1,90,0,91,0,132,...,104,0,103,0,62,0,66,0,106,0
1,2023-10-01 00:10:00,72,0,74,0,140,1,124,0,131,...,121,0,102,0,114,0,117,0,114,0


In [64]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [65]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [66]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head()

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,57.142857,100.0,95.614035,100.0,95.341615,72.727273
0,57.575758,100.0,95.906433,100.0,95.665635,73.076923
0,39.393939,100.0,94.152047,100.0,93.920973,56.521739
0,63.888889,100.0,96.19883,100.0,95.924765,77.966102
0,44.827586,100.0,95.321637,100.0,95.136778,61.904762


In [67]:
장년_적당_10 = pd.DataFrame(index=['mean'])

장년_적당_10['mean_sens'] = result['Sensitivity'].mean()
장년_적당_10['std_sens'] = result['Sensitivity'].std()
장년_적당_10['mean_spec'] = result['Specificity'].mean()
장년_적당_10['std_spec'] = result['Specificity'].std()
장년_적당_10['mean_accu'] = result['Accuracy'].mean()
장년_적당_10['std_accu'] = result['Accuracy'].std()
장년_적당_10['mean_ppv'] = result['PPV'].mean()
장년_적당_10['std_ppv'] = result['PPV'].std()
장년_적당_10['mean_npv'] = result['NPV'].mean()
장년_적당_10['std_npv'] = result['NPV'].std()
장년_적당_10['mean_F1_score'] = result['F_1 score'].mean()
장년_적당_10['std_F1_score'] = result['F_1 score'].std()

장년_적당_10

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,52.879364,10.766454,100.0,0.0,95.496491,1.276189,100.0,0.0,95.260961,1.328224,68.512524,9.508726


In [68]:
장년_적당_10.to_csv('../result/정규/장년_적당_10.csv')

# 균등분포

## 적당 운동 이상치 비율 5%

### 1. 청년 적당

In [70]:
df = pd.read_csv('../data/균등/uniform5_청년_적당.csv')
df.head()

Unnamed: 0,heart_rate1,event1,heart_rate10,event10,heart_rate100,event100,heart_rate1000,event1000,heart_rate101,event101,...,event995,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,update_time
0,103,0,126,0,120,0,199,1,125,0,...,0,109,0,144,0,118,0,150,0,01JAN2020:00:00:00
1,138,0,107,0,119,0,136,0,157,0,...,0,150,0,87,0,118,0,156,0,01JAN2020:00:10:00
2,206,1,108,0,86,0,156,0,147,0,...,0,136,0,142,0,101,0,130,0,01JAN2020:00:20:00
3,83,0,94,0,102,0,109,0,124,0,...,0,134,0,93,0,154,0,153,0,01JAN2020:00:30:00
4,102,0,141,0,152,0,114,0,156,0,...,0,81,0,202,1,148,0,147,0,01JAN2020:00:40:00


In [71]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [72]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [75]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head(3)

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,100.0,100.0,100.0,100.0,100.0,100.0
0,100.0,100.0,100.0,100.0,100.0,100.0
0,100.0,100.0,100.0,100.0,100.0,100.0


In [76]:
청년_적당_균등_5 = pd.DataFrame(index=['mean'])

청년_적당_균등_5['mean_sens'] = result['Sensitivity'].mean()
청년_적당_균등_5['std_sens'] = result['Sensitivity'].std()
청년_적당_균등_5['mean_spec'] = result['Specificity'].mean()
청년_적당_균등_5['std_spec'] = result['Specificity'].std()
청년_적당_균등_5['mean_accu'] = result['Accuracy'].mean()
청년_적당_균등_5['std_accu'] = result['Accuracy'].std()
청년_적당_균등_5['mean_ppv'] = result['PPV'].mean()
청년_적당_균등_5['std_ppv'] = result['PPV'].std()
청년_적당_균등_5['mean_npv'] = result['NPV'].mean()
청년_적당_균등_5['std_npv'] = result['NPV'].std()
청년_적당_균등_5['mean_F1_score'] = result['F_1 score'].mean()
청년_적당_균등_5['std_F1_score'] = result['F_1 score'].std()

청년_적당_균등_5

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,99.950231,0.769454,100.0,0.0,99.998246,0.026107,100.0,0.0,99.998186,0.026946,99.973517,0.414211


In [77]:
청년_적당_균등_5.to_csv('../result/균등/청년_적당_균등_5.csv')

### 2. 중년 적당

In [78]:
df = pd.read_csv('../data/균등/uniform5_중년_적당.csv')
df.head()

Unnamed: 0,heart_rate1,event1,heart_rate10,event10,heart_rate100,event100,heart_rate1000,event1000,heart_rate101,event101,...,event995,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,update_time
0,145,0,106,0,128,0,139,0,108,0,...,1,121,0,105,0,123,0,181,1,01JAN2020:00:00:00
1,128,0,91,0,90,0,99,0,134,0,...,0,107,0,97,0,124,0,131,0,01JAN2020:00:10:00
2,143,0,71,0,99,0,138,0,145,0,...,0,92,0,146,0,135,0,123,0,01JAN2020:00:20:00
3,73,0,139,0,33,1,139,0,119,0,...,0,85,0,110,0,96,0,75,0,01JAN2020:00:30:00
4,103,0,127,0,91,0,109,0,129,0,...,0,120,0,127,0,88,0,136,0,01JAN2020:00:40:00


In [79]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [80]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [81]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head(3)

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,100.0,100.0,100.0,100.0,100.0,100.0
0,100.0,100.0,100.0,100.0,100.0,100.0
0,100.0,100.0,100.0,100.0,100.0,100.0


In [82]:
중년_적당_균등_5 = pd.DataFrame(index=['mean'])

중년_적당_균등_5['mean_sens'] = result['Sensitivity'].mean()
중년_적당_균등_5['std_sens'] = result['Sensitivity'].std()
중년_적당_균등_5['mean_spec'] = result['Specificity'].mean()
중년_적당_균등_5['std_spec'] = result['Specificity'].std()
중년_적당_균등_5['mean_accu'] = result['Accuracy'].mean()
중년_적당_균등_5['std_accu'] = result['Accuracy'].std()
중년_적당_균등_5['mean_ppv'] = result['PPV'].mean()
중년_적당_균등_5['std_ppv'] = result['PPV'].std()
중년_적당_균등_5['mean_npv'] = result['NPV'].mean()
중년_적당_균등_5['std_npv'] = result['NPV'].std()
중년_적당_균등_5['mean_F1_score'] = result['F_1 score'].mean()
중년_적당_균등_5['std_F1_score'] = result['F_1 score'].std()

중년_적당_균등_5

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,100.0,0.0,100.0,0.0,100.0,0.0,100.0,0.0,100.0,0.0,100.0,0.0


In [83]:
중년_적당_균등_5.to_csv('../result/균등/중년_적당_균등_5.csv')

### 3. 장년 적당

In [3]:
df = pd.read_csv('../data/균등/uniform5_노년_적당.csv')
df.head()

Unnamed: 0,heart_rate1,event1,heart_rate10,event10,heart_rate100,event100,heart_rate1000,event1000,heart_rate101,event101,...,event995,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,update_time
0,95,0,109,0,128,0,87,0,94,0,...,0,124,0,75,0,91,0,102,0,01JAN2020:00:00:00
1,118,0,128,0,69,0,82,0,108,0,...,0,80,0,73,0,155,1,131,0,01JAN2020:00:10:00
2,108,0,126,0,98,0,61,0,97,0,...,0,80,0,72,0,103,0,63,0,01JAN2020:00:20:00
3,125,0,124,0,63,0,77,0,66,0,...,0,111,0,90,0,117,0,123,0,01JAN2020:00:30:00
4,77,0,130,0,105,0,60,0,75,0,...,0,79,0,78,0,104,0,75,0,01JAN2020:00:40:00


In [4]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [5]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [6]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head(3)

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,100.0,100.0,100.0,100.0,100.0,100.0
0,100.0,100.0,100.0,100.0,100.0,100.0
0,100.0,100.0,100.0,100.0,100.0,100.0


In [8]:
장년_적당_균등_5 = pd.DataFrame(index=['mean'])

장년_적당_균등_5['mean_sens'] = result['Sensitivity'].mean()
장년_적당_균등_5['std_sens'] = result['Sensitivity'].std()
장년_적당_균등_5['mean_spec'] = result['Specificity'].mean()
장년_적당_균등_5['std_spec'] = result['Specificity'].std()
장년_적당_균등_5['mean_accu'] = result['Accuracy'].mean()
장년_적당_균등_5['std_accu'] = result['Accuracy'].std()
장년_적당_균등_5['mean_ppv'] = result['PPV'].mean()
장년_적당_균등_5['std_ppv'] = result['PPV'].std()
장년_적당_균등_5['mean_npv'] = result['NPV'].mean()
장년_적당_균등_5['std_npv'] = result['NPV'].std()
장년_적당_균등_5['mean_F1_score'] = result['F_1 score'].mean()
장년_적당_균등_5['std_F1_score'] = result['F_1 score'].std()

장년_적당_균등_5

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,99.013792,3.160313,100.0,0.0,99.95848,0.13012,100.0,0.0,99.956762,0.135402,99.477378,1.708118


In [9]:
장년_적당_균등_5.to_csv('../result/균등/장년_적당_균등_5.csv')

## 적당 운동 이상치 비율 10%

### 1. 청년 적당

In [10]:
df = pd.read_csv('../data/균등/uniform10_청년_적당.csv')
df.head()

Unnamed: 0,heart_rate1,event1,heart_rate10,event10,heart_rate100,event100,heart_rate1000,event1000,heart_rate101,event101,...,event995,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,update_time
0,94,0,89,0,49,1,137,0,148,0,...,0,131,0,84,0,140,0,111,0,01JAN2020:00:00:00
1,209,1,83,0,129,0,115,0,102,0,...,0,108,0,127,0,119,0,48,1,01JAN2020:00:10:00
2,104,0,110,0,133,0,94,0,125,0,...,0,149,0,157,0,128,0,93,0,01JAN2020:00:20:00
3,152,0,126,0,108,0,91,0,113,0,...,0,128,0,105,0,110,0,101,0,01JAN2020:00:30:00
4,137,0,31,1,91,0,148,0,123,0,...,0,143,0,140,0,128,0,159,0,01JAN2020:00:40:00


In [11]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [12]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [13]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head(3)

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,83.72093,100.0,97.953216,100.0,97.712418,91.139241
0,93.939394,100.0,99.415205,100.0,99.356913,96.875
0,77.777778,100.0,97.660819,100.0,97.452229,87.5


In [14]:
청년_적당_균등_10 = pd.DataFrame(index=['mean'])

청년_적당_균등_10['mean_sens'] = result['Sensitivity'].mean()
청년_적당_균등_10['std_sens'] = result['Sensitivity'].std()
청년_적당_균등_10['mean_spec'] = result['Specificity'].mean()
청년_적당_균등_10['std_spec'] = result['Specificity'].std()
청년_적당_균등_10['mean_accu'] = result['Accuracy'].mean()
청년_적당_균등_10['std_accu'] = result['Accuracy'].std()
청년_적당_균등_10['mean_ppv'] = result['PPV'].mean()
청년_적당_균등_10['std_ppv'] = result['PPV'].std()
청년_적당_균등_10['mean_npv'] = result['NPV'].mean()
청년_적당_균등_10['std_npv'] = result['NPV'].std()
청년_적당_균등_10['mean_F1_score'] = result['F_1 score'].mean()
청년_적당_균등_10['std_F1_score'] = result['F_1 score'].std()

청년_적당_균등_10

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,78.564424,7.852649,100.0,0.0,97.901754,0.73364,100.0,0.0,97.727961,0.781974,87.77507,5.029685


In [15]:
청년_적당_균등_10.to_csv('../result/균등/청년_적당_균등_10.csv')

### 2. 중년 적당

In [16]:
df = pd.read_csv('../data/균등/uniform10_중년_적당.csv')
df.head()

Unnamed: 0,heart_rate1,event1,heart_rate10,event10,heart_rate100,event100,heart_rate1000,event1000,heart_rate101,event101,...,event995,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,update_time
0,73,0,127,0,98,0,87,0,172,1,...,0,139,0,79,0,114,0,75,0,01JAN2020:00:00:00
1,120,0,119,0,141,0,78,0,26,1,...,0,88,0,94,0,145,0,128,0,01JAN2020:00:10:00
2,123,0,180,1,99,0,79,0,82,0,...,0,128,0,91,0,146,0,116,0,01JAN2020:00:20:00
3,125,0,84,0,107,0,109,0,93,0,...,0,107,0,143,0,120,0,137,0,01JAN2020:00:30:00
4,85,0,83,0,99,0,78,0,187,1,...,1,114,0,130,0,107,0,78,0,01JAN2020:00:40:00


In [17]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [18]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [19]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head(3)

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,86.363636,100.0,99.122807,100.0,99.071207,92.682927
0,87.5,100.0,98.830409,100.0,98.726115,93.333333
0,94.444444,100.0,99.415205,100.0,99.350649,97.142857


In [20]:
중년_적당_균등_10 = pd.DataFrame(index=['mean'])

중년_적당_균등_10['mean_sens'] = result['Sensitivity'].mean()
중년_적당_균등_10['std_sens'] = result['Sensitivity'].std()
중년_적당_균등_10['mean_spec'] = result['Specificity'].mean()
중년_적당_균등_10['std_spec'] = result['Specificity'].std()
중년_적당_균등_10['mean_accu'] = result['Accuracy'].mean()
중년_적당_균등_10['std_accu'] = result['Accuracy'].std()
중년_적당_균등_10['mean_ppv'] = result['PPV'].mean()
중년_적당_균등_10['std_ppv'] = result['PPV'].std()
중년_적당_균등_10['mean_npv'] = result['NPV'].mean()
중년_적당_균등_10['std_npv'] = result['NPV'].std()
중년_적당_균등_10['mean_F1_score'] = result['F_1 score'].mean()
중년_적당_균등_10['std_F1_score'] = result['F_1 score'].std()

중년_적당_균등_10

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,86.399254,6.48942,100.0,0.0,98.666667,0.597707,100.0,0.0,98.543974,0.643468,92.570825,3.817927


In [21]:
중년_적당_균등_10.to_csv('../result/균등/중년_적당_균등_10.csv')

### 3. 장년 적당

In [22]:
df = pd.read_csv('../data/균등/uniform10_노년_적당.csv')
df.head()

Unnamed: 0,heart_rate1,event1,heart_rate10,event10,heart_rate100,event100,heart_rate1000,event1000,heart_rate101,event101,...,event995,heart_rate996,event996,heart_rate997,event997,heart_rate998,event998,heart_rate999,event999,update_time
0,64,0,113,0,99,0,124,0,74,0,...,0,166,1,117,0,109,0,29,1,01JAN2020:00:00:00
1,179,1,133,0,158,1,120,0,173,1,...,0,104,0,99,0,99,0,81,0,01JAN2020:00:10:00
2,106,0,61,0,80,0,78,0,92,0,...,0,129,0,79,0,76,0,103,0,01JAN2020:00:20:00
3,91,0,63,0,81,0,65,0,83,0,...,1,61,0,170,1,98,0,104,0,01JAN2020:00:30:00
4,116,0,131,0,79,0,128,0,68,0,...,0,16,1,78,0,66,0,70,0,01JAN2020:00:40:00


In [23]:
# 분석
df_copy = df.copy()

# 데이터를 df_train과 df_test로 분할
df_train = df_copy[:1008]
df_test = df_copy[1008:]

print(df_train.shape)
print(df_test.shape)

(1008, 2001)
(342, 2001)


In [24]:
for i in range(1, 1001):
    # ARIMA 모델 학습
    model = ARIMA(df_train[f'heart_rate{i}'], order=(2, 0, 1))
    result = model.fit()

    start_idx = 1008  # 테스트 데이터셋의 시작 인덱스
    end_idx = 1008 + len(df_test)  # 테스트 데이터셋의 끝 인덱스

    forecast = result.get_prediction(start=start_idx, end=end_idx, dynamic=False)

    # 신뢰구간과 예측값 할당
    forecast_ci = forecast.conf_int()

  # 데이터프레임에 신뢰구간 추가
    df_test[f'lower_{i}'] = forecast_ci.iloc[:, 0]
    df_test[f'upper_{i}'] = forecast_ci.iloc[:, 1]

    # 'pred_y{i}' 열이 이미 존재하는 경우 삭제하고 다시 추가 (insert 메서드 사용)
    if f'pred_y{i}' in df_test.columns:
        df_test.drop(columns=[f'pred_y{i}'], inplace=True)

    # 'pred_y{i}' 열을 'event{i}' 뒤에 생성
    df_test.insert(df_test.columns.get_loc(f'event{i}') + 1, f'pred_y{i}', np.where((df_test[f'heart_rate{i}'] < df_test[f'lower_{i}']) | (df_test[f'heart_rate{i}'] > df_test[f'upper_{i}']), 1, 0))

    # 'pred{i}', 'lower_{i}', 'upper_{i} 삭제
    if f'pred{i}' in df_test.columns:
        df_test.drop(columns=[f'pred{i}'], inplace = True)
    if f'lower_{i}' in df_test.columns:
        df_test.drop(columns=[f'lower_{i}'], inplace = True)
    if f'upper_{i}' in df_test.columns:
        df_test.drop(columns=[f'upper_{i}'], inplace = True)

In [25]:
result = pd.DataFrame()

for i in range(1,1001):

    TP = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==1)])
    TN = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==0)])
    FN = len(df_test[(df_test[f'event{i}']==1)&(df_test[f'pred_y{i}']==0)])
    FP = len(df_test[(df_test[f'event{i}']==0)&(df_test[f'pred_y{i}']==1)])


    result_i = performance(TP=TP, TN=TN, FP=FP, FN=FN, BETA=1)
    
    result = pd.concat([result, result_i])


result.head(3)

Unnamed: 0,Sensitivity,Specificity,Accuracy,PPV,NPV,F_1 score
0,75.675676,100.0,97.368421,100.0,97.133758,86.153846
0,77.419355,100.0,97.953216,100.0,97.798742,87.272727
0,80.645161,100.0,98.245614,100.0,98.107256,89.285714


In [26]:
장년_적당_균등_10 = pd.DataFrame(index=['mean'])

장년_적당_균등_10['mean_sens'] = result['Sensitivity'].mean()
장년_적당_균등_10['std_sens'] = result['Sensitivity'].std()
장년_적당_균등_10['mean_spec'] = result['Specificity'].mean()
장년_적당_균등_10['std_spec'] = result['Specificity'].std()
장년_적당_균등_10['mean_accu'] = result['Accuracy'].mean()
장년_적당_균등_10['std_accu'] = result['Accuracy'].std()
장년_적당_균등_10['mean_ppv'] = result['PPV'].mean()
장년_적당_균등_10['std_ppv'] = result['PPV'].std()
장년_적당_균등_10['mean_npv'] = result['NPV'].mean()
장년_적당_균등_10['std_npv'] = result['NPV'].std()
장년_적당_균등_10['mean_F1_score'] = result['F_1 score'].mean()
장년_적당_균등_10['std_F1_score'] = result['F_1 score'].std()

장년_적당_균등_10

Unnamed: 0,mean_sens,std_sens,mean_spec,std_spec,mean_accu,std_accu,mean_ppv,std_ppv,mean_npv,std_npv,mean_F1_score,std_F1_score
mean,75.900725,8.084409,100.0,0.0,97.619883,0.766912,100.0,0.0,97.427741,0.815057,86.055947,5.314885


In [27]:
장년_적당_균등_10.to_csv('../result/균등/장년_적당_균등_10.csv')