In [1]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import json
from time import time
import datetime
import ast
import random
from Utils.preprocessing import preprocess_all_days
from tqdm.auto import tqdm
from Utils.preprocessing import preprocess_wHr, preprocess_mGps, preprocess_mWifi, preprocess_mUsage, preprocess_mBle

  from .autonotebook import tqdm as notebook_tqdm


# PARAMETER

In [None]:
FREQUENCY = 5  # minutes
INTERPOLATION = "linear"  # 'time' or 'linear'
DATA = 0
MASK = 1
MODAL_NAME = ['mLight','mACStatus', 'mActivity', 'mBle', 'mGps', 'wHr',
                  'mScreenStatus', 'mUsageStats', 'mWifi', 'wLight', 'wPedo']

orginal_data_freq = {
    "mACStatus": 1,
    "mActivity": 5,
    "mBle": 10,
    "mGps": 1,
    "mLight": 10,
    "mScreenStatus": 1,
    "mUsageStats": 10,
    "mWifi": 10,
    "wHr": 1,
    "wLight": 1,
    "wPedo": 1,
}

# Data items
* mACStatus
* mActivity
* mBle
* mGps
* mLight
* mScreenStatus
* mUsageStatus
* wHr
* wLight
* wPedo


In [3]:
SD = 42
random.seed(SD)
np.random.seed(SD)
os.environ['PYTHONHASHSEED'] = str(SD)

In [4]:
dataset_path = os.path.join("ETRI 2024","ch2025_data_items")
train_data_path = os.path.join("ETRI 2024","ch2025_metrics_train.csv")

In [5]:
print("challenge 2025 dataset " + "="*5)
for file_name in os.listdir(dataset_path):
    if file_name.endswith(".parquet"):
        print(file_name)
        
parquet_files = glob.glob(os.path.join(dataset_path, "*.parquet"))
print(f"\nTotal parquet files: {len(parquet_files)}")




challenge 2025 dataset =====
ch2025_mACStatus.parquet
ch2025_mActivity.parquet
ch2025_mAmbience.parquet
ch2025_mBle.parquet
ch2025_mGps.parquet
ch2025_mLight.parquet
ch2025_mScreenStatus.parquet
ch2025_mUsageStats.parquet
ch2025_mWifi.parquet
ch2025_wHr.parquet
ch2025_wLight.parquet
ch2025_wPedo.parquet

Total parquet files: 12


# .paruet 파일 로드

In [6]:
# 파일 이름을 키로, DataFrame을 값으로 저장할 딕셔너리
lifelog_data = {}

# 파일별로 읽기
for file_path in parquet_files:
    name = os.path.basename(file_path).replace('.parquet', '').replace('ch2025_', '')
    lifelog_data[name] = pd.read_parquet(file_path)
    print(f"✅ Loaded: {name}, shape = {lifelog_data[name].shape}")

✅ Loaded: mACStatus, shape = (939896, 3)
✅ Loaded: mActivity, shape = (961062, 3)
✅ Loaded: mAmbience, shape = (476577, 3)
✅ Loaded: mBle, shape = (21830, 3)
✅ Loaded: mGps, shape = (800611, 3)
✅ Loaded: mLight, shape = (96258, 3)
✅ Loaded: mScreenStatus, shape = (939653, 3)
✅ Loaded: mUsageStats, shape = (45197, 3)
✅ Loaded: mWifi, shape = (76336, 3)
✅ Loaded: wHr, shape = (382918, 3)
✅ Loaded: wLight, shape = (633741, 3)
✅ Loaded: wPedo, shape = (748100, 9)


In [7]:
# 딕셔너리에 있는 모든 항목을 독립적인 변수로 할당
for key, df in lifelog_data.items():
    globals()[f"{key}_df"] = df

In [8]:
metric_train_df = pd.read_csv(train_data_path)
print(f"✅ Loaded: metric_train_df, shape = {metric_train_df.shape}")
print(metric_train_df.head())

✅ Loaded: metric_train_df, shape = (450, 9)
  subject_id  sleep_date lifelog_date  Q1  Q2  Q3  S1  S2  S3
0       id01  2024-06-27   2024-06-26   0   0   0   0   0   1
1       id01  2024-06-28   2024-06-27   0   0   0   0   1   1
2       id01  2024-06-29   2024-06-28   1   0   0   1   1   1
3       id01  2024-06-30   2024-06-29   1   0   1   2   0   0
4       id01  2024-07-01   2024-06-30   0   1   1   1   1   1


In [9]:
sample_submission = pd.read_csv(os.path.join("ETRI 2024","ch2025_submission_sample.csv"))
sample_submission['lifelog_date'] = pd.to_datetime(sample_submission['lifelog_date'])
test_keys = set(zip(sample_submission['subject_id'], sample_submission['lifelog_date'].dt.date))
print(f"✅ Loaded: sample_submission, shape = {sample_submission.shape}")

✅ Loaded: sample_submission, shape = (250, 9)


In [10]:
# ✅ 분리 함수
def split_test_train(df, subject_col='subject_id', timestamp_col='timestamp'):
    df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce')
    df = df.dropna(subset=[timestamp_col])
    df['date_only'] = df[timestamp_col].dt.date
    df['key'] = list(zip(df[subject_col], df['date_only']))

    test_df = df[df['key'].isin(test_keys)].drop(columns=['date_only', 'key'])
    train_df = df[~df['key'].isin(test_keys)].drop(columns=['date_only', 'key'])
    return test_df, train_df


In [11]:
# ✅ DataFrame 별 timestamp 컬럼 수동 지정
dataframes = {
    'mACStatus': (mACStatus_df, 'timestamp'),
    'mActivity': (mActivity_df, 'timestamp'),
    'mAmbience': (mAmbience_df, 'timestamp'),
    'mBle': (mBle_df, 'timestamp'),
    'mGps': (mGps_df, 'timestamp'),
    'mLight': (mLight_df, 'timestamp'),
    'mScreenStatus': (mScreenStatus_df, 'timestamp'),
    'mUsageStats': (mUsageStats_df, 'timestamp'),
    'mWifi': (mWifi_df, 'timestamp'),
    'wHr': (wHr_df, 'timestamp'),
    'wLight': (wLight_df, 'timestamp'),
    'wPedo': (wPedo_df, 'timestamp'),
}

# 학습 데이터 분리

In [12]:
# ✅ 결과 저장
for name, (df, ts_col) in dataframes.items():
    print(f"⏳ {name} 분리 중...")
    test_df, train_df = split_test_train(df.copy(), subject_col='subject_id', timestamp_col=ts_col)
    globals()[f"{name}_test"] = test_df
    globals()[f"{name}_train"] = train_df
    print(f"✅ {name}_test → {test_df.shape}, {name}_train → {train_df.shape}")

⏳ mACStatus 분리 중...
✅ mACStatus_test → (335849, 3), mACStatus_train → (604047, 3)
⏳ mActivity 분리 중...
✅ mActivity_test → (343579, 3), mActivity_train → (617483, 3)
⏳ mAmbience 분리 중...
✅ mAmbience_test → (170453, 3), mAmbience_train → (306124, 3)
⏳ mBle 분리 중...
✅ mBle_test → (8140, 3), mBle_train → (13690, 3)
⏳ mGps 분리 중...
✅ mGps_test → (287386, 3), mGps_train → (513225, 3)
⏳ mLight 분리 중...
✅ mLight_test → (34439, 3), mLight_train → (61819, 3)
⏳ mScreenStatus 분리 중...
✅ mScreenStatus_test → (336160, 3), mScreenStatus_train → (603493, 3)
⏳ mUsageStats 분리 중...
✅ mUsageStats_test → (16499, 3), mUsageStats_train → (28698, 3)
⏳ mWifi 분리 중...
✅ mWifi_test → (27467, 3), mWifi_train → (48869, 3)
⏳ wHr 분리 중...
✅ wHr_test → (143311, 3), wHr_train → (239607, 3)
⏳ wLight 분리 중...
✅ wLight_test → (233809, 3), wLight_train → (399932, 3)
⏳ wPedo 분리 중...
✅ wPedo_test → (288832, 9), wPedo_train → (459268, 9)


# 5분 단위 평균 T=492
* 만약 결측치가 5분이 넘어갈 경우 보간
* 원래 lifelog_Data, Sleep_data(lisfelog_data+1) 둘 다 있으나 현재는 예측이기에 lifelog_data만 사용

In [13]:
modality_names = ['mACStatus', 'mActivity', 'mBle', 'mGps', 'mLight',
                  'mScreenStatus', 'mUsageStats', 'mWifi', 'wHr', 'wLight', 'wPedo']
for name in modality_names:   # ['BLE', 'HR', 'ACC', ...]
    train_df:pd.DataFrame = globals()[f"{name}_train"]
    os.makedirs(f"ETRI 2024/train", exist_ok=True)
    print(f"{name} info")
    print(train_df.info())
    display(train_df.head(2))
    print("\n")
    train_df.head(50).to_csv(f"ETRI 2024/train/{name}_train_sample.csv", index=False)

mACStatus info
<class 'pandas.core.frame.DataFrame'>
Index: 604047 entries, 0 to 924618
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   subject_id  604047 non-null  object        
 1   timestamp   604047 non-null  datetime64[ns]
 2   m_charging  604047 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 18.4+ MB
None


Unnamed: 0,subject_id,timestamp,m_charging
0,id01,2024-06-26 12:03:00,0
1,id01,2024-06-26 12:04:00,0




mActivity info
<class 'pandas.core.frame.DataFrame'>
Index: 617483 entries, 0 to 945274
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   subject_id  617483 non-null  object        
 1   timestamp   617483 non-null  datetime64[ns]
 2   m_activity  617483 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 18.8+ MB
None


Unnamed: 0,subject_id,timestamp,m_activity
0,id01,2024-06-26 12:03:00,4
1,id01,2024-06-26 12:04:00,0




mBle info
<class 'pandas.core.frame.DataFrame'>
Index: 13690 entries, 0 to 21132
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   subject_id  13690 non-null  object        
 1   timestamp   13690 non-null  datetime64[ns]
 2   m_ble       13690 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 427.8+ KB
None


Unnamed: 0,subject_id,timestamp,m_ble
0,id01,2024-06-26 12:13:00,"[{'address': '00:15:7C:11:80:8D', 'device_clas..."
1,id01,2024-06-26 12:23:00,"[{'address': '0A:B1:26:4D:76:21', 'device_clas..."




mGps info
<class 'pandas.core.frame.DataFrame'>
Index: 513225 entries, 0 to 787964
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   subject_id  513225 non-null  object        
 1   timestamp   513225 non-null  datetime64[ns]
 2   m_gps       513225 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 15.7+ MB
None


Unnamed: 0,subject_id,timestamp,m_gps
0,id01,2024-06-26 12:03:00,"[{'altitude': 110.6, 'latitude': 0.2077385, 'l..."
1,id01,2024-06-26 12:04:00,"[{'altitude': 110.8, 'latitude': 0.2078068, 'l..."




mLight info
<class 'pandas.core.frame.DataFrame'>
Index: 61819 entries, 0 to 94672
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   subject_id  61819 non-null  object        
 1   timestamp   61819 non-null  datetime64[ns]
 2   m_light     61819 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 1.9+ MB
None


Unnamed: 0,subject_id,timestamp,m_light
0,id01,2024-06-26 12:03:00,534.0
1,id01,2024-06-26 12:13:00,846.0




mScreenStatus info
<class 'pandas.core.frame.DataFrame'>
Index: 603493 entries, 0 to 924428
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   subject_id    603493 non-null  object        
 1   timestamp     603493 non-null  datetime64[ns]
 2   m_screen_use  603493 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 18.4+ MB
None


Unnamed: 0,subject_id,timestamp,m_screen_use
0,id01,2024-06-26 12:03:00,0
1,id01,2024-06-26 12:04:00,0




mUsageStats info
<class 'pandas.core.frame.DataFrame'>
Index: 28698 entries, 0 to 44304
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   subject_id     28698 non-null  object        
 1   timestamp      28698 non-null  datetime64[ns]
 2   m_usage_stats  28698 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 896.8+ KB
None


Unnamed: 0,subject_id,timestamp,m_usage_stats
0,id01,2024-06-26 13:00:00,"[{'app_name': ' 캐시워크', 'total_time': 69}, {'ap..."
1,id01,2024-06-26 13:10:00,"[{'app_name': '통화', 'total_time': 26419}, {'ap..."




mWifi info
<class 'pandas.core.frame.DataFrame'>
Index: 48869 entries, 0 to 74944
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   subject_id  48869 non-null  object        
 1   timestamp   48869 non-null  datetime64[ns]
 2   m_wifi      48869 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 1.5+ MB
None


Unnamed: 0,subject_id,timestamp,m_wifi
0,id01,2024-06-26 12:03:00,"[{'bssid': 'a0:0f:37:9a:5d:8b', 'rssi': -78}, ..."
1,id01,2024-06-26 12:13:00,"[{'bssid': 'a0:0f:37:9a:5d:8b', 'rssi': -79}, ..."




wHr info
<class 'pandas.core.frame.DataFrame'>
Index: 239607 entries, 0 to 373308
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   subject_id  239607 non-null  object        
 1   timestamp   239607 non-null  datetime64[ns]
 2   heart_rate  239607 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 7.3+ MB
None


Unnamed: 0,subject_id,timestamp,heart_rate
0,id01,2024-06-26 12:23:00,"[134, 134, 135, 133, 134, 135, 134, 135, 134, ..."
1,id01,2024-06-26 12:24:00,"[123, 122, 121, 120, 121, 121, 120, 118, 119, ..."




wLight info
<class 'pandas.core.frame.DataFrame'>
Index: 399932 entries, 0 to 620299
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   subject_id  399932 non-null  object        
 1   timestamp   399932 non-null  datetime64[ns]
 2   w_light     399932 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 12.2+ MB
None


Unnamed: 0,subject_id,timestamp,w_light
0,id01,2024-06-26 12:17:00,633.0
1,id01,2024-06-26 12:18:00,483.0




wPedo info
<class 'pandas.core.frame.DataFrame'>
Index: 459268 entries, 0 to 734550
Data columns (total 9 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   subject_id       459268 non-null  object        
 1   timestamp        459268 non-null  datetime64[ns]
 2   step             459268 non-null  int64         
 3   step_frequency   459268 non-null  float64       
 4   running_step     459268 non-null  int64         
 5   walking_step     459268 non-null  int64         
 6   distance         459268 non-null  float64       
 7   speed            459268 non-null  float64       
 8   burned_calories  459268 non-null  float64       
dtypes: datetime64[ns](1), float64(4), int64(3), object(1)
memory usage: 35.0+ MB
None


Unnamed: 0,subject_id,timestamp,step,step_frequency,running_step,walking_step,distance,speed,burned_calories
0,id01,2024-06-26 12:09:00,10,0.166667,0,0,8.33,0.138833,0.0
1,id01,2024-06-26 12:10:00,0,0.0,0,0,0.0,0.0,0.0






In [14]:
processed_dict = {}

modality_handlers = {
    "wHr": preprocess_wHr,
    "mGps": preprocess_mGps,
    "mWifi": preprocess_mWifi,
    "mUsageStats": preprocess_mUsage,
    "mBle": preprocess_mBle,
}



modality_names = ['mLight','mACStatus', 'mActivity', 'mBle', 'mGps', 'wHr',
                  'mScreenStatus', 'mUsageStats', 'mWifi', 'wLight', 'wPedo']
for name in modality_names:   # ['wHr', 'mBle', 'mWifi', ...]
    train_df = globals()[f"{name}_train"]

    
    print(f"⏳ {name} 전처리 중...")
    if name in modality_handlers.keys():
        preprocess_func = modality_handlers[name]
        train_df = preprocess_func(train_df)

    if not os.path.exists(f"ETRI 2024/train/{name}_train_preprocess_input.csv"):
        train_df.to_csv(f"ETRI 2024/train/{name}_train_preprocess_input.csv", index=False)
    proc = preprocess_all_days(
        df=train_df,
        metric_df=metric_train_df,
        resample_freq=5,
        interpolation="time",
        min_ratio=0.5,
        mask=True
    )

    processed_dict[name] = proc

⏳ mLight 전처리 중...


100%|██████████| 450/450 [00:02<00:00, 177.75it/s]


⏳ mACStatus 전처리 중...


100%|██████████| 450/450 [00:11<00:00, 38.04it/s]


⏳ mActivity 전처리 중...


100%|██████████| 450/450 [00:12<00:00, 37.25it/s]


⏳ mBle 전처리 중...


100%|██████████| 450/450 [00:01<00:00, 294.57it/s]                               


⏳ mGps 전처리 중...


100%|██████████| 450/450 [00:10<00:00, 43.15it/s]                                 


⏳ wHr 전처리 중...
Heart Rate Missing Count Before: 0


100%|██████████| 450/450 [00:05<00:00, 84.99it/s]                                 


⏳ mScreenStatus 전처리 중...


100%|██████████| 450/450 [00:11<00:00, 38.41it/s]


⏳ mUsageStats 전처리 중...


100%|██████████| 450/450 [00:01<00:00, 251.54it/s]                                 


⏳ mWifi 전처리 중...


100%|██████████| 450/450 [00:02<00:00, 207.70it/s]                                


⏳ wLight 전처리 중...


100%|██████████| 450/450 [00:08<00:00, 55.60it/s]


⏳ wPedo 전처리 중...


100%|██████████| 450/450 [00:09<00:00, 47.58it/s]


In [15]:
processed_dict['mLight']

{('id01',
  datetime.date(2024, 6, 26)): (                     m_light
  timestamp                   
  2024-06-26 12:00:00    534.0
  2024-06-26 12:05:00    534.0
  2024-06-26 12:10:00    658.8
  2024-06-26 12:15:00    846.0
  2024-06-26 12:20:00    838.0
  ...                      ...
  2024-06-26 23:35:00      0.0
  2024-06-26 23:40:00      0.0
  2024-06-26 23:45:00      0.0
  2024-06-26 23:50:00      0.0
  2024-06-26 23:55:00      0.0
  
  [144 rows x 1 columns],                      m_light
  timestamp                   
  2024-06-26 12:00:00        0
  2024-06-26 12:05:00        0
  2024-06-26 12:10:00        0
  2024-06-26 12:15:00        0
  2024-06-26 12:20:00        0
  ...                      ...
  2024-06-26 23:35:00        0
  2024-06-26 23:40:00        0
  2024-06-26 23:45:00        0
  2024-06-26 23:50:00        0
  2024-06-26 23:55:00        0
  
  [144 rows x 1 columns]),
 ('id01',
  datetime.date(2024, 6, 27)): (                     m_light
  timestamp               

In [16]:
final_dataset = {}

keys = processed_dict["mACStatus"].keys()   # 공통 key

for key in keys:
    final_dataset[key] = {}
    for name in modality_names:
        if key in processed_dict[name]:
            final_dataset[key][name] = processed_dict[name][key] # KEY: MODALITY -> (subid, date)로 변환


In [17]:
file_name = f"ETRI 2024/processed_{INTERPOLATION}_{FREQUENCY}min_dataset.pkl"
import pickle
with open(file_name, "wb") as f:
    pickle.dump(final_dataset, f)

In [18]:
from collections import defaultdict

modality_names = processed_dict.keys()

# 모달리티별 missing count
missing_by_modality = defaultdict(int)

# 날짜별 모달리티 개수
modalities_per_day = defaultdict(int)

# 날짜 리스트
all_keys = list(final_dataset.keys())

for key in all_keys:
    day_modalities = final_dataset[key]
    
    count_present = 0
    for m in modality_names:
        if m in day_modalities and day_modalities[m] is not None:
            count_present += 1
        else:
            missing_by_modality[m] += 1

    modalities_per_day[key] = count_present

# ✔ 모든 모달리티가 있는 날짜
complete_days = [k for k, c in modalities_per_day.items() if c == len(modality_names)]

# ✔ 하나라도 부족한 날짜
incomplete_days = [k for k, c in modalities_per_day.items() if c < len(modality_names)]

# ✔ coverage percent
coverage = {m: 1 - missing_by_modality[m] / len(all_keys) for m in modality_names}


# ------------------ 출력 ------------------

print("📌 모달리티별 Missing 개수:")
for m in modality_names:
    print(f"  - {m}: {missing_by_modality[m]}개 missing")

print("\n📌 날짜별 모달리티 개수 (예: 5개 있으면 5개)")
for k, v in list(modalities_per_day.items())[:10]:  # 앞 10개만 미리보기
    print(f"{k}: {v}개")

print("\n📌 모든 모달리티가 있는 날짜 개수:", len(complete_days))
print("📌 하나라도 빠진 날짜 개수:", len(incomplete_days))

print("\n📌 모달리티별 coverage 비율 (%):")
for m in modality_names:
    print(f"  - {m}: {coverage[m]*100:.2f}%")


📌 모달리티별 Missing 개수:
  - mLight: 0개 missing
  - mACStatus: 0개 missing
  - mActivity: 0개 missing
  - mBle: 37개 missing
  - mGps: 26개 missing
  - wHr: 62개 missing
  - mScreenStatus: 0개 missing
  - mUsageStats: 10개 missing
  - mWifi: 22개 missing
  - wLight: 36개 missing
  - wPedo: 46개 missing

📌 날짜별 모달리티 개수 (예: 5개 있으면 5개)
('id01', datetime.date(2024, 6, 26)): 11개
('id01', datetime.date(2024, 6, 27)): 11개
('id01', datetime.date(2024, 6, 28)): 11개
('id01', datetime.date(2024, 6, 29)): 11개
('id01', datetime.date(2024, 6, 30)): 11개
('id01', datetime.date(2024, 7, 1)): 11개
('id01', datetime.date(2024, 7, 2)): 11개
('id01', datetime.date(2024, 7, 3)): 11개
('id01', datetime.date(2024, 7, 4)): 11개
('id01', datetime.date(2024, 7, 5)): 11개

📌 모든 모달리티가 있는 날짜 개수: 358
📌 하나라도 빠진 날짜 개수: 92

📌 모달리티별 coverage 비율 (%):
  - mLight: 100.00%
  - mACStatus: 100.00%
  - mActivity: 100.00%
  - mBle: 91.78%
  - mGps: 94.22%
  - wHr: 86.22%
  - mScreenStatus: 100.00%
  - mUsageStats: 97.78%
  - mWifi: 95.11%
  - wLigh

In [21]:
def print_dict_structure(d, indent=0):
    """Print dictionary structure without expanding large values."""
    prefix = "  " * indent

    if not isinstance(d, dict):
        print(f"{prefix}- {type(d).__name__}")
        return

    for key, value in d.items():
        key_str = str(key)
        if len(key_str) > 60:  # 너무 긴 key는 잘라줌
            key_str = key_str[:60] + "..."

        # value 타입만 출력
        if isinstance(value, dict):
            print(f"{prefix}{key_str}/   (dict)")
            print_dict_structure(value, indent + 1)
        else:
            print(f"{prefix}{key_str}: {type(value).__name__}")

In [22]:
# from Utils.util import print_dict_structure
print_dict_structure(final_dataset)

('id01', datetime.date(2024, 6, 26))/   (dict)
  mLight: tuple
  mACStatus: tuple
  mActivity: tuple
  mBle: tuple
  mGps: tuple
  wHr: tuple
  mScreenStatus: tuple
  mUsageStats: tuple
  mWifi: tuple
  wLight: tuple
  wPedo: tuple
('id01', datetime.date(2024, 6, 27))/   (dict)
  mLight: tuple
  mACStatus: tuple
  mActivity: tuple
  mBle: tuple
  mGps: tuple
  wHr: tuple
  mScreenStatus: tuple
  mUsageStats: tuple
  mWifi: tuple
  wLight: tuple
  wPedo: tuple
('id01', datetime.date(2024, 6, 28))/   (dict)
  mLight: tuple
  mACStatus: tuple
  mActivity: tuple
  mBle: tuple
  mGps: tuple
  wHr: tuple
  mScreenStatus: tuple
  mUsageStats: tuple
  mWifi: tuple
  wLight: tuple
  wPedo: tuple
('id01', datetime.date(2024, 6, 29))/   (dict)
  mLight: tuple
  mACStatus: tuple
  mActivity: tuple
  mBle: tuple
  mGps: tuple
  wHr: tuple
  mScreenStatus: tuple
  mUsageStats: tuple
  mWifi: tuple
  wLight: tuple
  wPedo: tuple
('id01', datetime.date(2024, 6, 30))/   (dict)
  mLight: tuple
  mACStat