### Baseline Sleep-Quality Prediction (id-wise)


In [5]:
# [1] 필요한 라이브러리 임포트 및 설치
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tqdm import tqdm

# [2] 데이터 불러오기
train_df = pd.read_csv("ch2025_metrics_train.csv")
submission_df = pd.read_csv("ch2025_submission_sample.csv")
merge_df = pd.read_csv("merge_df.csv")

# [3] merge_df 전처리: 날짜 변환 및 일 단위 집계
merge_df['lifelog_date'] = pd.to_datetime(merge_df['timestamp']).dt.date
daily_df = (
    merge_df.drop(columns=['timestamp'])
    .groupby(['subject_id', 'lifelog_date'])
    .mean()
    .reset_index()
)
daily_df.fillna(daily_df.mean(numeric_only=True), inplace=True)

# 날짜 포맷 일치
daily_df['lifelog_date'] = pd.to_datetime(daily_df['lifelog_date'])
train_df['lifelog_date'] = pd.to_datetime(train_df['lifelog_date'])
submission_df['lifelog_date'] = pd.to_datetime(submission_df['lifelog_date'])

# [4] 병합
train_merged = pd.merge(train_df, daily_df, how='left', on=['subject_id', 'lifelog_date'])
submission_merged = pd.merge(submission_df, daily_df, how='left', on=['subject_id', 'lifelog_date'])

# [5] 결측치 처리: 평균값으로 대체
feature_cols = train_merged.columns[9:]
for col in feature_cols:
    mean_val = train_merged[col].mean()
    train_merged[col].fillna(mean_val, inplace=True)
    submission_merged[col].fillna(mean_val, inplace=True)

# [6] 모델 구성: Stacking 모델 정의
base_learners = [
    ('lgbm', LGBMClassifier(random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)),
    ('rf', RandomForestClassifier(random_state=42))
]
meta_model = LogisticRegression()

def get_stacking_model():
    return StackingClassifier(
        estimators=base_learners,
        final_estimator=meta_model,
        cv=3,
        n_jobs=-1
    )

# [7] subject_id 단위로 모델 학습 및 예측 수행
targets = ['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3']
train_group = train_merged.groupby('subject_id')
submission_group = submission_merged.groupby('subject_id')

final_preds = []

for subject_id, train_data in tqdm(train_group, desc="Subject-wise training"):
    test_data = submission_group.get_group(subject_id)
    X_train = train_data[feature_cols].values
    X_test = test_data[feature_cols].values

    row = {
        'subject_id': subject_id,
        'sleep_date': test_data['sleep_date'].values,
        'lifelog_date': test_data['lifelog_date'].values
    }

    for target in targets:
        y_train = train_data[target].values
        model = get_stacking_model()
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        row[target] = pred

    final_preds.append(row)

# [8] 결과 조합 및 저장
result_rows = []
for r in final_preds:
    for i in range(len(r['sleep_date'])):
        result_rows.append({
            'subject_id': r['subject_id'],
            'sleep_date': r['sleep_date'][i],
            'lifelog_date': r['lifelog_date'][i],
            'Q1': r['Q1'][i],
            'Q2': r['Q2'][i],
            'Q3': r['Q3'][i],
            'S1': r['S1'][i],
            'S2': r['S2'][i],
            'S3': r['S3'][i],
        })

final_df = pd.DataFrame(result_rows)
final_df.to_csv("submission_stacked.csv", index=False)

print("✅ 제출 파일 생성 완료: submission_stacked.csv")


Subject-wise training:   0%|          | 0/10 [00:00<?, ?it/s]  File "c:\Users\als31\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\als31\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\als31\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\als31\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Subject-wise training: 100%|██████████| 10/10 [01:44<00:00, 10.43s/it]

✅ 제출 파일 생성 완료: submission_stacked.csv



