In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from torch.utils.data import DataLoader, TensorDataset
from sklearn.utils import resample
import requests
import json


In [2]:
# 데이터 불러오기
bmi_data = pd.read_csv("bmi_data_1.csv")
exercise_data = pd.read_csv("exercise_data_2.csv")
food_data = pd.read_csv("food_data_2.csv")


In [3]:
bmi_data

Unnamed: 0,성별코드,연령대코드(5세단위),허리둘레,수축기혈압,이완기혈압,식전혈당(공복혈당),총콜레스테롤,HDL콜레스테롤,LDL콜레스테롤,트리글리세라이드,혈청지오티(AST),혈청지피티(ALT),감마지티피,흡연상태,음주여부,BMI,user_id
0,2,13,72.0,125.0,79.0,88.0,166.000000,98.000000,53.000000,75.0,24.0,14.0,12.0,1.0,0.0,22.892820,0
1,1,11,90.1,118.0,76.0,103.0,196.744106,57.035215,115.281692,105.0,54.0,63.0,51.0,3.0,0.0,22.857143,1
2,1,13,96.5,120.0,70.0,182.0,196.744106,57.035215,115.281692,105.0,24.0,35.0,71.0,3.0,1.0,27.681661,2
3,2,11,71.0,118.0,73.0,96.0,220.000000,72.000000,127.000000,105.0,31.0,17.0,16.0,1.0,0.0,20.811655,3
4,2,13,71.0,167.0,96.0,106.0,163.000000,68.000000,79.000000,79.0,30.0,18.0,14.0,1.0,0.0,20.811655,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,2,13,85.0,125.0,81.0,90.0,201.000000,56.000000,118.000000,135.0,26.0,25.0,11.0,1.0,0.0,25.390625,99995
99996,2,13,79.0,135.0,74.0,101.0,196.744106,57.035215,115.281692,105.0,23.0,21.0,13.0,1.0,0.0,22.959184,99996
99997,2,9,83.0,120.0,63.0,132.0,196.744106,57.035215,115.281692,105.0,48.0,38.0,11.0,1.0,1.0,33.333333,99997
99998,2,8,65.5,100.0,61.0,88.0,196.744106,57.035215,115.281692,105.0,15.0,11.0,12.0,1.0,1.0,19.531250,99998


In [4]:
exercise_data

Unnamed: 0,운동명,단위체중당에너지소비량,user_id
0,스키,6.0,3316
1,허리돌리기(야외운동기구),2.5,55705
2,걷기,4.5,61707
3,골프,4.8,64353
4,배구연습(일반적인),4.0,10968
...,...,...,...
99994,스키,6.0,3316
99995,와이드 스쿼트,5.5,78857
99996,고정식자전거타기(101~160 Watts),8.8,40460
99997,크로스 크런치,4.5,78434


In [5]:
food_data

Unnamed: 0,음식,칼로리,지방,포화지방,단일불포화지방,다중불포화지방,탄수화물,당류,단백질,식이섬유,콜레스테롤,나트륨,수분,user_id
0,pink lady apple,18,0.100,0.000,0.000,0.000,3.9,0.000,0.1,0.000,0.0,0.000,0.0,66965
1,garlic,4,0.026,0.033,0.078,0.089,0.9,0.043,0.2,0.064,0.0,0.018,1.6,57255
2,pork tail cooked,1089,98.500,34.200,46.400,10.800,0.0,0.000,46.8,0.000,354.8,0.093,128.4,41186
3,carrots raw,25,0.100,0.046,0.043,0.069,5.8,2.900,0.6,1.700,0.0,0.024,53.9,86440
4,macaroni raw,390,1.600,0.300,0.200,0.600,78.4,2.800,13.7,3.400,0.0,0.097,10.4,92618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99994,sockeye salmon cooked,493,17.300,3.000,5.800,4.100,0.0,0.000,82.1,0.000,189.1,0.300,208.7,53308
99995,parmesan cheese,71,4.500,2.700,1.400,0.100,0.6,0.046,6.4,0.000,12.2,0.200,5.4,63044
99996,blue cheese dressing light,14,0.400,0.100,0.200,0.100,2.1,0.600,0.3,0.000,1.6,0.200,12.4,42361
99997,whiskey sour,158,0.000,0.018,0.092,0.024,13.9,0.000,0.0,0.000,0.0,0.067,77.0,21377


In [6]:
# 1.NaN 값 처리 및 데이터 정리
bmi_data.fillna(0, inplace=True)
exercise_data.fillna({"운동명": "Unknown"}, inplace=True)  
food_data.fillna(0, inplace=True)

In [7]:
# 2. BMI 데이터 전처리
def categorize_bmi(value):
    if value < 18.5:
        return "저체중"
    elif 18.5 <= value < 23:
        return "정상"
    elif 23 <= value < 25:
        return "과체중"
    else:
        return "비만"

bmi_data["BMI_등급"] = bmi_data["BMI"].apply(categorize_bmi)

In [8]:
bmi_data.columns

Index(['성별코드', '연령대코드(5세단위)', '허리둘레', '수축기혈압', '이완기혈압', '식전혈당(공복혈당)', '총콜레스테롤',
       'HDL콜레스테롤', 'LDL콜레스테롤', '트리글리세라이드', '혈청지오티(AST)', '혈청지피티(ALT)', '감마지티피',
       '흡연상태', '음주여부', 'BMI', 'user_id', 'BMI_등급'],
      dtype='object')

In [9]:
# 3. 운동 여부 추가 (MET 값 기준)
ENERGY_THRESHOLD = 3.0
exercise_data["운동 여부"] = (exercise_data["단위체중당에너지소비량"] > ENERGY_THRESHOLD).astype(int)

In [10]:
#  운동 여부 추가 로직: '단위체중당에너지소비량'이 특정 임계값 이상이면 1, 아니면 0
exercise_data ["운동 여부"] = (exercise_data["단위체중당에너지소비량"] > ENERGY_THRESHOLD).astype(int)

In [11]:
exercise_data

Unnamed: 0,운동명,단위체중당에너지소비량,user_id,운동 여부
0,스키,6.0,3316,1
1,허리돌리기(야외운동기구),2.5,55705,0
2,걷기,4.5,61707,1
3,골프,4.8,64353,1
4,배구연습(일반적인),4.0,10968,1
...,...,...,...,...
99994,스키,6.0,3316,1
99995,와이드 스쿼트,5.5,78857,1
99996,고정식자전거타기(101~160 Watts),8.8,40460,1
99997,크로스 크런치,4.5,78434,1


In [12]:
# ✅ 음식 섭취 여부 추가
food_data_grouped = food_data.groupby("user_id", as_index=False)["칼로리"].sum()
food_data_grouped["섭취 여부"] = (food_data_grouped["칼로리"] > 0).astype(int)

In [13]:
# ✅ 데이터 병합 (user_id 기준)
merged_data = bmi_data.merge(exercise_data[["user_id", "운동 여부"]], on="user_id", how="left")
merged_data = merged_data.merge(food_data_grouped[["user_id", "섭취 여부"]], on="user_id", how="left")

In [14]:
# ✅ NaN 값 처리
merged_data["운동 여부"] = merged_data["운동 여부"].fillna(0).astype(int)
merged_data["섭취 여부"] = merged_data["섭취 여부"].fillna(0).astype(int)

In [15]:
merged_data.columns

Index(['성별코드', '연령대코드(5세단위)', '허리둘레', '수축기혈압', '이완기혈압', '식전혈당(공복혈당)', '총콜레스테롤',
       'HDL콜레스테롤', 'LDL콜레스테롤', '트리글리세라이드', '혈청지오티(AST)', '혈청지피티(ALT)', '감마지티피',
       '흡연상태', '음주여부', 'BMI', 'user_id', 'BMI_등급', '운동 여부', '섭취 여부'],
      dtype='object')

In [16]:
merged_data

Unnamed: 0,성별코드,연령대코드(5세단위),허리둘레,수축기혈압,이완기혈압,식전혈당(공복혈당),총콜레스테롤,HDL콜레스테롤,LDL콜레스테롤,트리글리세라이드,혈청지오티(AST),혈청지피티(ALT),감마지티피,흡연상태,음주여부,BMI,user_id,BMI_등급,운동 여부,섭취 여부
0,2,13,72.0,125.0,79.0,88.0,166.000000,98.000000,53.000000,75.0,24.0,14.0,12.0,1.0,0.0,22.892820,0,정상,0,0
1,1,11,90.1,118.0,76.0,103.0,196.744106,57.035215,115.281692,105.0,54.0,63.0,51.0,3.0,0.0,22.857143,1,정상,0,0
2,1,13,96.5,120.0,70.0,182.0,196.744106,57.035215,115.281692,105.0,24.0,35.0,71.0,3.0,1.0,27.681661,2,비만,0,0
3,2,11,71.0,118.0,73.0,96.0,220.000000,72.000000,127.000000,105.0,31.0,17.0,16.0,1.0,0.0,20.811655,3,정상,0,0
4,2,13,71.0,167.0,96.0,106.0,163.000000,68.000000,79.000000,79.0,30.0,18.0,14.0,1.0,0.0,20.811655,4,정상,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199619,2,13,85.0,125.0,81.0,90.0,201.000000,56.000000,118.000000,135.0,26.0,25.0,11.0,1.0,0.0,25.390625,99995,비만,0,0
199620,2,13,79.0,135.0,74.0,101.0,196.744106,57.035215,115.281692,105.0,23.0,21.0,13.0,1.0,0.0,22.959184,99996,정상,0,0
199621,2,9,83.0,120.0,63.0,132.0,196.744106,57.035215,115.281692,105.0,48.0,38.0,11.0,1.0,1.0,33.333333,99997,비만,0,0
199622,2,8,65.5,100.0,61.0,88.0,196.744106,57.035215,115.281692,105.0,15.0,11.0,12.0,1.0,1.0,19.531250,99998,정상,0,0


In [17]:
merged_data["BMI_등급"]

0         정상
1         정상
2         비만
3         정상
4         정상
          ..
199619    비만
199620    정상
199621    비만
199622    정상
199623    비만
Name: BMI_등급, Length: 199624, dtype: object

In [18]:
# ✅ 기존 컬럼명
old_columns = [
    '성별코드', '연령대코드(5세단위)', '허리둘레', '수축기혈압', '이완기혈압', '식전혈당(공복혈당)', '총콜레스테롤',
       'HDL콜레스테롤', 'LDL콜레스테롤', '트리글리세라이드', '혈청지오티(AST)', '혈청지피티(ALT)', '감마지티피',
       '흡연상태', '음주여부', 'BMI', 'user_id', 'BMI_등급', '운동 여부', '섭취 여부'
]

In [19]:
# ✅ 변경할 컬럼명 매핑
merged_data = merged_data.rename(columns={
    "성별코드": "성별",
    "연령대코드(5세단위)": "연령대",
    "수축기혈압": "수축기혈압(최고 혈압)",
    "이완기혈압": "이완기혈압(최저 혈압)",
    "총콜레스테롤": "콜레스테롤 지수"
 })

In [20]:
merged_data

Unnamed: 0,성별,연령대,허리둘레,수축기혈압(최고 혈압),이완기혈압(최저 혈압),식전혈당(공복혈당),콜레스테롤 지수,HDL콜레스테롤,LDL콜레스테롤,트리글리세라이드,혈청지오티(AST),혈청지피티(ALT),감마지티피,흡연상태,음주여부,BMI,user_id,BMI_등급,운동 여부,섭취 여부
0,2,13,72.0,125.0,79.0,88.0,166.000000,98.000000,53.000000,75.0,24.0,14.0,12.0,1.0,0.0,22.892820,0,정상,0,0
1,1,11,90.1,118.0,76.0,103.0,196.744106,57.035215,115.281692,105.0,54.0,63.0,51.0,3.0,0.0,22.857143,1,정상,0,0
2,1,13,96.5,120.0,70.0,182.0,196.744106,57.035215,115.281692,105.0,24.0,35.0,71.0,3.0,1.0,27.681661,2,비만,0,0
3,2,11,71.0,118.0,73.0,96.0,220.000000,72.000000,127.000000,105.0,31.0,17.0,16.0,1.0,0.0,20.811655,3,정상,0,0
4,2,13,71.0,167.0,96.0,106.0,163.000000,68.000000,79.000000,79.0,30.0,18.0,14.0,1.0,0.0,20.811655,4,정상,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199619,2,13,85.0,125.0,81.0,90.0,201.000000,56.000000,118.000000,135.0,26.0,25.0,11.0,1.0,0.0,25.390625,99995,비만,0,0
199620,2,13,79.0,135.0,74.0,101.0,196.744106,57.035215,115.281692,105.0,23.0,21.0,13.0,1.0,0.0,22.959184,99996,정상,0,0
199621,2,9,83.0,120.0,63.0,132.0,196.744106,57.035215,115.281692,105.0,48.0,38.0,11.0,1.0,1.0,33.333333,99997,비만,0,0
199622,2,8,65.5,100.0,61.0,88.0,196.744106,57.035215,115.281692,105.0,15.0,11.0,12.0,1.0,1.0,19.531250,99998,정상,0,0


In [21]:
def create_features(merged_data):
    """📌 기존 merged_data를 유지하면서 새로운 피처 추가"""

    # 기존 데이터프레임을 복사하여 원본 유지
    merged_data = merged_data.copy()

    # ✅ 새로운 피처 추가
    merged_data["총콜레스테롤"] = (
        merged_data["HDL콜레스테롤"] +
        merged_data["LDL콜레스테롤"] +
        merged_data["트리글리세라이드"]
    )

    merged_data["혈압 차이"] = (
        merged_data["수축기혈압(최고 혈압)"] -
        merged_data["이완기혈압(최저 혈압)"]
    )

    merged_data["간 지표"] = (
        merged_data["혈청지오티(AST)"] +
        merged_data["혈청지피티(ALT)"] +
        merged_data["감마지티피"]
    )

    merged_data["고혈당 위험"] = (
        merged_data["식전혈당(공복혈당)"] /
        merged_data["BMI"]
    )

    merged_data["비만 위험 지수"] = (
        merged_data["허리둘레"] /
        merged_data["BMI"]
    )

    # ✅ 결측치 처리 (무한대 값 방지)
    merged_data.replace([float("inf"), -float("inf")], 0, inplace=True)
    merged_data.fillna(0, inplace=True)
    
    return merged_data

In [22]:
# ✅ 기존 데이터 유지하면서 새로운 컬럼 추가
merged_data = create_features(merged_data)

In [23]:

# ✅ 결과 확인
merged_data.head()

Unnamed: 0,성별,연령대,허리둘레,수축기혈압(최고 혈압),이완기혈압(최저 혈압),식전혈당(공복혈당),콜레스테롤 지수,HDL콜레스테롤,LDL콜레스테롤,트리글리세라이드,...,BMI,user_id,BMI_등급,운동 여부,섭취 여부,총콜레스테롤,혈압 차이,간 지표,고혈당 위험,비만 위험 지수
0,2,13,72.0,125.0,79.0,88.0,166.0,98.0,53.0,75.0,...,22.89282,0,정상,0,0,226.0,46.0,50.0,3.844,3.145091
1,1,11,90.1,118.0,76.0,103.0,196.744106,57.035215,115.281692,105.0,...,22.857143,1,정상,0,0,277.316907,42.0,168.0,4.50625,3.941875
2,1,13,96.5,120.0,70.0,182.0,196.744106,57.035215,115.281692,105.0,...,27.681661,2,비만,0,0,277.316907,50.0,130.0,6.57475,3.486062
3,2,11,71.0,118.0,73.0,96.0,220.0,72.0,127.0,105.0,...,20.811655,3,정상,0,0,304.0,45.0,64.0,4.6128,3.41155
4,2,13,71.0,167.0,96.0,106.0,163.0,68.0,79.0,79.0,...,20.811655,4,정상,0,0,226.0,71.0,62.0,5.0933,3.41155


In [24]:
merged_data.columns

Index(['성별', '연령대', '허리둘레', '수축기혈압(최고 혈압)', '이완기혈압(최저 혈압)', '식전혈당(공복혈당)',
       '콜레스테롤 지수', 'HDL콜레스테롤', 'LDL콜레스테롤', '트리글리세라이드', '혈청지오티(AST)',
       '혈청지피티(ALT)', '감마지티피', '흡연상태', '음주여부', 'BMI', 'user_id', 'BMI_등급',
       '운동 여부', '섭취 여부', '총콜레스테롤', '혈압 차이', '간 지표', '고혈당 위험', '비만 위험 지수'],
      dtype='object')

In [25]:
import os
os.makedirs('./data', exist_ok=True)
merged_data.to_csv("./data/merged_data.csv", index=False)


In [26]:
merged_data["운동 여부"].value_counts()

운동 여부
0    110901
1     88723
Name: count, dtype: int64

In [27]:
merged_data["섭취 여부"].value_counts()

섭취 여부
0    196191
1      3433
Name: count, dtype: int64

In [28]:
# ✅ 데이터 균형 조정 (SMOTE 적용)
features = [
    "BMI",                  # 비만 지수 (높을수록 비만 위험 증가)
    "허리둘레",              # 복부 비만 지표 (높을수록 건강 위험 증가)
    "수축기혈압(최고 혈압)",  # 고혈압 위험 평가
    "이완기혈압(최저 혈압)",  # 고혈압 위험 평가
    "혈압 차이",             # 수축기 - 이완기 혈압 차이 (높을수록 심혈관 질환 위험)
    "총콜레스테롤",          # LDL + HDL + 트리글리세라이드 (심혈관 건강)
    "고혈당 위험",           # 식전혈당(공복혈당)이 높은 경우 당뇨병 위험 증가
    "간 지표",              # (ALT, AST, 감마지티피) 간 건강 상태 반영
    "성별",                 # 남성 / 여성 차이 반영 (운동 및 식단에 영향)
    "연령대",                # 연령대 (고령자 위험 증가)
    "비만 위험 지수",         # 허리둘레 / BMI (비만 위험 증가)
    "흡연상태",             # 흡연 여부 (흡연자는 건강 위험 증가)
    "음주여부"              # 음주 여부 (과음 시 건강 위험 증가)
]





In [29]:
X = merged_data[features]
y_exercise = merged_data["운동 여부"]
y_food = merged_data["섭취 여부"]

In [30]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [31]:

smote = SMOTE(random_state=23)
X_resampled_ex, y_resampled_ex = smote.fit_resample(X_scaled, y_exercise)
X_resampled_food, y_resampled_food = smote.fit_resample(X_scaled, y_food)

In [32]:
# ✅ XGBoost 모델 학습 (음식 섭취 여부 예측)
xgb_model_food = xgb.XGBClassifier(use_label_encoder=False, eval_metric="logloss")
xgb_model_food.fit(X_resampled_food, y_resampled_food)

Parameters: { "use_label_encoder" } are not used.



In [33]:
# ✅ 음식 섭취 여부 예측을 위한 모델 정의 (PyTorch)
class FoodPredictionModel(nn.Module):
    def __init__(self, input_dim):
        super(FoodPredictionModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 2)  # 이진 분류 (섭취 여부: 0 or 1)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.softmax(self.layer3(x))
        return x

In [34]:
# ✅ 데이터 분할 (음식 섭취 여부)
input_dim_food = X_resampled_food.shape[1]
model_food = FoodPredictionModel(input_dim_food)
criterion_food = nn.CrossEntropyLoss()
optimizer_food = optim.Adam(model_food.parameters(), lr=0.001)

X_train_food, X_test_food, y_train_food, y_test_food = train_test_split(X_resampled_food, y_resampled_food, test_size=0.2, random_state=23)

train_tensor_food = torch.tensor(X_train_food, dtype=torch.float32)
target_tensor_food = torch.tensor(y_train_food.values, dtype=torch.long)

dataset_food = torch.utils.data.TensorDataset(train_tensor_food, target_tensor_food)
train_loader_food = DataLoader(dataset_food, batch_size=32, shuffle=True)

In [35]:
# ✅ 음식 섭취 여부 예측 모델 학습
for epoch in range(10):  # 10 Epoch 실행
    for data, target in train_loader_food:
        optimizer_food.zero_grad()
        output = model_food(data)
        loss = criterion_food(output, target)
        loss.backward()
        optimizer_food.step()

print("✅ 음식 섭취 여부 예측 모델 학습 완료!")

✅ 음식 섭취 여부 예측 모델 학습 완료!


In [36]:
import pickle

# ✅ PyTorch 모델 저장
with open("model_food.pth", "wb") as f:
    pickle.dump(model_food, f)

print("✅ model_food.pth 저장 완료!")

✅ model_food.pth 저장 완료!


In [37]:
# ✅ 운동 여부 예측을 위한 모델 정의 (PyTorch)
class ExercisePredictionModel(nn.Module):
    def __init__(self, input_dim):
        super(ExercisePredictionModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 2)  # 이진 분류 (운동 여부: 0 or 1)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.softmax(self.layer3(x))
        return x


In [38]:
# ✅ 데이터 분할 (운동 여부)
input_dim_ex = X_resampled_ex.shape[1]
model_exercise = ExercisePredictionModel(input_dim_ex)
criterion_ex = nn.CrossEntropyLoss()
optimizer_ex = optim.Adam(model_exercise.parameters(), lr=0.001)

X_train_ex, X_test_ex, y_train_ex, y_test_ex = train_test_split(X_resampled_ex, y_resampled_ex, test_size=0.2, random_state=23)

train_tensor_ex = torch.tensor(X_train_ex, dtype=torch.float32)
target_tensor_ex = torch.tensor(y_train_ex.values, dtype=torch.long)

dataset_ex = torch.utils.data.TensorDataset(train_tensor_ex, target_tensor_ex)
train_loader_ex = DataLoader(dataset_ex, batch_size=32, shuffle=True)


In [39]:
# ✅ 운동 여부 예측 모델 학습
for epoch in range(10):  # 10 Epoch 실행
    for data, target in train_loader_ex:
        optimizer_ex.zero_grad()
        output = model_exercise(data)
        loss = criterion_ex(output, target)
        loss.backward()
        optimizer_ex.step()

print("✅ 운동 여부 예측 모델 학습 완료!")


✅ 운동 여부 예측 모델 학습 완료!


In [40]:
import pickle

# ✅ PyTorch 모델 저장
with open("model_exercise.pth", "wb") as f:
    pickle.dump(model_exercise, f)

print("✅ model_exercise.pth 저장 완료!")


✅ model_exercise.pth 저장 완료!


In [41]:
feature_columns = [
    "BMI",                  # 비만 지수 (높을수록 비만 위험 증가)
    "허리둘레",              # 복부 비만 지표 (높을수록 건강 위험 증가)
    "수축기혈압(최고 혈압)",  # 고혈압 위험 평가
    "이완기혈압(최저 혈압)",  # 고혈압 위험 평가
    "혈압 차이",             # 수축기 - 이완기 혈압 차이 (높을수록 심혈관 질환 위험)
    "총콜레스테롤",          # LDL + HDL + 트리글리세라이드 (심혈관 건강)
    "고혈당 위험",           # 식전혈당(공복혈당)이 높은 경우 당뇨병 위험 증가
    "간 지표",              # (ALT, AST, 감마지티피) 간 건강 상태 반영
    "성별",                 # 남성 / 여성 차이 반영 (운동 및 식단에 영향)
    "연령대",                # 연령대 (고령자 위험 증가)
    "비만 위험 지수",         # 허리둘레 / BMI (비만 위험 증가)
    "흡연상태",             # 흡연 여부 (흡연자는 건강 위험 증가)
    "음주여부"              # 음주 여부 (과음 시 건강 위험 증가)
]

In [42]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [43]:
# 모델 불러오기
with open("model_exercise.pth", "rb") as f:
    model_exercise = pickle.load(f)

In [44]:
# ✅ 데이터 변환 (PyTorch Tensor → Numpy 변환)
X_test_ex_tensor = torch.tensor(X_test_ex, dtype=torch.float32)

In [45]:
# 예측 수행
model_exercise.eval()
with torch.no_grad():
    y_pred_ex_prob = model_exercise(X_test_ex_tensor)
    y_pred_ex = torch.argmax(y_pred_ex_prob, dim=1).numpy()

In [46]:
# 평가 지표 출력
accuracy_ex = accuracy_score(y_test_ex, y_pred_ex)
report_ex = classification_report(y_test_ex, y_pred_ex)
conf_matrix_ex = confusion_matrix(y_test_ex, y_pred_ex)


In [47]:
print("🏋️‍♂️ ✅ 운동 여부 예측 평가")
print(f"✅ 정확도: {accuracy_ex:.4f}")
print("📊 분류 보고서:\n", report_ex)
print("🛑 혼동 행렬:\n", conf_matrix_ex)

🏋️‍♂️ ✅ 운동 여부 예측 평가
✅ 정확도: 0.9678
📊 분류 보고서:
               precision    recall  f1-score   support

           0       0.97      0.96      0.97     22142
           1       0.96      0.97      0.97     22219

    accuracy                           0.97     44361
   macro avg       0.97      0.97      0.97     44361
weighted avg       0.97      0.97      0.97     44361

🛑 혼동 행렬:
 [[21346   796]
 [  634 21585]]


In [48]:
# ✅ 모델 불러오기
with open("model_food.pth", "rb") as f:
    model_food = pickle.load(f)

In [49]:
# ✅ 데이터 변환 (PyTorch Tensor → Numpy 변환)
X_test_food_tensor = torch.tensor(X_test_food, dtype=torch.float32)


In [50]:
# ✅ 예측 수행
model_food.eval()
with torch.no_grad():
    y_pred_food_prob = model_food(X_test_food_tensor)
    y_pred_food = torch.argmax(y_pred_food_prob, dim=1).numpy()


In [51]:
# ✅ 평가 지표 출력
accuracy_food = accuracy_score(y_test_food, y_pred_food)
report_food = classification_report(y_test_food, y_pred_food)
conf_matrix_food = confusion_matrix(y_test_food, y_pred_food)

In [52]:
print("🍽️ ✅ 음식 섭취 여부 예측 평가")
print(f"✅ 정확도: {accuracy_food:.4f}")
print("📊 분류 보고서:\n", report_food)
print("🛑 혼동 행렬:\n", conf_matrix_food)

🍽️ ✅ 음식 섭취 여부 예측 평가
✅ 정확도: 0.8498
📊 분류 보고서:
               precision    recall  f1-score   support

           0       0.87      0.82      0.85     39442
           1       0.83      0.87      0.85     39035

    accuracy                           0.85     78477
   macro avg       0.85      0.85      0.85     78477
weighted avg       0.85      0.85      0.85     78477

🛑 혼동 행렬:
 [[32538  6904]
 [ 4885 34150]]


In [54]:
merged_data.isna().sum()

성별              0
연령대             0
허리둘레            0
수축기혈압(최고 혈압)    0
이완기혈압(최저 혈압)    0
식전혈당(공복혈당)      0
콜레스테롤 지수        0
HDL콜레스테롤        0
LDL콜레스테롤        0
트리글리세라이드        0
혈청지오티(AST)      0
혈청지피티(ALT)      0
감마지티피           0
흡연상태            0
음주여부            0
BMI             0
user_id         0
BMI_등급          0
운동 여부           0
섭취 여부           0
총콜레스테롤          0
혈압 차이           0
간 지표            0
고혈당 위험          0
비만 위험 지수        0
dtype: int64