In [2]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0,VGG16
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image

In [8]:
csv_file_path = "E:\/통합_신체정보데이터.csv"
df = pd.read_csv (csv_file_path,encoding='utf8')
df

Unnamed: 0,model_num,height,weight,body_fat,waist,belly_waist,hip,thigh,bust,arm,shoulder,insim,gender,age,body_shape
0,F004,164.1,49.4,21.8,64.1,67.2,87.8,50.3,80.5,24.8,33.5,75.4,F,30,모래시계형 (Hourglass)
1,F005,152.9,51.9,24.2,66.2,79.2,90.5,55.0,80.5,29.2,33.1,63.3,F,44,배형 (Belly)
2,F006,159.7,47.6,20.0,64.4,74.5,87.3,47.6,79.5,25.1,32.1,66.5,F,50,배형 (Belly)
3,F007,162.2,59.1,27.8,74.8,82.3,92.1,52.2,93.0,26.7,33.6,70.6,F,42,배형 (Belly)
4,F008,157.6,61.6,36.4,82.6,89.0,92.2,51.5,99.7,29.0,36.6,66.6,F,65,사과형 (Apple)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
987,M499,169.9,65.2,17.8,81.8,82.0,87.0,48.4,92.7,29.3,39.1,69.1,M,53,사과형 (Apple)
988,M500,173.7,67.2,20.3,72.3,77.0,93.9,55.3,91.7,31.9,39.0,74.8,M,30,모래시계형 (Hourglass)
989,F013,152.0,74.0,50.5,99.7,104.8,105.4,64.4,101.9,32.8,33.8,65.0,F,50,사과형 (Apple)
990,F014,160.2,51.2,25.0,68.5,77.0,89.0,52.2,81.0,25.8,35.8,68.4,F,57,배형 (Belly)


In [32]:
df.isnull().sum()

model_num      0
height         0
weight         0
body_fat       0
waist          0
belly_waist    0
hip            0
thigh          0
bust           0
arm            0
shoulder       0
insim          0
gender         0
age            0
body_shape     0
dtype: int64

In [25]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.applications import EfficientNetB0, VGG16
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess
from tensorflow.keras.preprocessing import image

# 이미지 데이터 경로
image_dir = "E:/image_data/"

# EfficientNetB0 & VGG16 모델 로드
eff_model = EfficientNetB0(weights="imagenet", include_top=False, pooling="avg")
vgg_model = VGG16(weights="imagenet", include_top=False, pooling="avg")

# 이미지 로드 및 전처리 함수
def load_and_preprocess_image(img_path, preprocess_func):
    """이미지를 로드하고 CNN 모델에 맞게 전처리"""
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_func(img_array)  # 모델별 전처리 함수 적용
    return img_array

# EfficientNetB0 & VGG16 특징 벡터 추출 함수
def extract_features_for_model(model_num):
    """각 model_num에 해당하는 16개 이미지에서 EfficientNetB0 & VGG16 특징 벡터 추출"""
    eff_features_list = []
    vgg_features_list = []

    for i in range(1, 17):  # 1~16 이미지 로드
        img_filename = f"resize_{model_num}_{i}.jpg"
        img_path = os.path.join(image_dir, img_filename)

        if os.path.exists(img_path):
            # EfficientNetB0 특징 벡터 추출
            img_array_eff = load_and_preprocess_image(img_path, eff_preprocess)
            eff_features = eff_model.predict(img_array_eff).flatten()
            eff_features_list.append(eff_features)

            # VGG16 특징 벡터 추출
            img_array_vgg = load_and_preprocess_image(img_path, vgg_preprocess)
            vgg_features = vgg_model.predict(img_array_vgg).flatten()
            vgg_features_list.append(vgg_features)
        else:
            print(f"이미지 없음: {img_path}")
            return None, None  # 한 개라도 없으면 무효 처리

    # 16개 이미지의 평균 벡터 계산
    if eff_features_list and vgg_features_list:
        return np.mean(eff_features_list, axis=0), np.mean(vgg_features_list, axis=0)  # 평균값 사용
    return None, None

# CSV 데이터 불러오기 (신체 측정값)
df = pd.read_csv("E://통합_신체정보데이터.csv")

# CNN 특징 벡터 추가
feature_data = []
missing_models = []  # 누락된 모델 저장 리스트

for index, row in df.iterrows():
    model_num = row["model_num"]  # 모델 ID 가져오기
    eff_features, vgg_features = extract_features_for_model(model_num)

    if eff_features is not None and vgg_features is not None:
        combined_data = np.concatenate([
            [model_num],  # model_num 포함
            eff_features,  # EfficientNetB0 특징 벡터 (1280차원)
            vgg_features,  # VGG16 특징 벡터 (512차원)
            row[["height", "weight", "belly_waist", "waist", "hip", "thigh", "bust", "shoulder"]].values  # 신체 측정값
        ])
        feature_data.append(combined_data)
    else:
        missing_models.append(model_num)  # 이미지 누락된 model_num 저장

# 데이터프레임 변환
columns = ["model_num"] + \
          [f"eff_feature_{i}" for i in range(len(eff_features))] + \
          [f"vgg_feature_{i}" for i in range(len(vgg_features))] + \
          ["height", "weight", "belly_waist", "waist", "hip", "thigh", "bust", "shoulder"]

final_df = pd.DataFrame(feature_data, columns=columns)

# 결과 확인
print("EfficientNetB0 + VGG16 특징 벡터 추출 완료")
print(final_df.head())  # 상위 5개 출력

# CSV 저장
final_df.to_csv("CNN_특징벡터_신체데이터.csv", index=False)

# 누락된 모델 출력
if missing_models:
    print(f"누락된 모델 {len(missing_models)}개:", missing_models)


⚠️ 이미지 없음: E:/image_data/resize_F004_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_F005_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_F006_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_F007_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_F008_1.jpg




































































































⚠️ 이미지 없음: E:/image_data/resize_M004_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_M005_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_M006_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_M007_1.jpg
⚠️ 이미지 없음: E:/image_data/resize_M008_1.jpg














































































































EfficientNetB0 + VGG16 특징 벡터 추출 완료
  model_num  eff_feature_0  eff_feature_1  eff_feature_2  eff_feature_3  \
0      F009      -0.099405      -0.038858      -0.094923       0.092400   
1      F010      -0.098153      -0.100212      -0.081653       0.072339   
2      F011      -0.113879      -0.069387      -0.074935       0.035573   
3      F012      -0.134959      -0.024111      -0.097631       0.149215   
4      F016      -0.093315      -0.119686      -0.084543       0.097081   

   eff_feature_4  eff_feature_5  eff_feature_6  eff_feature_7  eff_feature_8  \
0       0.042726       0.543235       0.058994       0.221345       0.046533   
1      -0.070673       0.325903       0.251093       0.475126       0.054208   
2      -0.105749       0.394770       0.087188       0.479549      -0.000049   
3      -0.085375       0.205583       0.107286       0.444928       0.063614   
4      -0.104497       0.418836       0.249358       0.631522       0.154994   

   ...  vgg_feature_510  vgg_feat

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import xgboost as xgb

# CSV 데이터 불러오기
file_path = "CNN_특징벡터_신체데이터.csv"
df = pd.read_csv(file_path)
df = df.drop(columns=["model_num"], errors="ignore")  # "model_num"이 있으면 제거
# 특징 벡터 (X)와 타겟 데이터 (Y) 분리
X = df.iloc[:, :-8].values  # EfficientNetB0 + VGG16 특징 벡터
y = df[["height", "weight", "belly_waist", "waist", "hip", "thigh", "bust", "shoulder"]].values  # 신체 측정값

# 훈련/검증 데이터 분리 (80% 훈련, 20% 검증)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("훈련 데이터 크기:", X_train.shape, "검증 데이터 크기:", X_test.shape)


훈련 데이터 크기: (785, 1792) 검증 데이터 크기: (197, 1792)


In [2]:
# XGBoost 모델 생성
model = xgb.XGBRegressor(
    n_estimators=100,  # 트리 개수
    learning_rate=0.1,  # 학습률
    max_depth=6,  # 트리 깊이
    random_state=42
)




In [8]:
# 훈련 & 검증 데이터 학습 과정 출력
model.fit(
    X_train, y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    verbose=True
)

[0]	validation_0-rmse:32.89019	validation_1-rmse:33.03315
[1]	validation_0-rmse:29.66694	validation_1-rmse:29.87491
[2]	validation_0-rmse:26.76560	validation_1-rmse:27.03179
[3]	validation_0-rmse:24.15286	validation_1-rmse:24.48163
[4]	validation_0-rmse:21.80224	validation_1-rmse:22.19671
[5]	validation_0-rmse:19.68613	validation_1-rmse:20.16615
[6]	validation_0-rmse:17.78154	validation_1-rmse:18.35697
[7]	validation_0-rmse:16.06696	validation_1-rmse:16.72698
[8]	validation_0-rmse:14.52293	validation_1-rmse:15.27270
[9]	validation_0-rmse:13.13443	validation_1-rmse:13.98791
[10]	validation_0-rmse:11.88402	validation_1-rmse:12.84440
[11]	validation_0-rmse:10.75818	validation_1-rmse:11.84046
[12]	validation_0-rmse:9.74357	validation_1-rmse:10.95588
[13]	validation_0-rmse:8.83144	validation_1-rmse:10.17854
[14]	validation_0-rmse:8.01104	validation_1-rmse:9.50518
[15]	validation_0-rmse:7.27031	validation_1-rmse:8.91608
[16]	validation_0-rmse:6.60691	validation_1-rmse:8.40469
[17]	validation

In [9]:
# 예측 수행
y_pred = model.predict(X_test)

In [11]:
mae = mean_absolute_error(y_test,y_pred)
mae

3.5229723966497755

In [15]:
import numpy as np
num_samples = 5
print("실제값 vs 예측값 비교:")
for i in range(num_samples):
    print(f"실제: {y_test[i]}, 예측: {np.round(y_pred[i], 2)}")

실제값 vs 예측값 비교:
실제: [171.6  98.5 103.   99.7 110.   66.2 112.4  39.7], 예측: [172.69  78.87  94.21  92.62  95.51  58.69  97.95  39.95]
실제: [173.5  79.6  92.4  92.2  97.2  59.2  98.9  36.4], 예측: [174.16  77.39  94.43  90.39  95.33  55.39  96.43  39.47]
실제: [168.4  66.1  86.4  88.3  89.2  49.2  90.8  38.2], 예측: [167.2   67.39  86.55  81.43  93.96  52.33  92.36  38.18]
실제: [174.6  68.7  84.4  79.7  95.8  58.1  84.1  39.4], 예측: [170.73  68.44  84.04  78.4   95.13  56.39  93.39  38.62]
실제: [162.1  90.9 103.6 104.3 111.   66.3 114.8  37.3], 예측: [159.92  66.17  94.84  86.53  97.04  58.17  95.15  36.8 ]


In [18]:
import pickle
with open("xgboost_model.pkl", "wb") as file:
    pickle.dump(model, file)