In [39]:
# 필요한 라이브러리 import
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

In [40]:
# 데이터 불러오기
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
sample_submission_df = pd.read_csv('sample_submission.csv')

# ID 열 제거
train_df = train_df.drop('ID', axis=1)
test_df = test_df.drop('ID', axis=1)

In [41]:
# Weight_Status, Gender 열을 숫자 데이터로 변환
train_df['Weight_Status'] = train_df['Weight_Status'].map({'Normal Weight': 0, 'Overweight': 1, 'Obese': 2})
train_df['Gender'] = train_df['Gender'].map({'M': 0, 'F': 1})
test_df['Weight_Status'] = test_df['Weight_Status'].map({'Normal Weight': 0, 'Overweight': 1, 'Obese': 2})
test_df['Gender'] = test_df['Gender'].map({'M': 0, 'F': 1})

In [42]:
train_df['Height(cm)'] = train_df['Height(Feet)'] * 30.48 + train_df['Height(Remainder_Inches)']
test_df['Height(cm)'] = test_df['Height(Feet)'] * 30.48 + test_df['Height(Remainder_Inches)']

In [43]:
train_df = train_df.drop(['Height(Feet)', 'Height(Remainder_Inches)'], axis=1)
test_df = test_df.drop(['Height(Feet)', 'Height(Remainder_Inches)'], axis=1)

In [44]:
train_df['Weight(kg)'] = train_df['Weight(lb)'] * 0.453592
test_df['Weight(kg)'] = test_df['Weight(lb)'] * 0.453592

In [45]:
train_df = train_df.drop(['Weight(lb)'], axis=1)
test_df = test_df.drop(['Weight(lb)'], axis=1)

In [46]:
# 해리스앤베네딕트 공식
# 남자: 88.362 + (13.397 x 체중) + (4.799 x 신장) - (6.677 x 연령)
# 여자: 447.593 + (9.247 x 체중) + (3.098 x 신장) - (4.330 x 연령)
train_df['BMR'] = train_df.apply(lambda x: 88.362 + (13.397 * x['Weight(kg)']) + (4.799 * x['Height(cm)']) - 
                                 (6.677 * x['Age']) if x['Gender'] == 1 else 447.593 + (9.247 * x['Weight(kg)']) + 
                                 (3.098 * x['Height(cm)']) - (4.330 * x['Age']), axis=1)

test_df['BMR'] = test_df.apply(lambda x: 88.362 + (13.397 * x['Weight(kg)']) + (4.799 * x['Height(cm)']) - 
                               (6.677 * x['Age']) if x['Gender'] == 1  else 447.593 + (9.247 * x['Weight(kg)']) + 
                               (3.098 * x['Height(cm)']) - (4.330 * x['Age']), axis=1)

In [47]:
# 입력과 목표 변수 분리
X_train = train_df.drop(['Calories_Burned'], axis=1)
y_train = train_df['Calories_Burned']

In [48]:
# 데이터 전처리
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_poly)

In [60]:
# 데이터셋 분리
X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(X_train_scaled, y_train, test_size=0.3, random_state=42)

In [61]:
# 회귀 신경망 모델 구축
model = MLPRegressor(hidden_layer_sizes=(1024, 512,4), batch_size=1000, activation='relu', solver='lbfgs', alpha=0.001, max_iter=1000, random_state=42)

In [62]:
# 점증적 학습으로 모델 학습
model.partial_fit(X_train_sub, y_train_sub)
print(f'Training MSE: {mean_squared_error(y_train_sub, model.predict(X_train_sub)):.2f}')
print(f'Validation MSE: {mean_squared_error(y_valid, model.predict(X_valid)):.2f}')

Training MSE: 0.05
Validation MSE: 0.11


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [56]:
# 더 많은 데이터로 학습
for i in range(10):
    model.partial_fit(X_train_sub, y_train_sub)
    print(f'Training MSE: {mean_squared_error(y_train_sub, model.predict(X_train_sub)):.2f}')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.03


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.02


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.02


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.01


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.01


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.01


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training MSE: 0.00
Training MSE: 0.00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [63]:
# test 데이터 예측
X_test = test_df.values
X_poly_test = poly.transform(X_test)
X_test_scaled = scaler.transform(X_poly_test)
y_pred_test = model.predict(X_test_scaled)



In [64]:
# 결과 저장
sample_submission_df['Calories_Burned'] = y_pred_test
sample_submission_df.to_csv('submission_MLP_0.11.csv', index=False)