## 1.2.5 과대적합과 과소적합

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# 데이터 구성 및 준비하기
data = pd.read_csv('../data/NHANES Weight and Height/data.csv')

feature_cols = ['Weight (kg)', 'Standing Height (cm)']

label = data.pop('BMI(kg/m**2)')
feature = data[feature_cols].copy()

In [2]:
import keras

# 모델 생성 
model = keras.Sequential([
    keras.layers.Input(shape=(2,)),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dense(units=32, activation='relu'),
    keras.layers.Dense(units=16, activation='relu'),
    keras.layers.Dense(units=1) # activation='linear'
])

model.summary()

In [3]:
# 모델 컴파일 
model.compile(loss='mae',
              optimizer='adam',
              metrics=['mae', 'mse'])

In [4]:
import keras

# EarlyStopping
early_stopping = keras.callbacks.EarlyStopping(patience = 20)

In [5]:
from sklearn.model_selection import train_test_split

# 학습과 테스트 데이터 분리 (학습 80%, 테스트 20%)
train_x, test_x, train_y, test_y = train_test_split(feature, label, test_size=0.2)
# 학습과 검증 데이터 분리 (학습 80%, 검증 20%)
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2)

print('train dataset :', train_x.shape, train_y.shape)
print('validation dataset :', val_x.shape, val_y.shape)
print('test dataset :', test_x.shape, test_y.shape)

train dataset : (5368, 2) (5368,)
validation dataset : (1342, 2) (1342,)
test dataset : (1678, 2) (1678,)


In [6]:
history = model.fit(train_x, 
                    train_y,
                    validation_data=(val_x, val_y),
                    epochs=1000,
                    callbacks=[early_stopping])

Epoch 1/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 7.3956 - mae: 7.3956 - mse: 126.0925 - val_loss: 2.9718 - val_mae: 2.9718 - val_mse: 13.7614
Epoch 2/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.9329 - mae: 2.9329 - mse: 13.6635 - val_loss: 2.9637 - val_mae: 2.9637 - val_mse: 13.7296
Epoch 3/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.9611 - mae: 2.9611 - mse: 13.7655 - val_loss: 2.9441 - val_mae: 2.9441 - val_mse: 13.5117
Epoch 4/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.8981 - mae: 2.8981 - mse: 13.3190 - val_loss: 3.1124 - val_mae: 3.1124 - val_mse: 15.2017
Epoch 5/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.9062 - mae: 2.9062 - mse: 13.3909 - val_loss: 3.4315 - val_mae: 3.4315 - val_mse: 18.9818
Epoch 6/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [7]:
import plotly.graph_objects as go

fig = go.Figure( ) 
fig.add_trace(go.Scattergl(y=history.history['loss'],name='loss')) 
fig.add_trace(go.Scattergl(y=history.history['val_loss'],name='val_loss')) 
fig.update_layout(title="<b>Loss of Model</b>", xaxis_title='Epoch', yaxis_title='Loss')
fig.show( )

In [9]:
import numpy as np
x_test = np.array([(91.1, 152.7)])
#y_test = [39.1]

print(model.predict(x_test))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[[38.56941]]


In [None]:
model.evaluate(test_x, test_y)



[0.36158815026283264, 0.36158815026283264, 0.4038422703742981]