## 1.2.6 딥러닝으로 분류 문제 풀기

In [1]:
import pandas as pd

# 데이터 구성 및 준비하기
data = pd.read_csv('../data/NHANES Weight and Height/data.csv')

data.head()

Unnamed: 0.1,Unnamed: 0,Weight (kg),Standing Height (cm),BMI(kg/m**2)
0,0,97.1,160.2,37.8
1,1,98.8,182.3,29.7
2,2,74.3,184.2,21.9
3,3,103.7,185.3,30.2
4,4,83.3,177.1,26.6


In [2]:
def BMI_Classification(BMI):
    if BMI <= 18.5:
        return 0 #'저체중'
    elif (18.5 < BMI <= 23):
        return 1 #'정상'
    elif (23 < BMI <= 25):
        return 2 #'과체중'
    elif (25 < BMI <= 30):
        return 3 #'비만'
    elif BMI > 30:
        return 4 #'고도비만'

In [3]:
data['BMI_CF'] = data['BMI(kg/m**2)'].apply(BMI_Classification)

data.head(10)

Unnamed: 0.1,Unnamed: 0,Weight (kg),Standing Height (cm),BMI(kg/m**2),BMI_CF
0,0,97.1,160.2,37.8,4
1,1,98.8,182.3,29.7,3
2,2,74.3,184.2,21.9,1
3,3,103.7,185.3,30.2,4
4,4,83.3,177.1,26.6,3
5,5,91.1,152.7,39.1,4
6,6,72.6,158.4,28.9,3
7,7,73.0,161.2,28.1,3
8,8,81.4,161.3,31.3,4
9,9,86.0,167.8,30.5,4


In [4]:
feature_cols = ['Weight (kg)', 'Standing Height (cm)']

label = data.pop('BMI_CF')
feature = data[feature_cols].copy()

In [5]:
import tensorflow as tf

tf.random.set_seed(0)

In [7]:
# 모델 생성
import keras 

model = keras.Sequential([
    keras.layers.Input(shape=(2,)),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dense(units=32, activation='relu'),
    keras.layers.Dense(units=16, activation='relu'),
    keras.layers.Dense(units=5, activation='softmax') # Classification
])

model.summary()

In [8]:
# 모델 컴파일 
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [9]:
from sklearn.model_selection import train_test_split

# 학습과 테스트 데이터 분리 (학습 80%, 테스트 20%)
train_x, test_x, train_y, test_y = train_test_split(feature, label, test_size=0.2, random_state=0)

# 학습과 검증 데이터 분리 (학습 80%, 검증 20%)
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2, random_state=0)

print('train dataset :', train_x.shape, train_y.shape)
print('validation dataset :', val_x.shape, val_y.shape)
print('test dataset :', test_x.shape, test_y.shape)

train dataset : (5368, 2) (5368,)
validation dataset : (1342, 2) (1342,)
test dataset : (1678, 2) (1678,)


In [10]:
import keras

# EarlyStopping
early_stopping = keras.callbacks.EarlyStopping(patience = 20)

history = model.fit(train_x,
                    train_y,
                    validation_data=(val_x, val_y),
                    epochs=1000,
                    callbacks=[early_stopping])

Epoch 1/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4106 - loss: 5.4015 - val_accuracy: 0.7064 - val_loss: 0.6853
Epoch 2/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7312 - loss: 0.7029 - val_accuracy: 0.7712 - val_loss: 0.5569
Epoch 3/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7448 - loss: 0.6065 - val_accuracy: 0.7839 - val_loss: 0.5009
Epoch 4/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7547 - loss: 0.5565 - val_accuracy: 0.7817 - val_loss: 0.4870
Epoch 5/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7596 - loss: 0.5423 - val_accuracy: 0.7861 - val_loss: 0.4831
Epoch 6/1000
[1m168/168[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7664 - loss: 0.5409 - val_accuracy: 0.7921 - val_loss: 0.4756
Epoch 7/1000
[1

In [11]:
import plotly.graph_objects as go

fig = go.Figure( )
fig.add_trace(go.Scattergl(y=history.history['loss'], name='loss')) 
fig.add_trace(go.Scattergl(y=history.history['val_loss'], name='val_loss')) 
fig.update_layout(title="<b>Loss of Model</b>", xaxis_title='Epoch', yaxis_title='Loss', template='seaborn')
fig.show( )

In [12]:
import numpy as np
x_test = np.array([(91.1, 152.7)])
#y_test = [39.1] = 4

print(model.predict(x_test))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[[1.8403691e-34 3.5304318e-28 8.4083081e-16 2.1020212e-06 9.9999785e-01]]


In [13]:
import numpy as np

np.argmax(model.predict(x_test))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step


np.int64(4)

In [14]:
model.evaluate(test_x, test_y)

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8766 - loss: 0.2366


[0.22796955704689026, 0.8867699503898621]