In [26]:
import pandas as pd

# 데이터 시각화
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
# 한글 폰트 문제 해결 
# matplotlib은 한글 폰트를 지원하지 않음
# os정보
import platform

# font_manager : 폰트 관리 모듈
# rc : 폰트 변경 모듈
from matplotlib import font_manager, rc
# unicode 설정
plt.rcParams['axes.unicode_minus'] = False

if platform.system() == 'Darwin':
    rc('font', family='AppleGothic') # os가 macos
elif platform.system() == 'Windows':
    path = 'c:/Windows/Fonts/malgun.ttf' # os가 windows
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
else:
    print("Unknown System")

In [27]:
csv = pd.read_csv('../data/bmi.csv')
csv.head()

Unnamed: 0,height,weight,label
0,184,53,thin
1,175,41,thin
2,155,54,normal
3,160,46,thin
4,152,36,thin


In [28]:
csv.count()

height    20000
weight    20000
label     20000
dtype: int64

In [29]:
csv.isnull().sum()

height    0
weight    0
label     0
dtype: int64

In [30]:
csv.describe()

Unnamed: 0,height,weight
count,20000.0,20000.0
mean,165.0217,60.05345
std,14.715891,14.776617
min,140.0,35.0
25%,152.0,47.0
50%,165.0,60.0
75%,178.0,73.0
max,190.0,85.0


> height : 140 ~ 190   
> weight : 35 ~ 85

In [31]:
csv.label.value_counts()

normal    6849
fat       6764
thin      6387
Name: label, dtype: int64

---
### data 정규화

In [32]:
# weight, height
csv.weight = csv.weight / csv.weight.max()
csv.height = csv.height / csv.height.max()

In [33]:
csv.head()

Unnamed: 0,height,weight,label
0,0.968421,0.623529,thin
1,0.921053,0.482353,thin
2,0.815789,0.635294,normal
3,0.842105,0.541176,thin
4,0.8,0.423529,thin


In [34]:
# label
bclass = {'thin':[1,0,0], 'normal':[0,1,0], 'fat':[0,0,1]}
y=[]

for i, v in enumerate(csv['label']):
    y.append(bclass[v])
y

[[1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [0, 0, 1],
 [0, 1, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 1, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 1, 0],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0,

In [35]:
# Train
X_train = csv[['height', 'weight']][0:15000]
y_train = y[0:15000]

In [36]:
# Test
X_test = csv[['height', 'weight']][15000:20001]
y_test = y[15000:20001]

In [37]:
len(X_test)

5000

---
### Deep Learning 만들기

In [38]:
from tensorflow import keras

In [39]:
model = keras.Sequential()
model.add(keras.layers.Dense(512, activation='relu', input_shape=(2,)))
model.add(keras.layers.Dense(3, activation='softmax')) # label 3

In [50]:
model.compile(
    loss='categorical_crossentropy', # data가 표준화되어있음.
    optimizer='rmsprop',
    metrics=['accuracy']
)

In [51]:
# 데이터 훈련하기
import numpy as np

model.fit(
    np.array(X_train),
    np.array(y_train),
    epochs=50
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fcc7fdc4730>

---
### 테스트 데이터로 평가하기

In [52]:
score = model.evaluate(X_test, np.array(y_test))
print('loss :', score[0])
print('accuracy :', score[1])

loss : 0.029320236295461655
accuracy : 0.9886000156402588


### optimizer에 따른 loss
> rmsprop : 0.029   
> adam : 0.029   
> sgd : 0.039

### optimizer에 따른 accuracy
> rmsprop : 0.9864   
> adam : 0.9842   
> sgd : 0.9872