# DNN(Deep Neural Network)

## Keras로 비만도 테스트

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
from library.preprocessing import DataFramePreprocessor
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.utils import np_utils

In [2]:
bmi = pd.read_csv('data/bmi.csv')
bmi

Unnamed: 0,label,height,weight
0,normal,188,71
1,fat,161,68
2,thin,178,52
3,fat,136,63
4,normal,145,52
...,...,...,...
19995,thin,163,48
19996,fat,139,70
19997,normal,150,48
19998,normal,189,69


In [3]:
bmi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   label   20000 non-null  object
 1   height  20000 non-null  int64 
 2   weight  20000 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 468.9+ KB


In [4]:
bmi.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
label,20000.0,3.0,fat,7673.0,,,,,,,
height,20000.0,,,,160.09205,23.385464,120.0,140.0,160.0,180.0,200.0
weight,20000.0,,,,57.37765,13.260121,35.0,46.0,57.0,69.0,80.0


## 데이터 변환(라벨)

In [5]:
numeric_cols = ["height", "weight"]
categorical_cols = ["label"]

In [6]:
lbl = LabelEncoder()
mms = MinMaxScaler()
dfp = DataFramePreprocessor()

In [7]:
data, tfs = dfp.fit_transform_multiple_transformer(bmi, [lbl, mms], [categorical_cols, numeric_cols])

In [8]:
data

Unnamed: 0,label,height,weight
0,1,0.8500,0.800000
1,0,0.5125,0.733333
2,2,0.7250,0.377778
3,0,0.2000,0.622222
4,1,0.3125,0.377778
...,...,...,...
19995,2,0.5375,0.288889
19996,0,0.2375,0.777778
19997,1,0.3750,0.288889
19998,1,0.8625,0.755556


In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   label   20000 non-null  int32  
 1   height  20000 non-null  float64
 2   weight  20000 non-null  float64
dtypes: float64(2), int32(1)
memory usage: 390.8 KB


In [10]:
X_data = data[numeric_cols]
y_data = data[categorical_cols]

In [11]:
bmi['label']

0        normal
1           fat
2          thin
3           fat
4        normal
          ...  
19995      thin
19996       fat
19997    normal
19998    normal
19999    normal
Name: label, Length: 20000, dtype: object

## train, test 분할

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.25, stratify=y_data)

## 케라스 모델 구성

In [13]:
# 모델 구조 정의하기 --- (※3)
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(X_data.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(3))
model.add(Activation('softmax'))
model.compile( loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [14]:
# 데이터 훈련하기 --- (※5)
hist = model.fit(X_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [15]:
score = model.evaluate(X_test, y_test, verbose=1)
score



[0.021700723096728325, 0.9936000108718872]

In [16]:
pred = model.predict(X_test)



In [17]:
y_test.shape, pred.shape

((5000, 1), (5000, 3))

In [18]:
acc_score = accuracy_score(y_test, [np.argmax(p) for p in pred])
acc_score

0.9936

## 데이터 변환(원핫)

In [19]:
numeric_cols = ["height", "weight"]
categorical_cols = ["label"]

In [20]:
ohe = OneHotEncoder()
lbl = LabelEncoder()
mms = MinMaxScaler()
dfp = DataFramePreprocessor()

In [21]:
data, tfs = dfp.fit_transform_multiple_transformer(bmi, [ohe, mms], [categorical_cols, numeric_cols])

In [22]:
data

Unnamed: 0,label_fat,label_normal,label_thin,height,weight
0,0.0,1.0,0.0,0.8500,0.800000
1,1.0,0.0,0.0,0.5125,0.733333
2,0.0,0.0,1.0,0.7250,0.377778
3,1.0,0.0,0.0,0.2000,0.622222
4,0.0,1.0,0.0,0.3125,0.377778
...,...,...,...,...,...
19995,0.0,0.0,1.0,0.5375,0.288889
19996,1.0,0.0,0.0,0.2375,0.777778
19997,0.0,1.0,0.0,0.3750,0.288889
19998,0.0,1.0,0.0,0.8625,0.755556


In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   label_fat     20000 non-null  float64
 1   label_normal  20000 non-null  float64
 2   label_thin    20000 non-null  float64
 3   height        20000 non-null  float64
 4   weight        20000 non-null  float64
dtypes: float64(5)
memory usage: 781.4 KB


In [24]:
X_data = data[numeric_cols]
y_data = data.iloc[:, 0:3]

## train, test 분할

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.25, stratify=y_data)

## 케라스 모델 구성

In [26]:
# 모델 구조 정의하기 --- (※3)
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(X_data.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(y_data.shape[1]))
model.add(Activation('softmax'))
model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [32]:
# 데이터 훈련하기 --- (※5)
hist = model.fit(X_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [33]:
score = model.evaluate(X_test, y_test, verbose=1)
score



[0.030770404264330864, 0.9883999824523926]

In [34]:
pred = model.predict(X_test)



In [35]:
y_test.shape, pred.shape

((5000, 3), (5000, 3))

In [36]:
acc_score = accuracy_score([np.argmax(y) for y in y_test.values], [np.argmax(p) for p in pred])
acc_score

0.9884