In [1]:
# 경고 메세지가 안나오게..
import warnings
warnings.filterwarnings('ignore')

# 기본
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# KFold
from sklearn.model_selection import KFold           # 랜덤하게 섞어서 자를 수도 있고, 순서대로 자를 수도 있음
from sklearn.model_selection import StratifiedKFold # 결과 데이터의 비율이 최대한 균등하게 들어갈 수 있도록!

# 교차검증 함수
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

# 학습 데이터와 검증 데이터로 나누는 함수
from sklearn.model_selection import train_test_split

# 데이터 전처리
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# 하이퍼 파라미터 튜닝
from sklearn.model_selection import GridSearchCV

# 평가함수
from sklearn.metrics import accuracy_score

# 머신러닝 알고리즘 - 분류
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

# 머신러닝 알고리즘 - 회귀
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor

# 군집
from sklearn.cluster import KMeans
from sklearn.cluster import MeanShift

# 차원축소
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# 딥러닝
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf

# 다중분류를 위한 핫-윈 인코더
from keras.utils import to_categorical

# 저장된 딥러닝 모델을 복구하는 함수
from keras.models import load_model

# epoch마다 모델을 저장하는 함수
from keras.callbacks import ModelCheckpoint

# 더이상 성능 향상이 이루어지지 않는다면 조기 중단시킬 수 있는 함수
from keras.callbacks import EarlyStopping

# 저장
import pickle

# 시간 모듈
import time

# 그래프 설정
plt.rcParams['font.family']='Malgun Gothic'
# 맥용 plt.rcParams['font.family']='AppleGothic'
plt.rcParams['font.size'] = 16
plt.rcParams['figure.figsize'] = 20, 10
plt.rcParams['axes.unicode_minus'] = False

In [2]:
# seed 설정
np.random.seed(3)
tf.random.set_seed(3)

In [3]:
# 데이터 입력
df1 = pd.read_csv('../../data/wine.csv', header=None)
df1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1


In [4]:
# 데이터 분류
x = df1.drop(12, axis=1)
y = df1[12]

display(x)
display(y)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
6492,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
6493,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
6494,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
6495,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


0       1
1       1
2       1
3       1
4       1
       ..
6492    0
6493    0
6494    0
6495    0
6496    0
Name: 12, Length: 6497, dtype: int64

In [5]:
# 모델 설정
model = Sequential()
model.add(Dense(30, input_dim=12, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [6]:
# 모델 컴파일
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [7]:
# 모델 저장 콜백
modelpath = 'models/{epoch}-{val_loss}.hdf5'
callback1 = ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)

In [8]:
# 중단 콜백
# patience : 값을 조절하면서 확인 해보기

callback2 = EarlyStopping(monitor='val_loss', patience=100)

In [9]:
# 모델 실행
# 어차피 조기 중단 되니까 에포크 많이 주기

model.fit(x, y, validation_split=0.2, epochs=100000, batch_size=500, callbacks=[callback1, callback2])

Epoch 1/100000

Epoch 00001: val_loss improved from inf to 0.65745, saving model to models\1-0.6574481725692749.hdf5
Epoch 2/100000

Epoch 00002: val_loss improved from 0.65745 to 0.07766, saving model to models\2-0.07766126841306686.hdf5
Epoch 3/100000

Epoch 00003: val_loss did not improve from 0.07766
Epoch 4/100000

Epoch 00004: val_loss did not improve from 0.07766
Epoch 5/100000

Epoch 00005: val_loss did not improve from 0.07766
Epoch 6/100000

Epoch 00006: val_loss did not improve from 0.07766
Epoch 7/100000

Epoch 00007: val_loss did not improve from 0.07766
Epoch 8/100000

Epoch 00008: val_loss did not improve from 0.07766
Epoch 9/100000

Epoch 00009: val_loss did not improve from 0.07766
Epoch 10/100000

Epoch 00010: val_loss did not improve from 0.07766
Epoch 11/100000

Epoch 00011: val_loss did not improve from 0.07766
Epoch 12/100000

Epoch 00012: val_loss did not improve from 0.07766
Epoch 13/100000

Epoch 00013: val_loss did not improve from 0.07766
Epoch 14/100000

Epo


Epoch 00042: val_loss did not improve from 0.07766
Epoch 43/100000

Epoch 00043: val_loss did not improve from 0.07766
Epoch 44/100000

Epoch 00044: val_loss did not improve from 0.07766
Epoch 45/100000

Epoch 00045: val_loss did not improve from 0.07766
Epoch 46/100000

Epoch 00046: val_loss did not improve from 0.07766
Epoch 47/100000

Epoch 00047: val_loss did not improve from 0.07766
Epoch 48/100000

Epoch 00048: val_loss did not improve from 0.07766
Epoch 49/100000

Epoch 00049: val_loss did not improve from 0.07766
Epoch 50/100000

Epoch 00050: val_loss did not improve from 0.07766
Epoch 51/100000

Epoch 00051: val_loss did not improve from 0.07766
Epoch 52/100000

Epoch 00052: val_loss did not improve from 0.07766
Epoch 53/100000

Epoch 00053: val_loss did not improve from 0.07766
Epoch 54/100000

Epoch 00054: val_loss did not improve from 0.07766
Epoch 55/100000

Epoch 00055: val_loss did not improve from 0.07766
Epoch 56/100000

Epoch 00056: val_loss did not improve from 0.07


Epoch 00082: val_loss did not improve from 0.04227
Epoch 83/100000

Epoch 00083: val_loss did not improve from 0.04227
Epoch 84/100000

Epoch 00084: val_loss improved from 0.04227 to 0.03926, saving model to models\84-0.039262305945158005.hdf5
Epoch 85/100000

Epoch 00085: val_loss did not improve from 0.03926
Epoch 86/100000

Epoch 00086: val_loss did not improve from 0.03926
Epoch 87/100000

Epoch 00087: val_loss did not improve from 0.03926
Epoch 88/100000

Epoch 00088: val_loss did not improve from 0.03926
Epoch 89/100000

Epoch 00089: val_loss did not improve from 0.03926
Epoch 90/100000

Epoch 00090: val_loss did not improve from 0.03926
Epoch 91/100000

Epoch 00091: val_loss did not improve from 0.03926
Epoch 92/100000

Epoch 00092: val_loss did not improve from 0.03926
Epoch 93/100000

Epoch 00093: val_loss did not improve from 0.03926
Epoch 94/100000

Epoch 00094: val_loss did not improve from 0.03926
Epoch 95/100000

Epoch 00095: val_loss did not improve from 0.03926
Epoch 9


Epoch 00122: val_loss did not improve from 0.03017
Epoch 123/100000

Epoch 00123: val_loss did not improve from 0.03017
Epoch 124/100000

Epoch 00124: val_loss did not improve from 0.03017
Epoch 125/100000

Epoch 00125: val_loss did not improve from 0.03017
Epoch 126/100000

Epoch 00126: val_loss did not improve from 0.03017
Epoch 127/100000

Epoch 00127: val_loss did not improve from 0.03017
Epoch 128/100000

Epoch 00128: val_loss did not improve from 0.03017
Epoch 129/100000

Epoch 00129: val_loss improved from 0.03017 to 0.02607, saving model to models\129-0.026071401312947273.hdf5
Epoch 130/100000

Epoch 00130: val_loss did not improve from 0.02607
Epoch 131/100000

Epoch 00131: val_loss did not improve from 0.02607
Epoch 132/100000

Epoch 00132: val_loss did not improve from 0.02607
Epoch 133/100000

Epoch 00133: val_loss did not improve from 0.02607
Epoch 134/100000

Epoch 00134: val_loss did not improve from 0.02607
Epoch 135/100000

Epoch 00135: val_loss did not improve from 0


Epoch 00164: val_loss did not improve from 0.02607
Epoch 165/100000

Epoch 00165: val_loss did not improve from 0.02607
Epoch 166/100000

Epoch 00166: val_loss did not improve from 0.02607
Epoch 167/100000

Epoch 00167: val_loss did not improve from 0.02607
Epoch 168/100000

Epoch 00168: val_loss did not improve from 0.02607
Epoch 169/100000

Epoch 00169: val_loss did not improve from 0.02607
Epoch 170/100000

Epoch 00170: val_loss did not improve from 0.02607
Epoch 171/100000

Epoch 00171: val_loss did not improve from 0.02607
Epoch 172/100000

Epoch 00172: val_loss did not improve from 0.02607
Epoch 173/100000

Epoch 00173: val_loss did not improve from 0.02607
Epoch 174/100000

Epoch 00174: val_loss did not improve from 0.02607
Epoch 175/100000

Epoch 00175: val_loss did not improve from 0.02607
Epoch 176/100000

Epoch 00176: val_loss did not improve from 0.02607
Epoch 177/100000

Epoch 00177: val_loss did not improve from 0.02607
Epoch 178/100000

Epoch 00178: val_loss did not imp


Epoch 00205: val_loss did not improve from 0.02607
Epoch 206/100000

Epoch 00206: val_loss did not improve from 0.02607
Epoch 207/100000

Epoch 00207: val_loss did not improve from 0.02607
Epoch 208/100000

Epoch 00208: val_loss did not improve from 0.02607
Epoch 209/100000

Epoch 00209: val_loss did not improve from 0.02607
Epoch 210/100000

Epoch 00210: val_loss did not improve from 0.02607
Epoch 211/100000

Epoch 00211: val_loss improved from 0.02607 to 0.02289, saving model to models\211-0.02288695052266121.hdf5
Epoch 212/100000

Epoch 00212: val_loss did not improve from 0.02289
Epoch 213/100000

Epoch 00213: val_loss did not improve from 0.02289
Epoch 214/100000

Epoch 00214: val_loss did not improve from 0.02289
Epoch 215/100000

Epoch 00215: val_loss did not improve from 0.02289
Epoch 216/100000

Epoch 00216: val_loss did not improve from 0.02289
Epoch 217/100000

Epoch 00217: val_loss did not improve from 0.02289
Epoch 218/100000

Epoch 00218: val_loss did not improve from 0.


Epoch 00246: val_loss did not improve from 0.02289
Epoch 247/100000

Epoch 00247: val_loss did not improve from 0.02289
Epoch 248/100000

Epoch 00248: val_loss did not improve from 0.02289
Epoch 249/100000

Epoch 00249: val_loss did not improve from 0.02289
Epoch 250/100000

Epoch 00250: val_loss did not improve from 0.02289
Epoch 251/100000

Epoch 00251: val_loss did not improve from 0.02289
Epoch 252/100000

Epoch 00252: val_loss did not improve from 0.02289
Epoch 253/100000

Epoch 00253: val_loss did not improve from 0.02289
Epoch 254/100000

Epoch 00254: val_loss did not improve from 0.02289
Epoch 255/100000

Epoch 00255: val_loss did not improve from 0.02289
Epoch 256/100000

Epoch 00256: val_loss did not improve from 0.02289
Epoch 257/100000

Epoch 00257: val_loss did not improve from 0.02289
Epoch 258/100000

Epoch 00258: val_loss did not improve from 0.02289
Epoch 259/100000

Epoch 00259: val_loss did not improve from 0.02289
Epoch 260/100000

Epoch 00260: val_loss did not imp


Epoch 00287: val_loss did not improve from 0.02005
Epoch 288/100000

Epoch 00288: val_loss did not improve from 0.02005
Epoch 289/100000

Epoch 00289: val_loss did not improve from 0.02005
Epoch 290/100000

Epoch 00290: val_loss did not improve from 0.02005
Epoch 291/100000

Epoch 00291: val_loss did not improve from 0.02005
Epoch 292/100000

Epoch 00292: val_loss did not improve from 0.02005
Epoch 293/100000

Epoch 00293: val_loss did not improve from 0.02005
Epoch 294/100000

Epoch 00294: val_loss did not improve from 0.02005
Epoch 295/100000

Epoch 00295: val_loss did not improve from 0.02005
Epoch 296/100000

Epoch 00296: val_loss did not improve from 0.02005
Epoch 297/100000

Epoch 00297: val_loss did not improve from 0.02005
Epoch 298/100000

Epoch 00298: val_loss did not improve from 0.02005
Epoch 299/100000

Epoch 00299: val_loss did not improve from 0.02005
Epoch 300/100000

Epoch 00300: val_loss did not improve from 0.02005
Epoch 301/100000

Epoch 00301: val_loss did not imp


Epoch 00328: val_loss did not improve from 0.02005
Epoch 329/100000

Epoch 00329: val_loss did not improve from 0.02005
Epoch 330/100000

Epoch 00330: val_loss did not improve from 0.02005
Epoch 331/100000

Epoch 00331: val_loss did not improve from 0.02005
Epoch 332/100000

Epoch 00332: val_loss did not improve from 0.02005
Epoch 333/100000

Epoch 00333: val_loss did not improve from 0.02005
Epoch 334/100000

Epoch 00334: val_loss did not improve from 0.02005
Epoch 335/100000

Epoch 00335: val_loss did not improve from 0.02005
Epoch 336/100000

Epoch 00336: val_loss did not improve from 0.02005
Epoch 337/100000

Epoch 00337: val_loss did not improve from 0.02005
Epoch 338/100000

Epoch 00338: val_loss did not improve from 0.02005
Epoch 339/100000

Epoch 00339: val_loss did not improve from 0.02005
Epoch 340/100000

Epoch 00340: val_loss did not improve from 0.02005
Epoch 341/100000

Epoch 00341: val_loss did not improve from 0.02005
Epoch 342/100000

Epoch 00342: val_loss did not imp


Epoch 00369: val_loss did not improve from 0.02005
Epoch 370/100000

Epoch 00370: val_loss did not improve from 0.02005
Epoch 371/100000

Epoch 00371: val_loss did not improve from 0.02005
Epoch 372/100000

Epoch 00372: val_loss did not improve from 0.02005
Epoch 373/100000

Epoch 00373: val_loss did not improve from 0.02005
Epoch 374/100000

Epoch 00374: val_loss did not improve from 0.02005
Epoch 375/100000

Epoch 00375: val_loss did not improve from 0.02005


<tensorflow.python.keras.callbacks.History at 0x1330edaa548>

In [11]:
# 학습 완료된 모델을 로딩한다
best_model = load_model('models/275-0.020053867250680923.hdf5')
best_model

<tensorflow.python.keras.engine.sequential.Sequential at 0x13389ca40c8>

In [12]:
# 예측결과 추출한다.
pred = (best_model.predict(x) > 0.5).astype('int32')
pred

array([[1],
       [1],
       [1],
       ...,
       [0],
       [0],
       [0]])

In [13]:
r1 = accuracy_score(y, pred)
r1

0.9782976758503925