# 와인 맛 다중분류
베스트 모델 찾은 후 자동중단

3등급과 9등급은 제외할 것

In [21]:
import numpy as np
import pandas as pd
import tensorflow as tf
seed = 2021
np.random.seed(seed)
tf.random.set_seed(seed)

In [38]:
wine = pd.read_csv('dataset/wine.csv',header= None)
wine.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1


In [39]:
wine.iloc[:,-2].value_counts().sort_index()

3      30
4     216
5    2138
6    2836
7    1079
8     193
9       5
Name: 11, dtype: int64

In [40]:
# 데이터 전처리 4~ 8인 값들만 이용하겠음
wine = wine[wine.iloc[:,-2]>3]
wine = wine[wine.iloc[:,-2]<9]
wine.shape

(6462, 13)

In [41]:
y = wine.iloc[:,-2].values
X = wine.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,12]]

In [48]:
# x 스케일링
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [58]:
# y 인코딩 :dim 9 
from tensorflow.keras.utils import to_categorical
y_onehot = to_categorical(y)
y_onehot[:3]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0.]], dtype=float32)

In [61]:
# y 인코딩 : dim 5
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
y_enc = encoder.fit_transform(y)

y_onehot = to_categorical(y_enc)
y_onehot[:3]

array([[0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)

In [63]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(
    X_scaled, y_onehot, stratify=y_onehot, random_state=seed
)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((4846, 12), (1616, 12), (4846, 5), (1616, 5))

In [64]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [65]:
model = Sequential([
    Dense(30, input_dim=12, activation='relu'),
    Dense(12, activation='relu'),
    Dense(8, activation='relu'),
    Dense(5, activation='softmax')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 30)                390       
_________________________________________________________________
dense_5 (Dense)              (None, 12)                372       
_________________________________________________________________
dense_6 (Dense)              (None, 8)                 104       
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 45        
Total params: 911
Trainable params: 911
Non-trainable params: 0
_________________________________________________________________


In [66]:
model.compile(
    optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']
)

## 모델 저장관련 설정

In [67]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
modelpath = 'model/best_wine_grade.h5'
checkpointer = ModelCheckpoint(
    modelpath, monitor='val_loss', verbose=0, save_best_only=True
)
early_stopping = EarlyStopping(patience=20)

## 모델 학습 및 저장

In [68]:
history = model.fit(
    X_train, Y_train, validation_split=0.2,
    epochs=500, batch_size=200, verbose=0,
    callbacks=[checkpointer, early_stopping]
)

## 베스트 모델

In [69]:
from tensorflow.keras.models import load_model
best_model = load_model('model/best_wine_grade.h5')
best_model.evaluate(X_test, Y_test)



[1.0079628229141235, 0.5470296740531921]

# Another model

In [70]:
model2 = Sequential([
    Dense(56, input_dim=12, activation='relu'),
    Dense(30, activation='relu'),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(5, activation='softmax')
])
model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 56)                728       
_________________________________________________________________
dense_9 (Dense)              (None, 30)                1710      
_________________________________________________________________
dense_10 (Dense)             (None, 16)                496       
_________________________________________________________________
dense_11 (Dense)             (None, 8)                 136       
_________________________________________________________________
dense_12 (Dense)             (None, 5)                 45        
Total params: 3,115
Trainable params: 3,115
Non-trainable params: 0
_________________________________________________________________


In [71]:
model2.compile(
    optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']
)

In [72]:
modelpath2 = 'model/best_wine_grade2.h5'
checkpointer2 = ModelCheckpoint(
    modelpath2, monitor='val_loss', verbose=0, save_best_only=True
)
early_stopping2 = EarlyStopping(patience=20)

In [73]:
history2 = model2.fit(
    X_train, Y_train, validation_split=0.2,
    epochs=500, batch_size=200, verbose=0,
    callbacks=[checkpointer2, early_stopping2]
)

In [74]:
best_model = load_model('model/best_wine_grade2.h5')
best_model.evaluate(X_test, Y_test)



[1.009678840637207, 0.5439356565475464]

## 파라메터 갯수를 확 늘린 모델

In [75]:
model3 = Sequential([
    Dense(128, input_dim=12, activation='relu'),
    Dense(80, activation='relu'),
    Dense(48, activation='relu'),
    Dense(30, activation='relu'),
    Dense(12, activation='relu'),
    Dense(5, activation='softmax')
])
model3.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 128)               1664      
_________________________________________________________________
dense_14 (Dense)             (None, 80)                10320     
_________________________________________________________________
dense_15 (Dense)             (None, 48)                3888      
_________________________________________________________________
dense_16 (Dense)             (None, 30)                1470      
_________________________________________________________________
dense_17 (Dense)             (None, 12)                372       
_________________________________________________________________
dense_18 (Dense)             (None, 5)                 65        
Total params: 17,779
Trainable params: 17,779
Non-trainable params: 0
__________________________________________________

In [76]:
model3.compile(
    optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']
)

In [77]:
modelpath3 = 'model/best_wine_grade3.h5'
checkpointer3 = ModelCheckpoint(
    modelpath3, monitor='val_loss', verbose=0, save_best_only=True
)
early_stopping3 = EarlyStopping(patience=20)

In [78]:
history3 = model3.fit(
    X_train, Y_train, validation_split=0.2,
    epochs=500, batch_size=200, verbose=0,
    callbacks=[checkpointer3, early_stopping3]
)

In [79]:
best_model = load_model(modelpath3)
best_model.evaluate(X_test, Y_test)



[1.0051480531692505, 0.5482673048973083]