<a href="https://colab.research.google.com/github/park-hoyeon/park-hoyeon.github.io/blob/master/skt_6_30_Mutl_class_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 다중퍼셉트론 모델 - 옵티마이즈 등 바꿔보기
# 테스트 값을 대상으로.
# 결론 뽑아내는 매트릭 - MAE


In [None]:
import pandas as pd
value = pd.read_csv('./california_housing_train.csv', na_values = "?")
display(value.head())

In [None]:
from tensorflow.keras.datasets import fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
# 이미지 확인
import numpy as np
import matplotlib.pyplot as plt

class_names = ['T-shirt/top', 'Trouser', 'Pullover',
               'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] # 클래스 10개짜리 문제 해결하는 것임.
samples = np.random.randint(len(X_train), size=9)
plt.figure(figsize = (8, 6))
for i, idx in enumerate(samples):
  plt.subplot(3, 3, i+1)
  plt.xticks([])
  plt.yticks([])
  plt.imshow(X_train[idx], cmap = 'gray')
  plt.title(class_names[y_train[idx]])
plt.show()

In [None]:
# 검증 데이터 준비 - 검증용 데이터를 훈련용 데이터에서 분리한다.
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size = 0.3, random_state = 42)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

In [None]:
# 이미지 데이터의 정규화
import numpy as np
X_train = X_train.astype('float32') / 255.
X_val = X_val.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.
print(np.max(X_train), np.min(X_train))

In [None]:
# shape 변경 - 이미지	데이터를	Dense	레이어에	입력하기	위해서	데이터를	(batch	num,	input	num)	형태로	변형
X_train = (X_train.reshape((-1, 28 * 28)))
X_val = (X_val.reshape((-1, 28 * 28)))
X_test = (X_test.reshape((-1, 28 * 28)))
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

In [None]:
# 원-핫 인코딩
from tensorflow.keras.utils import to_categorical
y_train_oh = to_categorical(y_train)
y_val_oh = to_categorical(y_val)
y_test_oh = to_categorical(y_test)
y_train_oh[:5]

In [None]:
# 모델 만들기
from tensorflow import keras
from tensorflow.keras import layers
def build_model():
 model = keras.Sequential()
 model.add(layers.Flatten(input_shape=(784,)))
 model.add(layers.Dense(64, activation = 'relu',
                          input_shape=(784,)))
 model.add(layers.Dense(32, activation = 'relu'))
 model.add(layers.Dense(10, activation = 'softmax'))
 return model

model = build_model()
model.summary()

In [None]:
#컴파일
import tensorflow as tf
adam = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=adam,
      loss = 'categorical_crossentropy',
      metrics=['acc'])

In [None]:
# 학습
EPOCHS = 100
BATCH_SIZE = 64
history = model.fit(X_train, y_train_oh,
           epochs = EPOCHS,
           batch_size = BATCH_SIZE,
           validation_data = (X_val, y_val_oh),
           verbose = 1)

In [None]:
def plot_history(history):
    """
    Plots the training and validation accuracy and loss over epochs.

    Args:
        history: A Keras History object.
    """
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)

    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()



    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()



    plt.show()

In [None]:
# 학습 곡선
plot_history(history)

In [None]:
# 예측
y_pred = model.predict(X_test)
y_pred[:1]

In [None]:
#np.argmax() - 가장 확률이 높은 인덱스를 추출한다.
y_pred_argmax = np.argmax(y_pred, axis=1)
y_pred_argmax[:10]

In [None]:
# 예측 결과의 이미지 표시
n_rows = 3
n_cols = 8
plt.figure(figsize=(n_cols * 2, n_rows * 2))
plt.figure(figsize=(n_cols * 2, n_rows * 2))

# Reload the Fashion MNIST dataset to get the original X_test data
from tensorflow.keras.datasets import fashion_mnist
(_, _), (X_test_original, y_test_original) = fashion_mnist.load_data()


for row in range(n_rows):
  for col in range(n_cols):
    index = n_cols * row + col
    plt.subplot(n_rows, n_cols, index + 1)
    plt.imshow(X_test_original[index], cmap="gray")
    plt.axis('off')
    plt.title(f'{class_names[y_pred_argmax[index]]}({class_names[y_test_original[index]]})')
plt.show()

In [None]:
#혼동행렬 시각화
from sklearn.metrics import confusion_matrix
import seaborn as sns
def plot_matrix(y_test, y_pred):
  plt.figure(figsize = (10, 8))
  cm = confusion_matrix(y_test, y_pred)
  sns.heatmap(cm, annot = True, fmt = 'd',cmap = 'Blues')
  plt.xlabel('predicted label', fontsize = 15)
  plt.ylabel('true label', fontsize = 15)
  plt.show()
plot_matrix(y_test, y_pred_argmax)

In [None]:
#평가 지표 계산
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def print_score(y_test, y_pred):
  print('accuracy: %.3f' % (accuracy_score(y_test, y_pred)))
  print('precision: %.3f' % (precision_score(y_test, y_pred,
average='macro')))
  print('recall_score: %.3f' % (recall_score(y_test, y_pred,
average='macro')))
  print('f1_score: %.3f' % (f1_score(y_test, y_pred,
average='macro')))
print_score(y_test, y_pred_argmax)