## Sequential model

In [3]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Dense(64, activation="relu"),
    layers.Dense(10, activation="softmax")
])

Metal device set to: Apple M2


2024-05-28 14:04:44.306171: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-28 14:04:44.306261: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


점진적으로 Sequential Model 만들기

In [4]:
model = keras.Sequential()
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))

## Functional API model

In [5]:
inputs = keras.Input(shape=(3,), name="my_input")
features = layers.Dense(64, activation="relu")(inputs)
outputs = layers.Dense(10, activation="softmax")(features)
model = keras.Model(inputs=inputs, outputs=outputs)

## Subclassing model 

In [11]:
import numpy as np

num_samples = 1280
vocabulary_size = 10000
num_tags = 100
num_departments = 4

title_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size)) # dummy 입력 데이터
text_body_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))
tags_data = np.random.randint(0, 2, size=(num_samples, num_tags)) # dummy 입력 데이터

priority_data = np.random.random(size=(num_samples, 1)) # dummy 타겟 데이터
department_data = np.random.randint(0, 2, size=(num_samples, num_departments)) # dummy 타겟 데이터

In [12]:
class CustomerTicketModel(keras.Model):

    def __init__(self, num_departments):
        super().__init__()  # 부모 클래스의 생성자를 호출
        # 생성자에서 층을 정의
        self.concat_layer = layers.Concatenate() 
        self.mixing_layer = layers.Dense(64, activation="relu")
        self.priority_scorer = layers.Dense(1, activation="sigmoid")
        self.department_classifier = layers.Dense(
            num_departments, activation="softmax")
    
    def call(self, inputs):
        '''
        call() 메서드에서 정방향 패스를 정의
        '''
        title = inputs["title"]
        text_body = inputs["text_body"]
        tags = inputs["tags"]

        features = self.concat_layer([title, text_body, tags])
        features = self.mixing_layer(features)
        priority = self.priority_scorer(features)
        department = self.department_classifier(features)
        return priority, department

In [13]:
model = CustomerTicketModel(num_departments=4)

priority, department = model(
    {"title": title_data, "text_body": text_body_data, "tags": tags_data})

## 혼합 모델

In [14]:
vocabulary_size = 10000
num_tags = 100
num_departments = 4

# 모델의 입력을 정의
title = keras.Input(shape=(vocabulary_size,), name="title")          
text_body = keras.Input(shape=(vocabulary_size,), name="text_body")
tags = keras.Input(shape=(num_tags,), name="tags")

features = layers.Concatenate()([title, text_body, tags]) # 입력 특성을 하나의 텐서 features로 연결
features = layers.Dense(64, activation="relu")(features)  # 중간층을 적용하여 입력 특성을 더 풍부한 표현으로 재결합

# 모델의 출력 정의
priority = layers.Dense(1, activation="sigmoid", name="priority")(features) 
department = layers.Dense(
    num_departments, activation="softmax", name="department")(features)

# 입력과 출력을 지정하여 모델을 만듦
model = keras.Model(inputs=[title, text_body, tags], outputs=[priority, department])

## 사용자 정의 훈련 스탭을 사용하는 모델

사용자 정의 훈련 스텝을 사용하는 이유
1. **복잡한 모델 트레이닝 로직:**
    - 기본 `fit` 메서드는 일반적인 훈련 루프를 제공하지만, 특별한 요구사항이 있는 경우 이를 커스터마이즈해야 합니다.
        - 예를 들어, GAN(Generative Adversarial Networks)처럼 두 개 이상의 모델을 동시에 훈련시켜야 하는 경우나, 추가적인 손실 함수를 사용하는 경우입니다.
2. **보다 세밀한 제어:**
    - 훈련 프로세스를 더 세밀하게 제어하고 각 단계에서 무슨 일이 일어나는지 명확히 이해하고자 하는 경우에 유용합니다. 이를 통해 문제를 디버깅하거나 최적화할 수 있습니다.
3. **동적 학습률 변경:**
    - 학습 중 특정 조건에 따라 학습률을 동적으로 변경하거나 맞춤형 학습률 스케줄링을 적용할 수 있습니다.
4. **맞춤형 손실 함수 및 메트릭:**
    - 기본 컴파일 옵션에서 제공되지 않는 맞춤형 손실 함수나 메트릭을 사용해야 하는 경우입니다.
5. **특별한 데이터 전처리 또는 후처리:**
    - 입력 데이터를 특정 방식으로 전처리하거나, 예측 값을 특별한 방식으로 후처리해야 하는 경우입니다.
6. **복잡한 그래디언트 계산 및 업데이트:**
    - 표준 옵티마이저 업데이트 방식 대신, 맞춤형 그래디언트 계산 및 변수 업데이트 로직을 적용하고자 할 때 사용될 수 있습니다.
7. **학습 중 특정 로직 삽입:**
    - 학습 중에 특정 조건을 만족할 때마다 특정 작업을 수행하려는 경우, 예를 들어 모델의 일부 가중치를 고정하거나, 조건부 상태 관리를 구현할 때 유용합니다.

### 원래 train_step 코드 (기본 fit 메서드를 실행하면 이 코드가 실행된다)

In [None]:
  def train_step(self, data):
    """The logic for one training step.

    This method can be overridden to support custom training logic.
    For concrete examples of how to override this method see
    [Customizing what happends in fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit).
    This method is called by `Model.make_train_function`.

    This method should contain the mathematical logic for one step of training.
    This typically includes the forward pass, loss calculation, backpropagation,
    and metric updates.

    Configuration details for *how* this logic is run (e.g. `tf.function` and
    `tf.distribute.Strategy` settings), should be left to
    `Model.make_train_function`, which can also be overridden.

    Args:
      data: A nested structure of `Tensor`s.

    Returns:
      A `dict` containing values that will be passed to
      `tf.keras.callbacks.CallbackList.on_train_batch_end`. Typically, the
      values of the `Model`'s metrics are returned. Example:
      `{'loss': 0.2, 'accuracy': 0.7}`.
    """
    x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
    
    # Run forward pass.
    with tf.GradientTape() as tape:
      y_pred = self(x, training=True)
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    self._validate_target_and_loss(y, loss)
    
    # Run backwards pass.
    self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    return self.compute_metrics(x, y, y_pred, sample_weight)

### 교재 실습

In [21]:
from tensorflow.keras.datasets import mnist

# 1) 모델 생성 (나중에 재사용하기 용이하기 위해 별도의 함수로 만듦)
def get_mnist_model():
    inputs = keras.Input(shape=(28 * 28,))
    features = layers.Dense(512, activation="relu")(inputs)
    features = layers.Dropout(0.5)(features)
    outputs = layers.Dense(10, activation="softmax")(features)
    model = keras.Model(inputs, outputs)
    return model

# 2) load and split data
(images, labels), (test_images, test_labels) = mnist.load_data()
images = images.reshape((60000, 28 * 28)).astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28)).astype("float32") / 255
train_images, val_images = images[10000:], images[:10000]
train_labels, val_labels = labels[10000:], labels[:10000]


In [22]:
model = get_mnist_model()

loss_fn = keras.losses.SparseCategoricalCrossentropy()  # 손실함수 정의
optimizer = keras.optimizers.RMSprop()                  # 옵티마이저 준비
metrics = [keras.metrics.SparseCategoricalAccuracy()]   # 모니터링할 지표 리스트 준비
loss_tracking_metric = keras.metrics.Mean()             # 손실 평균을 추적할 평균 지표 준비

def train_step(inputs, targets):
    # 정방향 패스를 실행. training=True 전달
    with tf.GradientTape() as tape:                     
        predictions = model(inputs, training=True)
        loss = loss_fn(targets, predictions)
    # 역방향 패스를 실행. model.trainable_weights 사용
    gradients = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))
    logs = {}
    
    # 측정 지표를 계산
    for metric in metrics:
        metric.update_state(targets, predictions)
        logs[metric.name] = metric.result()
    # 손실 평균을 계산
    loss_tracking_metric.update_state(loss)
    logs["loss"] = loss_tracking_metric.result()
    return logs # 지표와 손실의 현재 값을 반환

In [23]:
def reset_metrics():
    for metric in metrics:
        metric.reset_state()
    loss_tracking_metric.reset_state()

In [24]:
training_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
training_dataset = training_dataset.batch(32)
epochs = 3
for epoch in range(epochs):
    reset_metrics()
    for inputs_batch, targets_batch in training_dataset:
        logs = train_step(inputs_batch, targets_batch)
    print(f"Results at the end of epoch {epoch}")
    for key, value in logs.items():
        print(f"...{key}: {value:.4f}")

Results at the end of epoch 0
...sparse_categorical_accuracy: 0.9189
...loss: 0.2748
Results at the end of epoch 1
...sparse_categorical_accuracy: 0.9659
...loss: 0.1232
Results at the end of epoch 2
...sparse_categorical_accuracy: 0.9783
...loss: 0.0797


In [25]:
def test_step(inputs, targets):
    predictions = model(inputs, training=False)
    loss = loss_fn(targets, predictions)

    logs = {}
    for metric in metrics:
        metric.update_state(targets, predictions)
        logs["val_" + metric.name] = metric.result()

    loss_tracking_metric.update_state(loss)
    logs["val_loss"] = loss_tracking_metric.result()
    return logs

val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))
val_dataset = val_dataset.batch(32)
reset_metrics()
for inputs_batch, targets_batch in val_dataset:
    logs = test_step(inputs_batch, targets_batch)
print("Evaluation results:")
for key, value in logs.items():
    print(f"...{key}: {value:.4f}")

Evaluation results:
...val_sparse_categorical_accuracy: 0.9599
...val_loss: 0.1692


In [15]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [27]:
class CustomModel(keras.Model):
    def train_step(self, data):
        images, labels = data

        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            loss = self.compiled_loss(labels, predictions, regularization_losses=self.losses)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        self.compiled_metrics.update_state(labels, predictions)
        return {m.name: m.result() for m in self.metrics}

# 모델 인스턴스화
model = get_mnist_model()
custom_model = CustomModel(inputs=model.input, outputs=model.output)

In [28]:
custom_model.compile(optimizer=keras.optimizers.Adam(),
                     loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                     metrics=[keras.metrics.SparseCategoricalAccuracy()])



# 모델 훈련
custom_model.fit(train_images, train_labels, epochs=5, batch_size=32)

Epoch 1/5


  return dispatch_target(*args, **kwargs)
2024-05-28 14:38:14.228752: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1773d1e20>

### 추가 실습 ) 학습률이 변화하는 모델
- 스텝(1000)마다 학습률은 decay_rate 비율(0.1)만큼 줄어들게 됨
$$ \text{lr} = \text{initial\_lr} \times (\text{decay\_rate})^{\left(\frac{\text{step}}{\text{decay\_steps}}\right)} $$

In [29]:
class CustomModel(keras.Model):
    def __init__(self, *args, **kwargs):
        super(CustomModel, self).__init__(*args, **kwargs)
        self.initial_lr = 0.001

    def compile(self, optimizer, loss, metrics, schedule_lr=None):
        super(CustomModel, self).compile(optimizer, loss, metrics)
        self.schedule_lr = schedule_lr

    def train_step(self, data):
        images, labels = data
        if self.schedule_lr:
            self.optimizer.learning_rate = self.schedule_lr(self.optimizer.iterations)

        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            loss = self.compiled_loss(labels, predictions, regularization_losses=self.losses)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        self.compiled_metrics.update_state(labels, predictions)
        return {m.name: m.result() for m in self.metrics}

# 학습률 스케줄링 함수 정의
def schedule_lr(step):
    initial_lr = 0.001
    decay_steps = 1000
    decay_rate = 0.1
    lr = initial_lr * (decay_rate ** (step // decay_steps))
    return lr

# 모델 인스턴스화
model = get_mnist_model()
custom_model = CustomModel(inputs=model.input, outputs=model.output)

### RNN 모델

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, LSTMCell, RNN, Dense


class CustomRNNCell(LSTMCell):
    def call(self, inputs, states, training=None):
        # Custom behavior for each time step can be added here
        return super().call(inputs, states, training=training)


class MyCustomModel(tf.keras.Model):
    def __init__(self):
        super(MyCustomModel, self).__init__()
        self.rnn_cell = CustomRNNCell(128)
        self.rnn_layer = RNN(self.rnn_cell)
        self.dense = Dense(10)

    def call(self, inputs, training=None, mask=None):
        x = self.rnn_layer(inputs)
        return self.dense(x)

# Dummy data
dummy_data = tf.random.normal((64, 10, 20))  # (batch_size, time_steps, features)
dummy_labels = tf.random.uniform((64,), maxval=10, dtype=tf.int32)


# Model instantiation and compilation
model = MyCustomModel()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model training
model.fit(dummy_data, dummy_labels, epochs=5)

### GAN 모델

In [None]:

class GAN(keras.Model):

    def __init__(self, generator, discriminator):
        super(GAN, self).__init__()
        self.generator = generator
        self.discriminator = discriminator

    def compile(self, generator_optimizer, discriminator_optimizer, loss_fn):
        super(GAN, self).compile()
        self.generator_optimizer = generator_optimizer
        self.discriminator_optimizer = discriminator_optimizer
        self.loss_fn = loss_fn

    def train_step(self, data):
        real_images, _ = data

        # 생성기 그라디언트 계산
        with tf.GradientTape() as gen_tape:
            generated_images = self.generator(tf.random.normal(shape=(batch_size, noise_dim)), training=True)
            fake_predictions = self.discriminator(generated_images, training=True)
            gen_loss = self.loss_fn(tf.ones_like(fake_predictions), fake_predictions)
            
        gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables)
        self.generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))

        # 판별기 그라디언트 계산
        with tf.GradientTape() as disc_tape:
            real_predictions = self.discriminator(real_images, training=True)
            fake_predictions = self.discriminator(generated_images, training=True)
            disc_loss = (self.loss_fn(tf.ones_like(real_predictions), real_predictions) + self.loss_fn(tf.zeros_like(fake_predictions), fake_predictions)) / 2



        gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)
        self.discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_variables))

        return {"gen_loss": gen_loss, "disc_loss": disc_loss}

# 모델, 옵티마이저, 손실 함수 정의
generator = create_generator_model()
discriminator = create_discriminator_model()
gan = GAN(generator, discriminator)

gan.compile(generator_optimizer=keras.optimizers.Adam(),
            discriminator_optimizer=keras.optimizers.Adam(),
            loss_fn=keras.losses.BinaryCrossentropy(from_logits=True))

## Image Data : Layer 의 너비가 달라지는 경우

### DNN 모델

In [None]:
# MNIST 데이터셋 로드 및 전처리
def load_mnist_data():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(-1, 28 * 28) / 255.0
    x_test = x_test.reshape(-1, 28 * 28) / 255.
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    return x_train, y_train, x_test, y_test


# 모델 생성 함수

def create_model(layer_widths):
    model = Sequential()
    model.add(Dense(layer_widths[0], activation='relu', input_shape=(784,)))
    for width in layer_widths[1:]:
        model.add(Dense(width, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


# 학습 및 평가 함수
def train_model(model, x_train, y_train, x_test, y_test, epochs=10):
    history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=2)
    return history


# 학습 및 검증 정확도를 그래프로 시각화
def plot_and_save_history(histories, titles, filename='model_comparison.png'):
    plt.figure(figsize=(12, 6))
    for history, title in zip(histories, titles):
        plt.plot(history.history['val_accuracy'], label=f'{title} val_accuracy')
        plt.plot(history.history['accuracy'], label=f'{title} accuracy')
    plt.title('Model accuracy comparison')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.savefig(filename)
    plt.show()


def main():
    x_train, y_train, x_test, y_test = load_mnist_data()
    # 모델 정의
    basic_model = create_model([128, 64])
    wide_model = create_model([512, 256])
    narrow_model = create_model([32, 16])
    # 학습 및 성능 비교
    print("Training basic model...")
    basic_history = train_model(basic_model, x_train, y_train, x_test, y_test)
    print("Training wide model...")
    wide_history = train_model(wide_model, x_train, y_train, x_test, y_test)
    print("Training narrow model...")
    narrow_history = train_model(narrow_model, x_train, y_train, x_test, y_test)

    # 성능 비교 그래프 저장
    plot_and_save_history([basic_history, wide_history, narrow_history],
                          ['Basic Model', 'Wide Model', 'Narrow Model'])

    print("\nPerformance Comparison:")
    basic_loss, basic_accuracy = basic_model.evaluate(x_test, y_test, verbose=2)
    wide_loss, wide_accuracy = wide_model.evaluate(x_test, y_test, verbose=2)
    narrow_loss, narrow_accuracy = narrow_model.evaluate(x_test, y_test, verbose=2)
    print(f"Basic Model - Test Loss: {basic_loss}, Test Accuracy: {basic_accuracy}")
    print(f"Wide Model - Test Loss: {wide_loss}, Test Accuracy: {wide_accuracy}")
    print(f"Narrow Model - Test Loss: {narrow_loss}, Test Accuracy: {narrow_accuracy}")

### 실험 2
- playground 의 기존 코드를 활용해서 실험을 진행
- **conv filter**를 조절하는 실험을 진행

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

# MNIST 데이터셋 로드
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') / 255
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 모델 생성 함수
def create_model(conv_out_channels=5, fc_output_size=10):
    model = Sequential()
    model.add(Conv2D(filters=conv_out_channels, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
    model.add(Flatten())
    model.add(Dense(fc_output_size, activation='relu'))
    model.add(Dense(10, activation='softmax'))  # MNIST는 10개의 클래스가 있음
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# 하이퍼파라미터 설정
conv_out_channels_list = [5, 10, 20]
fc_output_sizes = [50, 100, 200]

# 실험 결과 저장
results = []

# 각 설정에 대해 모델 학습 및 평가
for conv_out_channels in conv_out_channels_list:
    for fc_output_size in fc_output_sizes:
        model = create_model(conv_out_channels=conv_out_channels, fc_output_size=fc_output_size)
        history = model.fit(X_train, y_train, epochs=5, validation_split=0.2, verbose=1)
        loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
        results.append({
            'conv_out_channels': conv_out_channels,
            'fc_output_size': fc_output_size,
            'loss': loss,
            'accuracy': accuracy,
            'history': history.history
        })
        print(f"Conv Out Channels: {conv_out_channels}, FC Output Size: {fc_output_size}, Accuracy: {accuracy:.4f}")

# 결과 시각화 (예시: 첫 번째 결과만)
plt.plot(results[0]['history']['accuracy'], label='Training accuracy')
plt.plot(results[0]['history']['val_accuracy'], label='Validation accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='upper left')
plt.show()

#### 결과
<img alt="img" height="500" src="result.png" width="800"/>    


----
layer의 너비가 넓을 수록 빠른 수렴이 된다

### **추가 실험**
- conv filter를 위에서 구성한 DNN 코드와 같은 파라미터로 실험을 진행하였다
```
    basic_model = create_cnn_model([32, 64], 128)
    wide_model = create_cnn_model([128, 256], 512)
    narrow_model = create_cnn_model([16, 32], 64)
```

In [31]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

# 모델 생성 함수
def create_cnn_model(conv_filters, dense_units):
    model = Sequential()
    model.add(Conv2D(conv_filters[0], kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(conv_filters[1], kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


# 학습 및 평가 함수
def train_model(model, x_train, y_train, x_test, y_test, epochs=10):
    history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=2)
    return history


# 학습 및 검증 정확도와 손실 값을 그래프로 시각화 및 저장
def plot_and_save_history(histories, titles, filename_prefix='cnn_model'):
    fig, axs = plt.subplots(2, 1, figsize=(12, 12))

    for history, title in zip(histories, titles):
        axs[0].plot(history.history['val_accuracy'], label=f'{title} val_accuracy')
        axs[0].plot(history.history['accuracy'], label=f'{title} accuracy')
        axs[1].plot(history.history['val_loss'], label=f'{title} val_loss')
        axs[1].plot(history.history['loss'], label=f'{title} loss')
    axs[0].set_title('Model Accuracy Comparison')
    axs[0].set_xlabel('Epoch')
    axs[0].set_ylabel('Accuracy')
    axs[0].legend(loc='lower right')
    axs[1].set_title('Model Loss Comparison')
    axs[1].set_xlabel('Epoch')
    axs[1].set_ylabel('Loss')
    axs[1].legend(loc='upper right')

    plt.tight_layout()
    plt.savefig(f'{filename_prefix}_comparison.png')
    plt.show()


def main():
    x_train, y_train, x_test, y_test = load_mnist_data()
    # 모델 정의
    basic_model = create_cnn_model([32, 64], 128)
    wide_model = create_cnn_model([128, 256], 512)
    narrow_model = create_cnn_model([16, 32], 64)

    # 학습 및 성능 비교
    print("Training basic model...")
    basic_history = train_model(basic_model, x_train, y_train, x_test, y_test)
    print("Training wide model...")
    wide_history = train_model(wide_model, x_train, y_train, x_test, y_test)
    print("Training narrow model...")
    narrow_history = train_model(narrow_model, x_train, y_train, x_test, y_test)

    # 성능 비교 그래프 저장
    plot_and_save_history([basic_history, wide_history, narrow_history],
                          ['Basic Model', 'Wide Model', 'Narrow Model'])

    print("\nPerformance Comparison:")
    basic_loss, basic_accuracy = basic_model.evaluate(x_test, y_test, verbose=2)
    wide_loss, wide_accuracy = wide_model.evaluate(x_test, y_test, verbose=2)
    narrow_loss, narrow_accuracy = narrow_model.evaluate(x_test, y_test, verbose=2)

    print(f"Basic Model - Test Loss: {basic_loss}, Test Accuracy: {basic_accuracy}")
    print(f"Wide Model - Test Loss: {wide_loss}, Test Accuracy: {wide_accuracy}")
    print(f"Narrow Model - Test Loss: {narrow_loss}, Test Accuracy: {narrow_accuracy}")


if __name__ == "__main__":
    main()

Training basic model...
Epoch 1/10


2024-05-28 15:11:21.679839: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2024-05-28 15:11:37.792124: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1875/1875 - 19s - loss: 0.2425 - accuracy: 0.9279 - val_loss: 0.1294 - val_accuracy: 0.9594 - 19s/epoch - 10ms/step
Epoch 2/10
1875/1875 - 18s - loss: 0.1015 - accuracy: 0.9691 - val_loss: 0.0972 - val_accuracy: 0.9697 - 18s/epoch - 9ms/step
Epoch 3/10
1875/1875 - 18s - loss: 0.0712 - accuracy: 0.9783 - val_loss: 0.0849 - val_accuracy: 0.9735 - 18s/epoch - 10ms/step
Epoch 4/10
1875/1875 - 18s - loss: 0.0544 - accuracy: 0.9828 - val_loss: 0.0823 - val_accuracy: 0.9775 - 18s/epoch - 10ms/step
Epoch 5/10
1875/1875 - 19s - loss: 0.0421 - accuracy: 0.9858 - val_loss: 0.0893 - val_accuracy: 0.9738 - 19s/epoch - 10ms/step
Epoch 6/10
1875/1875 - 24s - loss: 0.0336 - accuracy: 0.9888 - val_loss: 0.0785 - val_accuracy: 0.9777 - 24s/epoch - 13ms/step
Epoch 7/10
1875/1875 - 20s - loss: 0.0277 - accuracy: 0.9909 - val_loss: 0.0919 - val_accuracy: 0.9761 - 20s/epoch - 11ms/step
Epoch 8/10
1875/1875 - 22s - loss: 0.0233 - accuracy: 0.9921 - val_loss: 0.1022 - val_accuracy: 0.9732 - 22s/epoch - 12ms/s

2024-05-28 15:14:39.305530: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2024-05-28 15:14:58.156684: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1875/1875 - 24s - loss: 0.1858 - accuracy: 0.9431 - val_loss: 0.0970 - val_accuracy: 0.9696 - 24s/epoch - 13ms/step
Epoch 2/10
1875/1875 - 22s - loss: 0.0785 - accuracy: 0.9755 - val_loss: 0.0723 - val_accuracy: 0.9772 - 22s/epoch - 12ms/step
Epoch 3/10
1875/1875 - 21s - loss: 0.0529 - accuracy: 0.9831 - val_loss: 0.0692 - val_accuracy: 0.9805 - 21s/epoch - 11ms/step
Epoch 4/10
1875/1875 - 22s - loss: 0.0408 - accuracy: 0.9870 - val_loss: 0.0704 - val_accuracy: 0.9786 - 22s/epoch - 12ms/step
Epoch 5/10
1875/1875 - 20s - loss: 0.0328 - accuracy: 0.9897 - val_loss: 0.0662 - val_accuracy: 0.9821 - 20s/epoch - 11ms/step
Epoch 6/10
1875/1875 - 22s - loss: 0.0281 - accuracy: 0.9906 - val_loss: 0.0937 - val_accuracy: 0.9776 - 22s/epoch - 12ms/step
Epoch 7/10
1875/1875 - 20s - loss: 0.0234 - accuracy: 0.9923 - val_loss: 0.0927 - val_accuracy: 0.9799 - 20s/epoch - 11ms/step
Epoch 8/10
1875/1875 - 21s - loss: 0.0192 - accuracy: 0.9937 - val_loss: 0.0857 - val_accuracy: 0.9799 - 21s/epoch - 11ms/

2024-05-28 15:18:14.131724: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2024-05-28 15:18:31.312628: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1875/1875 - 20s - loss: 0.3714 - accuracy: 0.8954 - val_loss: 0.1959 - val_accuracy: 0.9428 - 20s/epoch - 11ms/step
Epoch 2/10
1875/1875 - 19s - loss: 0.1829 - accuracy: 0.9458 - val_loss: 0.1577 - val_accuracy: 0.9548 - 19s/epoch - 10ms/step
Epoch 3/10
1875/1875 - 20s - loss: 0.1458 - accuracy: 0.9571 - val_loss: 0.1393 - val_accuracy: 0.9582 - 20s/epoch - 11ms/step
Epoch 4/10
1875/1875 - 21s - loss: 0.1232 - accuracy: 0.9631 - val_loss: 0.1253 - val_accuracy: 0.9631 - 21s/epoch - 11ms/step
Epoch 5/10
1875/1875 - 21s - loss: 0.1086 - accuracy: 0.9674 - val_loss: 0.1200 - val_accuracy: 0.9640 - 21s/epoch - 11ms/step
Epoch 6/10
1875/1875 - 20s - loss: 0.0976 - accuracy: 0.9713 - val_loss: 0.1270 - val_accuracy: 0.9634 - 20s/epoch - 11ms/step
Epoch 7/10
1875/1875 - 20s - loss: 0.0885 - accuracy: 0.9728 - val_loss: 0.1106 - val_accuracy: 0.9659 - 20s/epoch - 11ms/step
Epoch 8/10
1875/1875 - 20s - loss: 0.0823 - accuracy: 0.9750 - val_loss: 0.1189 - val_accuracy: 0.9659 - 20s/epoch - 11ms/

#### 실험 결과
<img alt="img" height="600" src="cnn_model_comparison.png" width="600"/>

| 모델 유형   | Test Loss | Test Acc | 해석  |
|---|---|----|-----|
| 기본 모델   | 0.0326    | 0.9920   | - 가장 낮은 손실 값으로 최적의 성능<br>- 매우 높은 정확도<br>- 적절한 컴퓨팅 자원을 사용하여 균형 잡힌 성능  |
| 넓은 모델   | 0.0426    | 0.9919   | - 손실 값이 조금 높지만 매우 높은 정확도<br>- 기본 모델과 거의 동일한 성능<br>- 과적합 가능성이 있으므로 추가 검토 필요<br>- 복잡한 패턴을 잘 학습 |
| 좁은 모델   | 0.0421    | 0.9883   | - 상대적으로 높은 손실 값<br>- 여전히 우수한 성능을 보이지만 다른 모델보다 약간 낮은 정확도<br>- 표현력의 한계로 인해 일부 패턴 학습에 어려움<br>- 계산 비용이 적어 빠른 학습 가능 |

## 회고
- keras에 있는 기본 모델들을 사용해보기만 했지 직접 밑바닥부터 구현해보는 경험을 통해 좀 더 학습 과정을 명확하게 볼 수 있어서 좋았다.
- playground 사이트가 자꾸 안돼서 실험을 진행할 수는 없었지만 오히려 직접 모델링을 진행해보는 과정에서 좀 더 효율적인 실험을 위해 모델을 만드는 방법을 알게 되었다(함수로 구현)