## Sequential model

In [3]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Dense(64, activation="relu"),
    layers.Dense(10, activation="softmax")
])

Metal device set to: Apple M2


2024-05-28 14:04:44.306171: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-28 14:04:44.306261: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


점진적으로 Sequential Model 만들기

In [4]:
model = keras.Sequential()
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))

## Functional API model

In [5]:
inputs = keras.Input(shape=(3,), name="my_input")
features = layers.Dense(64, activation="relu")(inputs)
outputs = layers.Dense(10, activation="softmax")(features)
model = keras.Model(inputs=inputs, outputs=outputs)

## Subclassing model 

In [11]:
import numpy as np

num_samples = 1280
vocabulary_size = 10000
num_tags = 100
num_departments = 4

title_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size)) # dummy 입력 데이터
text_body_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))
tags_data = np.random.randint(0, 2, size=(num_samples, num_tags)) # dummy 입력 데이터

priority_data = np.random.random(size=(num_samples, 1)) # dummy 타겟 데이터
department_data = np.random.randint(0, 2, size=(num_samples, num_departments)) # dummy 타겟 데이터

In [12]:
class CustomerTicketModel(keras.Model):

    def __init__(self, num_departments):
        super().__init__()  # 부모 클래스의 생성자를 호출
        # 생성자에서 층을 정의
        self.concat_layer = layers.Concatenate() 
        self.mixing_layer = layers.Dense(64, activation="relu")
        self.priority_scorer = layers.Dense(1, activation="sigmoid")
        self.department_classifier = layers.Dense(
            num_departments, activation="softmax")
    
    def call(self, inputs):
        '''
        call() 메서드에서 정방향 패스를 정의
        '''
        title = inputs["title"]
        text_body = inputs["text_body"]
        tags = inputs["tags"]

        features = self.concat_layer([title, text_body, tags])
        features = self.mixing_layer(features)
        priority = self.priority_scorer(features)
        department = self.department_classifier(features)
        return priority, department

In [13]:
model = CustomerTicketModel(num_departments=4)

priority, department = model(
    {"title": title_data, "text_body": text_body_data, "tags": tags_data})

## 혼합 모델

In [14]:
vocabulary_size = 10000
num_tags = 100
num_departments = 4

# 모델의 입력을 정의
title = keras.Input(shape=(vocabulary_size,), name="title")          
text_body = keras.Input(shape=(vocabulary_size,), name="text_body")
tags = keras.Input(shape=(num_tags,), name="tags")

features = layers.Concatenate()([title, text_body, tags]) # 입력 특성을 하나의 텐서 features로 연결
features = layers.Dense(64, activation="relu")(features)  # 중간층을 적용하여 입력 특성을 더 풍부한 표현으로 재결합

# 모델의 출력 정의
priority = layers.Dense(1, activation="sigmoid", name="priority")(features) 
department = layers.Dense(
    num_departments, activation="softmax", name="department")(features)

# 입력과 출력을 지정하여 모델을 만듦
model = keras.Model(inputs=[title, text_body, tags], outputs=[priority, department])

## 사용자 정의 훈련 스탭을 사용하는 모델

In [21]:
from tensorflow.keras.datasets import mnist

# 1) 모델 생성 (나중에 재사용하기 용이하기 위해 별도의 함수로 만듦)
def get_mnist_model():
    inputs = keras.Input(shape=(28 * 28,))
    features = layers.Dense(512, activation="relu")(inputs)
    features = layers.Dropout(0.5)(features)
    outputs = layers.Dense(10, activation="softmax")(features)
    model = keras.Model(inputs, outputs)
    return model

# 2) load and split data
(images, labels), (test_images, test_labels) = mnist.load_data()
images = images.reshape((60000, 28 * 28)).astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28)).astype("float32") / 255
train_images, val_images = images[10000:], images[:10000]
train_labels, val_labels = labels[10000:], labels[:10000]


In [22]:
model = get_mnist_model()

loss_fn = keras.losses.SparseCategoricalCrossentropy()  # 손실함수 정의
optimizer = keras.optimizers.RMSprop()                  # 옵티마이저 준비
metrics = [keras.metrics.SparseCategoricalAccuracy()]   # 모니터링할 지표 리스트 준비
loss_tracking_metric = keras.metrics.Mean()             # 손실 평균을 추적할 평균 지표 준비

def train_step(inputs, targets):
    # 정방향 패스를 실행. training=True 전달
    with tf.GradientTape() as tape:                     
        predictions = model(inputs, training=True)
        loss = loss_fn(targets, predictions)
    # 역방향 패스를 실행. model.trainable_weights 사용
    gradients = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))
    logs = {}
    
    # 측정 지표를 계산
    for metric in metrics:
        metric.update_state(targets, predictions)
        logs[metric.name] = metric.result()
    # 손실 평균을 계산
    loss_tracking_metric.update_state(loss)
    logs["loss"] = loss_tracking_metric.result()
    return logs # 지표와 손실의 현재 값을 반환

In [23]:
def reset_metrics():
    for metric in metrics:
        metric.reset_state()
    loss_tracking_metric.reset_state()

In [24]:
training_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
training_dataset = training_dataset.batch(32)
epochs = 3
for epoch in range(epochs):
    reset_metrics()
    for inputs_batch, targets_batch in training_dataset:
        logs = train_step(inputs_batch, targets_batch)
    print(f"Results at the end of epoch {epoch}")
    for key, value in logs.items():
        print(f"...{key}: {value:.4f}")

Results at the end of epoch 0
...sparse_categorical_accuracy: 0.9189
...loss: 0.2748
Results at the end of epoch 1
...sparse_categorical_accuracy: 0.9659
...loss: 0.1232
Results at the end of epoch 2
...sparse_categorical_accuracy: 0.9783
...loss: 0.0797


In [25]:
def test_step(inputs, targets):
    predictions = model(inputs, training=False)
    loss = loss_fn(targets, predictions)

    logs = {}
    for metric in metrics:
        metric.update_state(targets, predictions)
        logs["val_" + metric.name] = metric.result()

    loss_tracking_metric.update_state(loss)
    logs["val_loss"] = loss_tracking_metric.result()
    return logs

val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))
val_dataset = val_dataset.batch(32)
reset_metrics()
for inputs_batch, targets_batch in val_dataset:
    logs = test_step(inputs_batch, targets_batch)
print("Evaluation results:")
for key, value in logs.items():
    print(f"...{key}: {value:.4f}")

Evaluation results:
...val_sparse_categorical_accuracy: 0.9599
...val_loss: 0.1692


In [15]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [17]:
# # MNIST 데이터셋 로드
# (train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
# 
# # 전처리 (정규화 및 차원 확장)
# train_images = train_images.astype("float32") / 255.0
# test_images = test_images.astype("float32") / 255.0
# train_images = tf.expand_dims(train_images, -1)
# test_images = tf.expand_dims(test_images, -1)

In [18]:
# def create_model():
#     inputs = keras.Input(shape=(28, 28, 1))
#     x = layers.Conv2D(32, 3, activation='relu')(inputs)
#     x = layers.MaxPooling2D()(x)
#     x = layers.Conv2D(64, 3, activation='relu')(x)
#     x = layers.MaxPooling2D()(x)
#     x = layers.Flatten()(x)
#     x = layers.Dense(100, activation='relu')(x)
#     outputs = layers.Dense(10)(x)
#     model = keras.Model(inputs, outputs)
#     return model

In [27]:
class CustomModel(keras.Model):
    def train_step(self, data):
        images, labels = data

        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            loss = self.compiled_loss(labels, predictions, regularization_losses=self.losses)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        self.compiled_metrics.update_state(labels, predictions)
        return {m.name: m.result() for m in self.metrics}

# 모델 인스턴스화
model = get_mnist_model()
custom_model = CustomModel(inputs=model.input, outputs=model.output)

In [28]:
custom_model.compile(optimizer=keras.optimizers.Adam(),
                     loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                     metrics=[keras.metrics.SparseCategoricalAccuracy()])



# 모델 훈련
custom_model.fit(train_images, train_labels, epochs=5, batch_size=32)

Epoch 1/5


  return dispatch_target(*args, **kwargs)
2024-05-28 14:38:14.228752: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1773d1e20>

## 학습률이 변화하는 모델

In [29]:
class CustomModel(keras.Model):
    def __init__(self, *args, **kwargs):
        super(CustomModel, self).__init__(*args, **kwargs)
        self.initial_lr = 0.001

    def compile(self, optimizer, loss, metrics, schedule_lr=None):
        super(CustomModel, self).compile(optimizer, loss, metrics)
        self.schedule_lr = schedule_lr

    def train_step(self, data):
        images, labels = data
        if self.schedule_lr:
            self.optimizer.learning_rate = self.schedule_lr(self.optimizer.iterations)

        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            loss = self.compiled_loss(labels, predictions, regularization_losses=self.losses)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        self.compiled_metrics.update_state(labels, predictions)
        return {m.name: m.result() for m in self.metrics}

# 학습률 스케줄링 함수 정의
def schedule_lr(step):
    initial_lr = 0.001
    decay_steps = 1000
    decay_rate = 0.1
    lr = initial_lr * (decay_rate ** (step // decay_steps))
    return lr

# 모델 인스턴스화
model = get_mnist_model()
custom_model = CustomModel(inputs=model.input, outputs=model.output)

## Image Data : Layer 의 너비가 달라지는 경우

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical

# MNIST 데이터셋 로드
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28) / 255.0
x_test = x_test.reshape(-1, 28 * 28) / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 모델 1: 작은 너비의 레이어
model_small = Sequential([
    Dense(64, activation='relu', input_shape=(784,)),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

model_small.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 모델 2: 큰 너비의 레이어
model_large = Sequential([
    Dense(256, activation='relu', input_shape=(784,)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

model_large.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


# 모델 학습
print("Training small model...")
history_small = model_small.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), verbose=2)

print("Training large model...")
history_large = model_large.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), verbose=2)

# 모델 성능 비교
print("\nSmall Model Performance:")
loss_small, accuracy_small = model_small.evaluate(x_test, y_test, verbose=2)

print(f"Test loss: {loss_small}, Test accuracy: {accuracy_small}")
print("\nLarge Model Performance:")
loss_large, accuracy_large = model_large.evaluate(x_test, y_test, verbose=2)
print(f"Test loss: {loss_large}, Test accuracy: {accuracy_large}")

In [31]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical



# MNIST 데이터셋 로드 및 전처리
def load_mnist_data():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(-1, 28 * 28) / 255.0
    x_test = x_test.reshape(-1, 28 * 28) / 255.0
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    return x_train, y_train, x_test, y_test

# 모델 생성 함수
def create_model(layer_widths):
    model = Sequential()
    model.add(Dense(layer_widths[0], activation='relu', input_shape=(784,)))
    for width in layer_widths[1:]:
        model.add(Dense(width, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# 학습 및 평가 함수
def train_and_evaluate(model, x_train, y_train, x_test, y_test, epochs=10):
    model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=2)
    loss, accuracy = model.evaluate(x_test, y_test, verbose=2)
    return loss, accuracy

# 주 실행부
def main():
    x_train, y_train, x_test, y_test = load_mnist_data()
    # 모델 정의
    basic_model = create_model([128, 64])
    wide_model = create_model([512, 256])
    narrow_model = create_model([32, 16])
    
    # 학습 및 성능 비교
    print("Training basic model...")
    basic_loss, basic_accuracy = train_and_evaluate(basic_model, x_train, y_train, x_test, y_test)
    
    print("Training wide model...")
    wide_loss, wide_accuracy = train_and_evaluate(wide_model, x_train, y_train, x_test, y_test)

    print("Training narrow model...")
    narrow_loss, narrow_accuracy = train_and_evaluate(narrow_model, x_train, y_train, x_test, y_test)

    print("\nPerformance Comparison:")
    print(f"Basic Model - Test Loss: {basic_loss}, Test Accuracy: {basic_accuracy}")
    print(f"Wide Model - Test Loss: {wide_loss}, Test Accuracy: {wide_accuracy}")
    print(f"Narrow Model - Test Loss: {narrow_loss}, Test Accuracy: {narrow_accuracy}")
    
if __name__ == "__main__":
    main()

Training basic model...
Epoch 1/10


2024-05-28 15:11:21.679839: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2024-05-28 15:11:37.792124: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1875/1875 - 19s - loss: 0.2425 - accuracy: 0.9279 - val_loss: 0.1294 - val_accuracy: 0.9594 - 19s/epoch - 10ms/step
Epoch 2/10
1875/1875 - 18s - loss: 0.1015 - accuracy: 0.9691 - val_loss: 0.0972 - val_accuracy: 0.9697 - 18s/epoch - 9ms/step
Epoch 3/10
1875/1875 - 18s - loss: 0.0712 - accuracy: 0.9783 - val_loss: 0.0849 - val_accuracy: 0.9735 - 18s/epoch - 10ms/step
Epoch 4/10
1875/1875 - 18s - loss: 0.0544 - accuracy: 0.9828 - val_loss: 0.0823 - val_accuracy: 0.9775 - 18s/epoch - 10ms/step
Epoch 5/10
1875/1875 - 19s - loss: 0.0421 - accuracy: 0.9858 - val_loss: 0.0893 - val_accuracy: 0.9738 - 19s/epoch - 10ms/step
Epoch 6/10
1875/1875 - 24s - loss: 0.0336 - accuracy: 0.9888 - val_loss: 0.0785 - val_accuracy: 0.9777 - 24s/epoch - 13ms/step
Epoch 7/10
1875/1875 - 20s - loss: 0.0277 - accuracy: 0.9909 - val_loss: 0.0919 - val_accuracy: 0.9761 - 20s/epoch - 11ms/step
Epoch 8/10
1875/1875 - 22s - loss: 0.0233 - accuracy: 0.9921 - val_loss: 0.1022 - val_accuracy: 0.9732 - 22s/epoch - 12ms/s

2024-05-28 15:14:39.305530: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2024-05-28 15:14:58.156684: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1875/1875 - 24s - loss: 0.1858 - accuracy: 0.9431 - val_loss: 0.0970 - val_accuracy: 0.9696 - 24s/epoch - 13ms/step
Epoch 2/10
1875/1875 - 22s - loss: 0.0785 - accuracy: 0.9755 - val_loss: 0.0723 - val_accuracy: 0.9772 - 22s/epoch - 12ms/step
Epoch 3/10
1875/1875 - 21s - loss: 0.0529 - accuracy: 0.9831 - val_loss: 0.0692 - val_accuracy: 0.9805 - 21s/epoch - 11ms/step
Epoch 4/10
1875/1875 - 22s - loss: 0.0408 - accuracy: 0.9870 - val_loss: 0.0704 - val_accuracy: 0.9786 - 22s/epoch - 12ms/step
Epoch 5/10
1875/1875 - 20s - loss: 0.0328 - accuracy: 0.9897 - val_loss: 0.0662 - val_accuracy: 0.9821 - 20s/epoch - 11ms/step
Epoch 6/10
1875/1875 - 22s - loss: 0.0281 - accuracy: 0.9906 - val_loss: 0.0937 - val_accuracy: 0.9776 - 22s/epoch - 12ms/step
Epoch 7/10
1875/1875 - 20s - loss: 0.0234 - accuracy: 0.9923 - val_loss: 0.0927 - val_accuracy: 0.9799 - 20s/epoch - 11ms/step
Epoch 8/10
1875/1875 - 21s - loss: 0.0192 - accuracy: 0.9937 - val_loss: 0.0857 - val_accuracy: 0.9799 - 21s/epoch - 11ms/

2024-05-28 15:18:14.131724: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2024-05-28 15:18:31.312628: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1875/1875 - 20s - loss: 0.3714 - accuracy: 0.8954 - val_loss: 0.1959 - val_accuracy: 0.9428 - 20s/epoch - 11ms/step
Epoch 2/10
1875/1875 - 19s - loss: 0.1829 - accuracy: 0.9458 - val_loss: 0.1577 - val_accuracy: 0.9548 - 19s/epoch - 10ms/step
Epoch 3/10
1875/1875 - 20s - loss: 0.1458 - accuracy: 0.9571 - val_loss: 0.1393 - val_accuracy: 0.9582 - 20s/epoch - 11ms/step
Epoch 4/10
1875/1875 - 21s - loss: 0.1232 - accuracy: 0.9631 - val_loss: 0.1253 - val_accuracy: 0.9631 - 21s/epoch - 11ms/step
Epoch 5/10
1875/1875 - 21s - loss: 0.1086 - accuracy: 0.9674 - val_loss: 0.1200 - val_accuracy: 0.9640 - 21s/epoch - 11ms/step
Epoch 6/10
1875/1875 - 20s - loss: 0.0976 - accuracy: 0.9713 - val_loss: 0.1270 - val_accuracy: 0.9634 - 20s/epoch - 11ms/step
Epoch 7/10
1875/1875 - 20s - loss: 0.0885 - accuracy: 0.9728 - val_loss: 0.1106 - val_accuracy: 0.9659 - 20s/epoch - 11ms/step
Epoch 8/10
1875/1875 - 20s - loss: 0.0823 - accuracy: 0.9750 - val_loss: 0.1189 - val_accuracy: 0.9659 - 20s/epoch - 11ms/