# Лабораторная работа 4

Tensorflow 2.x

1) Подготовка данных

2) Использование Keras Model API

3) Использование Keras Sequential + Functional API

https://www.tensorflow.org/tutorials

Для выполнения лабораторной работы необходимо установить tensorflow версии 2.0 или выше .

Рекомендуется использовать возможности Colab'а по обучению моделей на GPU.



In [1]:
!pip show tensorflow

Name: tensorflow
Version: 2.15.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras, libclang, ml-dtypes, numpy, opt-einsum, packaging, protobuf, setuptools, six, tensorboard, tensorflow-estimator, tensorflow-io-gcs-filesystem, termcolor, typing-extensions, wrapt
Required-by: dopamine-rl, tf_keras


In [2]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

%matplotlib inline

In [3]:

USE_GPU = True

if USE_GPU:
    device = '/device:GPU:0'
else:
    device = '/cpu:0'

print_every = 100  # как часто выводить информацию при обучении моделей


print('Using device: ', device)

Using device:  /device:GPU:0


# Подготовка данных
Загрузите набор данных из предыдущей лабораторной работы.

In [4]:
# Загружаем датасет
digits = load_digits()
# Разделение данных на признаки и метки классов
X = digits.data
y = digits.target

# количество классов
num_classes = len(set(y))

# Создаем словарь, в котором ключи - классы изображений, а значения - списки индексов изображений в классах
class_indices = {i: [] for i in range(num_classes)}
for i, label in enumerate(y):
    class_indices[int(label)].append(i)

# Разделяем данные
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Разворачиваем изображения в одномерный массив
X_train = X_train.reshape(-1, 8, 8, 1)
X_val = X_val.reshape(-1, 8, 8, 1)
X_test = X_test.reshape(-1, 8, 8, 1)


print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (1077, 8, 8, 1)
Train labels shape:  (1077,) int64
Validation data shape:  (360, 8, 8, 1)
Validation labels shape:  (360,)
Test data shape:  (360, 8, 8, 1)
Test labels shape:  (360,)


In [5]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y

        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


bs = 3
train_dset = Dataset(X_train, y_train, batch_size=bs, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=bs, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=bs)

In [6]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

0 (3, 8, 8, 1) (3,)
1 (3, 8, 8, 1) (3,)
2 (3, 8, 8, 1) (3,)
3 (3, 8, 8, 1) (3,)
4 (3, 8, 8, 1) (3,)
5 (3, 8, 8, 1) (3,)
6 (3, 8, 8, 1) (3,)


#  Keras Model Subclassing API


Для реализации собственной модели с помощью Keras Model Subclassing API необходимо выполнить следующие шаги:

1) Определить новый класс, который является наследником tf.keras.Model.

2) В методе __init__() определить все необходимые слои из модуля tf.keras.layer

3) Реализовать прямой проход в методе call() на основе слоев, объявленных в __init__()

Ниже приведен пример использования keras API для определения двухслойной полносвязной сети.

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras

In [7]:
class TwoLayerFC(tf.keras.Model):
    def __init__(self, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__()
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu',
                                   kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                   kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()

    def call(self, x, training=False):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def test_TwoLayerFC():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    x = tf.zeros((64, input_size))
    model = TwoLayerFC(hidden_size, num_classes)
    with tf.device(device):
        scores = model(x)
        print(scores.shape)

test_TwoLayerFC()

(64, 10)




Реализуйте трехслойную CNN для вашей задачи классификации.

Архитектура сети:
    
1. Сверточный слой (5 x 5 kernels, zero-padding = 'same')
2. Функция активации ReLU
3. Сверточный слой (3 x 3 kernels, zero-padding = 'same')
4. Функция активации ReLU
5. Полносвязный слой
6. Функция активации Softmax

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Conv2D

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense

In [8]:
class ThreeLayerConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes):
        super(ThreeLayerConvNet, self).__init__()
        ########################################################################
        # TODO: Implement the __init__ method for a three-layer ConvNet. You   #
        # should instantiate layer objects to be used in the forward pass.     #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        self.conv1 = tf.keras.layers.Conv2D(channel_1, (5, 5), padding='same', activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(channel_2, (3, 3), padding='same', activation='relu')
        self.flatten = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(num_classes, activation='softmax')

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################

    def call(self, x, training=False):
        scores = None
        ########################################################################
        # TODO: Implement the forward pass for a three-layer ConvNet. You      #
        # should use the layer objects defined in the __init__ method.         #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        scores = self.conv1(x)
        scores = self.conv2(scores)
        scores = self.flatten(scores)
        scores = self.fc(scores)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        return scores

In [9]:
def test_ThreeLayerConvNet():
    channel_1, channel_2, num_classes = 12, 8, 10
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)
    with tf.device(device):
        x = tf.zeros((64, 3, 32, 32))
        scores = model(x)
        print(scores.shape)

test_ThreeLayerConvNet()

(64, 10)


Пример реализации процесса обучения:

In [10]:
# @title Текст заголовка по умолчанию
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.

    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for

    Returns: Nothing, but prints progress during trainingn
    """
    val_accuracy_results = []
    with tf.device(device):


        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

        model = model_init_fn()
        optimizer = optimizer_init_fn()

        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

        t = 0
        for epoch in range(num_epochs):

            # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics
            train_loss.reset_states()
            train_accuracy.reset_states()

            for x_np, y_np in train_dset:

                with tf.GradientTape() as tape:

                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)

                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)

                    if t % print_every == 0:
                        val_loss.reset_states()
                        val_accuracy.reset_states()
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)

                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(t, epoch+1,
                                             train_loss.result(),
                                             train_accuracy.result()*100,
                                             val_loss.result(),
                                             val_accuracy.result()*100))
                        val_accuracy_results.append(val_accuracy.result()*100)
                    t += 1

    val_accuracy_results_numpy = [val.numpy() for val in val_accuracy_results]
    return val_accuracy_results_numpy

In [11]:
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return TwoLayerFC(hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 16.023164749145508, Accuracy: 0.0, Val Loss: 605.4713745117188, Val Accuracy: 20.0
Iteration 100, Epoch 1, Loss: 54.54940414428711, Accuracy: 69.96699523925781, Val Loss: 7.028998374938965, Val Accuracy: 85.27777862548828
Iteration 200, Epoch 1, Loss: 29.69048309326172, Accuracy: 79.43614959716797, Val Loss: 5.058438301086426, Val Accuracy: 86.94444274902344
Iteration 300, Epoch 1, Loss: 21.497098922729492, Accuracy: 83.05648040771484, Val Loss: 3.705638885498047, Val Accuracy: 86.38888549804688


[20.0, 85.27778, 86.94444, 86.388885]

Обучите трехслойную CNN. В tf.keras.optimizers.SGD укажите Nesterov momentum = 0.9 .

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/optimizers/SGD

Значение accuracy на валидационной выборке после 1 эпохи обучения должно быть > 50% .

In [12]:
learning_rate = 3e-3
channel_1, channel_2, num_classes = 8, 8, 10

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return model

def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)



Iteration 0, Epoch 1, Loss: 3.9321212768554688, Accuracy: 0.0, Val Loss: 2.6098783016204834, Val Accuracy: 22.77777862548828
Iteration 100, Epoch 1, Loss: 1.1120197772979736, Accuracy: 65.3465347290039, Val Loss: 0.5815900564193726, Val Accuracy: 81.66666412353516
Iteration 200, Epoch 1, Loss: 0.7926088571548462, Accuracy: 76.4510726928711, Val Loss: 0.3835830092430115, Val Accuracy: 87.22222137451172
Iteration 300, Epoch 1, Loss: 0.6618355512619019, Accuracy: 80.39867401123047, Val Loss: 0.43144533038139343, Val Accuracy: 85.55555725097656


[22.777779, 81.666664, 87.22222, 85.55556]

# Использование Keras Sequential API для реализации последовательных моделей.

Пример для полносвязной сети:

In [13]:
learning_rate = 1e-2

def model_init_fn():
    input_shape = (8, 8, 1)
    hidden_layer_size, num_classes = 4000, 10
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Flatten(input_shape=input_shape),
        tf.keras.layers.Dense(hidden_layer_size, activation='relu',
                              kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, activation='softmax',
                              kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 16.784055709838867, Accuracy: 0.0, Val Loss: 591.4143676757812, Val Accuracy: 19.44444465637207
Iteration 100, Epoch 1, Loss: 54.17097091674805, Accuracy: 73.26732635498047, Val Loss: 7.911921501159668, Val Accuracy: 82.77777862548828
Iteration 200, Epoch 1, Loss: 29.790987014770508, Accuracy: 80.9286880493164, Val Loss: 4.651817321777344, Val Accuracy: 87.5
Iteration 300, Epoch 1, Loss: 21.54395866394043, Accuracy: 83.38870239257812, Val Loss: 4.527781009674072, Val Accuracy: 86.94444274902344


[19.444445, 82.77778, 87.5, 86.94444]

Альтернативный менее гибкий способ обучения:

In [14]:
model = model_init_fn()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[4.376359939575195, 0.8888888955116272]

Перепишите реализацию трехслойной CNN с помощью tf.keras.Sequential API . Обучите модель двумя способами.

In [15]:
def model_init_fn():
    ############################################################################
    # TODO: Construct a three-layer ConvNet using tf.keras.Sequential.         #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

  model = tf.keras.Sequential([
      # Первый сверточный слой
      tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(8, 8, 1)),
      # Первый пулинг слой
      tf.keras.layers.MaxPooling2D((2, 2), strides=(2, 2)),
      # Второй сверточный слой
      tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'),
      # Второй пулинг слой
      tf.keras.layers.MaxPooling2D((2, 2), strides=(2, 2)),
      # Разворачиваем данные перед подачей на полносвязный слой
      tf.keras.layers.Flatten(),
      # Полносвязный слой с relu активацией
      tf.keras.layers.Dense(1024, activation='relu'),
      # Выходной слой с softmax активацией
      tf.keras.layers.Dense(10, activation='softmax')
  ])

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                            END OF YOUR CODE                              #
    ############################################################################
  return model

learning_rate = 5e-4
def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.4470748901367188, Accuracy: 0.0, Val Loss: 2.4197804927825928, Val Accuracy: 14.166666984558105
Iteration 100, Epoch 1, Loss: 1.4936251640319824, Accuracy: 50.495052337646484, Val Loss: 0.7237217426300049, Val Accuracy: 77.22222137451172
Iteration 200, Epoch 1, Loss: 0.8985570669174194, Accuracy: 70.64676666259766, Val Loss: 0.2654624879360199, Val Accuracy: 92.5
Iteration 300, Epoch 1, Loss: 0.6837782263755798, Accuracy: 77.51937866210938, Val Loss: 0.22001928091049194, Val Accuracy: 91.94444274902344


[14.166667, 77.22222, 92.5, 91.94444]

In [16]:
model = model_init_fn()
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[1.697405457496643, 0.7749999761581421]

# Использование Keras Functional API

Для реализации более сложных архитектур сети с несколькими входами/выходами, повторным использованием слоев, "остаточными" связями (residual connections) необходимо явно указать входные и выходные тензоры.

Ниже представлен пример для полносвязной сети.

In [17]:
def two_layer_fc_functional(input_shape, hidden_size, num_classes):
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    inputs = tf.keras.Input(shape=input_shape)
    flattened_inputs = tf.keras.layers.Flatten()(inputs)
    fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu',
                                 kernel_initializer=initializer)(flattened_inputs)
    scores = tf.keras.layers.Dense(num_classes, activation='softmax',
                             kernel_initializer=initializer)(fc1_output)

    # Instantiate the model given inputs and outputs.
    model = tf.keras.Model(inputs=inputs, outputs=scores)
    return model

def test_two_layer_fc_functional():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    input_shape = (50,)

    x = tf.zeros((64, input_size))
    model = two_layer_fc_functional(input_shape, hidden_size, num_classes)

    with tf.device(device):
        scores = model(x)
        print(scores.shape)

test_two_layer_fc_functional()

(64, 10)


In [18]:
input_shape = (8, 8, 1)
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return two_layer_fc_functional(input_shape, hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 6.442903518676758, Accuracy: 33.333335876464844, Val Loss: 473.47119140625, Val Accuracy: 21.38888931274414
Iteration 100, Epoch 1, Loss: 53.489559173583984, Accuracy: 70.95709228515625, Val Loss: 5.241723537445068, Val Accuracy: 88.33333587646484
Iteration 200, Epoch 1, Loss: 29.06683921813965, Accuracy: 79.60198974609375, Val Loss: 4.46242618560791, Val Accuracy: 88.8888931274414
Iteration 300, Epoch 1, Loss: 20.89566421508789, Accuracy: 82.39202880859375, Val Loss: 5.013516426086426, Val Accuracy: 87.5


[21.38889, 88.333336, 88.88889, 87.5]

Поэкспериментируйте с архитектурой сверточной сети. Для вашего набора данных вам необходимо получить как минимум 70% accuracy на валидационной выборке за 10 эпох обучения. Опишите все эксперименты и сделайте выводы (без выполнения данного пункта работы приниматься не будут).

Эспериментируйте с архитектурой, гиперпараметрами, функцией потерь, регуляризацией, методом оптимизации.  

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/BatchNormalization#methods https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dropout#methods

In [19]:
class CustomConvNet(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        channel_1, channel_2, num_classes = 8, 8, 10
        dp_rate = 0.2
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.conv1 = tf.keras.layers.Conv2D(channel_1, [3,3], [1,1], padding='same',
                                  kernel_initializer=initializer)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.relu1 = tf.keras.layers.ReLU()
        self.dp1 = tf.keras.layers.Dropout(rate=dp_rate)
        self.conv2 = tf.keras.layers.Conv2D(channel_2, [3,3], [1,1], padding='same',
                                  kernel_initializer=initializer)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.relu2 = tf.keras.layers.ReLU()
        self.dp2 = tf.keras.layers.Dropout(rate=dp_rate)
        self.fl = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(num_classes,
                                  activation='softmax',
                                  kernel_initializer=initializer)


        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################

    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        x = input_tensor
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dp1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dp2(x)

        x = self.fl(x)
        x = self.fc(x)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x


print_every = 100
num_epochs = 4

model = CustomConvNet()

def model_init_fn():
    return CustomConvNet()

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate)

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 2.7388622760772705, Accuracy: 0.0, Val Loss: 7.096963405609131, Val Accuracy: 5.277777671813965
Iteration 100, Epoch 1, Loss: 2.1564927101135254, Accuracy: 27.7227725982666, Val Loss: 1.2094967365264893, Val Accuracy: 70.55555725097656
Iteration 200, Epoch 1, Loss: 1.6113818883895874, Accuracy: 47.595359802246094, Val Loss: 0.6343676447868347, Val Accuracy: 86.66666412353516
Iteration 300, Epoch 1, Loss: 1.2839415073394775, Accuracy: 59.57918167114258, Val Loss: 0.4638768434524536, Val Accuracy: 88.33333587646484
Iteration 400, Epoch 2, Loss: 0.3995066285133362, Accuracy: 89.68254089355469, Val Loss: 0.31467676162719727, Val Accuracy: 90.55555725097656
Iteration 500, Epoch 2, Loss: 0.4188767373561859, Accuracy: 86.85446166992188, Val Loss: 0.25956854224205017, Val Accuracy: 93.33333587646484
Iteration 600, Epoch 2, Loss: 0.385591059923172, Accuracy: 88.01652526855469, Val Loss: 0.22960492968559265, Val Accuracy: 93.8888931274414
Iteration 700, Epoch 2, Loss:

[5.2777777,
 70.55556,
 86.666664,
 88.333336,
 90.55556,
 93.333336,
 93.88889,
 92.5,
 96.388885,
 96.111115,
 96.666664,
 97.22222,
 97.22222,
 96.94444,
 96.666664]

Опишите все эксперименты, результаты. Сделайте выводы.

- Сравнение двух оптимизаторов, RMSprop и Adam, показало, что они демонстрируют примерно одинаковые результаты, с небольшим преимуществом у Adam.
- Проведенное сравнение функций активации relu и sigmoid выявило, что relu более эффективна как с точки зрения временных затрат, так и с точки зрения точности (accuracy).
- Внедрение методов регуляризации, таких как Dropout и BatchNormalization, оказало незначительное, но положительное влияние на качество модели на валидационной выборке.
- Добавление дополнительных слоев к текущей архитектуре, вероятно, не приведет к значительному улучшению, учитывая уже высокий уровень точности модели.