# Лабораторная работа №2.
## Реализация глубокой нейронной сети

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
from sklearn import metrics
import tensorflow as tf

In [2]:
# 1. Реализуйте полносвязную нейронную сеть с помощью библиотеки Tensor Flow.

required_train_size=200000
required_test_size=10000
labels={}

def loadImages(path):
    data = {}
    index = 0
    label_dirs = os.listdir(f"{path}/")
    for label in label_dirs:
        zeros = np.zeros((len(label_dirs)), dtype=np.byte)
        zeros[index] = 1
        labels[label] = zeros
        index += 1
        for image in os.listdir(f"{path}/{label}/"):
            try:
                if label not in data:
                    data[label] = []
                data[label].append(mpimg.imread(f"{path}/{label}/{image}").flatten())
            except:
                pass
    return data

data_large = loadImages("large")
data_small = loadImages("small")

def prepareDataset(data):
    X = []
    y = []
    for k, v in data.items():
        X = X + v
        y = y + [labels[k]]*len(v)
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.int32)

X, y = prepareDataset(data_large)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=required_test_size, train_size=required_train_size)

del X
del y
del data_large

X_test, y_test = prepareDataset(data_small)
del data_small

In [83]:
start_learning_rate=0.5
shape = X_train.shape[1]
num_labels = len(labels.items())
optimizer = tf.optimizers.SGD(start_learning_rate)

tf_valid_dataset = tf.constant(X_valid)
tf_test_dataset = tf.constant(X_test)

batch_size = 128
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
dataset = dataset.repeat().shuffle(X_train.shape[0]).batch(batch_size)

#Определяет кол-во нейронов и число слоев
def prepare_neurons(input_number, output_number, *args):
    weights = []
    biases = []
    previous_number=input_number
    seed = [42, 21]

    for index, neuron_number in enumerate(args):
        weights.append(tf.Variable(tf.random.stateless_normal([previous_number, args[index]], seed), name=f"weight_{index}"))
        biases.append(tf.Variable(tf.zeros([args[index]]), name=f"bias_{index}"))
        previous_number=neuron_number

    weights.append(tf.Variable(tf.random.stateless_normal([previous_number, output_number], seed), name="weight_output"))
    biases.append(tf.Variable(tf.zeros([output_number]), name="bias_output"))

    return weights, biases

neuron_number_1 = 1024
weights, biases = prepare_neurons(shape, num_labels, neuron_number_1)

# Определяем функци активации и стоимости
layers = [tf.nn.relu]
def calculate_prediction(tf_dataset, weigths, biases, layers):
    prev_layer = tf_dataset
    for index, layer in enumerate(layers):
        prev_layer = layer(tf.add(tf.matmul(prev_layer, weigths[index]), biases[index]), name=f"layer_{index}")
    return tf.matmul(prev_layer, weigths[-1]) + biases[-1]

@tf.function
def run_optimization(tf_train_dataset, tf_train_labels, optimizer, weights, biases):
    def cost():
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=calculate_prediction(tf_train_dataset, weights, biases, layers), labels=tf_train_labels))
    optimizer.minimize(cost, weights + biases, tape=tf.GradientTape())

def train_model(num_steps, weights, biases, optimizer, optimization_function, calculate_prediction_function, tf_last_activation_function=tf.nn.softmax):
    for step, (batch_x, batch_y) in enumerate(dataset.take(num_steps), 1):
        optimization_function(batch_x, batch_y, optimizer, weights, biases)
        if step % 500 == 0 or step < 10:
            prediction = tf_last_activation_function(calculate_prediction_function(tf_valid_dataset, weights, biases, layers))
            print("Validation accuracy:", metrics.accuracy_score(np.argmax(y_valid, 1), np.argmax(prediction, 1)), "on step:", step)

In [84]:
num_steps = 6000
train_model(num_steps, weights, biases, optimizer, run_optimization, calculate_prediction)

Validation accuracy: 0.1275 on step: 1
Validation accuracy: 0.2614 on step: 2
Validation accuracy: 0.1919 on step: 3
Validation accuracy: 0.3295 on step: 4
Validation accuracy: 0.2435 on step: 5
Validation accuracy: 0.3171 on step: 6
Validation accuracy: 0.4895 on step: 7
Validation accuracy: 0.5365 on step: 8
Validation accuracy: 0.5313 on step: 9
Validation accuracy: 0.7045 on step: 500
Validation accuracy: 0.7538 on step: 1000
Validation accuracy: 0.7746 on step: 1500
Validation accuracy: 0.7797 on step: 2000
Validation accuracy: 0.7813 on step: 2500
Validation accuracy: 0.7918 on step: 3000
Validation accuracy: 0.7903 on step: 3500
Validation accuracy: 0.802 on step: 4000
Validation accuracy: 0.7944 on step: 4500
Validation accuracy: 0.7994 on step: 5000
Validation accuracy: 0.7642 on step: 5500
Validation accuracy: 0.8045 on step: 6000


In [85]:
#2. Как улучшилась точность классификатора по сравнению с логистической регрессией?

regression_accuracy = 0.8356
prediction = tf.nn.softmax(calculate_prediction(tf_test_dataset, weights, biases, layers))
deep_learning_accuracy = metrics.accuracy_score(np.argmax(y_test, 1), np.argmax(prediction, 1))

print(f"{regression_accuracy} < {deep_learning_accuracy}")


0.8356 < 0.8748664815210425


In [86]:
#3. Используйте регуляризацию и метод сброса нейронов (dropout) для борьбы с переобучением. Как улучшилось качество классификации?

optimizer = tf.optimizers.SGD(start_learning_rate)
rate = 0.25
weights, biases = prepare_neurons(shape, num_labels, neuron_number_1)
layers = [tf.nn.relu]
beta = 0.001

def calculate_prediction_dropout(tf_dataset, weigths, biases, layers):
    prev_dropout = tf_dataset
    for index, layer in enumerate(layers):
        layer = layer(tf.add(tf.matmul(prev_dropout, weigths[index]), biases[index]), name=f"layer_{index}")
        prev_dropout = tf.nn.dropout(layer, rate)
    return tf.matmul(prev_dropout, weigths[-1]) + biases[-1]

def run_optimization_dropout_and_regularization(tf_train_dataset, tf_train_labels, optimizer, weights, biases):
    def cost():
        regularization = tf.nn.l2_loss(weights[0])
        for weight in weights[1:]:
            regularization += tf.nn.l2_loss(weight)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=calculate_prediction_dropout(tf_train_dataset, weights, biases, layers), labels=tf_train_labels))
        return tf.reduce_mean(loss + beta * regularization)
    optimizer.minimize(cost, weights + biases, tape=tf.GradientTape())

In [87]:
train_model(num_steps, weights, biases, optimizer, run_optimization_dropout_and_regularization, calculate_prediction_dropout)

prediction = tf.nn.softmax(calculate_prediction(tf_test_dataset, weights, biases, layers))
deep_learning_accuracy_corrected = metrics.accuracy_score(np.argmax(y_test, 1), np.argmax(prediction, 1))

print(f"With droput and regularization: {deep_learning_accuracy_corrected}")

Validation accuracy: 0.1472 on step: 1
Validation accuracy: 0.2178 on step: 2
Validation accuracy: 0.1826 on step: 3
Validation accuracy: 0.2001 on step: 4
Validation accuracy: 0.2592 on step: 5
Validation accuracy: 0.393 on step: 6
Validation accuracy: 0.4634 on step: 7
Validation accuracy: 0.4815 on step: 8
Validation accuracy: 0.4915 on step: 9
Validation accuracy: 0.6335 on step: 500
Validation accuracy: 0.7268 on step: 1000
Validation accuracy: 0.7476 on step: 1500
Validation accuracy: 0.7904 on step: 2000
Validation accuracy: 0.8022 on step: 2500
Validation accuracy: 0.8145 on step: 3000
Validation accuracy: 0.8211 on step: 3500
Validation accuracy: 0.8297 on step: 4000
Validation accuracy: 0.8434 on step: 4500
Validation accuracy: 0.8443 on step: 5000
Validation accuracy: 0.8542 on step: 5500
Validation accuracy: 0.854 on step: 6000
With droput and regularization: 0.924481948301645


In [91]:
# Воспользуйтесь динамически изменяемой скоростью обучения (learning rate). Наилучшая точность, достигнутая с помощью данной модели составляет 97.1%. Какую точность демонстрирует Ваша реализованная модель?

num_steps = 8000
weights, biases = prepare_neurons(shape, num_labels, neuron_number_1)
layers = [tf.nn.relu]
learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(start_learning_rate, 4000, 0.9, staircase=True)
optimizer = tf.optimizers.SGD(learning_rate)

train_model(num_steps, weights, biases, optimizer, run_optimization_dropout_and_regularization, calculate_prediction_dropout)

prediction = tf.nn.softmax(calculate_prediction(tf_test_dataset, weights, biases, layers))
deep_learning_accuracy_dynamic_learning_rate = metrics.accuracy_score(np.argmax(y_test, 1), np.argmax(prediction, 1))

print(f"With dynamic learning rate: {deep_learning_accuracy_dynamic_learning_rate}")

Validation accuracy: 0.1618 on step: 1
Validation accuracy: 0.1737 on step: 2
Validation accuracy: 0.1962 on step: 3
Validation accuracy: 0.3366 on step: 4
Validation accuracy: 0.1649 on step: 5
Validation accuracy: 0.3661 on step: 6
Validation accuracy: 0.451 on step: 7
Validation accuracy: 0.5182 on step: 8
Validation accuracy: 0.5119 on step: 9
Validation accuracy: 0.6315 on step: 500
Validation accuracy: 0.7168 on step: 1000
Validation accuracy: 0.7588 on step: 1500
Validation accuracy: 0.7878 on step: 2000
Validation accuracy: 0.7967 on step: 2500
Validation accuracy: 0.8171 on step: 3000
Validation accuracy: 0.8261 on step: 3500
Validation accuracy: 0.837 on step: 4000
Validation accuracy: 0.8428 on step: 4500
Validation accuracy: 0.8496 on step: 5000
Validation accuracy: 0.8476 on step: 5500
Validation accuracy: 0.8514 on step: 6000
Validation accuracy: 0.8533 on step: 6500
Validation accuracy: 0.8624 on step: 7000
Validation accuracy: 0.86 on step: 7500
Validation accuracy: 0.8