# Введение в искусственные нейронные сети
# Урок 3. TensorFlow

# IMDB

In [6]:
from __future__ import absolute_import, division, print_function, unicode_literals

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.1.0


In [105]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
from keras.utils import to_categorical
from keras import models
from keras import layers

## Импортируем датасет

In [106]:
imdb_mnist = keras.datasets.imdb
(x_train, y_train), (x_test, y_test) = imdb_mnist.load_data()

## Анализ датасета 

Давайте посмотрим структуры полученного массива данных:

In [107]:
x_train.shape

(25000,)

In [108]:
len(x_train[0])

218

In [109]:
y_train

array([1, 0, 0, ..., 0, 1, 0])

Проанализируем тестовую выборку:

In [110]:
x_test.shape

(25000,)

In [111]:
len(x_train[0])

218

In [112]:
y_test

array([0, 1, 1, ..., 0, 0, 0])

## Построение модели

Построение нейронной сети подразумевает конфигурацию ее слоев и последующую компиляцию.

### Определение слоев

Давайте создадим 3 слоя нейронной сети с помощью функционала Keras.layers

In [113]:
from keras.datasets import imdb
(training_data, training_targets), (testing_data, testing_targets) = imdb.load_data(num_words=10000)
data = np.concatenate((training_data, testing_data), axis=0)
targets = np.concatenate((training_targets, testing_targets), axis=0)

In [114]:
print("Categories:", np.unique(targets))
print("Number of unique words:", len(np.unique(np.hstack(data))))

length = [len(i) for i in data]
print("Average Review length:", np.mean(length))
print("Standard Deviation:", round(np.std(length)))

Categories: [0 1]
Number of unique words: 9998
Average Review length: 234.75892
Standard Deviation: 173.0


In [115]:
index = imdb.get_word_index()
reverse_index = dict([(value, key) for (key, value) in index.items()]) 
decoded = " ".join( [reverse_index.get(i - 3, "#") for i in data[1]] )
print(decoded) 

# big hair big boobs bad music and a giant safety pin these are the words to best describe this terrible movie i love cheesy horror movies and i've seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an abomination the script is completely laughable the best is the end showdown with the cop and how he worked out who the killer is it's just so damn terribly written the clothes are sickening and funny in equal # the hair is big lots of boobs # men wear those cut # shirts that show off their # sickening that men actually wore them and the music is just # trash that plays over and over again in almost every scene there is trashy music boobs and # taking away bodies and the gym still doesn't close for # all joking aside this is a truly bad film whose only charm is to look back on the disaster that was the 80's and have a good old laugh at how bad everything was back then


In [116]:
def vectorize(sequences, dimension = 10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results

In [117]:
data = vectorize(data)
targets = np.array(targets).astype("float32")

In [118]:
test_x = data[:10000]
test_y = targets[:10000]
train_x = data[10000:]
train_y = targets[10000:]

## Перебор параметров

Используем функцию brute_params для перебора параметров

In [179]:
def brute_params(shape, activations, denses, weights_1, weights_2, batch_sizes):
    for activation in activations:
        for dense in denses:
            for weight_1 in weights_1:
                for weight_2 in weights_2:
                    for batch_size in batch_sizes:
                        print("\n{} {} {} {} {}".format(activation, dense, weight_1, weight_2, batch_size))

                        model = Sequential()
                        # Input - Layer
                        model.add(layers.Dense(dense, activation = activation, input_shape=(shape, )))
                        # Hidden - Layers
                        model.add(layers.Dropout(weight_1, noise_shape=None, seed=None))
                        model.add(layers.Dense(dense, activation = activation))
                        model.add(layers.Dropout(weight_2, noise_shape=None, seed=None))
                        model.add(layers.Dense(dense, activation = activation))
                        # Output- Layer
                        model.add(layers.Dense(1, activation = "sigmoid"))

                        model.compile(
                        optimizer = "adam",
                        loss = "binary_crossentropy",
                        metrics = ["accuracy"]
                        )

                        results = model.fit(
                        train_x, train_y,
                        epochs= 2,
                        batch_size = batch_size,
                        validation_data = (test_x, test_y)
                        )

                        test_loss, test_acc = model.evaluate(test_x, test_y, verbose=0)
                        print('\nTest accuracy:', test_acc)

In [180]:
shape = 10000
activations = ["relu", "softmax"]
denses = [10, 50, 100, 250, 500]
weights_1 = [1, 0.4, 0.3, 0.2]
weights_2 = [1, 0.4, 0.3, 0.2]
batch_sizes = [8, 16, 32, 64, 128]

Прогоним функцию brute_params по заданным парметрам и выберем те, где будут наилучшие показатели test_acc

In [None]:
brute_params(shape, activations, denses, weights_1, weights_2, batch_sizes)

In [119]:
model = Sequential()
# Input - Layer
model.add(layers.Dense(50, activation = "relu", input_shape=(10000, )))
# Hidden - Layers
model.add(layers.Dropout(0.3, noise_shape=None, seed=None))
model.add(layers.Dense(50, activation = "relu"))
model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
model.add(layers.Dense(50, activation = "relu"))
# Output- Layer
model.add(layers.Dense(1, activation = "sigmoid"))

model.summary()


Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 50)                500050    
_________________________________________________________________
dropout_4 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 50)                2550      
_________________________________________________________________
dropout_5 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 51        
Total params: 505,201
Trainable params: 505,201
Non-trainable params: 0
________________________________________________

In [120]:
model.compile(
 optimizer = "adam",
 loss = "binary_crossentropy",
 metrics = ["accuracy"]
)

In [121]:
results = model.fit(
 train_x, train_y,
 epochs= 2,
 batch_size = 32,
 validation_data = (test_x, test_y)
)

Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


In [127]:
results.params

{'batch_size': 500,
 'epochs': 2,
 'steps': None,
 'samples': 40000,
 'verbose': 1,
 'do_validation': True,
 'metrics': ['loss', 'accuracy', 'val_loss', 'val_accuracy']}

In [128]:
print("Test-Accuracy:", np.mean(results.history["val_accuracy"]))


Test-Accuracy: 0.8958499729633331
