In [6]:
import tensorflow as tf
import numpy as np
import sklearn.datasets
import matplotlib.pyplot as plt

In [7]:
n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_outputs = 10

# Zastosujmy regulację $ l_1 $. 

Najpierw tworzymy model

In [9]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    logits = tf.layers.dense(hidden1, n_outputs, name="outputs")

Następnie weżmy wagi warstw i obliczamy całkowitą funkcję kosztu jako
  * sumę zwykłej entropii krzyżowej 
  * kosztu $ l_1 $ (czyli sumy bezwzględnych wartości wag):

In [12]:
W1 = tf.get_default_graph().get_tensor_by_name("hidden1/kernel:0")
W2 = tf.get_default_graph().get_tensor_by_name("outputs/kernel:0")

scale = 0.001 # l1 regularization hyperparameter

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy")
    reg_losses = tf.reduce_sum(tf.abs(W1)) + tf.reduce_sum(tf.abs(W2))
    loss = tf.add(base_loss, scale * reg_losses, name="loss")

In [13]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [17]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/")

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [18]:
n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(mnist.train.num_examples // batch_size):
            print("\r{}%".format(100 * batch_index //  (mnist.train.num_examples // batch_size) ), end="")
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        print(" ", epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test) 

99%  0 Train accuracy: 0.855 Test accuracy: 0.8325
99%  1 Train accuracy: 0.855 Test accuracy: 0.8675
99%  2 Train accuracy: 0.88 Test accuracy: 0.8811
99%  3 Train accuracy: 0.89 Test accuracy: 0.8883
99%  4 Train accuracy: 0.845 Test accuracy: 0.8948
99%  5 Train accuracy: 0.885 Test accuracy: 0.8999
99%  6 Train accuracy: 0.885 Test accuracy: 0.9019
99%  7 Train accuracy: 0.92 Test accuracy: 0.9027
99%  8 Train accuracy: 0.91 Test accuracy: 0.9053
99%  9 Train accuracy: 0.905 Test accuracy: 0.9057
99%  10 Train accuracy: 0.86 Test accuracy: 0.9071
99%  11 Train accuracy: 0.915 Test accuracy: 0.9074
99%  12 Train accuracy: 0.92 Test accuracy: 0.9086
99%  13 Train accuracy: 0.925 Test accuracy: 0.9066
99%  14 Train accuracy: 0.945 Test accuracy: 0.908
99%  15 Train accuracy: 0.905 Test accuracy: 0.9073
99%  16 Train accuracy: 0.895 Test accuracy: 0.9065
99%  17 Train accuracy: 0.925 Test accuracy: 0.9075
99%  18 Train accuracy: 0.89 Test accuracy: 0.9058
99%  19 Train accuracy: 0.87 T

# Alternatywnie

Alternatywnie możemy przekazać funkcję regularyzacji do funkcji 
```python
tf.layers.dense ()
```
która wykorzysta ją do wyliczenia regularyzacji

In [23]:
tf.reset_default_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

Następnie użyjemy funkcji 
```python
partial()
```
 aby uniknąć wielokrotnego powtarzania tych samych argumentów. 
 
Zwróć uwagę, że ustawiliśmy argument ** kernel_regularizer**:

In [24]:
scale = 0.001

In [25]:
from functools import partial

my_dense_layer = partial(
    tf.layers.dense, activation=tf.nn.relu,
    kernel_regularizer=tf.contrib.layers.l1_regularizer(scale))

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    hidden2 = my_dense_layer(hidden1, n_hidden2, name="hidden2")
    logits = my_dense_layer(hidden2, n_outputs, activation=None,
                            name="outputs")

Następnie musimy dodać regularyzację funkcji kosztu:

In [26]:
with tf.name_scope("loss"):                                     
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logits)                                
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy")  
    
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name="loss")

In [27]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()