# [Problem 1] Looking back on the scratch

Looking back at your scratching so far, list what you needed to implement deep learning.

##Answer.
Initialization and update of weights and biases
Looping through epochs.
Output through activation function.
Repeat forward and back propagation.

Preparing the data set
We will use the Iris dataset that we have used before. In the following sample code, it is assumed that Iris.csv is in the same hierarchy.

Iris Species

The objective variable is Species, but only the following two species are used out of the three types.

Iris-versicolor
Iris-virginica

# [Problem 2] Consider the correspondence between scratch and TensorFlow

In [None]:
"""
Binary classification of Iris dataset using a neural network implemented in TensorFlow.
"""
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
# Loading a Data Set
dataset_path ="datasets_19_420_Iris.csv" #read csv
df = pd.read_csv(dataset_path) # dataframe creation

# Extract conditions from data frame
# Create X and y
df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X)

# Convert labels to numbers.
# And while we're at it, let's make y two-dimensional.
y[y=='Iris-versicolor'] = 0
y[y=='Iris-virginica'] = 1
y = y.astype(np.int)[:, np.newaxis]
# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Further split into train and val
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# Mini Batch Class
class GetMiniBatch:
    """
    Iterator to get the mini-batch

    Parameters
    ----------
    X : ndarray of the following form, shape (n_samples, n_features)
      Training data
    y : ndarray of the following form, shape (n_samples, 1)
      Correct answer value
    batch_size : int
      batch size
    seed : int
      Seed of random number in NumPy
    """
    def __init__(self, X, y, batch_size = 10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_index]
        self.y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    def __len__(self):
        return self._stop
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self.X[p0:p1], self.y[p0:p1]        
    def __iter__(self):
        self._counter = 0
        return self
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self.X[p0:p1], self.y[p0:p1]
    

# Hyperparameter settings
learning_rate = 0.01 # learning rate
batch_size = 10 # batch size
num_epochs = 10 # number of epochs
n_hidden1 = 50 # output size of hidden layer
n_hidden2 = 100 # output size of hidden layer 2
n_input = X_train.shape[1] # number of input columns, number of features
n_samples = X_train.shape[0] # Number of input rows, number of data
n_classes = 1 # Number of classes?

# Determine the form of the argument to pass to the computational graph.
# https://note.nkmk.me/python-tensorflow-constant-variable-placeholder/
X = tf.placeholder("float", [None, n_input]) # Placeholder is a container where data is stored. The data is undecided and the graph is constructed, and the concrete values are given at runtime.
Y = tf.placeholder("float", [None, n_classes])

# train's mini-batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

# Is this the part of the computation graph? Network creation function. In Scratch, it's SimpleConvnet.
def example_net(x):
    """
    A simple 3-layer neural network
    """
    # Declare weights and biases.
    # Declare variables in tf.Variable. The contents are randomly created with tf.random_normal, just like np.random.normal.

Translated with www.DeepL.com/Translator (free version)
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1])),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2])),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1])),
        'b2': tf.Variable(tf.random_normal([n_hidden2])),
        'b3': tf.Variable(tf.random_normal([n_classes]))
    }
    
    # Layer iterator. tf.matmul is a matrix product, like np.dot. It seems to behave slightly differently, so check.
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3'] # tf.add and + are equivalent
    return layer_output


# Load the network structure                               
logits = example_net(X)
# Objective function
# sigmoid_cross_entropy_with_logits calculates the cross entropy error through the sigmoid function.
# Inside is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) x=logits, z=labels
# label minus log(sigmoid(x)) + (1 - label) minus log(1 - sigmoid(x))
# if less than or equal to 0, we get 0, so to summarize, max(x, 0) - x * z + log(1 + exp(-abs(x)))
# reduce_mean is the same as np.mean, if axis=None, a single scalar will be returned
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits))
# Optimization method
# Instantiate
AdamOptimizer(learning_rate=learning_rate)
# Called by minimize()
train_op = optimizer.minimize(loss_op)
# Estimation results
# tf.equal compares and returns True if they are the same, False if they are different as an array
# sign is converted to sine wave, not sure what -0.5 means.
correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))
# index value calculation
# cast changes the type of the tensor, like np.astype?
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Instance to initialize the variable.
# This is just a shortcut for `variables_initializer(global_variables())`.
init = tf.global_variables_initializer()

# Run the computational graph
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
            total_acc += acc
        total_loss /= n_samples
        total_acc /= n_samples
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}, val_acc : {:.3f}".format(epoch, loss, val_loss, acc, val_acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))

### Answers and notes
Tensorflow declares variables and such first. Isn't it an object type?\
It's not a surprise that it runs in C++. If the entrance is written in Python but the content is written in C++, it is better to assume that the basics are in the C system.\
Correlation with home-made CNNs

Activation function → through a layered iterator, done with tf.nn.relu(), you can change the relu part.\
Error function → implemented in tf.nn.sigmoid_cross_entropy_with_logits\
Full coupling layer → implemented in iterator layer, with network structure in logits, forward propagation when loss_op runs, loss function and gradient update in train_op. AdamOptimizer is used in this model.\
Epoch and mini-batch → Same here. Mini-batch functions and epochs are just normal for statements.\
Optimizer → Instantiate and call with minimize().

3, Application to other data sets\
There are several small datasets that we have been working with so far. Rewrite the above sample code to create a neural network that will train and estimate on these.

Iris (using all three objective variables)
House Prices\
Use all three types of data sets: train, val, and test.

# [Problem 3] Create a model of Iris using all three types of objective variables

In [None]:
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits))

In [None]:
correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))

In [None]:
import sys
"""
Tri-level classification of Iris dataset using a neural network implemented in TensorFlow.
"""

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
# Load the dataset
dataset_path ="datasets_19_420_Iris.csv" # read csv
df = pd.read_csv(dataset_path) # dataframe it

# Extract conditions from dataframe
# Create X and y
#df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
y = df["Species"]
#display(y)
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X)

# Convert the labels to one-hot-vectors.
# And while we're at it, make y two-dimensional
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y = enc.fit_transform(y[:, np.newaxis])
#print(y.shape)
#display(y)

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Further split into train and val
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
#print(X_train.shape)

# MiniBatch class
class GetMiniBatch:
    """
    Iterator to get the mini-batch.

    Parameters
    ----------
    X : ndarray of the following form, shape (n_samples, n_features)
      Training data
    y : ndarray of the following form, shape (n_samples, 1)
      Correct answer value
    batch_size : int
      batch size
    seed : int
      Seed of random number in NumPy
    """
    def __init__(self, X, y, batch_size = 10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_index]
        self.y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    def __len__(self):
        return self._stop
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self.X[p0:p1], self.y[p0:p1]        
    def __iter__(self):
        self._counter = 0
        return self
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self.X[p0:p1], self.y[p0:p1]
    

# Hyperparameter settings
learning_rate = 0.01 # learning rate
batch_size = 10 # batch size
num_epochs = 10 # number of epochs
n_hidden1 = 50 # output size of hidden layer
n_hidden2 = 100 # output size of hidden layer 2
n_input = X_train.shape[1] # number of input columns, number of features
n_samples = X_train.shape[0] # Number of input rows, number of data
n_classes = 3 # Number of classes. If you are doing binary classification and only need one column, 1 is fine, but if you are doing one-hot classification, increase the number. In this case, 3.

# Determine the form of the argument to pass to the computational graph.
# https://note.nkmk.me/python-tensorflow-constant-variable-placeholder/
X = tf.placeholder("float", [None, n_input]) # The placeholder is a container for the data. The data is undecided and the graph is constructed, and the concrete values are given at runtime.
Y = tf.placeholder("float", [None, n_classes])

# train's mini-batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

# Is this the part of the computation graph? Network creation function. In Scratch, it's SimpleConvnet.
def example_net(x):
    """
    A simple 3-layer neural network
    """
    # Declare weights and biases.
    # Declare variables in tf.Variable. The contents are randomly created with tf.random_normal, just like np.random.normal.
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1], seed=128)),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2], seed=128)),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes], seed=128))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1], seed=128)),
        'b2': tf.Variable(tf.random_normal([n_hidden2], seed=128)),
        'b3': tf.Variable(tf.random_normal([n_classes], seed=128))
    }
    
    # Layer iterator. tf.matmul is a matrix product, like np.dot. It seems to behave slightly differently, so check.
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    #print(layer_1)
    layer_1 = tf.nn.relu(layer_1)
    #print(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    #print(layer_2)
    layer_2 = tf.nn.relu(layer_2)
    #print(layer_2)
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3'] # tf.add and + are equivalent
    #print(layer_output)
    return layer_output
    

# Load the network structure                               
logits = example_net(X)
# Objective function
# sigmoid_cross_entropy_with_logits calculates the cross entropy error through the sigmoid function.
# Inside is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) x=logits, z=labels
# label minus log(sigmoid(x)) + (1 - label) minus log(1 - sigmoid(x))
# if less than or equal to 0, we get 0, so to summarize, max(x, 0) - x * z + log(1 + exp(-abs(x)))
# reduce_mean is the same as np.mean, if axis=None, a single scalar will be returned
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits))
# Optimization method
# Instantiate
AdamOptimizer(learning_rate=learning_rate)
# Called by minimize()
train_op = optimizer.minimize(loss_op)
# Estimation results
# tf.equal compares and returns True if they are the same, False if they are different as an array
# sign is converted to sine wave, not sure what -0.5 means.
softmax_out = tf.nn.softmax(logits, axis=1)
correct_pred = tf.equal(tf.argmax(Y, 1), tf.argmax(softmax_out, 1))
#correct_pred = tf.equal(tf.argmax(Y), tf.argmax(softmax_out))
# index value calculation
# cast changes the type of the tensor, like np.astype?
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Instance to initialize the variable.
# This is just a shortcut for `variables_initializer(global_variables())`.
init = tf.global_variables_initializer()

# This is just a shortcut for `variables_initializer(global_variables()` init = tf.global_variables_initializer()


# Run the computational graph
with tf.Session() as sess:
    Sess.run(init)
    for epoch in range(num_epochs):
        # Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            #print(sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y}))
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            #print(sess.run(loss_op, feed_dict={X: mini_batch_x, Y: mini_batch_y}))
            total_loss += loss
            total_acc += acc
        total_loss /= n_samples
        total_acc /= n_samples
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}, val_acc : {:.3f}".format(epoch, loss, val_loss, acc, val_acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))

# [Problem 4] Creating a model of House Prices

In [None]:
#import os
#import random
#os.environ['PYTHONHASHSEED'] = "0"
#np.random.seed(0)
#random.seed(0)

from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
"""
Regression classification of the HousePrice dataset using a neural network implemented in TensorFlow.
"""

# Load the dataset
dataset_path ="train.csv" # read csv
df = pd.read_csv(dataset_path) # dataframe it

# Extract conditions from dataframe
# Create X and y
#df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
y = df.loc[:,["SalePrice"]]
#display(y)
X = df.loc[:, ["GrLivArea", "YearBuilt"]]
y = np.array(y)
X = np.array(X)

#print(y.shape)
#print(X.shape)

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Logarithmically transform the objective variable
y = np.log1p(y)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Further split into train and val
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
#print(X_train.shape)

#plt.scatter(X_test[:,0],y_test)
#plt.show()

# Set the hyperparameters
learning_rate = 0.01 # learning rate
batch_size = 10 # batch size
num_epochs = 10 # number of epochs
n_hidden1 = 50 # output size of hidden layer
n_hidden2 = 100 # output size of hidden layer 2
n_input = X_train.shape[1] # number of input columns, number of features
n_samples = X_train.shape[0] # Number of input rows, number of data
n_classes = 1 # Number of classes. If you are doing binary classification and only need one column, 1 is fine, but if you are doing one-hot classification, increase the number. In this case, 3.

# Determine the form of the argument to pass to the computational graph.
# https://note.nkmk.me/python-tensorflow-constant-variable-placeholder/
X = tf.placeholder("float", [None, n_input]) # The placeholder is a container for the data. The data is undecided and the graph is constructed, and the concrete values are given at runtime.
Y = tf.placeholder("float", [None, n_classes])

# train's mini-batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

# Is this the part of the computation graph? Network creation function. In Scratch, it's SimpleConvnet.
def example_net(x):
    """
    A simple 3-layer neural network
    """
    # Declare weights and biases.
    # Declare variables in tf.Variable. The contents are randomly created with tf.random_normal, just like np.random.normal.
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1], seed=0)),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2], seed=0)),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes], seed=0))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1], seed=0)),
        'b2': tf.Variable(tf.random_normal([n_hidden2], seed=0)),
        'b3': tf.Variable(tf.random_normal([n_classes], seed=0))
    }
    
    # layered iterator. tf.matmul is a matrix product, like np.dot. It seems to behave slightly differently, so check.
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    #print(layer_1)
    layer_1 = tf.nn.relu(layer_1)
    #print(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    #print(layer_2)
    layer_2 = tf.nn.relu(layer_2)
    #print(layer_2)
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3'] # tf.add and + are equivalent
    #print(layer_output)
    return layer_output


# Load the network structure                               
logits = example_net(X)
# objective function
#loss_op = tf.reduce_mean(tf.squared_difference(Y, logits))
loss_op = tf.reduce_mean(tf.square(logits - Y))
# Optimization method
# Instantiate
AdamOptimizer(learning_rate=learning_rate)
RMSPropOptimizer(learning_rate=learning_rate) #optimizer = tf.train.
# call in minimize()
train_op = optimizer.minimize(loss_op)
# Estimation results
#correct_pred = logits
# index value calculation
# cast changes the type of the tensor, like np.astype?
#accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
accuracy = tf.reduce_mean(tf.squared_difference(Y, logits))

# Instance to initialize variable.
# This is just a shortcut for `variables_initializer(global_variables())`.
init = tf.global_variables_initializer()

# This is just a shortcut for `variables_initializer(global_variables()` init = tf.global_variables_initializer()



# Run the computational graph
with tf.Session() as sess:

    Sess.run(init)    
    for epoch in range(num_epochs):
        # Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            #print(sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y}))
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            #print(sess.run(loss_op, feed_dict={X: mini_batch_x, Y: mini_batch_y}))
            total_loss += loss
            total_acc += acc
        total_loss /= batch_size
        total_acc /= batch_size
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, MSE : {:.3f}, val_MSE : {:.3f}".format(epoch, total_loss, val_loss, total_acc, val_acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_MSE : {:.3f}".format(test_acc))

# [Problem 5] Creating a MNIST model

In [None]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, im_rows, im_cols, im_color)
X_train = X_train.astype('float64') / 255
X_test = X_test.reshape(-1, im_rows, im_cols, im_color)
X_test = X_test.astype('float64') / 255
print(X_train.max()) # 1.0
print(X_train.min()) # 0.0
print(X_train[0].dtype)

In [None]:
# Convert label to one-hot-vector.
# and make y two-dimensional.
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:, np.newaxis])
y_test = enc.fit_transform(y_test[:, np.newaxis])
print(y_train_one_hot.shape)

In [None]:
# Further split into train and val
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2, random_state=0)

### [Answer] Self-made function

In [None]:
"""
Using a neural network implemented in TensorFlow to classify MNIST datasets as multi-level
"""

# Set the hyperparameters
learning_rate = 0.01
batch_size = 10
num_epochs = 10
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 10
stride = 1
pad = 'VALID'
ksize = [3, 3]
# Determine the form of arguments to be passed to the computation graph.
X = tf.placeholder("float", [None, X_train.shape[1], X_train.shape[2], X_train.shape[3]])
Y = tf.placeholder("float", [None, n_classes])
# train's mini-batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)
def example_net(x):
    """
    A simple three-layer neural network
    """
    # Declare weights and biases
    weights = {
        'w1': tf.Variable(tf.random_normal([5, 5, 1, 4])),  # H, W, C, F
        'w2': tf.Variable(tf.random_normal([3, 3, 4, 16])),
        'w3': tf.Variable(tf.random_normal([64, 32])),
        'w4': tf.Variable(tf.random_normal([32, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([1, 1, 1, 4])),
        'b2': tf.Variable(tf.random_normal([1, 1, 1, 16])),
        'b3': tf.Variable(tf.random_normal([32])),
        'b4': tf.Variable(tf.random_normal([n_classes]))
    }
    layer_1 = tf.nn.conv2d(x, weights['w1'], stride, pad) + biases['b1']
    layer_1 = tf.nn.max_pool2d(layer_1, ksize, ksize, pad)
    layer_2 = tf.nn.conv2d(layer_1, weights['w2'], stride, pad) + biases['b2']
    layer_2 = tf.nn.max_pool2d(layer_2, ksize, ksize, pad)
    layer_2 = tf.layers.Flatten()(layer_2)
    layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
    layer_3 = tf.nn.relu(layer_3)
    layer_output = tf.matmul(layer_3, weights['w4']) + biases['b4'] # tf.add and + are equivalent
    return layer_output

# Load the network structure                               
logits = example_net(X)
# Objective function
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits), axis=0)
# optimization method
AdamOptimizer(learning_rate=learning_rate) optimizer = tf.train.
train_op = optimizer.minimize(loss_op)
# Estimation results
correct_pred = tf.equal(tf.argmax(Y, axis=1), tf.argmax(tf.nn.softmax(logits), axis=1))
# correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.nn.softmax(logits) - 0.5))
# index value calculation
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialize variable
init = tf.global_variables_initializer()

# Run the computational graph
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            _, loss, acc = sess.run([train_op, loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
            total_acc += acc
        total_loss /= n_samples
        total_acc /= n_samples
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}, val_acc : {:.3f}".format(epoch, loss, val_loss, acc, val_acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))
    print(sess.run(tf.argmax(tf.nn.softmax(logits), 1), feed_dict={X: X_val, Y: y_val}))

###[keras version].

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import RMSprop
from keras.datasets import mnist


im_rows, im_cols = 28, 28
im_color = 1
in_shape = (im_rows, im_cols, im_color)
out_size = 10

mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, im_rows, im_cols, im_color)
X_train = X_train.astype('float64') / 255
X_test = X_test.reshape(-1, im_rows, im_cols, im_color)
X_test = X_test.astype('float64') / 255

y_train = keras.utils.np_utils.to_categorical(y_train.astype('float64'), 10)
y_test = keras.utils.np_utils.to_categorical(y_test.astype('float64'), 10)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [None]:
from keras.callbacks import EarlyStopping

# Early stopping setting
# Stop when the element set as monitor falls below the patience count and min_data of change
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=2)


# Model building
model = Sequential() # Instance
model.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=in_shape)) # 1st layer, convolution
model.add(Conv2D(64, (3,3), activation='relu')) # 2nd layer, convolution
model.add(MaxPooling2D(pool_size=(2,2))) # 3rd layer, pooling layer
model.add(Dropout(0.25)) # Dropout, set percentage of values to 0 (forget)
model.add(Flatten()) # flatten, smoothing
model.add(Dense(128, activation='relu')) # All coupled layers
model.add(Dropout(0.5)) # dropout
model.add(Dense(out_size, activation='softmax')) # All coupled layers, final layer

#Compile model
model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])

# training
hist = model.fit(X_train, y_train, batch_size=100, epochs=10, verbose=1, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Output the score tested on the test data
score = model.evaluate(X_test, y_test, verbose=1)
print("accuracy=", score[1], 'loss=', score[0])

In [None]:
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Accuracy')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Loss')
plt.legend(['train', 'val'], loc='upper left')
plt.show()