<a href="https://colab.research.google.com/github/noushadkhan01/Grokking-Deep-Learning/blob/master/Chapter8%20-%20Intro%20to%20Regularization%20-%20Learning%20Signal%20and%20Ignoring%20Noise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 3 Layer Network on MNIST

### import dataset from keras

In [0]:
import sys, numpy as np
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [7]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

^^As you can see in **MNIST** dataset every image have (28, 28) in dimension so 28 * 28 = 784 so when whe flatten this image we got 784 in one row

## One Hot Encoding of labels

In [12]:
import sys, numpy as np

#now we are training our model only on 1000 images so take 1000 images from this dataset
images, labels = (X_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

#create numpy array of zeros with shape (len(labels), 10)
#because we need to a class matrix for every digit or we can create it from sklearn one hot encoder or pandas dummies
one_hot_labels = np.zeros((len(labels),10))
print('-'*120)
print(f'Label encoding for Training Data\n')
print('-'*80)
print(f'first three Train labels before one hot encoding are \n {labels[:3]}\n\n')

#create one hot labels
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

print(f'first three Train Labels after one hot encoding are \n {labels[:3]} \n\n')
print('-'*80)
test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
print('-'*120)
print(f'Label encoding for Test Data\n')
print('-'*80)
print(f'first three Test labels before one hot encoding are \n {y_test[:3]}\n\n')

#create one hot labels
for i,l in enumerate(y_test):
    test_labels[i][l] = 1
print(f'first three Test Labels after one hot encoding are \n {test_labels[:3]} \n\n')
print('-'*80)
print('-'*120)

------------------------------------------------------------------------------------------------------------------------
Label encoding for Training Data

--------------------------------------------------------------------------------
first three Train labels before one hot encoding are 
 [5 0 4]


first three Train Labels after one hot encoding are 
 [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]] 


--------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
Label encoding for Test Data

--------------------------------------------------------------------------------
first three Test labels before one hot encoding are 
 [7 2 1]


first three Test Labels after one hot encoding are 
 [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]] 


---------

In [13]:
X_train = images
X_train[:2]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [14]:
X_train.shape

(1000, 784)

In [15]:
y_train = labels
y_train[:3]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

## Make Model

In [28]:
import numpy as np
import sys
np.random.seed(1)
relu = (lambda x: np.maximum(0, x)) #return 0 if  x < 0 otherwise returns x

relu2deriv = (lambda x: x >= 0 )  # return True if x >= 0 otherwise returns False


alpha, iterations, hidden_size1, output_size = (0.005, 350, 40, 10)

#initialize weights

#for layer1
weights_1 = 0.2 * np.random.random((X_train.shape[1], hidden_size1)) - 0.1

weights_output = .2 * np.random.random((hidden_size1, output_size)) - 0.1

#build network

#we are using stochastics gradient descent algorithm which means we are updating our weights per example(row)
batch_size = 1
for iteration in range(iterations):
  total_error = 0
  correct_prediction = 0
  for row in range(len(X_train)):
    #get 2-d array of input for every row
    X = X_train[row:row + 1]
    
    #its an array of size (1, 10)
    y = y_train[row:row + 1]
    
    #in python > 3.5 we can use @ inplace of dot product
    layer1 = relu(X @ weights_1) #it returns an array of size (1,40)
    
    layer2 = layer1 @ weights_output #it returns an array of size (1,10)
    
    
    #of index of laye2 maximum values is equel to index of maximum value of y(which is current label)
    correct_prediction += int(np.argmax(layer2) == np.argmax(y))
    
    error = (layer2 - y)**2
    total_error += np.sum(error)
    
    output_delta = (layer2 - y) #it returns an array of size (1, 10)
    layer_1_delta = (output_delta @ weights_output.T) # it returns an arry of size (1, 40)
    layer_1_delta *= relu2deriv(layer1)
    
    weights_output -= alpha * (layer1.T @ output_delta) #it returns an array of size (40, 10)
    
    weights_1 -= alpha * (X.T @ layer_1_delta) #it returns an array of size (784, 40)
  sys.stdout.write("\r Iteration:"+str(iteration)+ \
                     " Train-Err:" + str(total_error/float(len(X_train)))[0:5] +\
                     " Train-Acc:" + str(correct_prediction/float(len(X_train)))) 
    
  
    
    
    


 Iteration:349 Train-Err:0.108 Train-Acc:1.0

In [29]:
images[:2]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [30]:
labels[:2]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [31]:
import sys, numpy as np
np.random.seed(1)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(layer_0 @ weights_0_1)
        layer_2 = layer_1 @ weights_1_2
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta@weights_1_2.T\
                                    * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T @ layer_2_delta
        weights_0_1 += alpha * layer_0.T@layer_1_delta

    sys.stdout.write("\r I:"+str(j)+ \
                     " Train-Err:" + str(error/float(len(images)))[0:5] +\
                     " Train-Acc:" + str(correct_cnt/float(len(images))))

 I:349 Train-Err:0.108 Train-Acc:1.0

In [0]:
layer_2

array([[-3.27533080e-02, -5.37572964e-03,  6.82407035e-02,
         6.95686725e-02, -2.67865372e-03, -1.50480362e-01,
         9.74881207e-01,  1.15799228e-02, -8.82299564e-06,
         1.97680831e-02]])

In [0]:
layer_2.shape

(1, 10)

In [0]:
labels.shape

(1000, 10)

In [0]:
labels[999:1000]

array([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]])

In [0]:
#it returns index of maximum value
np.argmax(layer_2)

6

In [0]:
np.argmax(labels[999:1000])

6

In [0]:
correct_cnt = 0
correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[999:1000]))
correct_cnt

1

In [0]:
if(j % 10 == 0 or j == iterations-1):
    error, correct_cnt = (0.0, 0)

    for i in range(len(test_images)):

        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(test_labels[i:i+1]))
    sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
                     " Test-Acc:" + str(correct_cnt/float(len(test_images))) + "\n")
    print()

 Test-Err:0.653 Test-Acc:0.7073



^^**As you can see its overfit model**

# Print Test Error with Train Error

In [32]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\
                                    * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r I:"+str(j)+ \
                     " Train-Err:" + str(error/float(len(images)))[0:5] +\
                     " Train-Acc:" + str(correct_cnt/float(len(images))))
    
    if(j % 10 == 0 or j == iterations-1):
        error, correct_cnt = (0.0, 0)

        for i in range(len(test_images)):

            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)

            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == \
                                            np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(correct_cnt/float(len(test_images))))
        print()

 I:0 Train-Err:0.722 Train-Acc:0.537 Test-Err:0.601 Test-Acc:0.6488
 I:10 Train-Err:0.312 Train-Acc:0.901 Test-Err:0.420 Test-Acc:0.8114
 I:20 Train-Err:0.260 Train-Acc:0.93 Test-Err:0.414 Test-Acc:0.8111
 I:30 Train-Err:0.232 Train-Acc:0.946 Test-Err:0.417 Test-Acc:0.8066
 I:40 Train-Err:0.215 Train-Acc:0.956 Test-Err:0.426 Test-Acc:0.8019
 I:50 Train-Err:0.204 Train-Acc:0.966 Test-Err:0.437 Test-Acc:0.7982
 I:60 Train-Err:0.194 Train-Acc:0.967 Test-Err:0.448 Test-Acc:0.7921
 I:70 Train-Err:0.186 Train-Acc:0.975 Test-Err:0.458 Test-Acc:0.7864
 I:80 Train-Err:0.179 Train-Acc:0.979 Test-Err:0.466 Test-Acc:0.7817
 I:90 Train-Err:0.172 Train-Acc:0.981 Test-Err:0.474 Test-Acc:0.7758
 I:100 Train-Err:0.166 Train-Acc:0.984 Test-Err:0.482 Test-Acc:0.7706
 I:110 Train-Err:0.161 Train-Acc:0.984 Test-Err:0.489 Test-Acc:0.7686
 I:120 Train-Err:0.157 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766
 I:130 Train-Err:0.153 Train-Acc:0.99 Test-Err:0.502 Test-Acc:0.7622
 I:140 Train-Err:0.149 Train-Acc:0

# Dropout In Code
**dropout is used to reduce the effect of some neurons in a large network to overcome overfitting**

## Dropout intuition

In [51]:
import numpy as np
np.random.seed(1)
i = 0
layer_0 = images[i:i+1]
weights_0_1 = .2 * np.random.random((layer_0.shape[1], 40))
layer_1 = relu(np.dot(layer_0,weights_0_1))
dropout_mask = np.random.randint(2, size = layer_1.shape) #it creates an array of random integers between [0, 2), 2 excluede with shape (1, 40)
dropout_mask

array([[0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
        0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1]])

In [52]:
dropout_mask.shape

(1, 40)

In [53]:
print(f'Layer1 befor dropout is \n {layer_1}')

Layer1 befor dropout is 
 [[10.80612655 11.40878644 10.49648979 11.03482644  9.57806096  9.69356395
  10.13441567 10.67038501 10.39963145 10.85921614 10.02558253 10.49376599
  11.67811423 10.96593671 11.04429163 10.31925713 11.23793847 10.28169626
  10.32556865 10.86767233 11.42114201 11.72691314  9.76259996 10.59488906
  10.82231082 11.3576492  10.99752792 10.98918643 11.32823886 10.96124407
  10.45434983  9.43732352 10.5490145  10.50685952 11.48728458 10.16861921
  10.84251603 10.30706397 10.20702591 10.60840595]]


In [54]:
layer_1 *= dropout_mask

print(f'Layer1 after dropout is \n {layer_1}')

Layer1 after dropout is 
 [[ 0.         11.40878644 10.49648979 11.03482644  9.57806096  0.
  10.13441567  0.          0.          0.          0.         10.49376599
   0.          0.         11.04429163 10.31925713  0.          0.
  10.32556865  0.         11.42114201 11.72691314  0.          0.
   0.         11.3576492   0.         10.98918643  0.          0.
   0.          0.          0.         10.50685952 11.48728458  0.
  10.84251603 10.30706397 10.20702591 10.60840595]]


## Train Nework with Dropout

In [0]:
import numpy 
numpy.random.seed(1)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1
i = 0
layer_0 = images[i:i+1]
layer_1 = relu(layer_0 @ weights_0_1) # we can use @ as dot product in python > 3.5

dropout_mask = np.random.randint(2,size=layer_1.shape) #it will create an array of 1's and 0's with shape (1, 40)

#multiply droput with layer_1 to reduce the effect of random layer_1 values in outputs
layer_1 *= dropout_mask * 2 #by multiplying 2 we will increase the effect of that particular neuron

layer_2 = np.dot(layer_1, weights_1_2)

error += np.sum((labels[i:i+1] - layer_2) ** 2)

correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i+i+1]))

layer_2_delta = (labels[i:i+1] - layer_2) #returns shape of (1, 10)
layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1) #return shape (1, 40)

layer_1_delta *= dropout_mask #returns shape of (1, 40)

#print(layer_1_delta)
w2 = weights_1_2
#print(f'elements of Weights_1_2 befor dropout are\n {weights_1_2[:10]} and shape of \nweight_1_2 is {weights_1_2.shape}\n')
weights_1_2 += alpha * layer_1.T.dot(layer_2_delta) #returns shape of (40, 10)

#print(f'elements of Weights_1_2 after dropout are\n {weights_1_2[:10]} and shape of \nweight_1_2 is {weights_1_2.shape} \n\n\n')

w1 = weights_0_1
#print(f'elements of Weights_0_1 before dropout are\n {weights_0_1[:10, :10]} and shape of\n weight_0_1 is {weights_0_1.shape}\n')
weights_0_1 += alpha * layer_0.T.dot(layer_1_delta) #returns shape of (784, 40)
#print('\n')
#print(f'elements of Weights_0_1 after dropout are\n {weights_0_1[:10, :10]} and shape of \nweight_0_1 is {weights_0_1.shape}')

^^have doubt in this

In [83]:
import numpy, sys
np.random.seed(1)
def relu(x):
    return (x >= 0) * x # returns x if x > 0
                        # returns 0 otherwise

def relu2deriv(output):
    return output >= 0 #returns 1 for input > 0

alpha, iterations, hidden_size = (0.005, 300, 100)
pixels_per_image, num_labels = (784, 10)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0,0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.641 Test-Acc:0.6333 Train-Err:0.891 Train-Acc:0.413
I:10 Test-Err:0.458 Test-Acc:0.787 Train-Err:0.472 Train-Acc:0.764
I:20 Test-Err:0.415 Test-Acc:0.8133 Train-Err:0.430 Train-Acc:0.809
I:30 Test-Err:0.421 Test-Acc:0.8114 Train-Err:0.415 Train-Acc:0.811
I:40 Test-Err:0.419 Test-Acc:0.8112 Train-Err:0.413 Train-Acc:0.827
I:50 Test-Err:0.409 Test-Acc:0.8133 Train-Err:0.392 Train-Acc:0.836
I:60 Test-Err:0.412 Test-Acc:0.8236 Train-Err:0.402 Train-Acc:0.836
I:70 Test-Err:0.412 Test-Acc:0.8033 Train-Err:0.383 Train-Acc:0.857
I:80 Test-Err:0.410 Test-Acc:0.8054 Train-Err:0.386 Train-Acc:0.854
I:90 Test-Err:0.411 Test-Acc:0.8144 Train-Err:0.376 Train-Acc:0.868
I:100 Test-Err:0.411 Test-Acc:0.7903 Train-Err:0.369 Train-Acc:0.864
I:110 Test-Err:0.411 Test-Acc:0.8003 Train-Err:0.371 Train-Acc:0.868
I:120 Test-Err:0.402 Test-Acc:0.8046 Train-Err:0.353 Train-Acc:0.857
I:130 Test-Err:0.408 Test-Acc:0.8091 Train-Err:0.352 Train-Acc:0.867
I:140 Test-Err:0.405 Test-Acc:0.8083 Train-Er

# Batch Gradient Descent

In [0]:
import numpy as np
np.random.seed(1)

def relu(x):
    return (x >= 0) * x # returns x if x > 0

def relu2deriv(output):
    return output >= 0 # returns 1 for input > 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))

        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

            layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)
            layer_1_delta *= dropout_mask

            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
            
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.815 Test-Acc:0.3832 Train-Err:1.284 Train-Acc:0.165
I:10 Test-Err:0.568 Test-Acc:0.7173 Train-Err:0.591 Train-Acc:0.672
I:20 Test-Err:0.510 Test-Acc:0.7571 Train-Err:0.532 Train-Acc:0.729
I:30 Test-Err:0.485 Test-Acc:0.7793 Train-Err:0.498 Train-Acc:0.754
I:40 Test-Err:0.468 Test-Acc:0.7877 Train-Err:0.489 Train-Acc:0.749
I:50 Test-Err:0.458 Test-Acc:0.793 Train-Err:0.468 Train-Acc:0.775
I:60 Test-Err:0.452 Test-Acc:0.7995 Train-Err:0.452 Train-Acc:0.799
I:70 Test-Err:0.446 Test-Acc:0.803 Train-Err:0.453 Train-Acc:0.792
I:80 Test-Err:0.451 Test-Acc:0.7968 Train-Err:0.457 Train-Acc:0.786
I:90 Test-Err:0.447 Test-Acc:0.795 Train-Err:0.454 Train-Acc:0.799
I:100 Test-Err:0.448 Test-Acc:0.793 Train-Err:0.447 Train-Acc:0.796
I:110 Test-Err:0.441 Test-Acc:0.7943 Train-Err:0.426 Train-Acc:0.816
I:120 Test-Err:0.442 Test-Acc:0.7966 Train-Err:0.431 Train-Acc:0.813
I:130 Test-Err:0.441 Test-Acc:0.7906 Train-Err:0.434 Train-Acc:0.816
I:140 Test-Err:0.447 Test-Acc:0.7874 Train-Err:0