Implementar una red neuronal (sin POO) de dos capas totalmente conectadas para resolver\
el problema de CIFAR-10. Las 100 neuronas de la primer capa tienen como función de ac-\
tivación a la función sigmoidal. La segunda capa es la de salida y esta formada por 10\
neuronas con una activación lineal. Como función de costo utilizar MSE y agregue un ter-\
mino de regularización L2.

In [1]:
import numpy as np
from scipy import linalg
from matplotlib import pyplot as plt
from keras.datasets import cifar10

In [2]:
(Xtrain, ytrain), (Xtest, ytest) = cifar10.load_data()

ytrain = ytrain[:,0]
ytest = ytest[:,0]

# summarize loaded dataset
print('Train: X=%s, y=%s' % (Xtrain.shape, ytrain.shape))
print('Test: X=%s, y=%s' % (Xtest.shape, ytest.shape))

Train: X=(50000, 32, 32, 3), y=(50000,)
Test: X=(10000, 32, 32, 3), y=(10000,)


In [3]:
names_dict = {0: 'airplane', 1: 'automobile', 2: 'bird', 
              3: 'cat', 4: 'deer', 5: 'dog', 
              6: 'frog', 7: 'horse', 8: 'ship', 
              9: 'truck'}

In [4]:
def clean(X):
        X_norm = X.reshape(len(X), X[0].size).astype(float)
        X_norm -= np.mean(X)
        X_norm /= np.std(X).astype(float)
        return X_norm

In [5]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

def sigprime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [6]:
Xtrain_n = clean(Xtrain)
Xtest_n = clean(Xtest)

print(Xtrain_n.shape)

(50000, 3072)


In [10]:
#Acá van todas las variables del sistema. Las voy a dejar como variables globales por esta vez
def Init():
    global s, y, a, z, W1, b1, W2, b2, grads1, grads2, gradb1, gradb2, grads
    s = Xtrain_n[0].size
    y = np.zeros([10,1], dtype=float)
    a = np.zeros([100, 1], dtype=float)
    z = np.zeros_like(a)
    W1 = np.random.uniform(-1, 1, size=(s, 100))
    b1 = np.random.uniform(-1, 1)
    W2 = np.random.uniform(-1, 1, size=(100, 10))
    b2 = np.random.uniform(-1, 1)
    #Hago un array de arrays para los gradientes
    grads1 = np.zeros_like(W1)
    grads2 = np.zeros_like(W2)
    gradb1 = np.array([0], dtype=float)
    gradb2 = np.array([0], dtype=float)
    grads = np.array([grads1, grads2, gradb1, gradb2], dtype=object)
    
Init()

In [11]:
def forward(x):
    global a, y, z    
    z = np.dot(W1.T, x) + b1
    a = sigmoid(z)
    y = np.dot(W2.T, a) + b2

In [12]:
def loss_function(yex, ytrue):
    diff = np.copy(yex)
    # ya que ytrue funciona como índice, le resto 1 a yex[ytrue]
    diff[ytrue] -= 1
    return linalg.norm(diff)**2, diff


In [57]:
def gradient(x, diff):
    #diff es el vector (y - ytrue)
    global grads, z, a
    aux = np.zeros_like(W2)
    sprim = sigprime(z)
    #for i in range(10):
    #    aux[:,i] = W2[:,i] * sprim
        
    grads[0] = np.outer(x, np.dot(W2, diff)*sprim)
    grads[1] = np.outer(a, diff.T)
    grads[2] = np.dot(np.dot(sprim.T, W2), diff)
    grads[3] = np.sum(diff)

    return grads

In [37]:
def update_weights(grads, alpha=.01):
    global W1, W2, b1, b2
    W1 -= alpha * grads[0]
    W2 -= alpha * grads[1]
    b1 -= alpha * grads[2]
    b2 -= alpha * grads[3]
    

In [72]:
#pruebas
loss = np.array(0, dtype=float)
gradaux = np.zeros_like(grads)
example = Xtrain_n[0:50,:]

for i in range(50):
    forward(example[i,:])
    (aux, diff) = loss_function(y, ytrain[0])
    loss += aux
    gradaux += gradient(example[i,:], diff)
#Falta aplicar regularización
loss /= (2*50)
gradaux /= 50
update_weights(gradaux, alpha=.05)
gradaux[1][0,0]


-0.1836271741037749

In [16]:
def predict(x):
    return forward(x)

def accuracy(y, ytrue):
    return float(np.sum(y==ytrue))/float(len(ytrue))

In [None]:
# Algoritmo de entrenamiento

# para epoca en epocas
    # grads=loss=0
    # para ejemplo en batch
    #   forward(ejemplo)
    #   loss+=(ejemplo)
    #   grads+=grads(ejemplo)
    # loss += L2
    # grads += reg
    # update weights
 

In [82]:
m = Xtrain_n.shape[0]
batch_size = 1000
n_batches = int(m/batch_size)
epochs = 10
lambda_ = .1

loss = np.array(0, dtype=float)
gradaux = np.zeros_like(grads)

Init()
for epoch in range(epochs):
    for batch in range(1):
        loss = 0
        gradaux = 0
        example = Xtrain_n[batch*batch_size:(1+batch)*batch_size,:]
        yexample = ytrain[batch*batch_size:(1+batch)*batch_size]

        for i in range(batch_size):
            forward(example[i,:])
            (aux, diff) = loss_function(y, yexample[i])
            loss += aux
            gradaux += gradient(example[i,:], diff)
        #regularización
        L2 = linalg.norm(W1)**2 + linalg.norm(W2)**2
        L2G = linalg.norm(W1) + linalg.norm(W2)

        loss += lambda_ * L2/2
        loss /= (2*batch_size)

        gradaux += lambda_ * L2G
        gradaux /= batch_size
        
        update_weights(gradaux, alpha=.05)

    print(epoch, loss)



0 67.23093051363198
1 40.736650317694576
2 34.16041463065594
3 29.806346528865152
4 28.19673070232996
5 25.833164348735373
6 25.073786787743323
7 23.275657046115064
8 22.945226505949815
9 21.911073601585947


In [88]:
#testeamos un poco
ytrguess = []
yguess = []
size = 1000
for i in range(size):
    predict(Xtrain_n[i,:])
    ytrguess.append(np.argmax(y))
    predict(Xtest_n[i,:])
    yguess.append(np.argmax(y))
    
print("Train accuracy: ",100*accuracy(ytrguess, ytrain[0:size]))
print("Test accuracy: ",100*accuracy(yguess, ytest[0:size]))

Train accuracy:  11.1
Test accuracy:  11.3
