In [1]:
import sys
sys.path.append('/root/.virtualenvs/jupyter/lecture4/python')
import needle as ndl
import numpy as np

In [2]:
import numpy as np
import gzip
import struct
from scipy.special import softmax
import numdifftools as nd

def softmax_loss(Z, y):
  log_sum_exp = np.log(np.sum(np.exp(Z), axis=1))
  if y.ndim == 1:
    for i in range(y.shape[0]):
      log_sum_exp[i] -= Z[i, y[i]]
    l = np.mean(log_sum_exp)
    return l
  elif y.ndim == 2:
    for i in range(y.shape[0]):
      idx = int(np.argwhere(y[i] == 1))
      # print(y[i][idx])
      log_sum_exp[i] -= Z[i, idx]
    l = np.mean(log_sum_exp)
    return l


def softmax_regression_epoch(X, y, theta, lr = 0.1, batch=50):
    for i in range(0, y.shape[0], batch):
      x = X[i:i+batch]
      y_b = y[i:i+batch]
      Z = np.transpose(np.exp(np.dot(x, theta)).T/(np.sum(np.exp(np.dot(x, theta)), axes=1)))      
      I = np.zeros((Z.shape[0], theta.shape[1]))      
      I[range(batch), y_b] = 1
      theta -= (lr/batch)*(np.dot(x.T, (Z-I)))   
    return theta

with gzip.open('data/t10k-images-idx3-ubyte.gz', 'rb') as f:
  magic_num, items = struct.unpack_from('>ii', f.read(8))
  rows, columns = struct.unpack('>ii', f.read(8))
  byte_stream = f.read()
  images = np.frombuffer(byte_stream, dtype='uint8')
  images = images.reshape((items, rows*columns)).astype(np.float32)
  images = images/np.max(images)


with gzip.open('data/t10k-labels-idx1-ubyte.gz', 'rb') as f:
  magic_num, items = struct.unpack_from('>ii', f.read(8))
  byte_stream = f.read()
  labels = np.frombuffer(byte_stream, dtype='uint8')
  labels = labels.reshape(items).astype(dtype=np.uint8)




def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
  for i in range(0, y.shape[0], batch):
    x = X[i:i+batch]
    y_b = y[i:i+batch]
    I = np.zeros((x.shape[0],W2.shape[1]))
    I[range(batch), y_b] = 1
    
    
    Z1 = np.maximum(0, np.dot(x, W1))
    G2 = softmax(np.dot(Z1, W2), axis=1) - I
    Z1_b = np.where(Z1>0, 1, 0)
    G1 = np.multiply(Z1_b, np.dot(G2, W2.T))

    W1 -= (lr/batch)*np.dot(x.T, G1)
    W2 -= (lr/batch)*np.dot(Z1.T, G2)
  return W1, W2

def softmax_regression_ndl(X, y, theta, lr = 0.1, batch=100):

  for i in range(0, y.shape[0], batch):
    x = X[i:i+batch]
    y_b = y[i:i+batch]
    Z = np.transpose(np.exp(np.dot(x, theta)).T/(np.sum(np.exp(np.dot(x, theta)), axis=1)))
    I = np.zeros((Z.shape[0], theta.shape[1]))
    I[range(batch), y_b] = 1
    theta -= (lr/batch)*(np.dot(x.T, (Z-I)))

  return theta

def nn_epoch(X, y, W1, W2, lr = 1, batch=50):
  for i in range(0, y.shape[0], batch):
    x = X[i:i+batch]
    y_b = y[i:i+batch]
    I = np.zeros((x.shape[0],W2.shape[1]))
    I[range(batch), y_b] = 1    
    # Z1 = np.maximum(0, )   
    Z1 = np.where(np.dot(x, W1)>0, 1, 0)  
    G2 = softmax(np.matmul(Z1, W2), axis=1) - I
       
    G1 = np.multiply(Z1, np.dot(G2, W2.T))
    # print(np.dot(x.T, G1))
    W1 -= (lr/batch)*np.dot(x.T, G1)
    W2 -= (lr/batch)*np.dot(Z1.T, G2)
    # print(W2)
  return W1, W2

def softmax_loss_ndl(Z, y_one_hot):
  log_sum_exp = ndl.log(ndl.summation(ndl.exp(Z), axes=1)).reshape((-1, 1))
  # print(log_sum_exp.shape)
  # print(Z.shape)
  # print(y_one_hot.shape)
  log_sum_exp = log_sum_exp + ndl.negate(ndl.multiply(Z, y_one_hot))
  l = ndl.divide_scalar(ndl.summation(log_sum_exp), log_sum_exp.cached_data.size)
  return l

def nn_epoch_ndl(X, y, W1, W2, lr = 1, batch=50):
  
  for i in range(0, y.shape[0], batch):
    x = ndl.Tensor(X[i:i+batch])
    y_b = y[i:i+batch]
    I = ndl.Tensor(np.zeros((x.shape[0],W2.shape[1])))
    I.cached_data[range(batch), y_b] = 1
    Z1 = ndl.relu(ndl.matmul(x, W1))
    logits = ndl.divide(ndl.exp(ndl.matmul(Z1, W2)), ndl.summation(ndl.exp(ndl.matmul(Z1, W2)), axes=1))
    loss = softmax_loss_ndl(logits, I)
    loss.backward()
    W1 -= (lr)*W1.grad
    W2 -= (lr)*W2.grad
    
  return W1, W2

In [10]:
import sys
sys.path.append('/root/.virtualenvs/jupyter/lecture4/python')
import needle as ndl

np.random.seed(0)
X = np.random.randn(50,5).astype(np.float32)
y = np.random.randint(3, size=(50,)).astype(np.uint8)
W1 = np.random.randn(5, 10).astype(np.float32) / np.sqrt(10)
W2 = np.random.randn(10, 3).astype(np.float32) / np.sqrt(3)
print(W2)


W1_0, W2_0 = W1.copy(), W2.copy()
W1 = ndl.Tensor(W1)
W2 = ndl.Tensor(W2)
X_ = ndl.Tensor(X)
y_one_hot = np.zeros((y.shape[0], 3))
y_one_hot[np.arange(y.size), y] = 1
y_ = ndl.Tensor(y_one_hot)


W1, W2 = nn_epoch_ndl(X, y, W1, W2, lr=1.0, batch=50)
# W1
# print(W1, '\n\n', W2)
dW1 = nd.Gradient(lambda W1_ : softmax_loss_ndl(ndl.relu(X_@ndl.Tensor(W1_).reshape((5,10))@W2), y_).numpy())(W1.numpy())

dW2 = nd.Gradient(lambda W2_ : softmax_loss_ndl(ndl.relu(X_@W1)@ndl.Tensor(W2_).reshape((10,3)), y_).numpy())(W2.numpy())
# print(W1, '\n',W2)
# print(W1_0-W1.numpy())
# print(dW1.reshape(5, 10))
# # print('\n')
# print(W2_0-W2.numpy())
# print(dW2.reshape((10, 3)))


[[ 0.36733624  0.11524972 -0.0044524 ]
 [ 0.6786693   0.724955   -0.16219284]
 [-0.21184002  0.9547785  -0.04942678]
 [ 0.16936992  0.8586685  -0.36862653]
 [ 0.40920788 -0.0476474  -0.39686117]
 [-0.8148212   0.35875607  0.17811905]
 [-0.19106793 -0.28013968 -0.03476331]
 [-0.9004626  -0.16824295  0.46423113]
 [ 0.42482972 -0.11699399  0.16757399]
 [-0.4935292  -0.1882925  -0.54978293]]
0
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [4]:
np.random.seed(0)
X = np.random.randn(50,5).astype(np.float32)
y = np.random.randint(3, size=(50,)).astype(np.uint8)
W1 = np.random.randn(5, 10).astype(np.float32) / np.sqrt(10)
W2 = np.random.randn(10, 3).astype(np.float32) / np.sqrt(3)
W1_0, W2_0 = W1.copy(), W2.copy()
W1 = ndl.Tensor(W1)
W2 = ndl.Tensor(W2)
dW1 = nd.Gradient(lambda W1_ : softmax_loss_ndl(ndl.relu(X_@ndl.Tensor(W1_).reshape((5,10)))@W2, y_).numpy())(W1.numpy())
dW1.reshape(5, 10)
dW2 = nd.Gradient(lambda W2_ : softmax_loss(ndl.relu(X_@W1)@ndl.Tensor(W2_).reshape((10,3)), y_).numpy())(W2.numpy())

TypeError: loop of ufunc does not support argument 0 of type Tensor which has no callable exp method

In [None]:
a = ndl.Tensor(np.random.randn(6, 4))
b = ndl.Tensor(np.random.randn(4, 5))
c = ndl.Tensor(np.random.randn(5, 5))

 

y = a@b + c
y.backward()
c.grad

y.backward()
c.grad


In [88]:
import sys
sys.path.append('/root/.virtualenvs/jupyter/lecture4/python')
import needle as ndl

import numpy as np
import numpy as np
import gzip
import struct
from scipy.special import softmax
import numdifftools as nd

def softmax_loss_ndl(Z, y_one_hot):
  log_sum_exp = ndl.log(ndl.summation(ndl.exp(Z), axes=1)).reshape((-1, 1))
  log_sum_exp -=  Z*y_one_hot
  l = ndl.divide_scalar(ndl.summation(log_sum_exp), log_sum_exp.cached_data.size)
  return l

def test_nn_epoch_ndl(X, W1, W2):
  x = ndl.Tensor(X)
  # y_b = y
  # I = ndl.Tensor(np.zeros((x.shape[0],W2.shape[1])))
  # I.cached_data[range(50), y_b.numpy()] = 1
  Z1 = ndl.relu(ndl.matmul(X, W1))
  logits = ndl.matmul(Z1, W2)

  return W2-W2.grad



def gradient_check(f, *args, tol=1e-6, backward=False, **kwargs):
    eps = 1e-4
    numerical_grads = [np.zeros(a.shape) for a in args]
    for i in range(len(args)):
        for j in range(args[i].realize_cached_data().size):
            args[i].realize_cached_data().flat[j] += eps
            f1 = float(f(*args, **kwargs).numpy().sum())
            args[i].realize_cached_data().flat[j] -= 2 * eps

            f2 = float(f(*args, **kwargs).numpy().sum())
            args[i].realize_cached_data().flat[j] += eps
            numerical_grads[i].flat[j] = (f1 - f2) / (2 * eps)
    if not backward:
        out = f(*args, **kwargs)
        computed_grads = [x.numpy() for x in out.op.gradient_as_tuple(ndl.Tensor(np.ones(out.shape)), out)]
        # print(f'my val:{computed_grads[0]}')
        # print(f'true_val{numerical_grads[0]}')
    else:
        out = f(*args, **kwargs).sum()
        # print(out)
        out.backward()
        # args[0].grad
        # print(numerical_grads[1].shape)
        computed_grads = [a.grad.numpy() for a in args]
        # computed_grads = [args[0].grad.numpy()]
        print(f'my val:{computed_grads[2]}')
        # print(f'true_val{numerical_grads[2]}')
    error = sum(
        np.linalg.norm(computed_grads[i] - numerical_grads[i])
        for i in range(len(args))
    )

    # assert error < tol
    return bool(error < tol)

with gzip.open('data/t10k-images-idx3-ubyte.gz', 'rb') as f:
  magic_num, items = struct.unpack_from('>ii', f.read(8))
  rows, columns = struct.unpack('>ii', f.read(8))
  byte_stream = f.read()
  images = np.frombuffer(byte_stream, dtype='uint8')
  images = images.reshape((items, rows*columns)).astype(np.float32)
  images = images/np.max(images)


with gzip.open('data/t10k-labels-idx1-ubyte.gz', 'rb') as f:
  magic_num, items = struct.unpack_from('>ii', f.read(8))
  byte_stream = f.read()
  labels = np.frombuffer(byte_stream, dtype='uint8')
  labels = labels.reshape(items).astype(dtype=np.uint8)


In [91]:
np.random.seed(0)
X = np.random.randn(50,5).astype(np.float32)
y = np.random.randint(3, size=(50,)).astype(np.uint8)
W1 = np.random.randn(5, 10).astype(np.float32) / np.sqrt(10)
W2 = np.random.randn(10, 3).astype(np.float32) / np.sqrt(3)


W1_0, W2_0 = W1.copy(), W2.copy()
W1 = ndl.Tensor(W1)
W2 = ndl.Tensor(W2)
X_ = ndl.Tensor(X)
y_one_hot = np.zeros((y.shape[0], 3))
y_one_hot[np.arange(y.size), y] = 1
y_ = ndl.Tensor(y_one_hot)


gradient_check(test_nn_epoch_ndl, ndl.Tensor(X), W1, W2, tol=0.01, backward=True)

dW2 = nd.Gradient(lambda W2_: test_nn_epoch_ndl(ndl.Tensor(X), W1, ndl.Tensor(W2_).reshape((10, 3))).numpy())(W2.numpy())
dW2



my val:[[28. 28. 28.]
 [28. 28. 28.]
 [25. 25. 25.]
 [25. 25. 25.]
 [30. 30. 30.]
 [27. 27. 27.]
 [26. 26. 26.]
 [26. 26. 26.]
 [24. 24. 24.]
 [23. 23. 23.]]


array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        ...,
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       ...,

       [[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        ...,
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]],

       [[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        ...,
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]],

       [[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [None]:
np.random.seed(0)
X, y = images, labels
Zsmall = ndl.Tensor(np.random.randn(16, 10).astype(np.float32))
Z = ndl.Tensor(np.zeros((y.shape[0], 10)).astype(np.float32))
y_one_hot = np.zeros((y.shape[0], 10))
y_one_hot[np.arange(y.size), y] = 1
ysmall = ndl.Tensor(y_one_hot[:16])
y = ndl.Tensor(y_one_hot)
Z = ndl.Tensor(np.random.randn(y.shape[0], 10).astype(np.float32))
# softmax_loss_ndl(Z,y)

gradient_check(softmax_loss_ndl, Zsmall, ysmall, tol=0.001, backward=True)


<needle.ops.Summation object at 0x7efe73c81420>
<needle.ops.DivScalar object at 0x7efe73c83310>
<needle.ops.Summation object at 0x7efe73c81510>
<needle.ops.EWiseAdd object at 0x7efe73c828c0>
<needle.ops.Negate object at 0x7efe73c82bc0>
<needle.ops.EWiseMul object at 0x7efe73c832b0>
None
<needle.ops.Reshape object at 0x7efe73c80bb0>
<needle.ops.Log object at 0x7efe73c81150>
<needle.ops.Summation object at 0x7efe73c812a0>
<needle.ops.Exp object at 0x7efe74968670>
None
my val:[[ 0.01136483  0.00290557  0.00518209  0.01830847  0.01260418  0.00073286
   0.00503573 -0.00457617  0.00175637  0.00293606]
 [ 0.00385512  0.01429103  0.00089488  0.00376984  0.00520293  0.00466008
   0.01487137  0.00271883  0.00456504  0.00142086]
 [ 0.00018995 -0.00155904  0.00579186  0.00116166  0.02361241  0.00056987
   0.0025543   0.00202351  0.01129994  0.01060554]
 [-0.00073012  0.00690035  0.0019457   0.00065221  0.00333842  0.00552762
   0.01617877  0.01573345  0.00320939  0.00349421]
 [ 0.001749    0.00120

In [None]:
gradient_check(ndl.relu, ndl.Tensor(np.random.randn(5,4)))

True

In [None]:
from numpy import broadcast_to


a = np.array([[1,2],[3,4]])

idx1 = np.argwhere(a>2)
idx1

a(idx1)


TypeError: 'numpy.ndarray' object is not callable

In [None]:
import sys
sys.path.append('/root/.virtualenvs/jupyter/lecture4/python')
import needle as ndl
a = ndl.Tensor(np.array(np.random.randn(50, 1)))
b = ndl.Tensor(np.array(np.random.randn(50, 1)))

# b.reshape((50,1,1, 1))
c = np.divide(a, b)
c.shape

(50, 1)