In [81]:
import numpy as np

In [90]:
def loadMnist(MNISTpath=None, Flat=True, Standardize=False, OneHot=False): # Mnist is already Flat and Normalized(0-1)
  import requests, gzip, pickle, os
  if MNISTpath is None: MNISTpath = "/media/moises/D/DLDS" if os.name == "posix" else "D/DLDS" # Nix or Win
  url = 'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
  if not os.path.exists(os.path.join(MNISTpath, "mnist.pkl.gz")):
    with open(os.path.join(MNISTpath, "mnist.pkl.gz"), "wb") as f:
      mnistPKLGZ = requests.get(url).content
      f.write(mnistPKLGZ)
  with gzip.open(os.path.join(MNISTpath, "mnist.pkl.gz"), "rb") as mn: 
    (xtr, ytr), (xva, yva), (xte, yte) = pickle.load(mn, encoding="latin-1") # tr, va, te
    # ((50000, 784) (50000,)) ((10000, 784) (10000,)) ((10000, 784) (10000,))
  if Standardize:
    xtr = xtr-xtr.mean(axis=1)[:,None] # Standardizing per-sample (!wrong:change this)
    xva = xva-xval.mean(axis=1)[:,None]
    xte = xte-xte.mean(axis=1)[:,None]
  if OneHot: ytr = one_hot(ytr, 10)
  if Flat: return (xtr, ytr), (xva, yva), (xte, yte) # tr, va, te 
  return (xtr.reshape(-1,1, 28, 28), ytr), (xva.reshape(-1,1, 28, 28), yva), (xte.reshape(-1,1, 28, 28), yte)



def shuffle_data(x, y, seed=0):
  if seed: np.random.seed(seed)
  idx = np.arange(x.shape[0]) # Only Shuffle the highest dim (shape[0])
  np.random.shuffle(idx)
  return x[idx], y[idx]
def one_hot(a, classes=10): return np.eye(classes)[a] # .T Transpose if you don't want torch style
def one_hot_vector(a, classes=10): return np.eye(classes)[a].reshape(-1,classes,1) # extra empty dim to work with michs outptu.
def accuracy(X,Y): return t.sum(X-Y).item()/ len(X)

class Dataset:
  def __init__(self, xs, ys, Shuffle:bool=False, OneHot:bool=False, classes:int=None):
    if Shuffle: xs, ys = shuffle_data(xs, ys)
    self.xs = xs
    self.ys = one_hot(ys, classes) if OneHot and not isinstance(classes, int) else ys 
    self.counter = 0
    self.size = xs.shape[0]
  def __len__(self): return self.size
  def __iter__(self): return self
  def __next__(self):
    yld = self.xs[self.counter], self.ys[self.counter]
    if self.counter < self.size-1: self.counter += 1
    else: raise StopIteration
    return yld
  def __getitem__(self,n): return list(zip(self.xs[n], self.ys[n]))
  def __repr__(self): return f"{self.__class__.__name__}(xs, ys)"

class Batcher:
  """ Batcher is a DS wrapper to iterate over MBS. 
  Batcher should Shuffle since the DS load data once and batch for many epochs
  once the entire DS is exhuasted we create new batcher to reset counter + reshufle """
  def __init__(self, DS, MBS=128, Shuffle=True): 
    self.MBS = MBS
    self.DS = DS
    if Shuffle: DS.xs, DS.ys = shuffle_data(DS.xs, DS.ys)
    self.counter = 0
    self.size = len(DS)
  def __iter__(self): return self
  def __repr__(self): return f"{self.__class__.__name__}(xs, ys, {self.MBS})"
  def __next__(self):
    if self.counter >= self.size: raise StopIteration
    if self.size>self.counter+self.MBS: 
      batch = self.DS.xs[self.counter:self.counter+self.MBS], self.DS.ys[self.counter:self.counter+self.MBS]
    else: batch = self.DS.xs[self.counter:], self.DS.ys[self.counter:]
    self.counter += self.MBS
    return batch
  def __getitem__(self,n):
    if isinstance(n, slice): return list(zip(self.DS.xs[n], self.DS.ys[n]))
    elif isinstance(n, int): return self.DS.xs[n], self.DS.ys[n]
    else: raise TypeError(f"Index must be int or slice got {type(n)}")
  def __repr__(self): return f"{self.__class__.__name__}(xs, ys)"

class MNIST(Dataset):
  def __init__(self, Train=True, Validation=False, Flat=True, OneHot=True, **kw):
    (trainX, trainY), (valX,ValY), (TestX,TestY) = loadMnist(Flat=Flat, OneHot=OneHot)
    if Validation: super().__init__(valX, ValY, **kw)
    elif Train: super().__init__(trainX, trainY, **kw)
    else: super().__init__(TestX, TestY, **kw)


In [107]:

def mse(y, p): return 0.5*np.power((y-p), 2).mean()
def mseP(y, p): return 2*(p-y)/np.prod(y.shape)

def sig(x): return np.reciprocal(1.0+np.exp(-x))
def sigP(x): s = sig(x); return s*(1.0-s)
def _affTrans(Z, W, B): return Z.dot(W) + B # W(inF,outF) # a = z@w+b -> dL/dz= dL/dz @ w.T 
def _affTransP(TopGrad, Z, W):
    BGrad = TopGrad.sum(axis=0) 
    WGrad = Z.T.dot(TopGrad) # dL/dw= z.T @ w
    Zgrad = TopGrad.dot(W.T) #dL/dz= dL/dz @ w.T
    return Zgrad, WGrad, BGrad

In [111]:
class Layer:
  def __repr__(self): return self.__class__.__name__
  def __call__(self,x ): return self.forward(x)
  def forward(self, x): raise NotImplementedError
  def backward(self, x): raise NotImplementedError

class Sigmoid(Layer): 
  def forward(self,x): 
    # print(f"Forwarding through {self.__class__.__name__}") 
    return sig(x)
  def backward(self,topGrad): 
    # print(f"Backwarding through {self.__class__.__name__} {topGrad.shape}")
    return sigP(topGrad) 

class Linear(Layer):
  def __init__(self,inF, outF): # a = z@w+b -> z[MBS, inF] w[inF, outF] -> [MBS, outF]
    self.bias = np.random.randn(outF)
    lim = np.sqrt(1/inF)
    self.weight = np.random.uniform(-lim, lim, (inF, outF))
  def forward(self, x): 
    # print(f"Forwarding through {self.__class__.__name__} {self.bias.shape}")  
    self.x = x; return _affTrans(x, self.weight, self.bias)
  def backward(self, topGrad, LR=0.1): 
    # print(f"backwarding through {self.__class__.__name__}  {self.bias.shape}")  
    zGrad, wGrad, bGrad = _affTransP(topGrad, self.x, self.weight) 
    self.weight -= wGrad*LR
    self.bias -= bGrad*LR
    return zGrad

class Net:
  def __call__(self,x ): return self.forward(x)
  def __init__(self):
    self.L1 = Linear(784, 100)
    self.L2 = Linear(100, 10)
    self.Act = Sigmoid()
    self.layers = [self.L1, self.Act, self.L2, self.Act]
  def forward(self, x):  
    for l in self.layers: x = l(x)
    return x
  def backward(self, topGrad): 
    for l in reversed(self.layers):  topGrad = l.backward(topGrad)

# MBS = 2
# MB = np.random.randn(MBS, 784)
net = Net()
# net(MB).shape # Forward pass is correct
# topGrad = np.ones((2, 10))
# net.backward(topGrad) # 1.0 if the topGrad of the loss with respect itself

In [97]:
mnist = MNIST(OneHot=True)
# MBS = 10
# mnistB = Batcher(mnist, MBS)
# for x,y in mnistB:
#     print(x.shape, y.shape)
#     break

(10, 784) (10, 10)


In [112]:
MBS = 2
topGrad = np.ones((MBS, 10))
losses = []
net = Net()
for i in range(1):
    mnistB = Batcher(mnist, MBS)
    for x,y in mnistB:
        pred = net(x)
        loss = mse(pred, y)
        losses.append(loss)
        topGrad = mseP(y, pred)
        net.backward(topGrad)
        # print(topGrad.shape)
        # break
        # net(topGrad.)


  def sig(x): return np.reciprocal(1.0+np.exp(-x))


In [117]:
mnistB = Batcher(mnist, MBS)

In [118]:
x, y = next(mnistB)

In [121]:
net(x) 

  def sig(x): return np.reciprocal(1.0+np.exp(-x))


array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])