In [3]:
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
from typing import * # type: ignore

In [5]:
np.random.seed(1337)
random.seed(1337)

In [6]:
from micrograd.engine import Value
from micrograd.nn import Neuron, Layer, MLP

In [7]:
n = MLP(3, [1, 1])
m = MLP(3, [2, 1])
n, m

(MLP of [Layer of [ReLUNeuron(3)], Layer of [LinearNeuron(1)]],
 MLP of [Layer of [ReLUNeuron(3), ReLUNeuron(3)], Layer of [LinearNeuron(2)]])

In [8]:
class _: # MLP
    from typing import List
    def __init__(self, nin: int, nouts: List[int]):
        sz = [nin] + nouts
        self.layers = [
            Layer(sz[i], sz[i + 1], nonlin=i != len(nouts) - 1)
            for i in range(len(nouts))
        ]
# MLP CONSTRUCTOR
# this is straightforward right, we make layers of
# cin, cout combos but we only make #cout of them
# each layers takes nin starting with nin, but then
# all other layers take the nouts[i] of the previous

# LINEAR OUTPUT
# the last layers is linear so we can predict any number not just [0,int)

# RELU LAYERS
# ReLU (Rectified Linear Unit) introduces non-linearity into neural networks
# due to its non-linear nature. The ReLU function is defined as:
#
# f(x) = max(0, x)
#
# without relus (or any non linear activation) nn's would be linear funtions

#  By definition, the ReLU is 𝑚𝑎𝑥(0,𝑥). Therefore, if we split the domain from
# (−∞,0] or [0,∞), then the function is linear. However, it's easy to see
# that 𝑓(−1)+𝑓(1)≠𝑓(0). Hence, by definition, ReLU is not linear. 
# https://datascience.stackexchange.com/a/26481


In [9]:
def internals(n: MLP) -> None:
  for layer in n.layers:
    print(layer, '---')
    for neuron in layer.neurons:
      print(neuron, '*')
      for value in neuron.parameters(): # .w and [.b]
        print(value, '.')

In [10]:
internals(n)
# MLP Diagram:
#
#   Input (3 features)
#       ↓
# Layer 1 (ReLUNeuron) # w: [0.2, 0.1, 0.3], b: 0.0
#       ↓
# Layer 2 (LinearNeuron) # w: [0.2], b: 0.0
#       ↓
#  Output (1 output)

Layer of [ReLUNeuron(3)] ---
ReLUNeuron(3) *
Value(data=0.23550571390294128, grad=0) .
Value(data=0.06653114721000164, grad=0) .
Value(data=-0.26830328150124894, grad=0) .
Value(data=0, grad=0) .
Layer of [LinearNeuron(1)] ---
LinearNeuron(1) *
Value(data=0.1715747078045431, grad=0) .
Value(data=0, grad=0) .


In [11]:
internals(m)
# here's why this makes sense
# you take three inputs, every neuron needs 3 weights plus a bias
# but the number outputs of the first layer is two
# so you need two neurons in the first layer
# the linear layer gives two inputs and thus has two weights

Layer of [ReLUNeuron(3), ReLUNeuron(3)] ---
ReLUNeuron(3) *
Value(data=-0.6686254326224383, grad=0) .
Value(data=0.6487474938152629, grad=0) .
Value(data=-0.23259038277158273, grad=0) .
Value(data=0, grad=0) .
ReLUNeuron(3) *
Value(data=0.5792256498313748, grad=0) .
Value(data=0.8434530197925192, grad=0) .
Value(data=-0.3847332240409951, grad=0) .
Value(data=0, grad=0) .
Layer of [LinearNeuron(2)] ---
LinearNeuron(2) *
Value(data=0.9844941451716409, grad=0) .
Value(data=-0.5901079958448365, grad=0) .
Value(data=0, grad=0) .


In [12]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0]

In [13]:
nn = MLP(3, [4, 4, 1]) # i think this ignores our random seed sadly
for _ in range(50):
  ypred = [nn(x) for x in xs]
  loss = sum((a - b)**2 for a, b in zip(ypred, ys))
  for p in nn.parameters():
      p.grad = 0.0
  loss.backward()
  for p in nn.parameters():
      p.data += -0.05 * p.grad
  print(_+1, loss.data, [f"{x.data:1.2f}" for x in ypred])


1 3.8786498946619115 ['0.23', '0.05', '0.12', '0.03']
2 10.819478474781187 ['3.24', '0.64', '0.75', '1.28']
3 13.24678282646478 ['-1.81', '-0.41', '-0.70', '-1.22']
4 4.0706682623564365 ['0.37', '0.55', '-0.13', '0.29']
5 3.4335305562849223 ['0.13', '0.10', '-0.28', '0.03']
6 3.0448729174651343 ['0.32', '0.16', '-0.28', '0.16']
7 2.5598893769509488 ['0.50', '0.13', '-0.32', '0.24']
8 1.971783060797325 ['0.71', '0.03', '-0.39', '0.33']
9 1.3525657579825372 ['0.92', '-0.15', '-0.48', '0.41']
10 0.8628703857312423 ['1.09', '-0.39', '-0.56', '0.46']
11 0.5860348252925198 ['1.22', '-0.62', '-0.62', '0.51']
12 0.525951331836336 ['1.30', '-0.64', '-0.67', '0.56']
13 0.5031139379262789 ['1.09', '-0.72', '-0.72', '0.42']
14 0.532468770083034 ['1.55', '-0.73', '-0.73', '0.72']
15 0.8385808483139425 ['0.69', '-0.81', '-0.81', '0.18']
16 1.4658746020062692 ['2.12', '-0.68', '-0.69', '1.13']
17 4.674142084973772 ['-0.47', '-0.92', '-0.92', '-0.58']
18 0.49716071302560283 ['1.08', '-0.63', '-0.60', 

In [20]:
import random
from cudagrad import tensor, Tensor
import cudagrad 

class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

class Neuron(Module):
    def __init__(self, nin, nonlin=True):
        self.w = [tensor([1], [random.uniform(-1,1)]) for _ in range(nin)]
        self.b = tensor([1], [0])
        self.nonlin = nonlin

    def __call__(self, x):
        ans = tensor([1], [0])
        for elem_x in x:
            for elem_w in self.w:
                if type(elem_x) != Tensor:
                    elem_x = tensor([1], [elem_x])
                ans = ans + (elem_w * elem_x)
        ans = ans + self.b
        return ans
        # act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        # return act.relu() if self.nonlin else act

    def parameters(self):
        return self.w + [self.b]

    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"


In [29]:
print(Neuron(2)([0.5, 0.2]).data)
print(Neuron(2)([tensor([1], [0.5]), tensor([1], [0.2])]).data)
# Layer(1, 2)
# MLP(2, [2, 1])

[0.09220615029335022]
[0.0012150630354881287]


In [11]:
class Layer(Module):

    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

In [15]:
nn = MLP(3, [4, 4, 1]) # i think this ignores our random seed sadly
for _ in range(50):
  ypred = [nn(x) for x in xs]
  loss = sum((a - b)**2 for a, b in zip(ypred, ys))
  for p in nn.parameters():
      p.grad = 0.0
  loss.backward()
  for p in nn.parameters():
      p.data += -0.05 * p.grad
  print(_+1, loss.data, [f"{x.data:1.2f}" for x in ypred])

TypeError: __mul__(): incompatible function arguments. The following argument types are supported:
    1. (self: cudagrad.Tensor, arg0: cudagrad.Tensor) -> cudagrad.Tensor

Invoked with: <cudagrad.Tensor object at 0x112e36b70>, 2.0