From bdfd424a4fcb99d715e4d2ab3fbbb5a8372f0e0d Mon Sep 17 00:00:00 2001 From: __mo_san__ <50895527+m0saan@users.noreply.github.com> Date: Fri, 2 Jun 2023 04:40:10 +0100 Subject: [PATCH] fix some typos & update docs --- README.md | 6 +- _docs/autograd.html | 94 +++++++++------- _docs/index.html | 4 +- _docs/search.json | 4 +- _docs/sitemap.xml | 10 +- _proc/00_autograd.ipynb | 215 +++++++++++++++++++---------------- _proc/01_operators.ipynb | 14 +-- _proc/index.ipynb | 6 +- minima/autograd.py | 93 +++++++++++++++- nbs/00_autograd.ipynb | 234 ++++++++++----------------------------- nbs/01_operators.ipynb | 38 ++----- nbs/index.ipynb | 6 +- 12 files changed, 352 insertions(+), 372 deletions(-) diff --git a/README.md b/README.md index 822cfa5..9f57796 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,10 @@ To install with pip, use: `pip install minima`. If you plan to develop Minima yourself, or want to be on the cutting edge, you can use an editable install. - git clone https://github.com/yourusername/minima - pip install -e "minima[dev]" +``` bash +git clone https://github.com/m0saan/minima +pip install . +``` ## Features diff --git a/_docs/autograd.html b/_docs/autograd.html index 3e8ba6a..67c4fe9 100644 --- a/_docs/autograd.html +++ b/_docs/autograd.html @@ -204,6 +204,7 @@

On this page

  • Device
  • Operator
  • TensorOp
  • +
  • Tensor
  • @@ -253,7 +254,7 @@

    Derivatives

    In the context of the function d = a*b + c which we’re going to use below, since a is the variable and b and c are constants, the derivative of d with respect to a is just b. This can be written in LaTeX as:

    \[ \frac{{dd}}{{da}} = b \]

    we begin by assigning values to three variables a, b, and c. We then create a fourth variable, d, which is equal to the product of a and b, added to c. When you execute this cell, it should display the value of d.

    -
    +
    a = 4
     b = -2
     c = 11
    @@ -261,7 +262,7 @@ 

    Derivatives

    d

    we define a function f_a(a,b,c), which helps us estimate the slope of the function at the point a. The function first calculates d1 using the given inputs, a, b, and c. Then it increments a by a small value h and recalculates the value d2. The function then prints the original d1, the new d2, and the estimated slope which is (d2 - d1) / h.

    -
    +
    def f_a(a,b,c):
         h = 0.01
         d1 = a*b + c
    @@ -276,7 +277,7 @@ 

    Derivatives

    that states that the derivative of d with respect to a, denoted as (db/da), is analytically equal to b. This is because in the expression d = a*b + c, the coefficient of a is b, so by the power rule of differentiation, the derivative is b. In this case, b equals -2.

    Now if we do this with b

    -
    +
    def f_b(a,b,c):
         h = 0.01
         d1 = a*b + c
    @@ -301,7 +302,7 @@ 

    +
    def trace(root):
         nodes, edges = set(), set()
         def build(v):
    @@ -348,7 +349,7 @@ 

    Value

    Represents a node within a computational graph.

    This class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value’s dependencies, the operation that produced it, and whether it requires a gradient for backpropagation. It’s central to the functioning of automatic differentiation within deep learning frameworks.

    Attributes: op (Operator) _prev (Set[‘Value’]) cached_data (NDArray) requires_grad (bool)

    -
    +
    a = Value(2.0, label='a')
     b = Value(-3.0, label='b')
     c = Value(10.0, label='c')
    @@ -371,7 +372,7 @@ 

    Value

    Manual gradient

    base case (L grad)

    -
    +
    def lol():
         h = 0.001
         
    @@ -400,13 +401,13 @@ 

    base case (L gradlol()

    sure enough it’s 1

    -
    +
    L.grad = 1

    f

    Here is a generic version of lol

    -
    +
    def lol(label):
         def foo(v, label):
             if v.label == label: v.data += h
    @@ -437,17 +438,17 @@ 

    f

    lol('f')
    -
    +
    f.grad = 4
    -
    +
    lol('d')
    -
    +
    d.grad = -2

    Let’s draw what we have up to this point

    -
    +
    draw_dot(L)

    Sure, here’s the step by step derivation for each of the variables:

    @@ -494,34 +495,34 @@

    f

    \[ \frac{dL}{dd} = f \cdot \frac{df}{df} = f \]

    -
    +
    lol('e')
    -
    +
    e.grad = -2 # 1 * d.grad
    -
    +
    lol('c')
    -
    +
    c.grad = -2 # 1 * d.grad
    -
    +
    draw_dot(L)
    -
    +
    lol('a')
    -
    +
    a.grad = 6  # b * e.grad
    -
    +
    lol('b')
    -
    +
    b.grad = -4 # a * e.grad
    -
    +
    draw_dot(L)

    @@ -536,7 +537,7 @@

    Value

    Represents a node within a computational graph.

    This class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value’s dependencies, the operation that produced it, and whether it requires a gradient for backpropagation. It’s central to the functioning of automatic differentiation within deep learning frameworks.

    Attributes: op (Operator) _prev (Set[‘Value’]) cached_data (NDArray) requires_grad (bool)

    -
    +
    a = Value(2.0, label='a')
     b = Value(-3.0, label='b')
     c = Value(10.0, label='c')
    @@ -547,37 +548,37 @@ 

    Value

    draw_dot(L)
    -
    +
    L.grad = 1
    -
    +
    L._backward()
    -
    +
    draw_dot(L)
    -
    +
    d._backward()
    -
    +
    draw_dot(L)
    -
    +
    c._backward()

    We expect that nothing will happen

    -
    +
    draw_dot(L)
    -
    +
    e._backward()
    -
    +
    draw_dot(L)

    sure enough, exactly as we did before

    We can do thid process automatically using topo sort algorithms, which’s will give us the correct order on which to call _backward on

    -
    +
    a = Value(2.0, label='a')
     b = Value(-3.0, label='b')
     c = Value(10.0, label='c')
    @@ -588,7 +589,7 @@ 

    Value

    draw_dot(L)
    -
    +
    # topological order all of the children in the graph
     topo = []
     visited = set()
    @@ -601,16 +602,16 @@ 

    Value

    build_topo(L)
    -
    +
    topo
    -
    +
    # go one variable at a time and apply the chain rule to get its gradient
     L.grad = 1
     for v in reversed(topo):
         v._backward()
    -
    +
    draw_dot(L)

    So let’s now update the Value class with this logic

    @@ -625,7 +626,7 @@

    Value

    Represents a node within a computational graph.

    This class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value’s dependencies, the operation that produced it, and whether it requires a gradient for backpropagation. It’s central to the functioning of automatic differentiation within deep learning frameworks.

    Attributes: op (Operator) _prev (Set[‘Value’]) cached_data (NDArray) requires_grad (bool)

    -
    +
    a = Value(2.0, label='a')
     b = Value(-3.0, label='b')
     c = Value(10.0, label='c')
    @@ -636,10 +637,10 @@ 

    Value

    draw_dot(L)
    -
    +
    L.backward()
    -
    +
    draw_dot(L)

    @@ -732,6 +733,19 @@

    TensorOp

    self._prev = op self.cached_data = cached_data self.requires_grad = requires_grad
    +
    +

    source

    + +
    +

    Tensor

    +
    +
     Tensor (array, device:Optional[__main__.Device]=None, dtype=None,
    +         requires_grad=True, **kwargs)
    +
    +

    A Tensor represents a multidimensional array of values in a computational graph.

    +

    Attributes: - data: The actual data of the tensor. It is computed lazily. - children: Other tensors that this tensor depends on for computing its value. - requires_grad: Whether this tensor needs to compute gradients.

    +

    Methods: - realize_data: Computes and returns the actual data for this tensor. - shape: Returns the shape of this tensor. - dtype: Returns the data type of this tensor.

    +

    Example: >>> t1 = Tensor([[1.0, 2.0], [3.0, 4.0]]) >>> print(t1.shape) (2, 2) >>> print(t1.dtype) float64

    diff --git a/_docs/index.html b/_docs/index.html index 531faf7..82acfe4 100644 --- a/_docs/index.html +++ b/_docs/index.html @@ -234,8 +234,8 @@

    Installing

    conda install minima anaconda

    To install with pip, use: pip install minima.

    If you plan to develop Minima yourself, or want to be on the cutting edge, you can use an editable install.

    -
    git clone https://github.com/yourusername/minima
    -pip install -e "minima[dev]"
    +
    git clone https://github.com/m0saan/minima
    +pip install .

    Features

    diff --git a/_docs/search.json b/_docs/search.json index 37cdb4f..9c3eb70 100644 --- a/_docs/search.json +++ b/_docs/search.json @@ -39,13 +39,13 @@ "href": "autograd.html#manual-gradient", "title": "autograd", "section": "Manual gradient", - "text": "Manual gradient\n\nbase case (L grad)\n\ndef lol():\n h = 0.001\n \n a = Value(2.0, label='a')\n b = Value(-3.0, label='b')\n c = Value(10.0, label='c')\n e = a*b; e.label='e'\n d = e + c; d.label='d'\n f = Value(-2.0, label='f')\n L = d*f; L.label='L'\n \n L1 = L.data\n \n a = Value(2.0, label='a')\n b = Value(-3.0, label='b')\n c = Value(10.0, label='c')\n e = a*b; e.label='e'\n d = e + c; d.label='d'\n f = Value(-2.0, label='f')\n L = d*f; L.label='L' \n \n L2 = L.data + h\n \n print(f'grad: {(L2 - L1) / h}')\n\nlol()\n\nsure enough it’s 1\n\nL.grad = 1\n\n\nf\nHere is a generic version of lol\n\ndef lol(label):\n def foo(v, label):\n if v.label == label: v.data += h\n \n h = 0.001\n \n a = Value(2.0, label='a')\n b = Value(-3.0, label='b')\n c = Value(10.0, label='c')\n e = a*b; e.label='e'\n d = e + c; d.label='d'\n f = Value(-2.0, label='f')\n L = d*f; L.label='L'\n \n L1 = L.data\n \n a = Value(2.0, label='a'); foo(a, label)\n b = Value(-3.0, label='b'); foo(b, label)\n c = Value(10.0, label='c'); foo(c, label)\n e = a*b; e.label='e'; foo(e, label)\n d = e + c; d.label='d'; foo(d, label)\n f = Value(-2.0, label='f'); foo(f, label)\n L = d*f; L.label='L'; foo(L, label) \n \n L2 = L.data\n \n print(f'grad: {(L2 - L1) / h}')\n\nlol('f')\n\n\nf.grad = 4\n\n\nlol('d')\n\n\nd.grad = -2\n\nLet’s draw what we have up to this point\n\ndraw_dot(L)\n\nSure, here’s the step by step derivation for each of the variables:\n\nWith respect to a:\n\nGiven that L = (a*b + c) * f, we will apply the product rule for differentiation.\nThe derivative of a*b with respect to a is b, and the derivative of c with respect to a is 0. Therefore:\n\\[\n\\frac{dL}{da} = f \\cdot \\frac{d(a*b + c)}{da} = f \\cdot (b + 0) = b \\cdot f\n\\]\n\nWith respect to b:\n\nThe derivative of a*b with respect to b is a, and the derivative of c with respect to b is 0. Therefore:\n\\[\n\\frac{dL}{db} = f \\cdot \\frac{d(a*b + c)}{db} = f \\cdot (a + 0) = a \\cdot f\n\\]\n\nWith respect to c:\n\nThe derivative of a*b with respect to c is 0, and the derivative of c with respect to c is 1. Therefore:\n\\[\n\\frac{dL}{dc} = f \\cdot \\frac{d(a*b + c)}{dc} = f \\cdot (0 + 1) = f\n\\]\n\nWith respect to f:\n\nThe derivative of (a*b + c) with respect to f is 0, and f is just f, therefore:\n\\[\n\\frac{dL}{df} = (a*b + c) \\cdot \\frac{df}{df} = a*b + c\n\\]\n\nWith respect to e (where e = a*b):\n\nThe derivative of e + c with respect to e is 1. Therefore:\n\\[\n\\frac{dL}{de} = f \\cdot \\frac{d(e + c)}{de} = f \\cdot 1 = f\n\\]\n\nWith respect to d (where d = e + c):\n\nThe derivative of d with respect to d is 1. Therefore:\n\\[\n\\frac{dL}{dd} = f \\cdot \\frac{df}{df} = f\n\\]\n\nlol('e')\n\n\ne.grad = -2 # 1 * d.grad\n\n\nlol('c')\n\n\nc.grad = -2 # 1 * d.grad\n\n\ndraw_dot(L)\n\n\nlol('a')\n\n\na.grad = 6 # b * e.grad\n\n\nlol('b')\n\n\nb.grad = -4 # a * e.grad\n\n\ndraw_dot(L)\n\n\nsource\n\n\n\nValue\n\n Value (data, _children=(), _op='', label='')\n\nRepresents a node within a computational graph.\nThis class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value’s dependencies, the operation that produced it, and whether it requires a gradient for backpropagation. It’s central to the functioning of automatic differentiation within deep learning frameworks.\nAttributes: op (Operator) _prev (Set[‘Value’]) cached_data (NDArray) requires_grad (bool)\n\na = Value(2.0, label='a')\nb = Value(-3.0, label='b')\nc = Value(10.0, label='c')\ne = a*b; e.label='e'\nd = e + c; d.label='d'\nf = Value(-2.0, label='f')\nL = d*f; L.label='L' \n\ndraw_dot(L)\n\n\nL.grad = 1\n\n\nL._backward()\n\n\ndraw_dot(L)\n\n\nd._backward()\n\n\ndraw_dot(L)\n\n\nc._backward()\n\nWe expect that nothing will happen\n\ndraw_dot(L)\n\n\ne._backward()\n\n\ndraw_dot(L)\n\nsure enough, exactly as we did before\nWe can do thid process automatically using topo sort algorithms, which’s will give us the correct order on which to call _backward on\n\na = Value(2.0, label='a')\nb = Value(-3.0, label='b')\nc = Value(10.0, label='c')\ne = a*b; e.label='e'\nd = e + c; d.label='d'\nf = Value(-2.0, label='f')\nL = d*f; L.label='L' \n\ndraw_dot(L)\n\n\n# topological order all of the children in the graph\ntopo = []\nvisited = set()\ndef build_topo(v):\n if v not in visited:\n visited.add(v)\n for child in v._prev:\n build_topo(child)\n topo.append(v)\n\nbuild_topo(L)\n\n\ntopo\n\n\n# go one variable at a time and apply the chain rule to get its gradient\nL.grad = 1\nfor v in reversed(topo):\n v._backward()\n\n\ndraw_dot(L)\n\nSo let’s now update the Value class with this logic\n\nsource\n\n\nValue\n\n Value (data, _children=(), _op='', label='')\n\nRepresents a node within a computational graph.\nThis class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value’s dependencies, the operation that produced it, and whether it requires a gradient for backpropagation. It’s central to the functioning of automatic differentiation within deep learning frameworks.\nAttributes: op (Operator) _prev (Set[‘Value’]) cached_data (NDArray) requires_grad (bool)\n\na = Value(2.0, label='a')\nb = Value(-3.0, label='b')\nc = Value(10.0, label='c')\ne = a*b; e.label='e'\nd = e + c; d.label='d'\nf = Value(-2.0, label='f')\nL = d*f; L.label='L' \n\ndraw_dot(L)\n\n\nL.backward()\n\n\ndraw_dot(L)\n\n\nsource\n\n\nValue\n\n Value (data, children=(), op='', label='')\n\nA class representing a scalar value and its gradient in a computational graph.\nAttributes: - data (float): the scalar value associated with this node - grad (float): the gradient of the output of the computational graph w.r.t. this node’s value - label (str): a label for this node, used for debugging and visualization purposes - _op (str): a string representation of the operation that produced this node in the computational graph - _prev (set of Value objects): the set of nodes that contributed to the computation of this node - _backward (function): a function that computes the gradients of this node w.r.t. its inputs\nMethods: - init(self, data, children=(), op=’‘, label=’’): Initializes a Value object with the given data, children, op, and label - repr(self): Returns a string representation of this Value object - add(self, other): Implements the addition operation between two Value objects - mul(self, other): Implements the multiplication operation between two Value objects - item(self): Returns the scalar value associated with this Value object - tanh(self): Applies the hyperbolic tangent function to this Value object and returns a new Value object\n\nsource\n\n\nall_devices\n\n all_devices ()\n\nreturn a list of all available devices\n\nsource\n\n\ncpu\n\n cpu ()\n\nReturn cpu device\n\nsource\n\n\nCPUDevice\n\n CPUDevice ()\n\nRepresents data that sits in CPU\n\nsource\n\n\nDevice\n\n Device ()\n\nIndicates the device supporting an NDArray.\n\nsource\n\n\nOperator\n\n Operator ()\n\nInitialize self. See help(type(self)) for accurate signature.\n\nsource\n\n\nTensorOp\n\n TensorOp ()\n\nOp class specialized to output tensors, will be alternate subclasses for other structures\n#| export\nclass Value:\n \"\"\"\n Represents a node within a computational graph.\n\n This class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value's dependencies, \n the operation that produced it, and whether it requires a gradient for backpropagation. It's central to the functioning of automatic \n differentiation within deep learning frameworks.\n\n Attributes:\n op (Operator)\n _prev (Set['Value']) \n cached_data (NDArray)\n requires_grad (bool)\n \"\"\"\n def __init__(self,\n op: Operator, # The operator that produced this node. If the node was initialized from actual data, this is 'None'.\n prev: Set['Value'], # The set of values that this value directly depends on. It's the union of the `_next` sets of all the values in `args`.\n cached_data: NDArray, # The actual data for this value. It's `None` for values that aren't yet computed.\n requires_grad: bool): # Specifies whether this node requires a gradient. This is `False` for nodes that don't need gradients.\n \n self._op = op\n self._prev = op\n self.cached_data = cached_data\n self.requires_grad = requires_grad" + "text": "Manual gradient\n\nbase case (L grad)\n\ndef lol():\n h = 0.001\n \n a = Value(2.0, label='a')\n b = Value(-3.0, label='b')\n c = Value(10.0, label='c')\n e = a*b; e.label='e'\n d = e + c; d.label='d'\n f = Value(-2.0, label='f')\n L = d*f; L.label='L'\n \n L1 = L.data\n \n a = Value(2.0, label='a')\n b = Value(-3.0, label='b')\n c = Value(10.0, label='c')\n e = a*b; e.label='e'\n d = e + c; d.label='d'\n f = Value(-2.0, label='f')\n L = d*f; L.label='L' \n \n L2 = L.data + h\n \n print(f'grad: {(L2 - L1) / h}')\n\nlol()\n\nsure enough it’s 1\n\nL.grad = 1\n\n\nf\nHere is a generic version of lol\n\ndef lol(label):\n def foo(v, label):\n if v.label == label: v.data += h\n \n h = 0.001\n \n a = Value(2.0, label='a')\n b = Value(-3.0, label='b')\n c = Value(10.0, label='c')\n e = a*b; e.label='e'\n d = e + c; d.label='d'\n f = Value(-2.0, label='f')\n L = d*f; L.label='L'\n \n L1 = L.data\n \n a = Value(2.0, label='a'); foo(a, label)\n b = Value(-3.0, label='b'); foo(b, label)\n c = Value(10.0, label='c'); foo(c, label)\n e = a*b; e.label='e'; foo(e, label)\n d = e + c; d.label='d'; foo(d, label)\n f = Value(-2.0, label='f'); foo(f, label)\n L = d*f; L.label='L'; foo(L, label) \n \n L2 = L.data\n \n print(f'grad: {(L2 - L1) / h}')\n\nlol('f')\n\n\nf.grad = 4\n\n\nlol('d')\n\n\nd.grad = -2\n\nLet’s draw what we have up to this point\n\ndraw_dot(L)\n\nSure, here’s the step by step derivation for each of the variables:\n\nWith respect to a:\n\nGiven that L = (a*b + c) * f, we will apply the product rule for differentiation.\nThe derivative of a*b with respect to a is b, and the derivative of c with respect to a is 0. Therefore:\n\\[\n\\frac{dL}{da} = f \\cdot \\frac{d(a*b + c)}{da} = f \\cdot (b + 0) = b \\cdot f\n\\]\n\nWith respect to b:\n\nThe derivative of a*b with respect to b is a, and the derivative of c with respect to b is 0. Therefore:\n\\[\n\\frac{dL}{db} = f \\cdot \\frac{d(a*b + c)}{db} = f \\cdot (a + 0) = a \\cdot f\n\\]\n\nWith respect to c:\n\nThe derivative of a*b with respect to c is 0, and the derivative of c with respect to c is 1. Therefore:\n\\[\n\\frac{dL}{dc} = f \\cdot \\frac{d(a*b + c)}{dc} = f \\cdot (0 + 1) = f\n\\]\n\nWith respect to f:\n\nThe derivative of (a*b + c) with respect to f is 0, and f is just f, therefore:\n\\[\n\\frac{dL}{df} = (a*b + c) \\cdot \\frac{df}{df} = a*b + c\n\\]\n\nWith respect to e (where e = a*b):\n\nThe derivative of e + c with respect to e is 1. Therefore:\n\\[\n\\frac{dL}{de} = f \\cdot \\frac{d(e + c)}{de} = f \\cdot 1 = f\n\\]\n\nWith respect to d (where d = e + c):\n\nThe derivative of d with respect to d is 1. Therefore:\n\\[\n\\frac{dL}{dd} = f \\cdot \\frac{df}{df} = f\n\\]\n\nlol('e')\n\n\ne.grad = -2 # 1 * d.grad\n\n\nlol('c')\n\n\nc.grad = -2 # 1 * d.grad\n\n\ndraw_dot(L)\n\n\nlol('a')\n\n\na.grad = 6 # b * e.grad\n\n\nlol('b')\n\n\nb.grad = -4 # a * e.grad\n\n\ndraw_dot(L)\n\n\nsource\n\n\n\nValue\n\n Value (data, _children=(), _op='', label='')\n\nRepresents a node within a computational graph.\nThis class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value’s dependencies, the operation that produced it, and whether it requires a gradient for backpropagation. It’s central to the functioning of automatic differentiation within deep learning frameworks.\nAttributes: op (Operator) _prev (Set[‘Value’]) cached_data (NDArray) requires_grad (bool)\n\na = Value(2.0, label='a')\nb = Value(-3.0, label='b')\nc = Value(10.0, label='c')\ne = a*b; e.label='e'\nd = e + c; d.label='d'\nf = Value(-2.0, label='f')\nL = d*f; L.label='L' \n\ndraw_dot(L)\n\n\nL.grad = 1\n\n\nL._backward()\n\n\ndraw_dot(L)\n\n\nd._backward()\n\n\ndraw_dot(L)\n\n\nc._backward()\n\nWe expect that nothing will happen\n\ndraw_dot(L)\n\n\ne._backward()\n\n\ndraw_dot(L)\n\nsure enough, exactly as we did before\nWe can do thid process automatically using topo sort algorithms, which’s will give us the correct order on which to call _backward on\n\na = Value(2.0, label='a')\nb = Value(-3.0, label='b')\nc = Value(10.0, label='c')\ne = a*b; e.label='e'\nd = e + c; d.label='d'\nf = Value(-2.0, label='f')\nL = d*f; L.label='L' \n\ndraw_dot(L)\n\n\n# topological order all of the children in the graph\ntopo = []\nvisited = set()\ndef build_topo(v):\n if v not in visited:\n visited.add(v)\n for child in v._prev:\n build_topo(child)\n topo.append(v)\n\nbuild_topo(L)\n\n\ntopo\n\n\n# go one variable at a time and apply the chain rule to get its gradient\nL.grad = 1\nfor v in reversed(topo):\n v._backward()\n\n\ndraw_dot(L)\n\nSo let’s now update the Value class with this logic\n\nsource\n\n\nValue\n\n Value (data, _children=(), _op='', label='')\n\nRepresents a node within a computational graph.\nThis class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value’s dependencies, the operation that produced it, and whether it requires a gradient for backpropagation. It’s central to the functioning of automatic differentiation within deep learning frameworks.\nAttributes: op (Operator) _prev (Set[‘Value’]) cached_data (NDArray) requires_grad (bool)\n\na = Value(2.0, label='a')\nb = Value(-3.0, label='b')\nc = Value(10.0, label='c')\ne = a*b; e.label='e'\nd = e + c; d.label='d'\nf = Value(-2.0, label='f')\nL = d*f; L.label='L' \n\ndraw_dot(L)\n\n\nL.backward()\n\n\ndraw_dot(L)\n\n\nsource\n\n\nValue\n\n Value (data, children=(), op='', label='')\n\nA class representing a scalar value and its gradient in a computational graph.\nAttributes: - data (float): the scalar value associated with this node - grad (float): the gradient of the output of the computational graph w.r.t. this node’s value - label (str): a label for this node, used for debugging and visualization purposes - _op (str): a string representation of the operation that produced this node in the computational graph - _prev (set of Value objects): the set of nodes that contributed to the computation of this node - _backward (function): a function that computes the gradients of this node w.r.t. its inputs\nMethods: - init(self, data, children=(), op=’‘, label=’’): Initializes a Value object with the given data, children, op, and label - repr(self): Returns a string representation of this Value object - add(self, other): Implements the addition operation between two Value objects - mul(self, other): Implements the multiplication operation between two Value objects - item(self): Returns the scalar value associated with this Value object - tanh(self): Applies the hyperbolic tangent function to this Value object and returns a new Value object\n\nsource\n\n\nall_devices\n\n all_devices ()\n\nreturn a list of all available devices\n\nsource\n\n\ncpu\n\n cpu ()\n\nReturn cpu device\n\nsource\n\n\nCPUDevice\n\n CPUDevice ()\n\nRepresents data that sits in CPU\n\nsource\n\n\nDevice\n\n Device ()\n\nIndicates the device supporting an NDArray.\n\nsource\n\n\nOperator\n\n Operator ()\n\nInitialize self. See help(type(self)) for accurate signature.\n\nsource\n\n\nTensorOp\n\n TensorOp ()\n\nOp class specialized to output tensors, will be alternate subclasses for other structures\n#| export\nclass Value:\n \"\"\"\n Represents a node within a computational graph.\n\n This class encapsulates a single value and its relationships in the graph, making it easy to track and manage the value's dependencies, \n the operation that produced it, and whether it requires a gradient for backpropagation. It's central to the functioning of automatic \n differentiation within deep learning frameworks.\n\n Attributes:\n op (Operator)\n _prev (Set['Value']) \n cached_data (NDArray)\n requires_grad (bool)\n \"\"\"\n def __init__(self,\n op: Operator, # The operator that produced this node. If the node was initialized from actual data, this is 'None'.\n prev: Set['Value'], # The set of values that this value directly depends on. It's the union of the `_next` sets of all the values in `args`.\n cached_data: NDArray, # The actual data for this value. It's `None` for values that aren't yet computed.\n requires_grad: bool): # Specifies whether this node requires a gradient. This is `False` for nodes that don't need gradients.\n \n self._op = op\n self._prev = op\n self.cached_data = cached_data\n self.requires_grad = requires_grad\n\nsource\n\n\nTensor\n\n Tensor (array, device:Optional[__main__.Device]=None, dtype=None,\n requires_grad=True, **kwargs)\n\nA Tensor represents a multidimensional array of values in a computational graph.\nAttributes: - data: The actual data of the tensor. It is computed lazily. - children: Other tensors that this tensor depends on for computing its value. - requires_grad: Whether this tensor needs to compute gradients.\nMethods: - realize_data: Computes and returns the actual data for this tensor. - shape: Returns the shape of this tensor. - dtype: Returns the data type of this tensor.\nExample: >>> t1 = Tensor([[1.0, 2.0], [3.0, 4.0]]) >>> print(t1.shape) (2, 2) >>> print(t1.dtype) float64" }, { "objectID": "index.html", "href": "index.html", "title": "Welcome to minima", "section": "", - "text": "minima is a lightweight deep learning framewor, lean yet effective tailor-made for educational exploration.\nJust like a delicate sapling inspired by the towering strength of an oak, Minima draws its inspiration from PyTorch.\nYet, it carves its own identity with a straightforward interface and a curated set of features.\nThis makes learning and using it a breeze, allowing you to effortlessly build and train neural networks.\nIndeed, Minima is your friendly companion on the journey to understanding deep learning, where less is often more.\n\n\n\nYou can install minima on your own machines with conda\nIf you’re using miniconda (recommended) then run:\nconda install minima\n…or if you’re using Anaconda then run:\nconda install minima anaconda\nTo install with pip, use: pip install minima.\nIf you plan to develop Minima yourself, or want to be on the cutting edge, you can use an editable install.\ngit clone https://github.com/yourusername/minima\npip install -e \"minima[dev]\"\n\n\n\n\nEasy to install and use\nSimple and intuitive API for defining and training neural networks\nBuilt-in support for common layers and activation functions\nSupports both CPU and GPU acceleration\nCompatible with NumPy arrays for easy data manipulation\n\n\n\n\nHere’s a simple example of how to define and train a neural network using Minima:\nimport minima as mi\n\n# Define the neural network architecture\nmodel = mi.Sequential(\n mi.Linear(784, 128),\n mi.ReLU(),\n mi.Linear(128, 10),\n mi.Softmax()\n)\n\n# Load the dataset\nx_train, y_train, x_test, y_test = load_data()\n\n# Train the model\nloss_fn = mi.CrossEntropyLoss()\noptimizer = mi.SGD(model.parameters(), lr=0.01)\nfor epoch in range(10):\n for x_batch, y_batch in minibatch(x_train, y_train, batch_size=32):\n y_pred = model(x_batch)\n loss = loss_fn(y_pred, y_batch)\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n\n# Evaluate the model\ny_pred = model(x_test)\naccuracy = compute_accuracy(y_pred, y_test)\nprint(f\"Accuracy: {accuracy:.2f}\")\nThis example defines a simple neural network with two linear layers and two activation functions, trains it on a dataset using stochastic gradient descent, and evaluates its accuracy on a test set.\n\n\n\nFor more information on how to use minima, please refer to the documentation, which can be found in the website above.\n\n\n\ncomming soon!\n\n\n\nminima is released under the Apache License 2.0. See LICENSE for more information." + "text": "minima is a lightweight deep learning framewor, lean yet effective tailor-made for educational exploration.\nJust like a delicate sapling inspired by the towering strength of an oak, Minima draws its inspiration from PyTorch.\nYet, it carves its own identity with a straightforward interface and a curated set of features.\nThis makes learning and using it a breeze, allowing you to effortlessly build and train neural networks.\nIndeed, Minima is your friendly companion on the journey to understanding deep learning, where less is often more.\n\n\n\nYou can install minima on your own machines with conda\nIf you’re using miniconda (recommended) then run:\nconda install minima\n…or if you’re using Anaconda then run:\nconda install minima anaconda\nTo install with pip, use: pip install minima.\nIf you plan to develop Minima yourself, or want to be on the cutting edge, you can use an editable install.\ngit clone https://github.com/m0saan/minima\npip install .\n\n\n\n\nEasy to install and use\nSimple and intuitive API for defining and training neural networks\nBuilt-in support for common layers and activation functions\nSupports both CPU and GPU acceleration\nCompatible with NumPy arrays for easy data manipulation\n\n\n\n\nHere’s a simple example of how to define and train a neural network using Minima:\nimport minima as mi\n\n# Define the neural network architecture\nmodel = mi.Sequential(\n mi.Linear(784, 128),\n mi.ReLU(),\n mi.Linear(128, 10),\n mi.Softmax()\n)\n\n# Load the dataset\nx_train, y_train, x_test, y_test = load_data()\n\n# Train the model\nloss_fn = mi.CrossEntropyLoss()\noptimizer = mi.SGD(model.parameters(), lr=0.01)\nfor epoch in range(10):\n for x_batch, y_batch in minibatch(x_train, y_train, batch_size=32):\n y_pred = model(x_batch)\n loss = loss_fn(y_pred, y_batch)\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n\n# Evaluate the model\ny_pred = model(x_test)\naccuracy = compute_accuracy(y_pred, y_test)\nprint(f\"Accuracy: {accuracy:.2f}\")\nThis example defines a simple neural network with two linear layers and two activation functions, trains it on a dataset using stochastic gradient descent, and evaluates its accuracy on a test set.\n\n\n\nFor more information on how to use minima, please refer to the documentation, which can be found in the website above.\n\n\n\ncomming soon!\n\n\n\nminima is released under the Apache License 2.0. See LICENSE for more information." } ] \ No newline at end of file diff --git a/_docs/sitemap.xml b/_docs/sitemap.xml index 893dd91..979b547 100644 --- a/_docs/sitemap.xml +++ b/_docs/sitemap.xml @@ -2,22 +2,22 @@ https://m0saan.github.io/minima/core.html - 2023-06-02T03:36:26.516Z + 2023-06-02T03:40:00.224Z https://m0saan.github.io/minima/Untitled.html - 2023-06-02T03:36:28.366Z + 2023-06-02T03:40:01.908Z https://m0saan.github.io/minima/operators.html - 2023-06-02T03:36:29.120Z + 2023-06-02T03:40:02.920Z https://m0saan.github.io/minima/autograd.html - 2023-06-02T03:36:30.158Z + 2023-06-02T03:40:04.076Z https://m0saan.github.io/minima/index.html - 2023-06-02T03:36:31.448Z + 2023-06-02T03:40:04.645Z diff --git a/_proc/00_autograd.ipynb b/_proc/00_autograd.ipynb index 42a2dce..beaf237 100644 --- a/_proc/00_autograd.ipynb +++ b/_proc/00_autograd.ipynb @@ -83,8 +83,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -106,8 +105,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -142,8 +140,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -209,8 +206,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -326,8 +322,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -377,8 +372,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -421,8 +415,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -447,8 +440,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -487,8 +479,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -499,8 +490,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -511,8 +501,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -530,8 +519,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -599,8 +587,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -611,8 +598,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -623,8 +609,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -635,8 +620,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -647,8 +631,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -659,8 +642,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -671,8 +653,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -683,8 +664,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -695,8 +675,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -707,8 +686,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -780,8 +758,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -800,8 +777,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -812,8 +788,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -824,8 +799,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -836,8 +810,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -848,8 +821,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -860,8 +832,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -879,8 +850,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -891,8 +861,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -903,8 +872,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -929,8 +897,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -949,8 +916,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -971,8 +937,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -983,8 +948,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -998,8 +962,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -1078,8 +1041,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -1098,8 +1060,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -1110,8 +1071,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "language": "python", - "tags": [] + "language": "python" }, "outputs": [], "source": [ @@ -1439,9 +1399,7 @@ }, { "cell_type": "markdown", - "metadata": { - "tags": [] - }, + "metadata": {}, "source": [ "``` python\n", "#| export\n", @@ -1472,6 +1430,83 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/m0saan/minima/blob/main/minima/autograd.py#L509){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Tensor\n", + "\n", + "> Tensor (array, device:Optional[__main__.Device]=None, dtype=None,\n", + "> requires_grad=True, **kwargs)\n", + "\n", + "A Tensor represents a multidimensional array of values in a computational graph.\n", + "\n", + "Attributes:\n", + "- data: The actual data of the tensor. It is computed lazily.\n", + "- children: Other tensors that this tensor depends on for computing its value.\n", + "- requires_grad: Whether this tensor needs to compute gradients.\n", + "\n", + "Methods:\n", + "- realize_data: Computes and returns the actual data for this tensor.\n", + "- shape: Returns the shape of this tensor.\n", + "- dtype: Returns the data type of this tensor.\n", + "\n", + "Example:\n", + ">>> t1 = Tensor([[1.0, 2.0], [3.0, 4.0]])\n", + ">>> print(t1.shape)\n", + "(2, 2)\n", + ">>> print(t1.dtype)\n", + "float64" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/m0saan/minima/blob/main/minima/autograd.py#L509){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Tensor\n", + "\n", + "> Tensor (array, device:Optional[__main__.Device]=None, dtype=None,\n", + "> requires_grad=True, **kwargs)\n", + "\n", + "A Tensor represents a multidimensional array of values in a computational graph.\n", + "\n", + "Attributes:\n", + "- data: The actual data of the tensor. It is computed lazily.\n", + "- children: Other tensors that this tensor depends on for computing its value.\n", + "- requires_grad: Whether this tensor needs to compute gradients.\n", + "\n", + "Methods:\n", + "- realize_data: Computes and returns the actual data for this tensor.\n", + "- shape: Returns the shape of this tensor.\n", + "- dtype: Returns the data type of this tensor.\n", + "\n", + "Example:\n", + ">>> t1 = Tensor([[1.0, 2.0], [3.0, 4.0]])\n", + ">>> print(t1.shape)\n", + "(2, 2)\n", + ">>> print(t1.dtype)\n", + "float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "#| output: asis\n", + "show_doc(Tensor)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1547,22 +1582,10 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" - }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, diff --git a/_proc/01_operators.ipynb b/_proc/01_operators.ipynb index 0e9b53c..7c70adc 100644 --- a/_proc/01_operators.ipynb +++ b/_proc/01_operators.ipynb @@ -345,22 +345,10 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" - }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, diff --git a/_proc/index.ipynb b/_proc/index.ipynb index 47d17ac..67cf416 100644 --- a/_proc/index.ipynb +++ b/_proc/index.ipynb @@ -60,8 +60,10 @@ "If you plan to develop Minima yourself, or want to be on the cutting\n", "edge, you can use an editable install.\n", "\n", - " git clone https://github.com/yourusername/minima\n", - " pip install -e \"minima[dev]\"" + "``` bash\n", + "git clone https://github.com/m0saan/minima\n", + "pip install .\n", + "``` " ] }, { diff --git a/minima/autograd.py b/minima/autograd.py index cf31b98..92bb34b 100644 --- a/minima/autograd.py +++ b/minima/autograd.py @@ -507,7 +507,26 @@ def __call__(self, *args): # %% ../nbs/00_autograd.ipynb 77 class Tensor(Value): - """A value in the computational graph.""" + """ + A Tensor represents a multidimensional array of values in a computational graph. + + Attributes: + - data: The actual data of the tensor. It is computed lazily. + - children: Other tensors that this tensor depends on for computing its value. + - requires_grad: Whether this tensor needs to compute gradients. + + Methods: + - realize_data: Computes and returns the actual data for this tensor. + - shape: Returns the shape of this tensor. + - dtype: Returns the data type of this tensor. + + Example: + >>> t1 = Tensor([[1.0, 2.0], [3.0, 4.0]]) + >>> print(t1.shape) + (2, 2) + >>> print(t1.dtype) + float64 + """ def __init__( self, @@ -518,6 +537,20 @@ def __init__( requires_grad=True, **kwargs ): + + """ + Initializes the tensor with given array, device, and data type. + + Args: + - array: A numeric array-like object (e.g., list, numpy array, or another tensor). + - device: The device where the tensor should be allocated. + - dtype: The desired data type for the tensor. + - requires_grad: Whether the tensor requires gradient computation. + + Returns: + None. + """ + if isinstance(array, Tensor): if device is None: device = array.device @@ -551,6 +584,20 @@ def _init( data: List[object] = None, requires_grad: Optional[bool] = None ): + """ + Internal initialization function for the Tensor. + + Args: + - op: The operator that produces this tensor. + - children: Set of tensors that this tensor depends on. + - num_outputs: Number of outputs that the operator produces. + - data: Actual data of the tensor, computed lazily. + - requires_grad: Whether this tensor requires gradient computation. + + Returns: + None. + """ + global TENSOR_COUNTER TENSOR_COUNTER += 1 if requires_grad is None: @@ -562,18 +609,49 @@ def _init( self.requires_grad = requires_grad def realize_data(self): + """ + If the data of this tensor has not been computed, computes and caches it. + Otherwise, returns the cached data. + + Returns: + The actual data of this tensor. + """ + if self.data is None: self.data = self._op.compute(*[child.realize_data() for child in self.children]) return self.data @staticmethod def _array_from_numpy(numpy_array, device, dtype): + """ + Converts a numpy array into an array suitable for the given device and data type. + + Args: + - numpy_array: The numpy array to convert. + - device: The device where the converted array should be allocated. + - dtype: The desired data type for the converted array. + + Returns: + The converted array. + """ + if ARRAY_API is numpy: return numpy.array(numpy_array, dtype=dtype) return ARRAY_API.array(numpy_array, device=device, dtype=dtype) @staticmethod def make_from_op(op: Operator, children: Tuple["Value"]): + """ + Creates a new tensor from a given operator and its children. + + Args: + - op: The operator that produces the tensor. + - children: The tensors that the operator depends on. + + Returns: + The newly created tensor. + """ + tensor = Tensor.__new__(Tensor) tensor._init(op, children) if not LAZY_MODE: @@ -582,10 +660,23 @@ def make_from_op(op: Operator, children: Tuple["Value"]): @property def shape(self): + """ + Returns the shape of this tensor. + + Returns: + A tuple representing the shape of this tensor. + """ + retu return self.realize_data().shape @property def dtype(self): + """ + Returns the data type of this tensor. + + Returns: + The data type of this tensor. + """ return self.realize_data().dtype def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': diff --git a/nbs/00_autograd.ipynb b/nbs/00_autograd.ipynb index dc4fd05..29f4dc9 100644 --- a/nbs/00_autograd.ipynb +++ b/nbs/00_autograd.ipynb @@ -12,9 +12,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| default_exp autograd" @@ -23,9 +21,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| hide\n", @@ -36,10 +32,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "tags": [] - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -120,9 +114,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "a = 4\n", @@ -142,9 +134,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "def f_a(a,b,c):\n", @@ -177,9 +167,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "def f_b(a,b,c):\n", @@ -243,9 +231,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "def trace(root):\n", @@ -338,9 +324,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "a = Value(2.0, label='a')\n", @@ -388,9 +372,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "def lol():\n", @@ -431,9 +413,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "L.grad = 1" @@ -456,9 +436,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "def lol(label):\n", @@ -495,9 +473,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "f.grad = 4" @@ -506,9 +482,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "lol('d') " @@ -517,9 +491,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "d.grad = -2" @@ -535,9 +507,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -603,9 +573,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "lol('e')" @@ -614,9 +582,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "e.grad = -2 # 1 * d.grad" @@ -625,9 +591,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "lol('c')" @@ -636,9 +600,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "c.grad = -2 # 1 * d.grad" @@ -647,9 +609,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -658,9 +618,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "lol('a')" @@ -669,9 +627,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "a.grad = 6 # b * e.grad" @@ -680,9 +636,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "lol('b')" @@ -691,9 +645,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "b.grad = -4 # a * e.grad" @@ -702,9 +654,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -713,9 +663,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -770,9 +718,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "a = Value(2.0, label='a')\n", @@ -789,9 +735,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "L.grad = 1" @@ -800,9 +744,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "L._backward()" @@ -811,9 +753,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -822,9 +762,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "d._backward()" @@ -833,9 +771,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -844,9 +780,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "c._backward()" @@ -862,9 +796,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -873,9 +805,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "e._backward()" @@ -884,9 +814,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -909,9 +837,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "a = Value(2.0, label='a')\n", @@ -928,9 +854,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "# topological order all of the children in the graph\n", @@ -949,9 +873,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "topo" @@ -960,9 +882,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "# go one variable at a time and apply the chain rule to get its gradient\n", @@ -974,9 +894,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -992,9 +910,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1067,9 +983,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "a = Value(2.0, label='a')\n", @@ -1086,9 +1000,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "L.backward()" @@ -1097,9 +1009,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "draw_dot(L)" @@ -1108,9 +1018,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1402,9 +1310,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1416,9 +1322,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1453,9 +1357,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1474,9 +1376,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1489,9 +1389,7 @@ }, { "cell_type": "markdown", - "metadata": { - "tags": [] - }, + "metadata": {}, "source": [ "\n", "``` python\n", @@ -1526,14 +1424,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", "class Tensor(Value):\n", - " \"\"\"\n", + " \"\"\"\n", " A Tensor represents a multidimensional array of values in a computational graph.\n", "\n", " Attributes:\n", @@ -1649,7 +1545,7 @@ " \n", " @staticmethod\n", " def _array_from_numpy(numpy_array, device, dtype):\n", - " \"\"\"\n", + " \"\"\"\n", " Converts a numpy array into an array suitable for the given device and data type.\n", "\n", " Args:\n", @@ -1686,7 +1582,7 @@ " \n", " @property\n", " def shape(self):\n", - " \"\"\"\n", + " \"\"\"\n", " Returns the shape of this tensor.\n", "\n", " Returns:\n", @@ -1762,9 +1658,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| hide\n", @@ -1886,21 +1780,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" } }, "nbformat": 4, diff --git a/nbs/01_operators.ipynb b/nbs/01_operators.ipynb index 2809365..dd51577 100644 --- a/nbs/01_operators.ipynb +++ b/nbs/01_operators.ipynb @@ -14,9 +14,7 @@ "cell_type": "code", "execution_count": null, "id": "06cf77c9-d5dc-473d-9624-6679191a3c6a", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| default_exp operators" @@ -26,9 +24,7 @@ "cell_type": "code", "execution_count": null, "id": "5485133d-9a30-4362-af02-d6724482f459", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -51,9 +47,7 @@ "cell_type": "code", "execution_count": null, "id": "bb4e9796-2da9-4668-ae19-cf547e25ddd1", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -73,9 +67,7 @@ "cell_type": "code", "execution_count": null, "id": "3ebbb12f-03ea-4d3e-beb4-e15c0eb7fc88", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -98,9 +90,7 @@ "cell_type": "code", "execution_count": null, "id": "67e4faab-b6f8-4332-ba80-06564920e53c", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -121,9 +111,7 @@ "cell_type": "code", "execution_count": null, "id": "64a28132-02d4-4b27-bc07-5897f88a9cd2", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -152,21 +140,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" } }, "nbformat": 4, diff --git a/nbs/index.ipynb b/nbs/index.ipynb index 462889c..8c48402 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -64,8 +64,10 @@ "If you plan to develop Minima yourself, or want to be on the cutting\n", "edge, you can use an editable install.\n", "\n", - " git clone https://github.com/yourusername/minima\n", - " pip install -e \"minima[dev]\"\n" + "``` bash\n", + "git clone https://github.com/m0saan/minima\n", + "pip install .\n", + "``` " ] }, {