added Softmax, LSUV and fixed some typos :3

m0saan · Jun 22, 2023 · 25a6f53 · 25a6f53
1 parent 43569ca
commit 25a6f53
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 18 deletions.
diff --git a/minima/_modidx.py b/minima/_modidx.py
@@ -268,6 +268,8 @@
                            'minima.nn.Residual.forward': ('nn.html#residual.forward', 'minima/nn.py'),
                            'minima.nn.Sequential': ('nn.html#sequential', 'minima/nn.py'),
                            'minima.nn.Sequential.__init__': ('nn.html#sequential.__init__', 'minima/nn.py'),
+                           'minima.nn.Sequential.__iter__': ('nn.html#sequential.__iter__', 'minima/nn.py'),
+                           'minima.nn.Sequential.__next__': ('nn.html#sequential.__next__', 'minima/nn.py'),
                            'minima.nn.Sequential.forward': ('nn.html#sequential.forward', 'minima/nn.py'),
                            'minima.nn.Softmax': ('nn.html#softmax', 'minima/nn.py'),
                            'minima.nn.Softmax.forward': ('nn.html#softmax.forward', 'minima/nn.py'),

diff --git a/minima/init.py b/minima/init.py
@@ -7,6 +7,7 @@
 # %% ../nbs/02_init.ipynb 2
 import math
 import minima as mi
+from functools import partial
 
 # %% ../nbs/02_init.ipynb 4
 def rand(

diff --git a/minima/nn.py b/minima/nn.py
@@ -239,7 +239,7 @@ def __call__(self, *args, **kwargs):
         # return self.grad_output
 
 
-# %% ../nbs/03_nn.ipynb 7
+# %% ../nbs/03_nn.ipynb 8
 class Sequential(Module):
     """
     A sequential container in Minima.
@@ -287,8 +287,18 @@ def forward(self, x: Tensor) -> Tensor:
             x = module(x)
         return x
 
+    def __iter__(self):
+        self._iter_idx = 0;
+        return self
+    def __next__(self):
+        if self._iter_idx < len(self.modules):
+            res = self.modules[self._iter_idx]
+            self._iter_idx += 1
+            return res
+        raise StopIteration()
 
-# %% ../nbs/03_nn.ipynb 8
+
+# %% ../nbs/03_nn.ipynb 9
 class Linear(Module):
     """
     A class representing a fully connected (linear) layer in a neural network.
@@ -357,7 +367,7 @@ def forward(self, X: Tensor) -> Tensor:
         out = out + self.bias.broadcast_to(out.shape) if self.bias else out
         return out
 
-# %% ../nbs/03_nn.ipynb 13
+# %% ../nbs/03_nn.ipynb 14
 class Flatten(Module):
     """
     A `Flatten` module in Minima.
@@ -383,12 +393,12 @@ def forward(self, X: Tensor) -> Tensor:
         return X.reshape((X.shape[0], -1))
 
 
-# %% ../nbs/03_nn.ipynb 14
+# %% ../nbs/03_nn.ipynb 15
 class ReLU(Module):
     def forward(self, x: Tensor) -> Tensor:
         return operators.relu(x)
 
-# %% ../nbs/03_nn.ipynb 24
+# %% ../nbs/03_nn.ipynb 25
 class CrossEntropyLoss(Module):
     """
     Cross-entropy loss module in Minima.
@@ -423,11 +433,11 @@ def forward(self, input: Tensor, target: Tensor) -> Tensor:
         Returns:
             Tensor: A single tensor that is the average cross-entropy loss.
         """
-        log_sum_exp_logits = ops.logsumexp(input, axes=(1, )).sum()
+        log_sum_exp_logits = operators.logsumexp(input, axes=(1, )).sum()
         true_class_logits_sum = (input * init.one_hot(input.shape[1], target)).sum()
         return (log_sum_exp_logits - true_class_logits_sum) / input.shape[0]
 
-# %% ../nbs/03_nn.ipynb 25
+# %% ../nbs/03_nn.ipynb 26
 class Softmax(Module):
     """
     Cross-entropy loss module in Minima.
@@ -451,7 +461,7 @@ class Softmax(Module):
     ```
     """
 
-    def forward(self, input: Tensor, target: Tensor) -> Tensor:
+    def forward(self, input: Tensor) -> Tensor:
         """
         Computes the Cross Entropy Loss between the input logits and the target class indices.
 
@@ -464,10 +474,10 @@ def forward(self, input: Tensor, target: Tensor) -> Tensor:
         """
 
 
-        exps = ops.exp(X - mi.autograd.ARRAY_API.max(input))
-        return exps / ops.summation(exps)
+        exps = operators.exp(input - mi.autograd.ARRAY_API.max(input))
+        return exps / operators.summation(exps)
 
-# %% ../nbs/03_nn.ipynb 34
+# %% ../nbs/03_nn.ipynb 35
 class LayerNorm1d(Module):
     """
     1D Layer normalization module in Minima.
@@ -526,7 +536,7 @@ def forward(self, x: Tensor) -> Tensor:
         return self.weight.broadcast_to(x.shape) * x_normed + self.bias.broadcast_to(x.shape)
 
 
-# %% ../nbs/03_nn.ipynb 37
+# %% ../nbs/03_nn.ipynb 38
 class BatchNorm1d(Module):
     """
     1D Batch normalization module in Minima.
@@ -631,7 +641,7 @@ def forward(self, x: Tensor) -> Tensor:
         x_normed = (x - mean.broadcast_to(x.shape)) / (std.broadcast_to(x.shape) + self.eps) ** .5
         return self.weight.broadcast_to(x.shape) * x_normed + self.bias.broadcast_to(x.shape)
 
-# %% ../nbs/03_nn.ipynb 38
+# %% ../nbs/03_nn.ipynb 39
 class Dropout(Module):
     """
     Dropout Layer for a Neural Network.
@@ -684,7 +694,7 @@ def forward(self, x: Tensor) -> Tensor:
         return x
 
 
-# %% ../nbs/03_nn.ipynb 39
+# %% ../nbs/03_nn.ipynb 40
 class Residual(Module):
     """
     Residual Layer for a Neural Network.
@@ -729,7 +739,7 @@ def forward(self, x: Tensor) -> Tensor:
         """
         return x + self.fn(x)
 
-# %% ../nbs/03_nn.ipynb 40
+# %% ../nbs/03_nn.ipynb 41
 class Identity(Module):
     def forward(self, x):
         return x
diff --git a/minima/operators.py b/minima/operators.py
@@ -520,7 +520,7 @@ def compute(self, a: NDArray) -> NDArray:
         Returns:
         The result of applying ReLU to a.
         """
-        self.out = ARRAY_API.clip(a, a_min=0)
+        self.out = ARRAY_API.clip(a, a_min=0, a_max=None)
         return self.out
 
     def gradient(self, out_grad: Tensor, node: Tensor) -> Tuple[Tensor,]:

diff --git a/minima/optim.py b/minima/optim.py
@@ -145,7 +145,7 @@ class AdaGrad(Optimizer):
     def __init__(
         self,
         params,  # The parameters of the model to be optimized.
-        lr=0.01,  # The initial learning rate.
+        lr=0.001,  # The initial learning rate.
         wd=0.0,  # The weight decay (L2 regularization).
         eps=1e-7,  # A small constant for numerical stability.
     ):
@@ -289,7 +289,7 @@ class Adam(Optimizer):
     def __init__(
         self,
         params, # `params` is the list of parameters
-        lr=0.01, # `lr` is the learning rate $\alpha$
+        lr=1e-5, # `lr` is the learning rate $\alpha$
         beta1=0.9, # The exponential decay rate for the first moment estimates. Default is 0.9.
         beta2=0.999, # The exponential decay rate for the second moment estimates. Default is 0.999.
         eps=1e-8, # `eps` is $\hat{\epsilon}$ or $\epsilon$ based on `optimized_update`