In [61]:
from collections import deque
import math
import random

class Element:
	"simple element with minigrad support"
	def __init__(self, data, _children=[], _op=''):
		self.data = data
		self.grad = 0
		self._backward = lambda: None
		self._prev = _children
		self._op = _op

	def __repr__(self):
		return f" Element(data={self.data}, grad={self.grad})"

	def __add__(self, other):
		out = Element(self.data + other.data, [self, other], "+")
		def _backward():
			self.grad += out.grad
			other.grad += out.grad
		out._backward = _backward
		return out
	
	def __sub__(self, other):
		out = Element(self.data - other.data, [self, other], "-")
		def _backward():
			self.grad += out.grad
			other.grad += out.grad
		out._backward = _backward
		return out

	def __mul__(self, other):
		out = Element(self.data * other.data, [self, other], "*")
		def _backward():
			self.grad += out.grad * other.data
			other.grad += out.grad * self.data
		out._backward = _backward
		return out

	def __truediv__(self, other):
		out = Element(self.data / other.data, [self,other], "/")
		def _backward():
			self.grad += out.grad / other.data  
			other.grad += -out.grad * (self.data / (other.data ** 2)) 
		out._backward = _backward
		return out
	
	def sqrt(self):
		out = Element(math.sqrt(self.data), [self], "sqrt")
		def _backward():
			self.grad += out.grad * (1 /(2* math.sqrt(self.data)))
		out._backward = _backward
		return out

	def relu(self):
		out = Element(self.data, [self], "relu")
		def _backward():
			if self.data >= 0:
				self.grad += 1
			else:
				self.grad += 0
		out._backward = _backward
		if self.data < 0:
			out.data = 0
		return out
			
	def backward(self):
		"reverse grad calculation using Breadth-first search"
		stk = deque()
		self.grad = 1 
		stk.append([self])
		while len(stk) > 0:
			nodes = stk.pop()
			for node in nodes:
				node._backward()
				stk.append(node._prev)
		return


class mTens:
    def __init__(self, vector:list[Element]) -> None:
        self.vector = vector

    def __repr__(self):
        st = ""
        for i in self.vector:
            st += " " + i.__repr__()
        return st

    def __add__(self, other):
        out = self.vector.copy()
        for i,item in enumerate(out):
            out[i] += other[i]
        return mTens(out)
    
    def __mul__(self, other):
        out = Element(0)
        for i, item in enumerate(self.vector):
            out += self.vector[i] * other[i]
        return mTens([out])
    
    def __getitem__(self, idx):
        return self.vector[idx]
    
    def backward(self):
        for i, item in enumerate(self.vector):
            item.backward()
        return

In [62]:
class BatchNorm:
    def __init__(self, n_features, momentum=0.9):
        self.momentum = momentum
        self.gamma = [Element(1.0) for _ in range(n_features)]  
        self.beta = [Element(0.0) for _ in range(n_features)]   
        self.running_mean = [0.0] * n_features
        self.running_var = [1.0] * n_features

    def __call__(self, x: mTens, training = True):
        normed = []
        if training:
            mean = sum([item.data for item in x.vector]) / len(x.vector)
            var = sum([(item.data - mean)**2 for item in x.vector]) / len(x.vector)
            for i, item in enumerate(x.vector):
                self.running_mean[i] = self.momentum * self.running_mean[i] + (1 - self.momentum) * mean
                self.running_var[i] = self.momentum * self.running_var[i] + (1 - self.momentum) * var
                norm = (item - Element(mean)) / Element(math.sqrt(var + 1e7))
                normed.append(norm * self.gamma[i] + self.beta[i])
            return mTens(normed)
        else:
            for i, item in enumerate(x.vector):
                norm = (item - Element(self.running_mean[i])) / Element(math.sqrt(self.running_var[i] + 1e7))
                normed.append(norm * self.gamma[i] + self.beta[i])
            return mTens(normed)
    

class Linear:
    def __init__(self, in_features, out_features):
        self.weights = [[Element(random.random()) for _ in range(in_features)] for _ in range(out_features)]
        self.biases = [Element(0.0) for _ in range(out_features)]
    
    def __call__(self, x: mTens):
        out = []
        for i, bias in enumerate(self.biases):
            wx = Element(0.0)
            for j, elem in enumerate(x.vector):
                wx += self.weights[i][j] * elem
            out.append(wx + bias)
        return mTens(out)
    
class Dropout:
    def __init__(self, p=0.5):
        self.p = p
        self.mask = None
    
    def __call__(self, x: mTens, training=True):
        if training:
            self.mask = [Element(1.0 if random.random() > self.p else 0.0) for _ in x.vector]
            return mTens([item * mask_item for item, mask_item in zip(x.vector, self.mask)])
        else:
            return x
        
class Relu:
    def __init__(self):
        None
        
    def __call__(self, x:mTens):
        for i in x.vector:
            i.relu()
        return x

In [63]:
ten1 = mTens([Element(10), Element(15), Element(-2)])
ten2 = mTens([Element(5), Element(3), Element(13)])


In [64]:
ten3 = ten1 + ten2
ten4 = ten3 * ten2
ten4.backward()

In [65]:
print(ten1)

  Element(data=10, grad=5)  Element(data=15, grad=3)  Element(data=-2, grad=13)


In [66]:
import random

def generate_dataset(num_samples=10):
    dataset = []
    targets = []
    
    for _ in range(num_samples):
        input_vector = [Element(random.uniform(0, 10)) for _ in range(6)]
        x = mTens(input_vector)
        target = sum([item.data for item in x.vector]) + random.uniform(-1, 1)
        dataset.append(x)
        targets.append(Element(target))
    return dataset, targets

dataset, targets = generate_dataset(num_samples=1)
for i in range(len(dataset)):
    print(f"Input {i+1}: {dataset[i]}",f"Target {i+1}: {targets[i]}" )

Input 1:   Element(data=9.712960656645453, grad=0)  Element(data=7.921003550675145, grad=0)  Element(data=2.9412787292554565, grad=0)  Element(data=5.063181618831124, grad=0)  Element(data=9.895737228564357, grad=0)  Element(data=5.114499851178089, grad=0) Target 1:  Element(data=39.74175240368594, grad=0)


In [67]:
from typing import Any


class model:
    def __init__(self) -> None:
        self.train = True
        self.l1 = Linear(6, 6)
        self.a1 = Relu()
        self.l2 = Linear(6, 4)
        self.b1 = BatchNorm(4)
        self.d1 = Dropout()
        self.a2 = Relu()
        self.l3 = Linear(4,1)

    def __call__(self, x, *args: Any, **kwds: Any) -> Any:
        x = self.l1(x)
        x = self.a1(x)
        x = self.l2(x)
        x = self.b1(x, self.train)
        x = self.d1(x, self.train)
        x = self.a2(x)
        x = self.l3(x)
        return x

    def set_training(self, train:bool):
        self.train = train


In [68]:
m1 = model()
output = m1(dataset[0])
output.backward()
print(output)

  Element(data=-0.0004578234683437069, grad=1)


In [69]:
m1.set_training(False)
output = m1(dataset[0])
print(output)

  Element(data=0.036292958976520656, grad=0)


Осталось реализовать матричные операции, лосс и оптимизатор.