In [2]:
from tinygrad.tensor import Tensor

a = Tensor([1])

We want to expand support for GPU in an elegant way

In [None]:
class Tensor:
    def __init__(self, data, device="CPU"):
        self.data = self._move_data(data, device)

    def _move_data(data, device):
        if device == 'GPU':
            return GPUBuffer(data)
        else:
            return data

Next we can define the GPUBuffer with opencl

In [None]:
import pyopencl as cl
class GPUBuffer:
    def __init__(self, data) -> None:
        self.cl = cl.Buffer()

in order to register all the methods defined in both CPU and GPU, we need to import them via `import ops_cpu; import ops_gpu`

In [None]:
# CPU
class Mul(Function):
  @staticmethod
  def forward(ctx, x, y):
    ctx.save_for_backward(x, y)
    return x*y

  @staticmethod
  def backward(ctx, grad_output):
    x,y = ctx.saved_tensors
    return unbroadcast(y*grad_output, x.shape), unbroadcast(x*grad_output, y.shape)
register('mul', Mul)

#GPU
class Mul(Function):
  @staticmethod
  def forward(ctx, x, y):
    ctx.save_for_backward(x, y)
    return binary_op(ctx, 'a*b', x, y)

  @staticmethod
  def backward(ctx, grad_output):
    x,y = ctx.saved_tensors
    grad_x = binary_op(ctx, 'a*b', y, grad_output)
    grad_y = binary_op(ctx, 'a*b', x, grad_output)
    return unbroadcast(ctx, grad_x, x.shape), unbroadcast(ctx, grad_y, y.shape),
register('mul', Mul, device=Device.GPU)

CPU operation is straightforward, but on the GPU, we 