In [2]:
import numpy as np
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import skcuda.linalg as linalg

ModuleNotFoundError: No module named 'pycuda'

In [2]:
!pip install pycuda
!pip install scikit-cuda


Collecting pycuda
  Downloading pycuda-2024.1.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2023.1.1-py2.py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting mako (from pycuda)
  Downloading Mako-1.3.0-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2024.1-cp310-cp310-linux_x86_64.whl size=661205 sha256=052e6ab81

In [6]:
a_gpu = gpuarray.to_gpu(np.array([0, 1, 2, 3]).astype(np.float32))
a_doubled = (2*a_gpu).get()
a_dot = gpuarray.dot(a_gpu, a_gpu).get()
print(a_doubled)
print(a_gpu)
print(a_dot)

  globals().clear()


*** compiler output in /tmp/tmp3eua3wuv
*** compiler output in /tmp/tmph9g8xmpp
[0. 2. 4. 6.]
[0. 1. 2. 3.]
14.0


In [None]:
import numpy as np

: 

In [12]:
a = np.array([1, 2, 3]).reshape(1, 3)

b = np.array([4, 5, 6]).reshape(3, 1)



x = Tensor(a, _label='a')
y = Tensor(b, _label='b')
print(x)
print(y)
z = x.dot(y); z._label = 'z'
print(z)

[[1 2 3]]
(3, 1)
[[ 8]
 [10]
 [12]]
Label: a; Value: [[1 2 3]]; Grad: 0
Label: b; Value: [[4]
 [5]
 [6]]; Grad: 0
Label: z; Value: [[32]]; Grad: 0


In [13]:
z.grad = 1
z.backward()

print(z._parent)

(Label: a; Value: [[1 2 3]]; Grad: [[4 5 6]], Label: b; Value: [[4]
 [5]
 [6]]; Grad: [[1]
 [2]
 [3]])


In [2]:
class Tensor:
  def __init__(self, value, _parent = None, _op = None, grad = 0, _label = None):
    self.value = value
    self._parent = _parent
    self._op = _op
    self.grad = 0
    self._label = _label

  def __repr__(self):
    return f'Label: {self._label}; Value: {self.value}; Grad: {self.grad}'

  def __add__(self, other):
    v = self.value + other.value
    out = Tensor(v, (self, other), '+')
    return out

  def dot(self, other):
    #1-d dot product, nx1 @ 1xn, seperating bc of the backward fn, self and other for dot product is diff than matmul
    v = np.dot(self.value, other.value)
    out = Tensor(v, (self, other), '.')
    def backward():
      self.grad = out.grad * other.value.T
      other.grad = out.grad * self.value.T
    out.backward = backward
    return out

  def matmul(self, other):
    #self = nxd, other = dx1, i.e. self is the weight matrix and other is the input
    v = np.matmul(self.value, other.value)
    out = Tensor(v, (self, other), '@')
    def backward():
      self.grad = np.matmul(out.grad, other.value.T)
      other.grad= np.matmul(self.value.T, out.grad)
    out.backward = backward
    return out







In [None]:
class Dense_Layer:
  def __init__(self, input_size, output_size):
    self.weights = Tensor(np.random.normal(size=(output_size, input_size)))
    self.bias = Tensor(np.zeros((output_size, 1)))

  def forward(self, input):
    self.input = input
    z_pre_bias = self.weights.matmul(input)
    z = z_pre_bias + self.bias
    return z



In [None]:
class Dense(Layer):
  def __init__(self, input_size, output_size):
    self.weights = np.random.normal(size=(output_size, input_size))
    self.bias = np.zeros((output_size, 1))

  def forward(self, input):
    self.input = input
    print("forward pass")
    print(f"self.weights shape: {self.weights.shape}")
    print(f"input shape: {input.shape}")
    print(f"input value: {input}")
    pre_act = np.dot(self.weights, input)
    pre_act += self.bias
    return pre_act

  def backward(self, output_gradient, learning_rate, input):
    print("backward pass")
    print(f"output_gradient shape: {output_gradient.shape}")
    #print(f"self.weights.T shape: {self.weights.T.shape}")
    #print(f"input.T shape: {self.input.T.shape}")
    print(f"input value: {self.input}")
    weights_gradient = np.matmul(output_gradient, self.input.T)
    input_gradient = np.matmul(self.weights.T, output_gradient)
    print(f"weights_gradient shape: {weights_gradient}")
    print(f"input_gradient shape: {input_gradient}")
    self.weights -= learning_rate * weights_gradient
    self.bias -= learning_rate * output_gradient
    return input_gradient




In [22]:
a = Tensor(np.array([1, 2, 3]).reshape(3, 1), _label= 'a')
b = Tensor(np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]).reshape(3, 3), _label= 'b')

c = b.matmul(a) ; c._label = 'c'; c.grad = np.array([1, 1, 1]).reshape(3, 1)

print(c)
c.backward()
print(c._parent)
print(c._op)

Label: c; Value: [[14]
 [14]
 [14]]; Grad: [[1]
 [1]
 [1]]
(Label: b; Value: [[1 2 3]
 [1 2 3]
 [1 2 3]]; Grad: [[1 2 3]
 [1 2 3]
 [1 2 3]], Label: a; Value: [[1]
 [2]
 [3]]; Grad: [[3]
 [6]
 [9]])
@


In [17]:
def lol():
  layer = Dense(3, 3)
  input = np.array([1, 2, 3]).reshape(3, 1)
  weights = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]).reshape(3, 3)
  bias = np.array([0, 0, 0]).reshape(3, 1)

  layer.weights = weights
  layer.bias = bias

  print(f"self.weights = {layer.weights}" )

  layer.forward(input)

  layer.backward(np.array([1, 1, 1]).reshape(3, 1), 1, input)

lol()

self.weights = [[1 2 3]
 [1 2 3]
 [1 2 3]]
forward pass
self.weights shape: (3, 3)
input shape: (3, 1)
input value: [[1]
 [2]
 [3]]
backward pass
output_gradient shape: (3, 1)
input value: [[1]
 [2]
 [3]]
weights_gradient shape: [[1 2 3]
 [1 2 3]
 [1 2 3]]
input_gradient shape: [[3]
 [6]
 [9]]


In [None]:

class DataLoader:
    def __init__(self, inputs, desired_outputs, batch_size, shuffle=True):
        self.inputs = inputs
        self.desired_outputs = desired_outputs
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __iter__(self):
        # Get the total number of data points
        self.n_samples = self.inputs.shape[0]

        # Create an array of indices
        self.indices = np.arange(self.n_samples)

        # Shuffle if required
        if self.shuffle:
            np.random.shuffle(self.indices)

        return self

    def __next__(self):
        # If all data has been seen, stop the iteration
        if len(self.indices) == 0:
            raise StopIteration

        # Select indices for the current batch
        current_indices = self.indices[:self.batch_size]
        self.indices = self.indices[self.batch_size:]

        # Extract the batch of data
        batch_inputs = self.inputs[current_indices].T
        batch_outputs = self.desired_outputs[current_indices]

        return batch_inputs, batch_outputs



In [None]:
# Example usage
inputs = np.reshape(([0, 0], [0, 1], [1, 0], [1 , 1]), (4, 2)) # 100 samples, 10 features each
#inputs = inputs.T
print(inputs)
#inputs = inputs.T
desired_outputs = np.reshape((0, 1, 1, 0), (4, 1))  # 100 samples, 1 output each
batch_size = 1

dataloader = DataLoader(inputs, desired_outputs, batch_size)


for batch_inputs, batch_outputs in dataloader:
    print("Batch inputs:", batch_inputs)
    print("Batch outputs:", batch_outputs)
    #test = test_layer.forward(batch_inputs.T)
    #print(test)
    # Here you can feed the batch_inputs and batch_outputs to your model

[[0 0]
 [0 1]
 [1 0]
 [1 1]]
Batch inputs: [[0]
 [0]]
Batch outputs: [[0]]
Batch inputs: [[1]
 [0]]
Batch outputs: [[1]]
Batch inputs: [[0]
 [1]]
Batch outputs: [[1]]
Batch inputs: [[1]
 [1]]
Batch outputs: [[0]]


In [None]:
# Example usage
inputs = np.reshape(([0, 0], [0, 1], [1, 0], [1 , 1]), (4, 2)) # 100 samples, 10 features each
#inputs = inputs.T
desired_outputs = np.reshape((0, 1, 1, 0), (4, 1))  # 100 samples, 1 output each
batch_size = 1

dataloader = DataLoader(inputs, desired_outputs, batch_size)

In [3]:
def relu(input):
  return np.maximum(input, 0)

def relu_prime(input):
  return np.where(input > 0, 1, 0)

def mse(y_hat, y):
  return np.mean(np.power(y_hat - y, 2))

def mse_prime(y_hat, y):
  return 2 * (y_hat - y) / np.size(y)


class Neural_Net:
  def __init__ (self, layers):
    self.layers = layers

  def add_layer (self, layer):
    self.layers.append(layer)
    return self.layers


  def forward(self, input):
    for layer in self.layers:
      input = layer.forward(input)

    return input

  def error(self, prediction, real):
    return mse(prediction, real)

  def backward(self, learning_rate, prediction, real, input):

    output_gradient = mse_prime(prediction, real)
    print(f"gradient of error wrt prediction shape {output_gradient.shape}")

    for layer in reversed(self.layers):
      output_gradient = layer.backward(output_gradient, learning_rate, input)
    return


  def train(self, epochs, learning_rate, data_loader):
    for _ in range(0, epochs):
      error = 0
      for input_data, desired_output in data_loader:
        #input = input.T
        prediction = self.forward(input_data)
        error = self.error(prediction, desired_output)
        self.backward(learning_rate, prediction, desired_output, input_data)
      error /= data_loader.batch_size
      print(f"Error for epoch {_}: {error} ")












In [4]:
class Layer:
  def __init__():
    return

  def forward():
    return

  def backward():
    return


In [16]:
class Dense(Layer):
  def __init__(self, input_size, output_size):
    self.weights = np.random.normal(size=(output_size, input_size))
    self.bias = np.zeros((output_size, 1))

  def forward(self, input):
    self.input = input
    print("forward pass")
    print(f"self.weights shape: {self.weights.shape}")
    print(f"input shape: {input.shape}")
    print(f"input value: {input}")
    pre_act = np.dot(self.weights, input)
    pre_act += self.bias
    return pre_act

  def backward(self, output_gradient, learning_rate, input):
    print("backward pass")
    print(f"output_gradient shape: {output_gradient.shape}")
    #print(f"self.weights.T shape: {self.weights.T.shape}")
    #print(f"input.T shape: {self.input.T.shape}")
    print(f"input value: {self.input}")
    weights_gradient = np.matmul(output_gradient, self.input.T)
    input_gradient = np.matmul(self.weights.T, output_gradient)
    print(f"weights_gradient shape: {weights_gradient}")
    print(f"input_gradient shape: {input_gradient}")
    self.weights -= learning_rate * weights_gradient
    self.bias -= learning_rate * output_gradient
    return input_gradient




In [None]:
class Activation(Layer):
  def __init__(self, activation, act_prime):
    self.activation = activation
    self.act_prime = act_prime

  def forward(self, input):
    self.input = input
    return self.activation(input)

  def backward(self, output_gradient, learning_rate, input):
    return self.act_prime(self.input)


In [None]:
class Relu(Activation):
  def __init__(self):
    activation = lambda x : relu(x)
    act_prime = lambda x : relu_prime(x)
    return super().__init__(activation, act_prime)
  def backward(self, output_gradient, learning_rate, input):
    print(f"output_gradient shape of relu layer (grad of error wrt activation value): {output_gradient.shape}")
    print(f"input shape of relu layer (pre-activation_value): {self.input.shape}")
    print(f"input value: {self.input}")
    return np.multiply(self.act_prime(self.input), output_gradient)


In [None]:
layer_list = [
    Dense(2, 3),
    Relu(),
    Dense(3, 1),
    Relu()
]

network = Neural_Net(layer_list)




In [None]:
def lol():
  test = Relu()
  grad = [1]
  print(test.backward())

In [None]:
network.train(10000, 0.1, dataloader)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 [0.        ]]
output_gradient shape of relu layer (grad of error wrt activation value): (3, 1)
input shape of relu layer (pre-activation_value): (3, 1)
input value: [[-0.14688832]
 [ 2.00365257]
 [-0.41399754]]
backward pass
output_gradient shape: (3, 1)
self.weights.T shape: (2, 3)
input.T shape: (1, 2)
input value: [[1]
 [0]]
forward pass
self.weights shape: (3, 2)
input shape: (2, 1)
input value: [[0]
 [0]]
forward pass
self.weights shape: (1, 3)
input shape: (3, 1)
input value: [[0.        ]
 [0.        ]
 [0.07439818]]
gradient of error wrt prediction shape (1, 1)
output_gradient shape of relu layer (grad of error wrt activation value): (1, 1)
input shape of relu layer (pre-activation_value): (1, 1)
input value: [[1.38777878e-17]]
backward pass
output_gradient shape: (1, 1)
self.weights.T shape: (3, 1)
input.T shape: (1, 3)
input value: [[0.        ]
 [0.        ]
 [0.07439818]]
output_gradient shape of relu layer (

In [None]:
dedz = np.array([3, 4]).reshape((-1, 1))

dedz

array([[3],
       [4]])

In [None]:
ee = np.reshape([1, 0], (2, 1))
ee

array([[1],
       [0]])

In [None]:

print(network.forward(ee))

forward pass
self.weights shape: (3, 2)
input shape: (2, 1)
input value: [[1]
 [0]]
forward pass
self.weights shape: (1, 3)
input shape: (3, 1)
input value: [[0.        ]
 [2.00365257]
 [0.        ]]
[[1.]]


In [None]:
input

array([[ 1],
       [-2],
       [ 3]])

In [None]:
test = Relu()
test.forward(input)

#print(test.weights)



array([[1],
       [0],
       [3]])

In [None]:
egg = test.forward(input)

print(egg)

[[1.02926184]
 [1.29131245]]


In [None]:
back = test.backward(dedz, 1)

back

array([[-25.86732936],
       [ 45.24942778],
       [-75.12692659]])

In [None]:
layer = Layer((3, 2))

print(layer.weights)

test1 = np.array([
    [1, 0],
    [1, 0],
    [1, 0]
])

print(test)
print(input)

test_forward = layer.forward(input)

print(test_forward)

print(layer.derivatives)






[[ 0.          0.        ]
 [ 0.17944571 -0.29869203]
 [-0.7942206  -0.07382915]
 [-0.06265169  1.14773829]]
[[-2.43544411 -1.90185887]
 [ 0.06280698  0.42694943]
 [-0.35160918 -1.0454109 ]]
[[1 2 3]]
[[-1.59695056  2.99686454]]
[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [None]:
print(input)

layer1 = Layer((3, 2))

print(layer1.weights)

layer2 = Layer((2, 1))

print(layer2.weights)

layers = (layer1, layer2)

print(type(layers))

[[1 2 3]]
[[-0.66403834 -0.11206623]
 [ 2.08157846 -0.03151657]
 [-0.28017265 -0.79555331]]
[[1.09003914]
 [0.97991225]]
<class 'tuple'>


In [None]:
testNet = Neural_Net(layers)

print(testNet.layers[0].weights)

test = testNet.forward(input)

print(test)

[[ 0.90582208  0.42139595]
 [ 1.11286853 -0.71953776]
 [ 0.55423551  1.96286882]]
[[-3.59542501]]
