[https://colab.research.google.com/drive/1VpeE6UvEPRz9HmsHh1KS0XxXjYu533EC]

# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
import pandas as pd
from scipy import signal
import keras
import numpy as np
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('expand_frame_repr', False)

from optimizers.sgd import SGD
from optimizers.adam import Adam
# from network import Network
# from layers.conv2d import Conv2D
# from layers.dense import FCLayer
# from layers.activation import ActivationLayer, SoftmaxLayer, tanh, tanh_prime, softmax, softmax_prime, relu, relu_prime, sigmoid, sigmoid_prime
# from layers.flatten import FlattenLayer
from losses import mse, categorical_crossentropy, binary_crossentropy, mae
from layers import Conv2D, FCLayer, ActivationLayer, FlattenLayer, DropoutLayer, GlobalAveragePoolingLayer, \
      RNN, MaxPool2D, LSTM, GRU, Embedding, BatchNorm2D, LayerNorm
from autograd import tanh, relu, sigmoid, softmax, Module, Tensor
from utils import draw_computation_graph

2024-05-24 18:41:37.625029: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-24 18:41:37.645317: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-24 18:41:37.645338: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-24 18:41:37.645908: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-24 18:41:37.649744: I tensorflow/core/platform/cpu_feature_guar

In [15]:
def visualize_test_samples(x_test,y_test,samples,network):
  for test, true in zip(x_test[:samples], y_test[:samples]):
    pred = network.predict([test])[0][0]
    idx = np.argmax(pred)
    idx_true = np.argmax(true)
    plt.title('pred: %s, prob: %.2f, true: %d' % (idx, pred[idx], idx_true))
    plt.imshow(test, cmap='binary')
    plt.show()

In [16]:
def adjust_data(image,is_conv=False):
  image = np.cast['float32'](image)
  if is_conv:
    image = np.expand_dims(image,axis=-1)
  image/=255
  return image

In [17]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [18]:
x_train = adjust_data(x_train,is_conv=True)
y_train = keras.utils.to_categorical(y_train)
x_test = adjust_data(x_test,is_conv=True)
y_test = keras.utils.to_categorical(y_test)

In [19]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28, 1), (60000, 10), (10000, 28, 28, 1), (10000, 10))

In [20]:
# def create_batches(data, labels, batch_size):
#     num_batches = int(len(data) / batch_size)
#     data_batches = []
#     label_batches = []
    
#     for i in range(num_batches):
#         start = i * batch_size
#         end = min((i + 1) * batch_size, len(data))
#         data_batches.append(data[start:end])
#         label_batches.append(labels[start:end])
    
#     return np.array(data_batches), np.array(label_batches)

In [21]:
# batch_size = 32
# x_train_batches, y_train_batches = create_batches(x_train, y_train, batch_size)
# x_test_batches, y_test_batches = create_batches(x_test, y_test, batch_size)

In [22]:
# x_train_batches.shape, y_train_batches.shape, x_test_batches.shape, y_test_batches.shape

In [23]:
# x_train_batches = adjust_data(x_train_batches,True)
# y_train_batches = keras.utils.to_categorical(y_train_batches)
# x_test_batches = adjust_data(x_test_batches,True)
# y_test_batches = keras.utils.to_categorical(y_test_batches)

In [24]:
# x_train_batches.shape, y_train_batches.shape, x_test_batches.shape, y_test_batches.shape

# SGD

In [25]:
class Model(Module):
    def __init__(self) -> None:
        self.conv = Conv2D(filters=3, kernel_size=5, padding='same', strides=2)
        self.batchnorm = BatchNorm2D()
        self.maxpool = MaxPool2D(pool_size=2, strides=1, padding='same')
        self.flatten = FlattenLayer()
        self.linear1 = FCLayer(output_dim=20)
        self.linear2 = FCLayer(output_dim=10)

    def forward(self, inputs: Tensor, training=True) -> Tensor:
        x = self.conv(inputs, training=training)
        x0 = self.batchnorm(x)
        x1 = self.maxpool(x0)
        x2 = self.flatten(x1, training=training)
        x3 = self.linear1(x2, training=training)
        x4 = tanh(x3)
        x5 = self.linear2(x4, training=training)
        x6 = tanh(x5)
        x7 = softmax(x6)
        return x7

optimizer = SGD(learning_rate=0.001)
batch_size = 32
model = Model()
x_train = Tensor(x_train)
y_train = Tensor(y_train)
i = 0
# print(x_train.shape)
# train_data = x_train.shape[0]
train_data = 1000

for epoch in range(50):
    epoch_loss = 0.0
    # print(f'before: {x_train.shape}')
    for start in range(0, train_data, batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model(inputs)
        actual = y_train[start:end]
        # print(predicted.shape, actual.shape)
        loss = categorical_crossentropy(y_true=actual, y_pred=predicted)
        if i==0:
            draw_computation_graph(loss)
        i+=1
        # for param in model.parameters():
        #     print(param.name, param.id)
        # print(start, loss)
        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)
    # print(f'after: {x_train.shape}')
    epoch_loss /= (train_data//batch_size + train_data % batch_size)
    print(epoch, epoch_loss)

0 55.05290804531892
1 46.3684569405066
2 42.33992724926197


KeyboardInterrupt: 

# RNN

In [2]:
np.random.seed(42)

def create_dataset(num_sequences, sequence_length):
    return np.array([np.arange(start, start + sequence_length) for start in range(num_sequences)])

# Helper function to create inputs and targets from the sequences
def create_inputs_targets(data):
    X = data[:, :-1]  # all but the last item in each sequence as input
    Y = data[:, -1]  # all but the first item in each sequence as target (for sequence prediction)
    return X, Y

# Helper function to create batches
def create_batches(X, Y, batch_size):
    num_batches = len(X) // batch_size
    X_batches = np.array(np.array_split(X, num_batches))
    Y_batches = np.array(np.array_split(Y, num_batches))
    return X_batches, Y_batches

# Generate dataset
num_sequences = 1000  # The number of sequences you want
sequence_length = 4  # The length of each sequence
batch_size = 32  # The size of each batch

# Create dataset
dataset = create_dataset(num_sequences, sequence_length)

# Split dataset into training and testing sets (80-20 split)
train_size = int(num_sequences * 0.8)
train_set, test_set = dataset[:train_size], dataset[train_size:]

# Create inputs (X) and targets (Y) for training and testing
x_train, y_train = create_inputs_targets(train_set)
x_test, y_test = create_inputs_targets(test_set)
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

print(x_train.shape, y_train.shape)
# Shuffle training data
indices = np.arange(train_size)
np.random.shuffle(indices)

# Apply shuffled indices to create shuffled training data
x_train_shuffled = x_train[indices]
y_train_shuffled = y_train[indices]
# Normalize inputs
# x_train_max = np.max(x_train_shuffled)
# x_train_shuffled = x_train_shuffled / x_train_max
# x_test = x_test / x_train_max  # use the same scale as train set

# print(x_train_shuffled.shape, y_train_shuffled.shape)
# Create batches from the training and testing data
# x_train_batches, y_train_batches = create_batches(x_train_shuffled, y_train_shuffled, batch_size)
# x_test_batches, y_test_batches = create_batches(x_test, y_test, batch_size)


# Example: Print the first training batch
# print("First training batch (x_train, y_train):")
# print(x_train_batches.shape, y_train_batches.shape)
# x_train_batches[0], y_train_batches[0]

(800, 3, 1) (800,)


In [4]:
class Model(Module):
    def __init__(self) -> None:
        super().__init__()
        # self.rnn = RNN(hidden_size=20,output_size=30, return_sequences=False, bidirectional=False, init='glorot_uniform')
        self.add_module('lstm', LSTM(hidden_size=20, return_sequences=True, bidirectional=False))
        # self.lstm = LSTM(hidden_size=20, return_sequences=True, bidirectional=False)
        self.add_module('layernorm', LayerNorm(D=1))
        # self.layernorm = LayerNorm(D=1)
        self.add_module('flatten', FlattenLayer())
        # self.rnn2 = RNN(hidden_size=20,output_size=30, return_sequences=False, init='glorot_uniform')
        # self.flatten = FlattenLayer()
        # self.linear1 = FCLayer(output_dim=20)
        self.add_module('linear2', FCLayer(output_dim=1))
        # self.linear2 = FCLayer(output_dim=1)

    def forward(self, inputs: Tensor) -> Tensor:
        x = self.lstm(inputs)
        # print(f'before layernorm: {x}')
        x1 = self.layernorm(x)
        # print(f'after layernorm: {x1}')
        # x = self.rnn2(x)
        x2 = self.flatten(x1)
        # x3 = self.linear1(x2)
        x4 = self.linear2(x2)
        return x4
    
m = Model()
m(Tensor(np.random.randn(32, 3, 1)))
summary = m.summary()
# print(summary)
pd.DataFrame(summary)

RecursionError: maximum recursion depth exceeded while calling a Python object

In [27]:
class Model(Module):
    def __init__(self) -> None:
        # self.rnn = RNN(hidden_size=20,output_size=30, return_sequences=False, bidirectional=False, init='glorot_uniform')
        self.lstm = LSTM(hidden_size=20, return_sequences=False, bidirectional=False)
        self.layernorm = LayerNorm(D=1)
        # self.rnn2 = RNN(hidden_size=20,output_size=30, return_sequences=False, init='glorot_uniform')
        self.flatten = FlattenLayer()
        # self.linear1 = FCLayer(output_dim=20)
        self.linear2 = FCLayer(output_dim=1)

    def forward(self, inputs: Tensor) -> Tensor:
        x = self.lstm(inputs)
        # print(f'before layernorm: {x}')
        x1 = self.layernorm(x)
        # print(f'after layernorm: {x1}')
        # x = self.rnn2(x)
        x2 = self.flatten(x1)
        # x3 = self.linear1(x2)
        x4 = self.linear2(x2)
        return x4

optimizer = Adam(learning_rate=0.001)
batch_size = 32
model = Model()
x_train = Tensor(x_train_shuffled)
y_train = Tensor(np.expand_dims(y_train_shuffled,axis=-1))
i = 0
for epoch in range(1000):
    epoch_loss = 0.0

    for start in range(0, len(x_train_shuffled), batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model(inputs)
        actual = y_train[start:end]
        # print(predicted.shape, actual.shape)
        loss = mse(y_true=actual, y_pred=predicted)
        if i==0:
            draw_computation_graph(loss)
        i+=1
        # for param in model.parameters():
        #     print(param.name, param.id)
        # print(start, loss)
        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)
    epoch_loss /= (train_data//batch_size + train_data % batch_size)
    print(epoch, epoch_loss)

0 137619.5544946631
1 137386.89973783083
2 137135.88611734737
3 136859.3391069833
4 136550.53694213482
5 136203.32031139964
6 135813.6163885433
7 135379.3827680809
8 134899.6430867293
9 134373.98859426836
10 133802.4134634241
11 133185.22015668644
12 132522.94535706958
13 131816.30552408777
14 131066.15738187465
15 130273.46866490059
16 129439.29629158911
17 128564.77004913658
18 127651.08028387825
19 126699.46844245035
20 125711.21958827086
21 124687.65628344375
22 123630.13342538937
23 122540.03375383464
24 121418.76385155563
25 120267.75051378376
26 119088.4374084517
27 117882.28196815592
28 116650.75247987888
29 115395.3253438052
30 114117.48247538824
31 112818.70885286172
32 111500.4901721799
33 110164.31061957963
34 108811.65075183015
35 107443.9854718018
36 106062.78208730958
37 104669.49845481224
38 103265.5812194507
39 101852.46411934953
40 100431.56635648423
41 99004.2910496821
42 97572.0237462188
43 96136.13099208778
44 94697.95896755044
45 93258.83217674737
46 91820.0521942

In [28]:
test = np.array([[10,11,12], [11,12,13]])
test = np.expand_dims(test, axis=-1)
test = Tensor(test)
print(test.shape)
out = model(test)
np.round(out.data)

(2, 3, 1)


array([[-121.],
       [-114.]])

In [7]:
def learning_rate_decay(epoch, optimizer):
    if epoch != 0 and epoch % 50 == 0:
        optimizer.learning_rate *= 0.99



epoch 1: loss=0.05099273701819455
epoch 2: loss=13249.740727372679
epoch 3: loss=14749.407963355394
epoch 4: loss=15250.752942708701
epoch 5: loss=15492.481381005673
epoch 6: loss=15622.466162419676
epoch 7: loss=15695.41284485934
epoch 8: loss=15737.109844625535
epoch 9: loss=15761.141082024957
epoch 10: loss=15775.042803035232
epoch 11: loss=15783.09837473718
epoch 12: loss=15787.76982950266
epoch 13: loss=15790.479695590704
epoch 14: loss=15792.051856900975
epoch 15: loss=15792.963996259414
epoch 16: loss=15793.493200779878
epoch 17: loss=15793.800228715167
epoch 18: loss=15793.978352795524
epoch 19: loss=15794.081690347573
epoch 20: loss=15794.141639857253
epoch 21: loss=15794.176417997249
epoch 22: loss=15794.19659337004
epoch 23: loss=15794.208297317833
epoch 24: loss=15794.215086847486
epoch 25: loss=15794.219025468892
epoch 26: loss=15794.221310260786
epoch 27: loss=15794.222635661932
epoch 28: loss=15794.223404521004
epoch 29: loss=15794.22385053153
epoch 30: loss=15794.224109