In [22]:
import numpy as np

# Define the input data (15 days of sales, 1 value per day)
data = np.array([100, 90, 120, 80, 150, 110, 130, 140, 115, 105, 90, 125, 135, 100, 95])
#data = np.arange(100000)

mean = np.mean(data)
std = np.std(data)

# Normalize the array along the columns
data = (data - mean) / std
# Define the number of time steps in the input sequence
num_time_steps = 3

# Define the number of input features (in this case, we only have 1 input feature: sales)
num_input_features = 1

# Define the number of hidden units in the RNN layer
num_hidden_units = 5

# Define the batch size
batch_size = 4

# Initialize the weight matrices and bias vectors for the RNN layer
W_xh = np.random.randn(num_input_features, num_hidden_units)
W_hh = np.random.randn(num_hidden_units, num_hidden_units)
b_h = np.zeros((1, num_hidden_units))
W_hy = np.random.randn(num_hidden_units, num_input_features)
b_y = np.zeros((1, num_input_features))

# Initialize the hidden state (this is the initial state before processing any input)
h_t = np.zeros((batch_size, num_hidden_units))

# Define the learning rate
learning_rate = 0.001

for each in range(1000):
# Loop through the input sequence and update the hidden state at each time step
    for i in range(0, len(data) - num_time_steps, batch_size):
        # Extract the input sequence for the current batch
        x_batch = np.zeros((batch_size, num_time_steps, num_input_features))
        for j in range(batch_size):
            x_batch[j,:,:] = data[i+j:i+j+num_time_steps].reshape((num_time_steps, num_input_features))
        
        # Compute the new hidden states for the current batch using the current inputs and previous hidden states
        h_t = np.zeros((batch_size, num_hidden_units))
        for t in range(num_time_steps):
            x_t = x_batch[:,t,:]
            h_t = np.tanh(np.dot(x_t, W_xh) + np.dot(h_t, W_hh) + b_h)
        
        # Compute the outputs for the current batch at the last time step
        y_pred = np.dot(h_t, W_hy) + b_y
        
        # Compute the target values for the current batch (in this case, we want to predict the next day's sales based on the previous 3 days of sales)
        y_true = data[i+num_time_steps:i+num_time_steps+batch_size].reshape((batch_size, num_input_features))
        
        # Compute the error (mean squared error) for the current batch
        error = 0.5 * np.mean((y_pred - y_true) ** 2)
        
        # Compute the gradients of the output layer for the current batch (using the chain rule)
        grad_y = (y_pred - y_true) / batch_size
        grad_W_hy = np.dot(h_t.T, grad_y)
        grad_b_y = np.sum(grad_y, axis=0, keepdims=True)
        
        # Initialize the gradients of the hidden state for the current batch (this will be used as the initial gradients for backpropagation)
        grad_h = np.zeros((batch_size, num_hidden_units))
        
        # Loop backward through the time steps and compute the gradients for each time step for the current batch
        for t in reversed(range(num_time_steps)):
            x_t = x_batch[:,t,:]
            h_t = np.tanh(np.dot(x_t, W_xh) + np.dot(h_t, W_hh) + b_h)
            
            # Compute the gradients for the output of the RNN layer (using the chain rule)
            grad_output = grad_h + np.dot(grad_y, W_hy.T)
            grad_z = grad_output * (1 - h_t ** 2)
            
            # Compute the gradients for the parameters of the RNN layer (using the chain rule)
            grad_W_xh = np.dot(x_t.T, grad_z)
            grad_W_hh = np.dot(h_t.T, grad_z)
            grad_b_h = np.sum(grad_z, axis=0, keepdims=True)
            
            # Update the gradients for the next time step (using the chain rule)
            grad_h = np.dot(grad_z, W_hh.T)
            
            # Accumulate the gradients for the current batch
            if t == num_time_steps - 1:
                total_grad_W_xh = grad_W_xh
                total_grad_W_hh = grad_W_hh
                total_grad_b_h = grad_b_h
                total_grad_W_hy = grad_W_hy
                total_grad_b_y = grad_b_y
            else:
                total_grad_W_xh += grad_W_xh
                total_grad_W_hh += grad_W_hh
                total_grad_b_h += grad_b_h
                total_grad_W_hy += grad_W_hy
                total_grad_b_y += grad_b_y

        # Update the parameters of the RNN layer for the current batch using the computed gradients and the learning rate
        W_xh -= learning_rate * total_grad_W_xh
        W_hh -= learning_rate * total_grad_W_hh
        b_h -= learning_rate * total_grad_b_h
        W_hy -= learning_rate * total_grad_W_hy
        b_y -= learning_rate * total_grad_b_y
        print("Error:", error)

Error: 2.35990344297781
Error: 0.40523367655577924
Error: 0.5164650787731605
Error: 2.338884540507108
Error: 0.404796329885643
Error: 0.5059882978081105
Error: 2.318103098913752
Error: 0.404409681339409
Error: 0.4957531223047647
Error: 2.2975587962323636
Error: 0.4040717075998556
Error: 0.4857542946173853
Error: 2.2772511643770676
Error: 0.40378046313300314
Error: 0.4759867011434186
Error: 2.2571795980212372
Error: 0.4035340767563984
Error: 0.46644536623064264
Error: 2.2373433631047766
Error: 0.40333074839010363
Error: 0.45712544647152653
Error: 2.217741604972959
Error: 0.40316874597865365
Error: 0.4480222253509852
Error: 2.1983733561527736
Error: 0.4030464025731878
Error: 0.4391311082172887
Error: 2.1792375437743083
Error: 0.4029621135638063
Error: 0.43044761754907995
Error: 2.1603329966459266
Error: 0.40291433405295446
Error: 0.421967388494268
Error: 2.141658451992972
Error: 0.40290157636130647
Error: 0.41368616465906094
Error: 2.123212561870389
Error: 0.4029224076582452
Error: 0.405

In [5]:
for each in range(0, len(data) - num_time_steps, batch_size):
    print(each)

0
4
8


In [23]:
y_pred

array([[ 0.45723794],
       [ 0.01784452],
       [-0.22004888],
       [-0.04935979]])

In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, SimpleRNN, Dense
from tensorflow.keras.models import Model

# Define the input shape and parameters of the RNN layer
num_time_steps = 3
num_input_features = 1
num_hidden_units = 5

# Define the input layer
input_layer = Input(shape=(num_time_steps, num_input_features))

# Define the RNN layer
rnn_layer = SimpleRNN(units=num_hidden_units, activation='tanh')

# Define the output layer
output_layer = Dense(units=1)

# Connect the input layer to the RNN layer
rnn_output = rnn_layer(input_layer)

# Connect the RNN layer to the output layer
output = output_layer(rnn_output)

# Define the model
model = Model(inputs=input_layer, outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Generate some random data
x = np.random.randn(100, num_time_steps, num_input_features)
y = np.random.randn(100, 1)

# Train the model
model.fit(x, y, epochs=10)


2023-02-15 12:40:04.862053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-15 12:40:05.665679: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-02-15 12:40:05.665723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory
2023-02-15 12:40:06.732099: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:

Epoch 1/10


upport.
2023-02-15 12:40:07.409166: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1700] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.
2023-02-15 12:40:07.409184: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-02-15 12:40:07.409350: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9382 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6
2023-02-15 12:40:09.009551: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-02-15 12:40:09.381720: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x56288

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc40df51540>