In [24]:
"""
Original linear model: y = b + wx (Too simple to use)
Sigmoid function model: Suppose we have three sigmoid function

"""
# Viewers for previous three days: X = [25, 20, 21]
# There are weight for no. of sigmoid and no. of features. So Wij represents weight for xj for i-th sigmoid
# Bias: different bias for different functions

"""
r1 = b1 + w11x1 + w12x2 + w13x3
r2 = b2 + w21x1 + w22x2 + w23x3
r3 = b3 + w31x1 + w32x2 + w33x3

Where 
result vector: R = [r1, r2, r3]
weight matrix W = [[w11, w12, w13], [w21, w22, w23], [w31, w32, w33]]
input vector X = [x1, x2, x3]
bias vector B = [b1, b2, b3]
"""

import numpy as np

#### --------------------------------------- Define Model -------------------------------------- ####
# Define the sigmoid function
def sigmoid(r):
    return 1 / (1 + np.exp(-r))

# Set a random seed for reproducibility
np.random.seed(42)

# Define the input vector
original_x = np.array([33, 28, 32])

def model(X, W, B, C, B2):
    # Compute the result
    R = np.dot(W, X) + B

    # Apply the sigmoid function to each element of R
    sigmoid_R = sigmoid(R)

    # Scale by coefficients
    sigmoid_R_scaled = C * sigmoid_R

    # Compute the final output
    Y = B2 + sigmoid_R_scaled
    
    return Y

def deep_learn(original_data, layers, learning_rate=0.01):
    # Initialize weights, biases, and coefficients
    W = 2 * np.random.rand(3, 3) - 1
    B = 50 + 10 * np.random.rand(3)
    C = np.random.rand(3) * 100 + 0.5
    B2 = 0.1 * np.random.rand(3)
    
    for layer in range(layers):
        # Compute output
        output = model(original_data, W, B, C, B2)
        
        # Assume the watched amount for next day is [53, 46, 12]
        real_watch = np.array([53, 46, 12])
        
        # Compute the loss
        loss = loss_function(real_watch, output)

        # Compute gradients
        error = output - real_watch  # (predicted - actual)
        d_sigmoid = sigmoid(output) * (1 - sigmoid(output))  # derivative of sigmoid

        # Gradients
        dC = sigmoid(output)
        dB2 = np.ones_like(B2)
        dW = np.outer(error * d_sigmoid, original_data)  # gradients for W
        dB = error * d_sigmoid  # gradients for B

        # Update parameters
        W -= learning_rate * dW
        B -= learning_rate * dB
        C -= learning_rate * dC
        B2 -= learning_rate * dB2
        
        if layer % 5 == 0:  # Print loss every 50 layers
            print(f"Layer {layer}: Loss = {loss}")

    return output

def loss_function(real_values: list, predict_values: list):
    real = np.array(real_values)
    predict = np.array(predict_values)

    # Compute the absolute errors
    absolute_errors = np.abs(real - predict)

    # Sum the absolute errors
    total_error = np.sum(absolute_errors)
    
    # Calculate MAE by dividing by the number of items
    mae = total_error / len(real)

    return mae

# Execute the training
final_output = deep_learn(original_x, layers=500, learning_rate=0.01)
print("Final Result is:", final_output)

Layer 0: Loss = 20.577748631067788
Layer 5: Loss = 20.54441529456309
Layer 10: Loss = 20.511081958070886
Layer 15: Loss = 20.477748621592486
Layer 20: Loss = 20.444415285129345
Layer 25: Loss = 20.411081948683062
Layer 30: Loss = 20.377748612255406
Layer 35: Loss = 20.34441527584835
Layer 40: Loss = 20.311081939464046
Layer 45: Loss = 20.277748603104893
Layer 50: Loss = 20.24441526677354
Layer 55: Loss = 20.21108193047289
Layer 60: Loss = 20.177748594206193
Layer 65: Loss = 20.14441525797702
Layer 70: Loss = 20.111081921789303
Layer 75: Loss = 20.077748585647402
Layer 80: Loss = 20.044415249556142
Layer 85: Loss = 20.011081913520844
Layer 90: Loss = 19.9777485775474
Layer 95: Loss = 19.9444152416423
Layer 100: Loss = 19.91108190581274
Layer 105: Loss = 19.87774857006666
Layer 110: Loss = 19.84441523441284
Layer 115: Loss = 19.81108189886098
Layer 120: Loss = 19.777748563421802
Layer 125: Loss = 19.744415228107155
Layer 130: Loss = 19.711081892930125
Layer 135: Loss = 19.6777485579052
L