In [195]:
import pandas as pd

df = pd.concat(pd.read_excel("Folds5x2_pp.xlsx", sheet_name=None), ignore_index=True)

train_dataset = df.sample(frac=0.8, random_state=0)
test_dataset = df.drop(train_dataset.index)

def normalize(df):
    for key in df.keys():
        df[key] = df[key] / df[key].max()
    return df.to_numpy()


train_features = normalize(train_dataset.drop(["PE"], axis=1))
test_features = normalize(test_dataset.drop(["PE"], axis=1))
train_labels = train_dataset["PE"].values.reshape(1, -1)[0]
test_labels = test_dataset["PE"].values.reshape(1, -1)[0]

In [196]:
import numpy as np

input_size = len(train_features[0])

# DAMN NETWORK
w1 = np.random.randint(2, 5, (input_size, 3))
b1 = np.random.randint(2, 5, (3,))
w2 = np.random.randint(2, 5, (3, 1))
b2 = np.random.randint(2, 5, (1,))

In [197]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def d_sigmoid(z):
    return sigmoid(z)*(1-sigmoid(z))

def forward(x):
    x = x.dot(w1)
    x = x + b1
    x = sigmoid(x)
    x = x.dot(w2)
    x = x + b2
    return x

def sqrt_mean_squared_error(x, y):
    return np.square(x - y).sum() / len(x)

In [198]:
# TRAINING DATA
# function to create a list containing mini-batches
def create_mini_batches(X, y, batch_size):
    mini_batches = []
    data = np.hstack((X, y))
    np.random.shuffle(data)
    n_minibatches = data.shape[0] // batch_size
    i = 0
  
    for i in range(n_minibatches + 1):
        mini_batch = data[i * batch_size:(i + 1)*batch_size, :]
        X_mini = mini_batch[:, :-1]
        Y_mini = mini_batch[:, -1].reshape((-1, 1))
        mini_batches.append((X_mini, Y_mini))
    if data.shape[0] % batch_size != 0:
        mini_batch = data[i * batch_size:data.shape[0]]
        X_mini = mini_batch[:, :-1]
        Y_mini = mini_batch[:, -1].reshape((-1, 1))
        mini_batches.append((X_mini, Y_mini))
    return mini_batches

def forward(x):
    # FORWARD PROPAGATION
    # Z1(3,m) = ((W1(4,3).T(3,4) * X(4,m))(3,m) + b(3,m))
    z1 = w1.T.dot(x.T).T + b1
    # A1(3,m) = A(Z1(3,m))
    a1 = sigmoid(z1)
    # Z2(1,1) = ((W2(3,m).T(m,3) * A1(3,m))(2,1) + b(1,1))
    z2 = (w2.T.dot(a1.T) + b2)
    # A2(1,m) = A(Z2(1,m))
    a2 = sigmoid(z2)
    return a2

# create batches that contains 50 (X,y) each
mini_batches = create_mini_batches(train_features, train_labels.reshape(len(train_labels), 1), 50) 

In [199]:
# THIS ENTIRE PROCESS COUNT AS AN EPOCH

# LEARNING_RATE
λ = 0.01

# GRADIENT TO BE
dJdB1 = [np.zeros(b.shape) for b in w1]
dJdW1 = [np.zeros(w.shape) for w in w1]
dJdB2 = [np.zeros(b.shape) for b in w2]
dJdW2 = [np.zeros(w.shape) for w in w2]

# mini_batch = zip(train_features, train_labels)
for x, y in mini_batches[0:-2]:
    # CALLED m IN SHAPES COMMENTS
    batch_size = len(x)
    
    # FORWARD PROPAGATION
    # Z1(3,m) = ((W1(4,3).T(3,4) * X(4,m))(3,m) + b(3,m))
    z1 = w1.T.dot(x.T).T + b1
    # A1(3,m) = A(Z1(3,m))
    a1 = sigmoid(z1)
    # Z2(1,1) = ((W2(3,m).T(m,3) * A1(3,m))(2,1) + b(1,1))
    z2 = (w2.T.dot(a1.T) + b2)
    # A2(1,m) = A(Z2(1,m))
    a2 = sigmoid(z2)
    
    # BACKWARD PROPAGATION
    # HERE WE DO MATHS, NO LUCK, HIGHLY SUGGEST https://www.youtube.com/watch?v=tIeHLnjs5U8
    # dJdW2(3,m) = dZ2/dW2 * dA2/dZ2 * dJ/dA2 WITH J OUR COST FUNCTION (sqrt_mean_squared_error)
    dJdA2 = (2 * (a2 - y.T))
    dZ2dW2 = a1
    dA2dZ2 = d_sigmoid(z2)
    # TODO EDITED dJdA2, every calculus using it should be remade
    dJdW2 = dA2dZ2.dot(dZ2dW2).T.dot(dJdA2.T)
    dZ2dB2 = 1
    dJdB2 = dA2dZ2.T * dJdA2 * 1
    
    # dJdW1(4,3) = dJdZ1 * dZ1dW1
    # HERE THE HARD PART IS ABOUT dJ/A1, which have impact on cost function thru Z2 -> A2 -> J
    # (1, m)
    dA2dZ2 = d_sigmoid(z2)
    # (1, m) = (1,m) * (m, 1)
    dJdZ2 = dA2dZ2 * dJdA2.T
    # (3, m) = (3, m) * (3, m)
    dJdZ1 = (w2.dot(dJdZ2)).T * d_sigmoid(z1)
    # (3, 4) = (3, m) * (m, 4)
    dJdW1 = dJdZ1.T.dot(x).T / batch_size
    dJdB1 = (dJdZ1 / batch_size).sum(axis=0)
    
    w2 = w2 + dJdW2
    b2 = b2 + dJdB2
    w1 = w1 + dJdW1
    b1 = b1 + dJdB1
    
    print(w2)
#     print(dJdW1)
    

# d = len(train_features)
# w1 = [w - λ/d * dw for w, dw in zip(w1, dJdW1)]
# w2 = [w - λ/d * dw for w, dw in zip(w2, dJdW2)]
# b1 = [b - λ/d * dw for b, dw in zip(b1, dJdB1)]
# b2 = [b - λ/d * dw for b, dw in zip(b2, dJdB2)]
# print(w1)

ValueError: shapes (3,1) and (50,1) not aligned: 1 (dim 1) != 50 (dim 0)

In [None]:
w2