### Import

In [1]:
import numpy as np
import pandas as pd

### Fetch the data from the Internet and save it as csv
The following cell only needs to run once. 

In [2]:
# from ucimlrepo import fetch_ucirepo 

# # fetch dataset 
# breast_cancer_wisconsin_original = fetch_ucirepo(id=15) 
  
# # data (as pandas dataframes) 
# X = breast_cancer_wisconsin_original.data.features 
# y = breast_cancer_wisconsin_original.data.targets 

# data = pd.concat([y, X], axis=1)
# data.to_csv("data.csv", index=False)

### Read the data to X and y, and replace B and M (2&4) with 0 and 1

Note that there are missing values. As we got a large dataset, we will just drop the records.

In [2]:
data = pd.read_csv("data.csv")
data.dropna(inplace=True)
X = data.drop("Class", axis=1)
y = data['Class']
y.replace({2: 0, 4: 1}, inplace=True)
print(X.shape, y.shape)

(683, 9) (683,)


In [290]:
a = np.array([1,2,3]).reshape(-1,1)
b=np.array([4,5,6]).reshape(1, -1)
print(a.shape)
print(b.shape)
print(a.dot(b).shape)

(3, 1)
(1, 3)
(3, 3)


In [662]:
from numpy import random
import matplotlib.pyplot as plt


class Layer:
    num_of_layers = 0
    def __init__(self, input_size, output_size, activation="linear"):
        self.weights = random.rand(input_size+1, output_size)-0.5   # weight[0, x] is bias's weight
        # self.weights[:,:] = 0.6
        self.input_size = input_size
        self.output_size = output_size
        self.activation = activation
        if activation == "linear":
            self.activation_function = lambda x: x
            self.d_activation_function = lambda x: x * 1
        elif activation == "sigmoid":
            self.activation_function = lambda x: 1/(1+np.exp(-x))
            self.d_activation_function = lambda x: self.activation_function(x) * (1 - self.activation_function(x))
        else:
            raise Exception("Wrong activation function")
        Layer.num_of_layers += 1

    def forward_propagation(self, input):   # input should be an array
        input = np.concatenate(([[1], input]))    # add bias = 1
        self.input = input
        output = self.activation_function(input.dot(self.weights))
        self.output = output
        return output
    
    def backward_propagation(self, dl_dy, learning_rate = 0.005):
        # print(f"dl_dy.shape: {dl_dy.shape}")
        # print(f"self.weights.shape: {self.weights.shape}")

        da_dw = self.input.reshape(-1, 1)
        # print(f"da_dw.shape: {da_dw.shape}")

        dl_dw = da_dw.dot(self.d_activation_function(dl_dy.T))
        # print(f"dl_dw.shape: {dl_dw.shape}")

        dl_dx = (self.weights[1:]).dot(self.d_activation_function(dl_dy))
        # print(f"dl_dx.shape: {dl_dx.shape}")

        self.weights -= learning_rate * dl_dw

        # print()
        return dl_dx
        

class Model:
    def __init__(self):
        self.layers = []

    def add(self, layer):
        self.layers.append(layer)

    def show_layers(self):
        for i, l in enumerate(self.layers):
            print(f"Layer {i+1}: ", end="")
            print(f"input size: {l.input_size}, output size: {l.output_size}")

    def predict(self, input):
        prev_output = input
        for layer in self.layers:
            prev_output = layer.forward_propagation(prev_output)
            # print(prev_output)
        return prev_output

    def one_cycle(self, input, expected_output, learning_rate = 0.01):
        if len(input) != len(expected_output):
            raise Exception("Input and Expected output have different length")
                
        for X, y in zip(input, expected_output):
            pred = self.predict(X)
            error = (y - pred)
            sq_error = (y - pred)**2
            # d(squared error)/d(prediction)
            derror_dpred = -2 * error
            derror_dpred = np.array([derror_dpred])
            dl_dy = derror_dpred
            for layer in reversed(self.layers):
                dl_dy = layer.backward_propagation(dl_dy, learning_rate)
            # print(f"X: {X}, y: {y}, pred: {pred}")



        # pred = self.predict(input)
        # error = (expected_output - pred)
        # sq_error = (expected_output - pred)**2
        # # d(squared error)/d(prediction)
        # derror_dpred = -2 * error
        # derror_dpred = np.array([derror_dpred])
        # dl_dy = derror_dpred
        # for layer in reversed(self.layers):
        #     dl_dy = layer.backward_propagation(dl_dy)

        print()

    def mean_squared_error(self, input, expected_output):
        if len(input) != len(expected_output):
            raise Exception("Input and Expected output have different length")                
        squared_error = 0
        for X, y in zip(input, expected_output):    
            squared_error += (y - self.predict(X))**2
        mse = squared_error / len(input)
        return mse





layer1 = Layer(5, 2, activation="sigmoid")
layer2 = Layer(3, 2, activation="sigmoid")
layer3 = Layer(5, 1, activation="linear")


model = Model()
# model.add(layer1)
# model.add(layer2)
model.add(layer3)
model.show_layers()

# input = np.array([5, 8, 3, 2, 1])
# expected_output = sum(input*[3, 4, 6, 2, 1])+5
# print(expected_output)
input = np.array([[5, 8, 3, 2, 1], [4, 2, 1, 5, 6], [6, 1, 6, 8, 9], [7, 4, 3, 2, 1], [3, 6, 7, 8, 8]])
expected_output = [(sum(i*[3, 4, 6, 2, 1])+5) for i in input]
print(expected_output)

print()
for i in range(50):
    model.one_cycle(input, expected_output, 0.005)
    print(model.mean_squared_error(input, expected_output))

for i in range(len(input)):
    print(f"Input: {input[i]} Expected output: {expected_output[i]} prediction: {model.predict(input[i])}")

Layer 1: input size: 5, output size: 1
[75, 47, 88, 65, 104]


[62.75884073]

[58.32810896]

[38.92528753]

[29.48505494]

[25.65344473]

[24.09576733]

[23.04546562]

[21.76327566]

[20.06256641]

[18.01281503]

[15.77282181]

[13.5056347]

[11.34166834]

[9.36837807]

[7.63315083]

[6.15159876]

[4.91698482]

[3.90869248]

[3.09892899]

[2.45754686]

[1.95520391]

[1.56520718]

[1.26439664]

[1.03338078]

[0.85637123]

[0.7207995]

[0.61684116]

[0.53692827]

[0.47529661]

[0.4275911]

[0.39053697]

[0.36167526]

[0.33915595]

[0.3215798]

[0.30787999]

[0.29723476]

[0.28900386]

[0.28268279]

[0.27786985]

[0.27424255]

[0.27154044]

[0.26955262]

[0.2681083]

[0.26706956]

[0.26632566]

[0.26578843]

[0.26538844]

[0.26507177]

[0.26479727]

[0.26453428]
Input: [5 8 3 2 1] Expected output: 75 prediction: [75.6081181]
Input: [4 2 1 5 6] Expected output: 47 prediction: [47.12128986]
Input: [6 1 6 8 9] Expected output: 88 prediction: [88.76106693]
Input: [7 4 3 2 1] Expected output: 