In [1]:
import pandas as pd # to import csv
import numpy as np  # to compute every numerical operation
import numba as nba # to compute quickly
from sklearn import preprocessing   # to normalise data

In [2]:
# sigmoid function to make more sense of the outputs
# vectorised = u can pass an array as an argument
@nba.vectorize(nopython=True)
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [3]:
# import data from csv, store values in numpy.array
house_prices_df = pd.read_csv("housepricedata.csv")
house_prices_arr = house_prices_df.values

# extract last column which contains the expected outputs
expected_outputs = house_prices_arr[:,-1]

# extract a big array of data with info about every set of inputs per row
# normalise it with sklearn, since I don't wanna do it by hand
all_input_neurons_unnorm = house_prices_arr[:,:-1]
all_input_neurons = preprocessing.MinMaxScaler().fit_transform(all_input_neurons_unnorm)

# get some numbers
nr_of_input_samples = all_input_neurons.shape[0]
nr_of_input_neurons = all_input_neurons.shape[1]
nr_of_input_neurons, nr_of_input_samples

(10, 1460)

In [4]:
# arbitrary choice of 6 hidden neurons
NR_OF_HIDDEN_NEURS = 6

In [5]:
# input -> hidden neurons 1
# we're starting with random weights and biases
weights1 = np.random.random((nr_of_input_neurons, NR_OF_HIDDEN_NEURS))
biases1 = np.random.random(NR_OF_HIDDEN_NEURS)
weights1.shape, biases1.shape

((10, 6), (6,))

In [6]:
# hidden neurons 1 -> hidden neurons 2
# we're starting with random weights and biases
weights2 = np.random.random((NR_OF_HIDDEN_NEURS, NR_OF_HIDDEN_NEURS))
biases2 = np.random.random(NR_OF_HIDDEN_NEURS)
weights2.shape, biases2.shape

((6, 6), (6,))

In [7]:
# hidden neurons 2 -> out
# we're starting with random weights and biases
weights3 = np.random.random((NR_OF_HIDDEN_NEURS, 2))
biases3 = np.random.random(2)
weights3.shape, biases3.shape

((6, 2), (2,))

In [8]:
# function that creates data for next neuron set
# using the current neuron values, weights and biases
def think(input, weights, biases):
    return sigmoid(np.dot(input, weights) + biases)

In [9]:
# "thinking" about the stuff, trying to figure out the answers -> predictions
hidden_1 = think(all_input_neurons, weights1, biases1)
hidden_2 = think(hidden_1, weights2, biases2)
predictions = think(hidden_2, weights3, biases3)

In [20]:
# calculate the cost of every provided output
# as a comparison with the expected values
@nba.njit
def get_costs(prediction_pairs, expected_outputs):
    costs_array = np.zeros(len(prediction_pairs))
    for i in np.arange(len(prediction_pairs)):
        expected_probability_of_0 = 0.5 * (1 + (-1)**(expected_outputs[i]))
        expected_probability_of_1 = 0.5 * (1 + (-1)**(expected_outputs[i]+1))
        predicted_probability_of_0 = prediction_pairs[i,0]
        predicted_probability_of_1 = prediction_pairs[i,1]
        costs_array[i] += (expected_probability_of_0 - predicted_probability_of_0)**2
        costs_array[i] += (expected_probability_of_1 - predicted_probability_of_1)**2
    return costs_array
    
def cost(prediction_pairs, expected_outputs):
    return get_costs(prediction_pairs, expected_outputs).mean()

### That's how it looks like now:

In [32]:
# let's get costs
costs_of_predictions = get_costs(predictions, expected_outputs)

# create a summary
summary = pd.concat(
    [
        pd.DataFrame(expected_outputs,              columns=["Reality"]),
        pd.DataFrame(np.round(predictions*100,1),   columns=["%0", "%1"]),
        pd.DataFrame(costs_of_predictions,          columns=["Cost"])
    ],
    axis=1
)
print("Average cost:", round(cost(predictions, expected_outputs),4))
summary

Average cost: 0.9594


Unnamed: 0,Reality,%0,%1,Cost
0,1,99.0,96.9,0.981915
1,1,99.0,96.9,0.981800
2,1,99.0,96.9,0.981966
3,0,99.0,96.8,0.937554
4,1,99.1,96.9,0.982169
...,...,...,...,...
1455,1,99.0,96.9,0.981942
1456,1,99.0,96.9,0.981900
1457,1,99.1,96.9,0.982067
1458,0,99.0,96.8,0.936270


Predictions are pretty much complete garbage at this point.
Shouldn't be surprising, since we basicaly rolled D100 for weights and biases. :P

### Let's get some test data to check if cost function makes sense:

In [35]:
# random predictions; possible values: 0%, 25%, 50%, 75%, 100%
random_predictions = (np.random.randint(0, 5, [len(predictions), 2]) / 4)
costs_of_predictions = get_costs(random_predictions, expected_outputs)

# create a summary
summary = pd.concat(
    [
        pd.DataFrame(expected_outputs,                      columns=["Reality"]),
        pd.DataFrame(np.round(random_predictions*100,1),    columns=["%0", "%1"]),
        pd.DataFrame(costs_of_predictions,                  columns=["Cost"])
    ],
    axis=1
)
print("Average cost:", round(cost(random_predictions, expected_outputs),4))
summary

Average cost: 0.7557


Unnamed: 0,Reality,%0,%1,Cost
0,1,0.0,25.0,0.5625
1,1,25.0,50.0,0.3125
2,1,0.0,50.0,0.2500
3,0,0.0,25.0,1.0625
4,1,75.0,100.0,0.5625
...,...,...,...,...
1455,1,75.0,25.0,1.1250
1456,1,25.0,0.0,1.0625
1457,1,25.0,50.0,0.3125
1458,0,25.0,0.0,0.5625


## Seems that the cost() function is working fine.