In [1]:
import pandas as pd # to import csv
import numpy as np  # to compute every numerical operation
import numba as nba # to compute quickly
from sklearn import preprocessing   # to normalise data

In [2]:
# sigmoid function to make more sense of the outputs
# vectorised = u can pass an array as an argument
@nba.vectorize(nopython=True)
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [3]:
# import data from csv, store values in numpy.array
house_prices_df = pd.read_csv("housepricedata.csv")
house_prices_arr = house_prices_df.values

# extract last column which contains the expected outputs
expected_outputs = house_prices_arr[:,-1]

# extract a big array of data with info about every set of inputs per row
# normalise it with sklearn, since I don't wanna do it by hand
all_input_neurons_unnorm = house_prices_arr[:,:-1]
all_input_neurons = preprocessing.MinMaxScaler().fit_transform(all_input_neurons_unnorm)

# get some numbers
nr_of_input_samples = all_input_neurons.shape[0]
nr_of_input_neurons = all_input_neurons.shape[1]
nr_of_input_neurons, nr_of_input_samples

(10, 1460)

In [4]:
# arbitrary choice of 6 hidden neurons
NR_OF_HIDDEN_NEURS = 6

In [5]:
# input -> hidden neurons 1
# we're starting with random weights and biases
weights1 = np.random.random((nr_of_input_neurons, NR_OF_HIDDEN_NEURS))
biases1 = np.random.random(NR_OF_HIDDEN_NEURS)
weights1.shape, biases1.shape

((10, 6), (6,))

In [6]:
# hidden neurons 1 -> hidden neurons 2
# we're starting with random weights and biases
weights2 = np.random.random((NR_OF_HIDDEN_NEURS, NR_OF_HIDDEN_NEURS))
biases2 = np.random.random(NR_OF_HIDDEN_NEURS)
weights2.shape, biases2.shape

((6, 6), (6,))

In [7]:
# hidden neurons 2 -> out
# we're starting with random weights and biases
weights3 = np.random.random((NR_OF_HIDDEN_NEURS, 2))
biases3 = np.random.random(2)
weights3.shape, biases3.shape

((6, 2), (2,))

In [8]:
# function that creates data for next neuron set
# using the current neuron values, weights and biases
def think(input, weights, biases):
    return sigmoid(np.dot(input, weights) + biases)

In [9]:
# "thinking" about the stuff, trying to figure out the answers -> outputs
hidden_1 = think(all_input_neurons, weights1, biases1)
hidden_2 = think(hidden_1, weights2, biases2)
predictions = think(hidden_2, weights3, biases3)

In [10]:
# calculate the cost of every provided output
# as a comparison with the expected values
@nba.njit
def costs(prediction_tuples, expected_outputs):
    costs = np.zeros(len(prediction_tuples))
    for i in np.arange(len(prediction_tuples)):
        prob0 = 0.5 * (1 + (-1)**(expected_outputs[i]))
        prob1 = 0.5 * (1 + (-1)**(expected_outputs[i]+1))
        costs[i] += (prediction_tuples[i,0] - prob0)**2 + (prediction_tuples[i,1] - prob1)**2
    return costs

### That's how it looks like now:

In [11]:
# let's get costs
costs_of_predictions = costs(predictions, expected_outputs)

# create a summary
predictions_df = pd.DataFrame(np.round(predictions*100,1), columns=["%0", "%1"])
costs_df = pd.DataFrame(costs_of_predictions, columns = ["Cost"])
expected_outputs_df = pd.DataFrame(expected_outputs, columns=["Reality"])

summary = pd.concat([expected_outputs_df, predictions_df, costs_df], axis=1)
summary

Unnamed: 0,Reality,%0,%1,Cost
0,1,96.4,96.2,0.930712
1,1,96.4,96.2,0.930579
2,1,96.4,96.2,0.930848
3,0,96.4,96.2,0.926742
4,1,96.4,96.2,0.931210
...,...,...,...,...
1455,1,96.4,96.2,0.930750
1456,1,96.4,96.2,0.930770
1457,1,96.4,96.2,0.930957
1458,0,96.3,96.1,0.925793


Predictions are pretty much complete garbage at this point.
Shouldn't be surprising, since we basicaly rolled D100 for weights and biases. :P

### Let's get some test data to check if cost function makes sense:

In [12]:
# random predictions; possible values: 0%, 25%, 50%, 75%, 100%
random_predictions = (np.random.randint(0, 5, [len(predictions), 2]) / 4)
costs_of_predictions = costs(random_predictions, expected_outputs)

# create a summary
predictions_df = pd.DataFrame(np.round(random_predictions*100,1), columns=["%0", "%1"])
costs_df = pd.DataFrame(costs_of_predictions, columns = ["Cost"])
expected_outputs_df = pd.DataFrame(expected_outputs, columns=["Reality"])

summary = pd.concat([expected_outputs_df, predictions_df, costs_df], axis=1)
summary

Unnamed: 0,Reality,%0,%1,Cost
0,1,100.0,25.0,1.5625
1,1,0.0,25.0,0.5625
2,1,75.0,100.0,0.5625
3,0,0.0,25.0,1.0625
4,1,50.0,75.0,0.3125
...,...,...,...,...
1455,1,50.0,50.0,0.5000
1456,1,0.0,50.0,0.2500
1457,1,50.0,0.0,1.2500
1458,0,50.0,75.0,0.8125


## Seems that the cost function is working fine.