In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df_data = pd.read_csv("cracow_apartments.csv", sep=",")

In [3]:
df_data.head()

Unnamed: 0,distance_to_city_center,rooms,size,price
0,2.4,1.0,19.35,191.565
1,2.4,2.0,13.08,221.568
2,5.0,1.0,24.66,185.936
3,1.9,1.0,24.82,275.502
4,1.9,1.0,25.39,241.205


In [4]:
def init(n):
    return {"w": np.zeros(n), "b": 0.0}

In [5]:
def predict(x, parameters):
    # Prediction initial value
    prediction = 0
#     print(x)
#     print(parameters)
    
    # Adding multiplication of each feature with it's weight
    for weight, feature in zip(parameters["w"], x):
        prediction += weight * feature
        
    # Adding bias
    prediction += parameters["b"]
        
    return prediction

In [27]:
features = ["size"]
target = ["price"]
X, y = df_data[features].values, df_data[target].values
n = len(features)
model_parameters = init(n)
predictions =[predict(x, model_parameters) for x in X]

In [28]:
x_array = np.array(X)
x_array.flatten()

array([ 19.35,  13.08,  24.66,  24.82,  25.39,  25.54,  25.62,  26.04,
        26.26,  26.26,  26.6 ,  26.87,  27.85,  27.86,  27.93,  28.32,
        29.  ,  29.91,  30.83,  31.68,  32.02,  32.22,  34.32,  34.32,
        34.78,  35.01,  37.11,  37.41,  38.7 ,  29.  ,  45.15,  48.17,
        48.52,  49.42,  49.44,  50.91,  51.9 ,  52.4 ,  52.7 ,  53.61,
        53.83,  53.86,  54.18,  54.9 ,  54.92,  54.97,  55.1 ,  67.35,
        67.73,  68.6 ,  70.1 ,  70.16,  70.81,  71.81,  72.02,  74.36,
        75.59,  75.92,  83.  ,  85.09,  86.86,  87.91,  96.21,  99.  ,
       100.88, 106.96,  30.83,  36.95,  39.59,  40.69,  41.25,  41.31,
        41.94,  34.18,  34.3 ,  34.4 ])

In [29]:
y.flatten()

array([191.565, 221.568, 185.936, 275.502, 241.205, 193.052, 231.861,
       151.757, 172.003, 175.952, 234.08 , 185.967, 185.025, 261.884,
       235.8  , 357.511, 287.1  , 297.605, 292.882, 234.432, 232.113,
       209.43 , 272.844, 223.08 , 290.739, 224.064, 241.215, 299.28 ,
       255.42 , 287.   , 234.735, 349.233, 272.227, 325.777, 360.418,
       459.993, 321.98 , 393.   , 375.852, 427.023, 339.129, 350.9  ,
       409.836, 334.89 , 389.932, 417.   , 413.25 , 451.245, 504.589,
       418.46 , 455.65 , 614.809, 485.261, 495.489, 555.   , 714.183,
       521.571, 529.753, 680.6  , 680.   , 464.18 , 483.505, 692.712,
       782.1  , 700.   , 808.144, 292.885, 229.09 , 285.048, 287.6  ,
       284.625, 325.   , 373.088, 177.702, 264.11 , 242.52 ])

In [30]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [31]:
# x_axis = np.mgrid[5:7:0.1]
# y_axis = np.mgrid[35:50:0.2]
x_axis = np.linspace(0, 10, num=100)
y_axis = np.linspace(0, 40, num=100)

In [32]:
print(model_parameters['w'][0])

print(model_parameters['b'])

0.0
0.0


In [33]:
def mse(predictions, targets):
    # Retrieving number of samples in dataset
    samples_num = len(predictions)
    
    # Summing square differences between predicted and expected values
    accumulated_error = 0.0
    for prediction, target in zip(predictions, targets):
        accumulated_error += (prediction - target)**2
        
    # Calculating mean and dividing by 2
    mae_error = (1.0 / (2*samples_num)) * accumulated_error
    
    return mae_error

In [34]:
item_array = []
for y_item in np.nditer(y_axis):
    x_item_array = []
    for x_item in np.nditer(x_axis):
        loop_parameters = {'b': y_item, 'w': np.array([x_item])}
        predictions = [predict(x, loop_parameters) for x in X]
        mse_val = mse(predictions, y)
#         print(mse_val[0])
        x_item_array.append(mse_val[0])
    
    item_array.append(x_item_array)

In [51]:
def train2(X, y, model_parameters, learning_rate=0.00001, iterations=12000):
    # Make prediction for every data sample
    predictions = [predict(x, model_parameters) for x in X]

    # Calculate initial cost for model - MSE
    initial_error = mse(predictions, y)
    
    trains_w.append(model_parameters['w'][0])
    trains_b.append(model_parameters['b'])
    trains_mse.append(initial_error[0])
    
    print("Initial state:")
    print(" - error: {}".format(initial_error))
    print(" - parameters: {}".format(model_parameters))
    
    for i in range(iterations):
        # Sum up partial gradients for every data sample, for every parameter in model
        accumulated_grad_w0 = 0
        accumulated_grad_b = 0   
        for x, y_target in zip(X, y):
            accumulated_grad_w0 += (predict(x, model_parameters) - y_target)*x[0]
            accumulated_grad_b += (predict(x, model_parameters) - y_target)
            
        # Calculate mean of gradient
        w_grad = (1.0/len(X)) * accumulated_grad_w0
        b_grad = (1.0/len(X)) * accumulated_grad_b
        
        # Update parameters by small part of averaged gradient
        model_parameters["w"][0] = model_parameters["w"][0] - learning_rate * w_grad
        model_parameters["b"] = model_parameters["b"] - learning_rate * b_grad
        
        if i % 4000 == 0:
            print("\nIteration {}:".format(i))
            print(" - error: {}".format(mse([predict(x, model_parameters) for x in X], y)))
            print(" - parameters: {}".format(model_parameters))
        
        if i % 100 == 0 or (i > 1 and i < 100):
            trains_w.append(model_parameters['w'][0])
            trains_b.append(model_parameters['b'][0])
            trains_mse.append(mse([predict(x, model_parameters) for x in X], y)[0])
            
        
        if i % 20 == 0:
            data_w.append(model_parameters['w'][0])
            data_b.append(model_parameters['b'][0])
            
            
    print("\nFinal state:")
    print(" - error: {}".format(mse([predict(x, model_parameters) for x in X], y)))
    print(" - parameters: {}".format(model_parameters))

In [52]:
trains_w = []
trains_b = []
trains_mse = []

data_w = []
data_b = []

model_parameters = init(n)
train2(X, y, model_parameters)

Initial state:
 - error: [75870.4884482]
 - parameters: {'w': array([0.]), 'b': 0.0}

Iteration 0:
 - error: [71785.89050732]
 - parameters: {'w': array([0.20350039]), 'b': array([0.00356868])}

Iteration 4000:
 - error: [1795.02828104]
 - parameters: {'w': array([7.27337638]), 'b': array([0.3805391])}

Iteration 8000:
 - error: [1793.42251814]
 - parameters: {'w': array([7.26901236]), 'b': array([0.63393854])}

Final state:
 - error: [1791.83896096]
 - parameters: {'w': array([7.26467917]), 'b': array([0.88554832])}


In [53]:
z_values = np.array(item_array)

In [54]:
fig = go.Figure(data=[go.Surface(z=z_values, x=x_axis, y=y_axis),go.Scatter3d(x=trains_w, y=trains_b, z=trains_mse, mode='markers')])

In [55]:
fig.show()

In [56]:
xx = np.linspace(0, 120, 2)
train_frame_data = []

# go.Frame(data=[go.Scatter(x=[1, 2], y=[1, 2])]),
#             go.Frame(data=[go.Scatter(x=[1, 4], y=[1, 4])]),
#             go.Frame(data=[go.Scatter(x=[3, 4], y=[3, 4])]

frame_cnt = 0
# [go.Frame(data=[go.Scatter(x=[1, 2], y=[1, 2])]),
#             go.Frame(data=[go.Scatter(x=[1, 4], y=[1, 4])]),
#             go.Frame(data=[go.Scatter(x=[3, 4], y=[3, 4])],
#                      layout=go.Layout(title_text="End Title"))]
for w, b in np.nditer([data_w, data_b]):
    one_frame = {"data": {}, "name": str(frame_cnt)}
    y_value_array = []
    x_value_array = []
    for x_value in np.nditer(xx):
        y_value = x_value * w + b
        
        y_value_array.append(y_value)
        x_value_array.append(x_value)
        
#     print(x_value_array)
#     print(y_value_array)
    train_frame_data.append(go.Frame(data=[go.Scatter(x=np.array(x_value_array), y=np.array(y_value_array), mode="lines")]))
    
    
print(train_frame_data)

[Frame({
    'data': [{'mode': 'lines', 'type': 'scatter', 'x': array([  0., 120.]), 'y': array([3.56867974e-03, 2.44236159e+01])}]
}), Frame({
    'data': [{'mode': 'lines', 'type': 'scatter', 'x': array([  0., 120.]), 'y': array([5.75891221e-02, 3.91963997e+02])}]
}), Frame({
    'data': [{'mode': 'lines', 'type': 'scatter', 'x': array([  0., 120.]), 'y': array([8.87785201e-02, 6.00397794e+02])}]
}), Frame({
    'data': [{'mode': 'lines', 'type': 'scatter', 'x': array([  0., 120.]), 'y': array([1.07020275e-01, 7.18601278e+02])}]
}), Frame({
    'data': [{'mode': 'lines', 'type': 'scatter', 'x': array([  0., 120.]), 'y': array([1.17919323e-01, 7.85634603e+02])}]
}), Frame({
    'data': [{'mode': 'lines', 'type': 'scatter', 'x': array([  0., 120.]), 'y': array([1.24654259e-01, 8.23649019e+02])}]
}), Frame({
    'data': [{'mode': 'lines', 'type': 'scatter', 'x': array([  0., 120.]), 'y': array([1.29027683e-01, 8.45206635e+02])}]
}), Frame({
    'data': [{'mode': 'lines', 'type': 'scatte

In [57]:
len(train_frame_data)

600

In [61]:

fig2 = go.Figure(
    data=[go.Scatter(x=x_array.flatten(), y=y.flatten(), mode='markers'),
          go.Scatter(x=x_array.flatten(), y=y.flatten(), mode='markers')],
    layout=go.Layout(
        updatemenus=[dict(type="buttons",
                          buttons=[dict(label="Play",
                                        method="animate",
                                        args=[None])])]),
    frames=train_frame_data
)

fig2.show()