In [1]:

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:

housing_data_df = pd.read_csv('data/kc_house_data.csv')
housing_data_df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [3]:

size_log = np.log(np.array(housing_data_df['sqft_lot']))
bedrooms = np.array(housing_data_df['bedrooms'])
target_col = housing_data_df.price.to_numpy()

In [4]:

def linear_regression(params, feature1=size_log, feature2=bedrooms):
    return params[0] + feature1*params[1] + feature2*params[2]

In [5]:

def loss_function(params, targets=target_col, feature1=size_log, feature2=bedrooms):
    predictions = linear_regression(params, feature1, feature2)
    return tf.losses.mae(targets, predictions)

In [6]:

def print_results(params):
    return print('loss: {:0.3f}, intercept: {:0.3f}, slope_1: {:0.3f}, slope_2: {:0.3f}'.format \
                 (loss_function(params).numpy(), params[0].numpy(), params[1].numpy(), params[2].numpy()))

In [7]:

def optimisation(params, num_opt=100, opt=tf.optimizers.Adam(0.9)):

    for i in range(num_opt):
        # minimise loss
        opt.minimize(lambda: loss_function(params), var_list=[params])
        print_results(params)

In [8]:

if __name__ == '__main__':
    # initial slope and intercept
    params = tf.Variable([0.1, 0.05, 0.02])
    
    # perform linear regression
    linreg = optimisation(params)

loss: 540075.500, intercept: 1.000, slope_1: 0.950, slope_2: 0.920
loss: 540063.250, intercept: 1.900, slope_1: 1.850, slope_2: 1.820
loss: 540051.562, intercept: 2.800, slope_1: 2.750, slope_2: 2.720
loss: 540039.375, intercept: 3.700, slope_1: 3.650, slope_2: 3.620
loss: 540027.438, intercept: 4.600, slope_1: 4.550, slope_2: 4.520
loss: 540015.438, intercept: 5.500, slope_1: 5.450, slope_2: 5.420
loss: 540003.312, intercept: 6.400, slope_1: 6.350, slope_2: 6.320
loss: 539991.375, intercept: 7.300, slope_1: 7.250, slope_2: 7.220
loss: 539979.250, intercept: 8.200, slope_1: 8.150, slope_2: 8.120
loss: 539967.312, intercept: 9.100, slope_1: 9.050, slope_2: 9.020
loss: 539955.188, intercept: 10.000, slope_1: 9.950, slope_2: 9.920
loss: 539943.188, intercept: 10.900, slope_1: 10.850, slope_2: 10.820
loss: 539931.188, intercept: 11.800, slope_1: 11.750, slope_2: 11.720
loss: 539919.250, intercept: 12.700, slope_1: 12.650, slope_2: 12.620
loss: 539907.125, intercept: 13.600, slope_1: 13.550