In [1]:
import turicreate

In [2]:
data = turicreate.SFrame('home_data.sframe')
len(data)

21613

## Splitting into train_test data

In [3]:
train_data,test_data = data.random_split(0.8, seed = 0)

In [4]:
len(train_data)

17384

In [5]:
len(test_data)

4229

# Build a generic simple linear regression function 

In [9]:
def simple_linear_regression(input_features, output):
    n = len(input_features)
    input_features_sum = input_features.sum()
    sq_inpf = input_features * input_features
    input_features_sq_sum = sq_inpf.sum()
    output_sum = output.sum()
    sq_outp = output * output
    output_sq_sum = sq_outp.sum()
    prod = input_features * output
    prod_sum = prod.sum()
    
    denominetor = (n * input_features_sq_sum)-(input_features_sum * input_features_sum)
    intercept = ((output_sum * input_features_sq_sum) - (input_features_sum * prod_sum))/ denominetor
    slope = ((n * prod_sum) - (input_features_sum * output_sum))/denominetor
    
    return intercept, slope

In [10]:
test_feature = turicreate.SArray(range(5))
test_output = turicreate.SArray(1 + 1*test_feature)
(test_intercept, test_slope) =  simple_linear_regression(test_feature, test_output)
print ("Intercept: " + str(test_intercept))
print ("Slope: " + str(test_slope))

Intercept: 1.0
Slope: 1.0


In [12]:
sqft_intercept, sqft_slope = simple_linear_regression(train_data['sqft_living'], train_data['price'])

print ("Intercept: " + str(sqft_intercept))
print ("Slope: " + str(sqft_slope))

Intercept: -47116.076574939834
Slope: 281.95883856769746


# Predicting Values

In [13]:
def get_regression_predictions(input_feature, intercept, slope):
    predicted_values = intercept + (slope * input_feature)
    
    return predicted_values

In [15]:
# quiz 
my_house_sqft = 2650
estimated_price = get_regression_predictions(my_house_sqft, sqft_intercept, sqft_slope)
print ("The estimated price for a house with %d squarefeet is $%.2f" % (my_house_sqft, estimated_price))

The estimated price for a house with 2650 squarefeet is $700074.85


# Residual Sum Squares

In [18]:
def get_residual_sum_of_squares(input_feature, output, intercept, slope):
    predicted_values = get_regression_predictions(input_feature, intercept, slope)
    residuals = (output - predicted_values)
    RSS = residuals * residuals
    # square the residuals and add them up

    return(RSS.sum())

In [19]:
# for testing
print(get_residual_sum_of_squares(test_feature, test_output, test_intercept, test_slope)) # should be 0.0

0.0


In [20]:
# quiz
rss_prices_on_sqft = get_residual_sum_of_squares(train_data['sqft_living'], train_data['price'], sqft_intercept, sqft_slope)
print ('The RSS of predicting Prices based on Square Feet is : ' + str(rss_prices_on_sqft))

The RSS of predicting Prices based on Square Feet is : 1201918356321967.5


# Predict the squarefeet given price

In [24]:
def inverse_regression_predictions(output, intercept, slope):
    # solve output = intercept + slope*input_feature for input_feature. Use this equation to compute the inverse predictions:
    estimated_feature = (output - intercept)/slope
    return estimated_feature

In [25]:
# quiz
my_house_price = 800000
estimated_squarefeet = inverse_regression_predictions(my_house_price, sqft_intercept, sqft_slope)
print ("The estimated squarefeet for a house worth $%.2f is %d" % (my_house_price, estimated_squarefeet))

The estimated squarefeet for a house worth $800000.00 is 3004


# New Model: estimate prices from bedrooms

In [None]:
bedroom_intercept, bedroom_slope = simple_linear_regression(train_data['bedrooms'], train_data['price'])

print ("Intercept: " + str(sqft_intercept))
print ("Slope: " + str(sqft_slope))