#### Import Turicreate Library & load the dataset

In [1]:
import turicreate

In [28]:
sales = turicreate.SFrame('home_data.sframe/')
sales

id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront
7129300520,2014-10-13 00:00:00+00:00,221900.0,3.0,1.0,1180.0,5650.0,1.0,0
6414100192,2014-12-09 00:00:00+00:00,538000.0,3.0,2.25,2570.0,7242.0,2.0,0
5631500400,2015-02-25 00:00:00+00:00,180000.0,2.0,1.0,770.0,10000.0,1.0,0
2487200875,2014-12-09 00:00:00+00:00,604000.0,4.0,3.0,1960.0,5000.0,1.0,0
1954400510,2015-02-18 00:00:00+00:00,510000.0,3.0,2.0,1680.0,8080.0,1.0,0
7237550310,2014-05-12 00:00:00+00:00,1225000.0,4.0,4.5,5420.0,101930.0,1.0,0
1321400060,2014-06-27 00:00:00+00:00,257500.0,3.0,2.25,1715.0,6819.0,2.0,0
2008000270,2015-01-15 00:00:00+00:00,291850.0,3.0,1.5,1060.0,9711.0,1.0,0
2414600126,2015-04-15 00:00:00+00:00,229500.0,3.0,1.0,1780.0,7470.0,1.0,0
3793500160,2015-03-12 00:00:00+00:00,323000.0,3.0,2.5,1890.0,6560.0,2.0,0

view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat
0,3,7.0,1180.0,0.0,1955.0,0.0,98178,47.51123398
0,3,7.0,2170.0,400.0,1951.0,1991.0,98125,47.72102274
0,3,6.0,770.0,0.0,1933.0,0.0,98028,47.73792661
0,5,7.0,1050.0,910.0,1965.0,0.0,98136,47.52082
0,3,8.0,1680.0,0.0,1987.0,0.0,98074,47.61681228
0,3,11.0,3890.0,1530.0,2001.0,0.0,98053,47.65611835
0,3,7.0,1715.0,0.0,1995.0,0.0,98003,47.30972002
0,3,7.0,1060.0,0.0,1963.0,0.0,98198,47.40949984
0,3,7.0,1050.0,730.0,1960.0,0.0,98146,47.51229381
0,3,7.0,1890.0,0.0,2003.0,0.0,98038,47.36840673

long,sqft_living15,sqft_lot15
-122.25677536,1340.0,5650.0
-122.3188624,1690.0,7639.0
-122.23319601,2720.0,8062.0
-122.39318505,1360.0,5000.0
-122.04490059,1800.0,7503.0
-122.00528655,4760.0,101930.0
-122.32704857,2238.0,6819.0
-122.31457273,1650.0,9711.0
-122.33659507,1780.0,8113.0
-122.0308176,2390.0,7570.0


#### Split the dataset into train & test dataset

In [5]:
train_data,test_data = sales.random_split(0.8, seed=0)

#### Make a function for simple Linear Regression to find the intercept & slope

In [8]:
def simple_linear_regression (input_feature,output):
    sum_input_features = input_feature.sum()
    sum_output = output.sum()
    sum_product = (input_feature*output).sum()
    sum_sq_input_features = (input_feature*input_feature).sum()
    Num_houses = len(input_feature)
    slope = ((sum_product) - (1/Num_houses)*((sum_input_features) * (sum_output)))/((sum_sq_input_features) - (1/Num_houses)*((sum_input_features) * (sum_input_features)))
    intercept = (sum_output/Num_houses) - (slope * (sum_input_features/Num_houses))
    return (intercept,slope)

#### Intercept & slope when Input feature = 'sqft_living' & Output = 'price'

In [16]:
squarfeet_slope = simple_linear_regression(train_data['sqft_living'], train_data['price'])[1]
squarfeet_intercept = simple_linear_regression(train_data['sqft_living'], train_data['price'])[0]
print (squarfeet_intercept)
print (squarfeet_slope)

-47116.07657494
281.9588385676974


#### Function for getting the predicted outputs

In [18]:
def get_regression_predictions(input_feature, intercept, slope):
    predicted_output = intercept + (slope*input_feature);
    return(predicted_output)

#### Prediction Example

In [19]:
my_house_sqft = 2650
get_regression_predictions(my_house_sqft, squarfeet_intercept, squarfeet_slope)

700074.8456294581

#### Function for getting the RSS

In [21]:
def get_residual_sum_of_squares(input_feature, output, intercept, slope):
    predicted_output = intercept + (slope * input_feature)
    prediction_errors = output - predicted_output
    RSS = (prediction_errors * prediction_errors).sum()
    return RSS

#### Testing the above mentioned function

In [27]:
test_feature = turicreate.SArray(range(5))
test_output = turicreate.SArray(1 + 1*test_feature)
(test_intercept, test_slope) =  simple_linear_regression(test_feature, test_output)
print ("Intercept: " + str(test_intercept))
print ("Slope: " + str(test_slope))
print (get_residual_sum_of_squares(test_feature, test_output, test_intercept, test_slope))

Intercept: 1.0
Slope: 1.0
0.0


#### Getting the RSS

In [29]:
get_residual_sum_of_squares(train_data['sqft_living'], train_data['price'], squarfeet_intercept, squarfeet_slope)

1201918356321966.8

#### Function Getting the input from the output

In [31]:
def inverse_regression_predictions(output, intercept, slope):
    estimated_input = (output - intercept)/(slope)
    return estimated_input

#### Example

In [33]:
my_house_price = 800000
estimated_squarefeet = inverse_regression_predictions(my_house_price, squarfeet_intercept, squarfeet_slope)
estimated_squarefeet

3004.396247615945

#### Using Number of Bedrooms as a feature

In [36]:
bedrooms_intercept, bedrooms_slope = simple_linear_regression (train_data['bedrooms'], train_data['price'])

#### Getting RSS for the above models on the test data

In [37]:
get_residual_sum_of_squares(test_data['sqft_living'], test_data['price'], squarfeet_intercept, squarfeet_slope)

275402936247141.47

In [38]:
get_residual_sum_of_squares(test_data['bedrooms'], test_data['price'], bedrooms_intercept, bedrooms_slope)

493364582868288.1