### Mount the local drive to save the works

In [None]:
from google.colab import drive
drive.mount('/content/MyDrive/')

In [None]:
#!pip install turicreate
import turicreate

In [None]:
!ls

In [None]:
#!7z x '/content/MyDrive/MyDrive/SFRAMES/home_data_small.sframe.zip'

In [None]:
sf = turicreate.SFrame('/content/MyDrive/MyDrive/SFRAMES/home_data.sframe')

In [None]:
sf.head(3)

In [None]:
sf.shape

# **Regression Model**

In [None]:
train_data,test_data = sf.random_split(.8,seed=0)

In [None]:
example_features = ['sqft_living', 'bedrooms', 'bathrooms']
example_model = turicreate.linear_regression.create(train_data, target = 'price', features = example_features, 
                                                    validation_set = None)

In [None]:
example_weight_summary = example_model.coefficients
print(example_weight_summary)

# **Making Predictions**

In [None]:
example_predictions = example_model.predict(train_data)
print(example_predictions[0]) # should be 271789.505878

In [None]:
def get_residual_sum_of_squares(model, data, outcome):
    # First get the predictions
    predictions = model.predict(data)
    residual = outcome - predictions
    RSS = sum(residual * residual)
    # Then square and add them up
    return(RSS)    

In [None]:
rss_example_train = get_residual_sum_of_squares(example_model, test_data, test_data['price'])
print(rss_example_train) # should be 2.7376153833e+14

# **Some new features**


In [None]:
from math import log
train_data['bedrooms_squared'] = train_data['bedrooms'].apply(lambda x: x**2)
test_data['bedrooms_squared'] = test_data['bedrooms'].apply(lambda x: x**2)

In [None]:
# create the remaining 3 features in both TEST and TRAIN data
train_data['bed_bath_rooms'] = train_data['bedrooms']*train_data['bathrooms']
test_data['bed_bath_rooms'] = test_data['bedrooms']*test_data['bathrooms']
train_data['log_sqft_living'] = train_data['sqft_living'].apply(lambda x: log(x))
test_data['log_sqft_living'] = test_data['sqft_living'].apply(lambda x: log(x))
train_data['lat_plus_long'] = train_data['lat'] + train_data['long']
test_data['lat_plus_long'] = test_data['lat'] + test_data['long']

In [None]:
print('bedrooms_squared _ ' + str(round(sum(test_data['bedrooms_squared'])/len(test_data['bedrooms_squared']),2)))
print('bed_bath_rooms _ ' + str(round(sum(test_data['bed_bath_rooms'])/len(test_data['bed_bath_rooms']),2)))
print('log_sqft_living _ ' + str(round(sum(test_data['log_sqft_living'])/len(test_data['log_sqft_living']),2)))
print('lat_plus_long _ ' + str(round(sum(test_data['lat_plus_long'])/len(test_data['lat_plus_long']),2)))

# Learning Multiple Models

In [None]:
model_1_features = ['sqft_living', 'bedrooms', 'bathrooms', 'lat', 'long']
model_2_features = model_1_features + ['bed_bath_rooms']
model_3_features = model_2_features + ['bedrooms_squared', 'log_sqft_living', 'lat_plus_long']

In [None]:
# Learn the three models: (don't forget to set validation_set = None)
model_1 = turicreate.linear_regression.create(train_data, target = 'price', features = model_1_features, 
                                                  validation_set = None)
model_2 = turicreate.linear_regression.create(train_data, target = 'price', features = model_2_features, 
                                                  validation_set = None)
model_3 = turicreate.linear_regression.create(train_data, target = 'price', features = model_3_features, 
                                                  validation_set = None)

In [None]:
print('model 1')
model_1.coefficients

In [None]:
print('model 2')
model_2.coefficients

In [None]:
print('model 3')
model_3.coefficients

# RSS for each model - Training Set

In [None]:
print("RSS Value of Model - 1: ")
get_residual_sum_of_squares(model_1, train_data, train_data['price'])


In [None]:
print("RSS Value of Model - 2: ")
get_residual_sum_of_squares(model_2, train_data, train_data['price'])

In [None]:
print("RSS Value of Model - 3: ")
get_residual_sum_of_squares(model_3, train_data, train_data['price'])

# RSS for each model - Test Set

In [None]:
print("RSS Value of Model - 1 Test: ")
get_residual_sum_of_squares(model_1, test_data, test_data['price'])

In [None]:
print("RSS Value of Model - 2 Test: ")
get_residual_sum_of_squares(model_2, test_data, test_data['price'])

In [None]:
print("RSS Value of Model - 3 Test: ")
get_residual_sum_of_squares(model_3, test_data, test_data['price'])