## Plan:

* Develop a **model** to predict property value
   * Use drivers identified in explore to build predictive regression models
   * Create and run a baseline model with `sklearn`'s `DummyRegressor` to compare our results to
   * Create and run `Linear Regression`, `LassoLars`, and Polynomial regression models
   * Use the insights from the highest-performing model (with highest test `RMSE`) to confirm our initial hypotheses and insights on the features that are the biggest drivers of property value

In [287]:
from sklearn.preprocessing import PolynomialFeatures

from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression, LassoLars, TweedieRegressor

from sklearn.metrics import mean_squared_error, r2_score

from sklearn.preprocessing import MinMaxScaler

import wrangle as w

import sys
import os
home_directory_path = os.path.expanduser('~')
sys.path.append(home_directory_path +'/utils')

from prepare_utils import split_data

import pandas as pd
pd.set_option('display.float_format', '{:.2f}'.format)
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

## Preprocessing

- Encode categorical variables
- Scale continuous variables:
    - MinMax

In [18]:
df = w.prep_zillow()
df

Unnamed: 0,parcelid,id,bathrooms,bedrooms,sqft,fips,latitude,longitude,regionidcity,regionidzip,property_value,transaction_date,age,county
52520,12826780,1187175,2.0,3,1762,6037,33937685,-117996709,14634,96171,522000,2017-09-25,61,LA
52518,17239384,2968375,2.0,4,1612,6111,34300140,-118706327,27110,97116,67205,2017-09-21,52,Ventura
52517,11000655,673515,2.0,2,1286,6037,34245368,-118282383,12447,96284,354621,2017-09-20,76,LA
52508,12410050,2900077,1.0,2,1023,6037,33880374,-118112653,44116,96208,222739,2017-09-19,60,LA
52497,10867042,986237,2.0,3,2141,6037,34150146,-118490857,12447,96426,887003,2017-09-19,62,LA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,12069064,870991,1.0,2,738,6037,34149214,-118239357,45457,96325,218552,2017-01-01,94,LA
4,12095076,781532,3.0,4,2962,6037,34145202,-118179824,47019,96293,773303,2017-01-01,66,LA
3,12177905,2288172,3.0,4,2376,6037,34245180,-118240722,396551,96330,145143,2017-01-01,46,LA
2,14186244,11677,2.0,3,1243,6059,33886168,-117823170,21412,97078,564778,2017-01-01,54,Orange


Encode:
- Categorize bathrooms fro modeling to:
    - 1-2 bathrooms (~300k avg property value)
    - 2.5-3 bathrooms (~515k avg property value)
    - 3.5-4 bathrooms (~800k avg property value)
- Bedrooms could be worth modeling on as a categorical variable
- County doesn't tell us a lot on property value, but I could later explore zipcodes

In [19]:
# df['la_county'] = np.where(df['county'] == 'LA', 1, 0)
# df['ventura_county'] = np.where(df['county'] == 'Ventura', 1, 0)
df['1_2_bathrooms'] = np.where(df['bathrooms'] <= 2, 1, 0)
df['2.5_3_bathrooms'] = np.where(df['bathrooms'].isin([2.5, 3]), 1, 0)
df['3.5_4_bathrooms'] = np.where(df['bathrooms'] >= 3.5, 1, 0)

In [101]:
df.columns

Index(['parcelid', 'id', 'bathrooms', 'bedrooms', 'sqft', 'fips', 'latitude',
       'longitude', 'regionidcity', 'regionidzip', 'property_value',
       'transaction_date', 'age', 'county', '1_2_bathrooms', '2.5_3_bathrooms',
       '3.5_4_bathrooms'],
      dtype='object')

In [102]:
cols_to_models = ['sqft', '1_2_bathrooms', '2.5_3_bathrooms', '3.5_4_bathrooms', 
                  'property_value', 'county']
target = ['property_value']

Split data into train, validate, test.

In [103]:
train, validate, test = split_data(df[cols_to_models], validate_size=.15, test_size=.15, 
                         stratify_col='county', random_state=123)

# drop county column
train = train.iloc[:,:-1]
validate = validate.iloc[:,:-1]
test = test.iloc[:,:-1]

 Split into X and y.

In [258]:
train.iloc[:,:4]

Unnamed: 0,sqft,1_2_bathrooms,2.5_3_bathrooms,3.5_4_bathrooms
35445,2512,0,1,0
25232,2223,0,1,0
20581,2168,0,0,1
27254,954,1,0,0
36098,1229,1,0,0
...,...,...,...,...
22820,2264,0,1,0
40700,1461,0,1,0
45684,1661,1,0,0
46917,2756,0,1,0


In [259]:
# remove target
X_train = train.iloc[:,:4]
X_validate = validate.iloc[:,:4]
X_test = test.iloc[:,:4]

# only add target
y_train = train['property_value']
y_vaildate = validate['property_value']
y_test = test['property_value']

Scale `sqft`

In [360]:
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_validate_scaled = scaler.transform(X_validate)
X_test_scaled = scaler.transform(X_test)

train['sqft_scaled'] = X_train_scaled[:,0]
validate['sqft_scaled'] = X_validate_scaled[:,0]
test['sqft_scaled'] = X_test_scaled[:,0]

**Baseline Model**

In [264]:
dummy = DummyRegressor()

In [265]:
dummy.fit(X_train_scaled, y_train)

In [266]:
train['baseline_pred'] = dummy.predict(X_train_scaled)

In [399]:
# RMSE
mean_squared_error(train['property_value'], train['baseline_pred'], squared=False)

285789.35772279406

In [267]:
print('R^2:', dummy.score(train.iloc[:,[1,2,3,5]], train['property_value']))

R^2: 0.0


---
**Linear Regression Model**

Pooled Regression

In [268]:
lm = LinearRegression()

In [269]:
lm.fit(X_train_scaled, y_train)

In [270]:
train['lm_pred'] = lm.predict(X_train_scaled)

In [271]:
train

Unnamed: 0,sqft,1_2_bathrooms,2.5_3_bathrooms,3.5_4_bathrooms,property_value,sqft_scaled,baseline_pred,lm_pred
35445,2512,0,1,0,767135,0.45,414625.58,551148.57
25232,2223,0,1,0,288645,0.38,414625.58,505468.05
20581,2168,0,0,1,591995,0.37,414625.58,626951.07
27254,954,1,0,0,453470,0.10,414625.58,263539.24
36098,1229,1,0,0,434377,0.16,414625.58,307006.86
...,...,...,...,...,...,...,...,...
22820,2264,0,1,0,415548,0.39,414625.58,511948.68
40700,1461,0,1,0,483708,0.21,414625.58,385023.21
45684,1661,1,0,0,293403,0.26,414625.58,375290.55
46917,2756,0,1,0,888343,0.50,414625.58,589716.21


In [272]:
print('R^2:', lm.score(X_train_scaled, y_train))

R^2: 0.2300268678548748


In [398]:
# RMSE
mean_squared_error(train['property_value'], train['lm_pred'], squared=False)

250774.7683349454

In [388]:
lm.coef_

array([710814.2240168 , -70955.83629653, -29610.35564431, 100566.19194085])

In [389]:
lm.intercept_

262733.9752148463

In [395]:
pd.DataFrame(index=list(X_train.columns) + ['intercept'],
             columns=['coefficients'],
             data=np.append(lm.coef_ * scaler.scale_, lm.intercept_))

Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-70955.84
2.5_3_bathrooms,-29610.36
3.5_4_bathrooms,100566.19
intercept,262733.98


---
**LassoLars**

In [326]:
lars = LassoLars(alpha=20.0)

In [327]:
lars.fit(X_train_scaled, y_train)

In [328]:
train['lars_pred'] = lars.predict(X_train_scaled)

In [330]:
print('R^2:', lars.score(X_train_scaled, y_train))

R^2: 0.23002657537917115


In [397]:
# RMSE
mean_squared_error(train['property_value'], train['lars_pred'], squared=False)

250774.81596356825

In [331]:
pd.DataFrame(index=list(X_train.columns) + ['intercept'],
             columns=['coefficients'],
             data=np.append(lars.coef_ * scaler.scale_, lars.intercept_))

Unnamed: 0,coefficients
sqft,157.79
1_2_bathrooms,-41555.92
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130011.3
intercept,233620.66


---
**Tweedie Regressor (GLM)**

In [342]:
glm = TweedieRegressor(power=1, alpha=1)

In [343]:
glm.fit(X_train_scaled, y_train)

In [344]:
train['glm_pred'] = glm.predict(X_train_scaled)

In [345]:
train

Unnamed: 0,sqft,1_2_bathrooms,2.5_3_bathrooms,3.5_4_bathrooms,property_value,sqft_scaled,baseline_pred,lm_pred,lars_pred,glm_pred
35445,2512,0,1,0,767135,0.45,414625.58,551148.57,551091.01,546311.89
25232,2223,0,1,0,288645,0.38,414625.58,505468.05,505490.15,496613.90
20581,2168,0,0,1,591995,0.37,414625.58,626951.07,626823.08,552923.09
27254,954,1,0,0,453470,0.10,414625.58,263539.24,263700.70,287058.50
36098,1229,1,0,0,434377,0.16,414625.58,307006.86,307092.52,314329.83
...,...,...,...,...,...,...,...,...,...,...
22820,2264,0,1,0,415548,0.39,414625.58,511948.68,511959.48,503379.26
40700,1461,0,1,0,483708,0.21,414625.58,385023.21,385255.36,386191.74
45684,1661,1,0,0,293403,0.26,414625.58,375290.55,375257.13,362496.04
46917,2756,0,1,0,888343,0.50,414625.58,589716.21,589591.39,592124.00


In [346]:
print('R^2:', glm.score(X_train_scaled, y_train))

R^2: 0.21670071824016313


In [348]:
glm.coef_

array([ 1.48412144e+00, -1.28068696e-01,  1.25669948e-03,  1.26814100e-01])

In [349]:
glm.intercept_

12.545678754202116

In [350]:
pd.DataFrame(index=list(X_train.columns) + ['intercept'],
             columns=['coefficients'],
             data=np.append(glm.coef_, glm.intercept_))

Unnamed: 0,coefficients
sqft,1.48
1_2_bathrooms,-0.13
2.5_3_bathrooms,0.0
3.5_4_bathrooms,0.13
intercept,12.55


---
**Linear Regression Model**

Group-wise Regression

3 regression models for each bathroom category

In [365]:
train

Unnamed: 0,sqft,1_2_bathrooms,2.5_3_bathrooms,3.5_4_bathrooms,property_value,sqft_scaled,baseline_pred,lm_pred,lars_pred,glm_pred
35445,2512,0,1,0,767135,0.45,414625.58,551148.57,551091.01,546311.89
25232,2223,0,1,0,288645,0.38,414625.58,505468.05,505490.15,496613.90
20581,2168,0,0,1,591995,0.37,414625.58,626951.07,626823.08,552923.09
27254,954,1,0,0,453470,0.10,414625.58,263539.24,263700.70,287058.50
36098,1229,1,0,0,434377,0.16,414625.58,307006.86,307092.52,314329.83
...,...,...,...,...,...,...,...,...,...,...
22820,2264,0,1,0,415548,0.39,414625.58,511948.68,511959.48,503379.26
40700,1461,0,1,0,483708,0.21,414625.58,385023.21,385255.36,386191.74
45684,1661,1,0,0,293403,0.26,414625.58,375290.55,375257.13,362496.04
46917,2756,0,1,0,888343,0.50,414625.58,589716.21,589591.39,592124.00


In [367]:
lm1 = LinearRegression().fit(train[train['1_2_bathrooms']==1][['sqft_scaled']],
                             train[train['1_2_bathrooms']==1]['property_value'])
lm2 = LinearRegression().fit(train[train['2.5_3_bathrooms']==1][['sqft_scaled']],
                             train[train['2.5_3_bathrooms']==1]['property_value'])
lm3 = LinearRegression().fit(train[train['3.5_4_bathrooms']==1][['sqft_scaled']],
                             train[train['3.5_4_bathrooms']==1]['property_value'])

In [369]:
print('LM1 R^2:', lm1.score(train[train['1_2_bathrooms']==1][['sqft_scaled']],
                             train[train['1_2_bathrooms']==1]['property_value']))
print('LM2 R^2:', lm2.score(train[train['2.5_3_bathrooms']==1][['sqft_scaled']],
                             train[train['2.5_3_bathrooms']==1]['property_value']))
print('LM3 R^2:', lm3.score(train[train['3.5_4_bathrooms']==1][['sqft_scaled']],
                             train[train['3.5_4_bathrooms']==1]['property_value']))

LM1 R^2: 0.05140976480820303
LM2 R^2: 0.11097921260550758
LM3 R^2: 0.07243225988416213


In [380]:
pd.DataFrame(index=['sqft'] + ['intercept'],
             columns=['coefficients'],
             data=np.append(lm1.coef_ * scaler.scale_[0], lm1.intercept_))

Unnamed: 0,coefficients
sqft,141.74
intercept,206925.72


In [378]:
lm2.coef_

array([804503.89489941])

In [382]:
pd.DataFrame(index=['sqft'] + ['intercept'],
             columns=['coefficients'],
             data=np.append(lm2.coef_* scaler.scale_[0], lm2.intercept_))

Unnamed: 0,coefficients
sqft,178.9
intercept,195556.69


In [383]:
lm3.coef_

array([662100.5568747])

In [384]:
pd.DataFrame(index=['sqft'] + ['intercept'],
             columns=['coefficients'],
             data=np.append(lm3.coef_ * scaler.scale_[0], lm3.intercept_))

Unnamed: 0,coefficients
sqft,147.23
intercept,392105.89


In [410]:
df[['property_value']]

Unnamed: 0,property_value
52520,522000
52518,67205
52517,354621
52508,222739
52497,887003
...,...
5,218552
4,773303
3,145143
2,564778


In [455]:
def preprocess_zillow_data(df):
    
    stratify_col = ['county']
    target = ['property_value']
    
    df['1_2_bathrooms'] = np.where(df['bathrooms'] <= 2, 1, 0)
    df['2.5_3_bathrooms'] = np.where(df['bathrooms'].isin([2.5, 3]), 1, 0)
    df['3.5_4_bathrooms'] = np.where(df['bathrooms'] >= 3.5, 1, 0)
    
    features = ['sqft','1_2_bathrooms','2.5_3_bathrooms','3.5_4_bathrooms']
    
    train, validate, test = split_data(df[features+target+stratify_col],
                                       validate_size=.15, test_size=.15, 
                                       stratify_col=stratify_col, random_state=123)

    # drop county column
    train = train[features+target]
    validate = validate[features+target]
    test = test[features+target]
    

    
    # remove target
    X_train = train[features]
    X_validate = validate[features]
    X_test = test[features]

    # only add target
    y_train = train[target]
    y_vaildate = validate[target]
    y_test = test[target]
    
    scaler = MinMaxScaler()

    X_train_scaled = scaler.fit_transform(X_train)
    X_validate_scaled = scaler.transform(X_validate)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_validate_scaled, X_test_scaled,\
            y_train, y_vaildate, y_test, scaler

In [456]:
def run_model(X_train, y_train, X, y, model, scaler, features):
    
    train_model = model.fit(X_train, y_train)
    
    train_rmse = mean_squared_error(y_train,
                              model.predict(X_train),
                              squared=False)
    test_rmse = mean_squared_error(y,
                              model.predict(X),
                              squared=False)
    train_r2 = model.score(X_train, y_train)
    test_r2 = model.score(X, y)
    
    if model==DummyRegressor():
        return train_rmse, test_rmse, train_r2, test_r2
    
    display(pd.DataFrame(index=features + ['intercept'],
                         columns=['coefficients'],
                         data=np.append(model.coef_ * scaler.scale_, model.intercept_)))
    
    coeffs = np.append(model.coef_, model.intercept_)
    coeffs_unscaled = np.append(model.coef_ * scaler.scale_, model.intercept_)
    
    print(f'RMSE for {str(model)}: ', rmse)

    print(f'R^2 for {str(model)}: ', r2)
    
    print(f'Coeffs: {coeffs}\nCoeffs unscaled: {coeffs_unscaled}')
    
    return train_rmse, test_rmse, train_r2, test_r2, coeffs, coeffs_unscaled 

In [457]:
X_train, X_validate, X_test,\
    y_train, y_vaildate, y_test, scaler = preprocess_zillow_data(df,     
                                                                 features = ['sqft',
                                                                             '1_2_bathrooms',
                                                                             '2.5_3_bathrooms',
                                                                             '3.5_4_bathrooms'])

In [459]:
run_model(X_train, y_train, X_train,
          y_train, LinearRegression(), scaler, features = ['sqft',
                                                           '1_2_bathrooms',
                                                           '2.5_3_bathrooms',
                                                           '3.5_4_bathrooms'])

Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-70955.84
2.5_3_bathrooms,-29610.36
3.5_4_bathrooms,100566.19
intercept,262733.98


RMSE for LinearRegression():  250774.7683349454
R^2 for LinearRegression():  0.2300268678548748
Coeffs: [710814.2240168  -70955.83629653 -29610.35564431 100566.19194085
 262733.97521485]
Coeffs unscaled: [ 1.58064093e+02 -7.09558363e+04 -2.96103556e+04  1.00566192e+05
  2.62733975e+05]


array([ 1.58064093e+02, -7.09558363e+04, -2.96103556e+04,  1.00566192e+05,
        2.62733975e+05])

In [441]:
X_train, X_validate, X_test,\
    y_train, y_vaildate, y_test, scaler = preprocess_zillow_data(df,     
                                                                 features = ['sqft',
                                                                             '1_2_bathrooms',
                                                                             '2.5_3_bathrooms',
                                                                             '3.5_4_bathrooms'])
run_model(X_train, y_train, X_train,
          y_train, LinearRegression(), scaler, features = ['sqft',
                                                           '1_2_bathrooms',
                                                           '2.5_3_bathrooms',
                                                           '3.5_4_bathrooms'])

Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-70955.84
2.5_3_bathrooms,-29610.36
3.5_4_bathrooms,100566.19
intercept,262733.98


RMSE for LinearRegression():  250774.7683349454
R^2 for LinearRegression():  0.2300268678548748


(250774.7683349454, 0.2300268678548748)

In [467]:
def run_models(df, features, model_obj,
               scores_df=pd.DataFrame(columns=['model', 'features', 'coeffs',
                                                'coeffs_unscaled', 'train_rmse', 'train_r2',
                                                'test_rmse', 'test_r2'])):
        
    X_train, X_validate, X_test,\
    y_train, y_vaildate, y_test, scaler = preprocess_zillow_data(df,     
                                                                 features = features)
    model_train_scores = run_model(X_train, y_train, X_train,
                                   y_train, model_obj, scaler, features = features)
    model_test_scores = run_model(X_train, y_train, X_validate,
                                  y_vaildate, model_obj, scaler, features = features)
    
    scores = pd.concat([scores_df, pd.DataFrame({'model': [model_obj],
                                                 'features': [features],
                                                 'coeffs': [model_train_scores[2]],
                                                 'coeffs_unscaled': [model_train_scores[3]],
                                                 'train_rmse': [model_train_scores[0]],
                                                 'train_r2': [model_train_scores[1]],
                                                 'test_rmse': [model_test_scores[0]],
                                                 'test_r2': [model_test_scores[1]]})])
    return scores

In [468]:
run_models(df, features = ['sqft', '1_2_bathrooms', '2.5_3_bathrooms', '3.5_4_bathrooms'],
           model_obj = LinearRegression())

Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-70955.84
2.5_3_bathrooms,-29610.36
3.5_4_bathrooms,100566.19
intercept,262733.98


RMSE for LinearRegression():  250774.7683349454
R^2 for LinearRegression():  0.2300268678548748
Coeffs: [710814.2240168  -70955.83629653 -29610.35564431 100566.19194085
 262733.97521485]
Coeffs unscaled: [ 1.58064093e+02 -7.09558363e+04 -2.96103556e+04  1.00566192e+05
  2.62733975e+05]


Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-70955.84
2.5_3_bathrooms,-29610.36
3.5_4_bathrooms,100566.19
intercept,262733.98


RMSE for LinearRegression():  245332.5950403372
R^2 for LinearRegression():  0.25517616863736114
Coeffs: [710814.2240168  -70955.83629653 -29610.35564431 100566.19194085
 262733.97521485]
Coeffs unscaled: [ 1.58064093e+02 -7.09558363e+04 -2.96103556e+04  1.00566192e+05
  2.62733975e+05]


Unnamed: 0,model,features,coeffs,coeffs_unscaled,train_rmse,train_r2,test_rmse,test_r2
0,LinearRegression(),"[sqft, 1_2_bathrooms, 2.5_3_bathrooms, 3.5_4_b...","[710814.2240167982, -70955.83629653123, -29610...","[158.0640925098506, -70955.83629653123, -29610...",250774.77,0.23,245332.6,0.26


In [472]:
scores=pd.DataFrame(columns=['model', 'features', 'coeffs',
                            'coeffs_unscaled', 'train_rmse', 'train_r2',
                            'test_rmse', 'test_r2'])
features_list = [['sqft', '1_2_bathrooms', '2.5_3_bathrooms', '3.5_4_bathrooms'],
                 ['sqft']]

for features in features_list:
    scores = run_models(df, features = features, model_obj = LinearRegression(), scores_df=scores)
        
    for alpha in [0, .5, 1, 2, 5]:
        scores = run_models(df, features = features, model_obj = LassoLars(alpha=alpha), scores_df=scores)

display(scores)

Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-70955.84
2.5_3_bathrooms,-29610.36
3.5_4_bathrooms,100566.19
intercept,262733.98


RMSE for LinearRegression():  250774.7683349454
R^2 for LinearRegression():  0.2300268678548748
Coeffs: [710814.2240168  -70955.83629653 -29610.35564431 100566.19194085
 262733.97521485]
Coeffs unscaled: [ 1.58064093e+02 -7.09558363e+04 -2.96103556e+04  1.00566192e+05
  2.62733975e+05]


Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-70955.84
2.5_3_bathrooms,-29610.36
3.5_4_bathrooms,100566.19
intercept,262733.98


RMSE for LinearRegression():  245332.5950403372
R^2 for LinearRegression():  0.25517616863736114
Coeffs: [710814.2240168  -70955.83629653 -29610.35564431 100566.19194085
 262733.97521485]
Coeffs unscaled: [ 1.58064093e+02 -7.09558363e+04 -2.96103556e+04  1.00566192e+05
  2.62733975e+05]


Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-41345.48
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130176.55
intercept,233123.62


RMSE for LassoLars(alpha=0):  250774.7683349454
R^2 for LassoLars(alpha=0):  0.2300268678548748
Coeffs: [710814.2240168  -41345.48065222      0.         130176.54758516
 233123.61957053]
Coeffs unscaled: [ 1.58064093e+02 -4.13454807e+04  0.00000000e+00  1.30176548e+05
  2.33123620e+05]


Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-41345.48
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130176.55
intercept,233123.62


RMSE for LassoLars(alpha=0):  245332.5950403372
R^2 for LassoLars(alpha=0):  0.25517616863736114
Coeffs: [710814.2240168  -41345.48065222      0.         130176.54758516
 233123.61957053]
Coeffs unscaled: [ 1.58064093e+02 -4.13454807e+04  0.00000000e+00  1.30176548e+05
  2.33123620e+05]


Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-41350.74
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130172.42
intercept,233136.05


RMSE for LassoLars(alpha=0.5):  250774.7683647133
R^2 for LassoLars(alpha=0.5):  0.23002686767207747
Coeffs: [710783.23425595 -41350.74154858      0.         130172.41630464
 233136.04559462]
Coeffs unscaled: [ 1.58057201e+02 -4.13507415e+04  0.00000000e+00  1.30172416e+05
  2.33136046e+05]


Unnamed: 0,coefficients
sqft,158.06
1_2_bathrooms,-41350.74
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130172.42
intercept,233136.05


RMSE for LassoLars(alpha=0.5):  245332.68836607886
R^2 for LassoLars(alpha=0.5):  0.2551756019678596
Coeffs: [710783.23425595 -41350.74154858      0.         130172.41630464
 233136.04559462]
Coeffs unscaled: [ 1.58057201e+02 -4.13507415e+04  0.00000000e+00  1.30172416e+05
  2.33136046e+05]


Unnamed: 0,coefficients
sqft,158.05
1_2_bathrooms,-41356.0
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130168.29
intercept,233148.47


RMSE for LassoLars(alpha=1):  250774.76845401697
R^2 for LassoLars(alpha=1):  0.23002686712368559
Coeffs: [710752.2444951  -41356.00244495      0.         130168.28502413
 233148.4716187 ]
Coeffs unscaled: [ 1.58050310e+02 -4.13560024e+04  0.00000000e+00  1.30168285e+05
  2.33148472e+05]


Unnamed: 0,coefficients
sqft,158.05
1_2_bathrooms,-41356.0
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130168.29
intercept,233148.47


RMSE for LassoLars(alpha=1):  245332.78175235257
R^2 for LassoLars(alpha=1):  0.2551750349305941
Coeffs: [710752.2444951  -41356.00244495      0.         130168.28502413
 233148.4716187 ]
Coeffs unscaled: [ 1.58050310e+02 -4.13560024e+04  0.00000000e+00  1.30168285e+05
  2.33148472e+05]


Unnamed: 0,coefficients
sqft,158.04
1_2_bathrooms,-41366.52
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130160.02
intercept,233173.32


RMSE for LassoLars(alpha=2):  250774.76881123165
R^2 for LassoLars(alpha=2):  0.2300268649301178
Coeffs: [710690.26497341 -41366.52423768      0.         130160.0224631
 233173.32366686]
Coeffs unscaled: [ 1.58036528e+02 -4.13665242e+04  0.00000000e+00  1.30160022e+05
  2.33173324e+05]


Unnamed: 0,coefficients
sqft,158.04
1_2_bathrooms,-41366.52
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130160.02
intercept,233173.32


RMSE for LassoLars(alpha=2):  245332.96870649594
R^2 for LassoLars(alpha=2):  0.2551738997527727
Coeffs: [710690.26497341 -41366.52423768      0.         130160.0224631
 233173.32366686]
Coeffs unscaled: [ 1.58036528e+02 -4.13665242e+04  0.00000000e+00  1.30160022e+05
  2.33173324e+05]


Unnamed: 0,coefficients
sqft,158.0
1_2_bathrooms,-41398.09
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130135.23
intercept,233247.88


RMSE for LassoLars(alpha=5):  250774.77131173463
R^2 for LassoLars(alpha=5):  0.2300268495751432
Coeffs: [710504.32640834 -41398.08961588      0.         130135.23478001
 233247.87981136]
Coeffs unscaled: [ 1.57995180e+02 -4.13980896e+04  0.00000000e+00  1.30135235e+05
  2.33247880e+05]


Unnamed: 0,coefficients
sqft,158.0
1_2_bathrooms,-41398.09
2.5_3_bathrooms,0.0
3.5_4_bathrooms,130135.23
intercept,233247.88


RMSE for LassoLars(alpha=5):  245333.53102168866
R^2 for LassoLars(alpha=5):  0.25517048539298015
Coeffs: [710504.32640834 -41398.08961588      0.         130135.23478001
 233247.87981136]
Coeffs unscaled: [ 1.57995180e+02 -4.13980896e+04  0.00000000e+00  1.30135235e+05
  2.33247880e+05]


Unnamed: 0,coefficients
sqft,198.11
intercept,161600.35


RMSE for LinearRegression():  252375.84280886277
R^2 for LinearRegression():  0.22016367708213835
Coeffs: [890896.04557133 161600.35129289]
Coeffs unscaled: [   198.10897166 161600.35129289]


Unnamed: 0,coefficients
sqft,198.11
intercept,161600.35


RMSE for LinearRegression():  246607.18883635805
R^2 for LinearRegression():  0.2474167925020585
Coeffs: [890896.04557133 161600.35129289]
Coeffs unscaled: [   198.10897166 161600.35129289]


Unnamed: 0,coefficients
sqft,198.11
intercept,161600.35


RMSE for LassoLars(alpha=0):  252375.84280886277
R^2 for LassoLars(alpha=0):  0.22016367708213835
Coeffs: [890896.04557133 161600.35129289]
Coeffs unscaled: [   198.10897166 161600.35129289]


Unnamed: 0,coefficients
sqft,198.11
intercept,161600.35


RMSE for LassoLars(alpha=0):  246607.18883635805
R^2 for LassoLars(alpha=0):  0.2474167925020585
Coeffs: [890896.04557133 161600.35129289]
Coeffs unscaled: [   198.10897166 161600.35129289]


Unnamed: 0,coefficients
sqft,198.1
intercept,161606.62


RMSE for LassoLars(alpha=0.5):  252375.8428307242
R^2 for LassoLars(alpha=0.5):  0.22016367694703554
Coeffs: [890873.9763865  161606.61920767]
Coeffs unscaled: [   198.10406413 161606.61920767]


Unnamed: 0,coefficients
sqft,198.1
intercept,161606.62


RMSE for LassoLars(alpha=0.5):  246607.28516788976
R^2 for LassoLars(alpha=0.5):  0.24741620454265945
Coeffs: [890873.9763865  161606.61920767]
Coeffs unscaled: [   198.10406413 161606.61920767]


Unnamed: 0,coefficients
sqft,198.1
intercept,161612.89


RMSE for LassoLars(alpha=1):  252375.8428963085
R^2 for LassoLars(alpha=1):  0.2201636765417273
Coeffs: [890851.90720167 161612.88712245]
Coeffs unscaled: [   198.09915659 161612.88712245]


Unnamed: 0,coefficients
sqft,198.1
intercept,161612.89


RMSE for LassoLars(alpha=1):  246607.38154440717
R^2 for LassoLars(alpha=1):  0.24741561630846043
Coeffs: [890851.90720167 161612.88712245]
Coeffs unscaled: [   198.09915659 161612.88712245]


Unnamed: 0,coefficients
sqft,198.09
intercept,161625.42


RMSE for LassoLars(alpha=2):  252375.8431586456
R^2 for LassoLars(alpha=2):  0.22016367492049438
Coeffs: [890807.768832   161625.42295202]
Coeffs unscaled: [   198.08934152 161625.42295202]


Unnamed: 0,coefficients
sqft,198.09
intercept,161625.42


RMSE for LassoLars(alpha=2):  246607.57443239895
R^2 for LassoLars(alpha=2):  0.24741443901566185
Coeffs: [890807.768832   161625.42295202]
Coeffs unscaled: [   198.08934152 161625.42295202]


Unnamed: 0,coefficients
sqft,198.06
intercept,161663.03


RMSE for LassoLars(alpha=5):  252375.84499500552
R^2 for LassoLars(alpha=5):  0.22016366357186334
Coeffs: [890675.35372301 161663.03044071]
Coeffs unscaled: [   198.05989631 161663.03044071]


Unnamed: 0,coefficients
sqft,198.06
intercept,161663.03


RMSE for LassoLars(alpha=5):  246608.15417602583
R^2 for LassoLars(alpha=5):  0.24741090054206438
Coeffs: [890675.35372301 161663.03044071]
Coeffs unscaled: [   198.05989631 161663.03044071]


Unnamed: 0,model,features,coeffs,coeffs_unscaled,train_rmse,train_r2,test_rmse,test_r2
0,LinearRegression(),"[sqft, 1_2_bathrooms, 2.5_3_bathrooms, 3.5_4_b...","[710814.2240167982, -70955.83629653123, -29610...","[158.0640925098506, -70955.83629653123, -29610...",250774.77,0.23,245332.6,0.26
0,LassoLars(alpha=0),"[sqft, 1_2_bathrooms, 2.5_3_bathrooms, 3.5_4_b...","[710814.224016795, -41345.48065221762, 0.0, 13...","[158.0640925098499, -41345.48065221762, 0.0, 1...",250774.77,0.23,245332.6,0.26
0,LassoLars(alpha=0.5),"[sqft, 1_2_bathrooms, 2.5_3_bathrooms, 3.5_4_b...","[710783.2342559511, -41350.74154858373, 0.0, 1...","[158.0572013021906, -41350.74154858373, 0.0, 1...",250774.77,0.23,245332.69,0.26
0,LassoLars(alpha=1),"[sqft, 1_2_bathrooms, 2.5_3_bathrooms, 3.5_4_b...","[710752.2444951049, -41356.00244495021, 0.0, 1...","[158.05031009453077, -41356.00244495021, 0.0, ...",250774.77,0.23,245332.78,0.26
0,LassoLars(alpha=2),"[sqft, 1_2_bathrooms, 2.5_3_bathrooms, 3.5_4_b...","[710690.2649734124, -41366.524237683174, 0.0, ...","[158.0365276792111, -41366.524237683174, 0.0, ...",250774.77,0.23,245332.97,0.26
0,LassoLars(alpha=5),"[sqft, 1_2_bathrooms, 2.5_3_bathrooms, 3.5_4_b...","[710504.3264083351, -41398.08961588208, 0.0, 1...","[157.9951804332522, -41398.08961588208, 0.0, 1...",250774.77,0.23,245333.53,0.26
0,LinearRegression(),[sqft],"[890896.0455713295, 161600.35129288855]","[198.10897166362676, 161600.35129288855]",252375.84,0.22,246607.19,0.25
0,LassoLars(alpha=0),[sqft],"[890896.0455713295, 161600.35129288855]","[198.10897166362676, 161600.35129288855]",252375.84,0.22,246607.19,0.25
0,LassoLars(alpha=0.5),[sqft],"[890873.976386498, 161606.61920767097]","[198.10406412864086, 161606.61920767097]",252375.84,0.22,246607.29,0.25
0,LassoLars(alpha=1),[sqft],"[890851.9072016663, 161612.88712245342]","[198.09915659365493, 161612.88712245342]",252375.84,0.22,246607.38,0.25
