#### Import packages and datasets

In [1]:
import pandas as pd
import numpy as np
import sqlite3
from custom_functions import *

In [2]:
conn = sqlite3.connect('../../data/processed/main.db')
query_df = '''SELECT * FROM step3_final_df'''
query_performance = '''SELECT * FROM step3_performance_metrics'''
df_final = pd.read_sql(query_df, conn, index_col='index').reset_index(drop=True)
performance_metrics = pd.read_sql(query_performance, conn, index_col='index').reset_index(drop=True)

#### Show final model statistics

In [3]:
x = list(df_final.drop('SalePrice_log', axis=1).columns)
model, _ = produce_model(df_final, x, 'SalePrice_log')
print(model.summary())

Modeling: SalePrice_log ~ Heating_ElecBB+Heating_FloorWall+Heating_HeatPump+Heating_HotWater+Heating_Radiant+SqFtTotLiving_log+Basement_Finished+Porch_Open+Porch_Closed+Porch_Both
                            OLS Regression Results                            
Dep. Variable:          SalePrice_log   R-squared:                       0.404
Model:                            OLS   Adj. R-squared:                  0.403
Method:                 Least Squares   F-statistic:                     1217.
Date:                Fri, 12 Mar 2021   Prob (F-statistic):               0.00
Time:                        22:16:07   Log-Likelihood:                -9173.4
No. Observations:               17986   AIC:                         1.837e+04
Df Residuals:                   17975   BIC:                         1.845e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                        coef  

#### Quantify impact of coefficients given log-scaled output

In [10]:
results = pd.DataFrame(model.params).reset_index()
results.columns = ['attribute', 'coeff']
results['log_transformed?'] = ['_log' in x for x in results.attribute]

In [11]:
exp_func = lambda x: np.round(10**x, 2)-1
results['% change in SalePrice per *unit* input increase'] = results['coeff'].apply(exp_func)
results.iloc[0, -1] = 'NA'
results.loc[results['log_transformed?']==True, '% change in SalePrice per *unit* input increase'] = 'NA'

In [8]:
logged_inputs_converted = []
for row in range(results.shape[0]):
    if results.iloc[row]['log_transformed?'] == True:
        x = 1.01**results.iloc[row]['coeff']
        logged_inputs_converted.append(x-1)
    else:
        logged_inputs_converted.append('NA')
results['% change in SalePrice per *percent* input increase']  = logged_inputs_converted

Unnamed: 0,attribute,coeff,log_transformed?,% change in SalePrice per *unit* input increase,% change in SalePrice per *percent* input increase
0,Intercept,7.999294,False,,
1,Heating_ElecBB,-0.058457,False,-0.13,
2,Heating_FloorWall,0.07805,False,0.2,
3,Heating_HeatPump,0.112877,False,0.3,
4,Heating_HotWater,0.267768,False,0.85,
5,Heating_Radiant,0.273241,False,0.88,
6,SqFtTotLiving_log,0.710611,True,,0.00709587
7,Basement_Finished,0.071071,False,0.18,
8,Porch_Open,0.033703,False,0.08,
9,Porch_Closed,0.10285,False,0.27,


In [None]:
results

In [9]:
# footage up 10%
(x-1)*10

0.07095872590325403