In [74]:
import pandas as pd
import numpy as np
import sqlite3
from custom_functions import *

In [24]:
conn = sqlite3.connect('../../data/processed/main.db')
query_df = '''SELECT * FROM step3_final_df'''
query_performance = '''SELECT * FROM step3_performance_metrics'''
df_final = pd.read_sql(query_df, conn, index_col='index').reset_index(drop=True)
performance_metrics = pd.read_sql(query_performance, conn, index_col='index').reset_index(drop=True)

In [32]:
x = list(df_final.drop('SalePrice_log', axis=1).columns)
model, _ = produce_model(df_final, x, 'SalePrice_log')
print(model.summary())

Modeling: SalePrice_log ~ Heating_ElecBB+Heating_FloorWall+Heating_HeatPump+Heating_HotWater+Heating_Radiant+SqFtTotLiving_log+Basement_Finished+Porch_Open+Porch_Closed+Porch_Both
                            OLS Regression Results                            
Dep. Variable:          SalePrice_log   R-squared:                       0.404
Model:                            OLS   Adj. R-squared:                  0.403
Method:                 Least Squares   F-statistic:                     1217.
Date:                Fri, 12 Mar 2021   Prob (F-statistic):               0.00
Time:                        17:55:47   Log-Likelihood:                -9173.4
No. Observations:               17986   AIC:                         1.837e+04
Df Residuals:                   17975   BIC:                         1.845e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                        coef  

In [45]:
results = pd.DataFrame(model.params).reset_index()
results.columns = ['attribute', 'coeff']
results['log_transformed?'] = ['_log' in x for x in results.attribute]

Unnamed: 0,attribute,coeff,log_transformed?
0,Intercept,7.999294,False
1,Heating_ElecBB,-0.058457,False
2,Heating_FloorWall,0.07805,False
3,Heating_HeatPump,0.112877,False
4,Heating_HotWater,0.267768,False
5,Heating_Radiant,0.273241,False
6,SqFtTotLiving_log,0.710611,True
7,Basement_Finished,0.071071,False
8,Porch_Open,0.033703,False
9,Porch_Closed,0.10285,False


In [59]:
results.iloc[0]

attribute           Intercept
coeff                 7.99929
log_transformed?        False
Name: 0, dtype: object

In [87]:
coeff_unlogged = []
for row_number in range(results.shape[0]):
    row = results.iloc[row_number]    
    if row['log_transformed?']==True:
        #unlogged = 10**(row.coeff)
        unlogged = np.log(row.coeff)
        coeff_unlogged.append(unlogged)
    else:
        coeff_unlogged.append(row.coeff)
        
results['coeff_unlogged'] = coeff_unlogged
results

Unnamed: 0,attribute,coeff,log_transformed?,coeff_unlogged,coeff_logged,% change in SalePrice
0,Intercept,7.999294,False,7.999294,99837630.0,99837633.4
1,Heating_ElecBB,-0.058457,False,-0.058457,0.8740637,-0.13
2,Heating_FloorWall,0.07805,False,0.07805,1.196879,0.2
3,Heating_HeatPump,0.112877,False,0.112877,1.296813,0.3
4,Heating_HotWater,0.267768,False,0.267768,1.852541,0.85
5,Heating_Radiant,0.273241,False,0.273241,1.876036,0.88
6,SqFtTotLiving_log,0.710611,True,-0.34163,136721.0,136719.99
7,Basement_Finished,0.071071,False,0.071071,1.177798,0.18
8,Porch_Open,0.033703,False,0.033703,1.080694,0.08
9,Porch_Closed,0.10285,False,0.10285,1.267213,0.27


In [107]:
coeff_logged = []
exp_func = lambda x: np.round(10**x, 2)-1
results['% change in SalePrice per unit input increase'] = results['coeff_unlogged'].apply(exp_func)
results.iloc[0, -1] = 'NA'
results.loc[results['log_transformed?']==True, '% change in SalePrice per *unit* input increase'] = 'NA'
results

Unnamed: 0,attribute,coeff,log_transformed?,coeff_unlogged,coeff_logged,% change in SalePrice,% change in SalePrice per unit input increase,% change in SalePrice per *unit* input increase
0,Intercept,7.999294,False,7.999294,99837630.0,,,
1,Heating_ElecBB,-0.058457,False,-0.058457,0.8740637,-0.13,-0.13,
2,Heating_FloorWall,0.07805,False,0.07805,1.196879,0.2,0.2,
3,Heating_HeatPump,0.112877,False,0.112877,1.296813,0.3,0.3,
4,Heating_HotWater,0.267768,False,0.267768,1.852541,0.85,0.85,
5,Heating_Radiant,0.273241,False,0.273241,1.876036,0.88,0.88,
6,SqFtTotLiving_log,0.710611,True,-0.34163,136721.0,-0.54,-0.54,
7,Basement_Finished,0.071071,False,0.071071,1.177798,0.18,0.18,
8,Porch_Open,0.033703,False,0.033703,1.080694,0.08,0.08,
9,Porch_Closed,0.10285,False,0.10285,1.267213,0.27,0.27,


In [127]:
logged_inputs_converted = []
for row in range(results.shape[0]):
    if results.iloc[row]['log_transformed?'] == True:
        x = 1.01**results.iloc[row]['coeff']
        logged_inputs_converted.append(x-1)
    else:
        logged_inputs_converted.append('NA')
        
results['% change in SalePrice per *percent* input increase']  = logged_inputs_converted
results

Unnamed: 0,attribute,coeff,log_transformed?,coeff_unlogged,coeff_logged,% change in SalePrice,% change in SalePrice per unit input increase,% change in SalePrice per *unit* input increase,% change in SalePrice per *percent* input increase
0,Intercept,7.999294,False,7.999294,99837630.0,,,,
1,Heating_ElecBB,-0.058457,False,-0.058457,0.8740637,-0.13,-0.13,,
2,Heating_FloorWall,0.07805,False,0.07805,1.196879,0.2,0.2,,
3,Heating_HeatPump,0.112877,False,0.112877,1.296813,0.3,0.3,,
4,Heating_HotWater,0.267768,False,0.267768,1.852541,0.85,0.85,,
5,Heating_Radiant,0.273241,False,0.273241,1.876036,0.88,0.88,,
6,SqFtTotLiving_log,0.710611,True,-0.34163,136721.0,-0.54,-0.54,,0.00709587
7,Basement_Finished,0.071071,False,0.071071,1.177798,0.18,0.18,,
8,Porch_Open,0.033703,False,0.033703,1.080694,0.08,0.08,,
9,Porch_Closed,0.10285,False,0.10285,1.267213,0.27,0.27,,


In [128]:
# footage up 10%
(x-1)*

0.007095872590325403

In [122]:
1**0.71

1.0

In [104]:
coeff_sqft_log = results.loc[results.attribute=='SqFtTotLiving_log', 'coeff'].values[0]

x = 1.01**coeff_sqft_log
x

1.0070958725903254

In [105]:
x**100

2.028063780510364

In [None]:
coef_effect = [(x - 1)*df.SalePrice.mean() for x in transformed_coef]

In [None]:
import math

transformed_coef = []
for x in df_unscaled_coef.iloc[:,0]:
    y = math.exp(x)
    transformed_coef.append(y)
print(transformed_coef)


coef_effect = [(x - 1)*df.SalePrice.mean() for x in transformed_coef]