In [1]:
import pandas as pd
import pickle
from sklearn.preprocessing import PolynomialFeatures

In [2]:
train = pd.read_csv('../../data/engineered/training.csv', index_col='id')
train.drop('SalePrice', 1, inplace=True)
pf = PolynomialFeatures(include_bias=False)
pf.fit(train)
pf_feat_cols = pf.get_feature_names(input_features=train.columns)

In [3]:
with open('../models/lasso.pickle', 'rb') as file:
    lasso_model = pickle.load(file)



In [4]:
lasso_coefs = lasso_model.best_estimator_.steps[1][1].coef_
lasso_coefs = pd.Series(lasso_coefs, index=train.columns)

In [5]:
with open('../models/ridge.pickle', 'rb') as file:
    ridge_model = pickle.load(file)

In [6]:
ridge_coefs = ridge_model.best_estimator_.steps[1][1].coef_
ridge_coefs = pd.Series(ridge_coefs, index=train.columns)

In [7]:
with open('../models/elastic_net.pickle', 'rb') as file:
    elastic_model = pickle.load(file)

In [8]:
elastic_coefs = elastic_model.best_estimator_.steps[1][1].coef_
elastic_coefs = pd.Series(elastic_coefs, index=train.columns)

In [9]:
with open('../models/elastic_net_2.pickle', 'rb') as file:
    elastic_model_2 = pickle.load(file)

In [10]:
elastic_coefs_2 = elastic_model_2.best_estimator_.steps[2][1].coef_
elastic_coefs_2 = pd.Series(elastic_coefs_2, index=pf_feat_cols)

In [11]:
lasso_index_sorted = lasso_coefs.abs().sort_values(ascending=False).index
lasso_coefs[lasso_index_sorted][:20]

overall_qual        0.114552
gr_liv_area         0.092103
year_built          0.064835
1st_flr_sf          0.058633
overall_cond        0.048163
lot_area            0.041685
2nd_flr_sf          0.029435
bsmtfin_sf_1        0.029306
year_remod/add      0.022409
bsmt_full_bath      0.020793
functional_Typ      0.019287
garage_cars         0.017445
fireplaces          0.016503
exter_cond_TA       0.016302
heating_qc_Ex       0.015599
total_bsmt_sf       0.015149
bsmt_exposure_Gd    0.014663
foundation_PConc    0.013066
screen_porch        0.012112
exter_qual_TA      -0.011716
dtype: float64

In [12]:
ridge_index_sorted = ridge_coefs.abs().sort_values(ascending=False).index
ridge_coefs[ridge_index_sorted][:20]

overall_qual                    0.103134
year_built                      0.075526
gr_liv_area                     0.072491
1st_flr_sf                      0.069430
2nd_flr_sf                      0.065710
misc_val                       -0.062107
misc_feature_No Misc Feature   -0.050628
bsmtfin_sf_1                    0.049476
overall_cond                    0.047370
exterior_2nd_VinylSd           -0.044680
garage_finish_Unf              -0.042371
lot_area                        0.041448
exterior_1st_VinylSd            0.038384
garage_finish_RFn              -0.033517
exter_cond_TA                   0.029793
garage_finish_Fin              -0.029366
bsmtfin_type_2_Unf             -0.027117
heating_qc_Ex                   0.023748
year_remod/add                  0.023348
functional_Typ                  0.022260
dtype: float64

In [13]:
elast_index_sorted = elastic_coefs.abs().sort_values(ascending=False).index
elastic_coefs[elast_index_sorted][:20]

overall_qual        0.118971
gr_liv_area         0.107758
year_built          0.055149
overall_cond        0.045040
1st_flr_sf          0.043640
lot_area            0.040295
bsmtfin_sf_1        0.029270
year_remod/add      0.022777
bsmt_full_bath      0.020846
functional_Typ      0.017492
garage_cars         0.016799
fireplaces          0.016584
total_bsmt_sf       0.014967
foundation_PConc    0.014852
bsmt_exposure_Gd    0.012894
exter_qual_TA      -0.011679
heating_qc_Ex       0.010885
2nd_flr_sf          0.010543
screen_porch        0.010444
paved_drive_Y       0.010149
dtype: float64

In [14]:
elast_index_sorted_2 = elastic_coefs_2.abs().sort_values(ascending=False).index
elastic_coefs_2[elast_index_sorted_2][:20]

year_built gr_liv_area                       0.072736
overall_qual lot_area                        0.071470
year_remod/add gr_liv_area                   0.036277
year_built year_remod/add                    0.031364
overall_qual overall_cond                    0.024359
year_built 1st_flr_sf                        0.023730
overall_cond total_bsmt_sf                   0.022256
lot_area gr_liv_area                         0.022008
garage_area heating_qc_Ex                    0.017333
bsmtfin_sf_1^2                               0.017063
misc_val bsmt_exposure_Gd                   -0.014770
garage_cars garage_cond_TA                   0.014521
overall_qual street_Pave                     0.013432
overall_cond fireplaces                      0.010909
central_air_Y functional_Typ                 0.010061
bsmtfin_sf_1 condition_1_Norm                0.009415
overall_qual lot_frontage                    0.009101
garage_area foundation_PConc                 0.008740
year_remod/add 1st_flr_sf   

The columns we changed in engineering due to skew are 
>'lot_frontage',  
>'lot_area',  
>'mas_vnr_area',  
>'bsmtfin_sf_1',  
>'bsmtfin_sf_2',  
>'total_bsmt_sf',  
>'1st_flr_sf',  
>'low_qual_fin_sf',  
>'gr_liv_area',  
>'bsmt_half_bath',  
>'kitchen_abvgr',  
>'wood_deck_sf',  
>'open_porch_sf',  
>'enclosed_porch',  
>'3ssn_porch',  
>'screen_porch',  
>'pool_area',  
>'misc_val'