#### External Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings('ignore')

#### Reading in Data

In [2]:
clean_train = pd.read_csv('../data/Model_Benchmarks_train.csv')

#### Features List

In [3]:
features = ['overall_qual', 'total_sf','neighborhood', 'exter_qual', 'bsmt_qual', 'kitchen_qual', 'gr_liv_area',
            'garage_cars', 'garage_finish','fireplace_qu', 'full_bath', 'foundation', 'garage_type','mas_vnr_area']

## Final Model

#### Transform and Scale Data

In [4]:
X = clean_train[features]
y =  clean_train[['saleprice']]

poly = PolynomialFeatures(include_bias = False)
X_poly = poly.fit_transform(X)
X_poly = pd.DataFrame(X_poly, columns = poly.get_feature_names(features))
features = X_poly.columns

ss = StandardScaler()
X_poly_sc =ss.fit_transform(X_poly)

#### Fit Model

In [5]:
ridge = RidgeCV()
ridge.fit(X_poly_sc, y)
y_hat = ridge.predict(X_poly_sc)
R2 = r2_score(y_hat, y)
print('Ridge Final Model Produces an R2 value of {}%.'.format(round((R2 * 100),2)))

Ridge Final Model Produces an R2 value of 90.27%.


- This model explains 90.27% variability of all the data around its mean.

### DataFrame of Strong Coefficients

In [6]:
final_model = pd.DataFrame(ridge.coef_)
final_model = final_model.T
final_model['abs_coefs'] = pd.DataFrame(abs(ridge.coef_)).T
final_model['coefs'] = pd.DataFrame(ridge.coef_).T
final_model.index = X_poly.columns
final_model = final_model.sort_values('abs_coefs', ascending=False)
final_model.drop(0, axis=1, inplace=True)


In [7]:
final_model.head(12)

Unnamed: 0,abs_coefs,coefs
total_sf bsmt_qual,19858.582866,19858.582866
neighborhood^2,18507.833858,-18507.833858
neighborhood,17826.105014,17826.105014
total_sf,13217.091293,-13217.091293
total_sf exter_qual,12891.675565,12891.675565
exter_qual foundation,12784.197491,-12784.197491
total_sf kitchen_qual,12611.544428,12611.544428
full_bath^2,12429.712046,12429.712046
kitchen_qual garage_cars,12379.81084,12379.81084
gr_liv_area,11847.533124,-11847.533124


### Coefs Insights:

- Top 5 coefs total_sf bsmt_qual, neighborhood^2, neighborhood, total_sf, and total_sf exter_qual
- For every one unit increase for these coefs sale prices will increase corresponding to the coef
- Creating the feature interaction of total_sf turned out to be a strong predictor when determining sale price 
- Which neighborhood a house is located in has a strong impact on sale price
- Features dealing with quality are key when predicting sale price
- Final R2 score is 90.27% which explains the variability of all the data around its mean.