In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import cross_val_score

In [2]:
df = pd.read_csv('./../data/ames_housing_price_data_v3.csv', index_col = 0)

In [3]:
price = df['SalePrice']
price_log = df['SalePrice_log']

In [12]:
df2 = df.drop(['SalePrice', 'MS_coded', 'SalePrice_log', 'sold_datetime'], axis = 1)

In [6]:
df3 = pd.get_dummies(df2, drop_first = True)

In [None]:
to_dummify = [
 'Street_paved',
 'Alley',
 'LandContour',
 'Utilities',
 'LandSlope',
 'Neighborhood',
 'BldgType',
 'OverallQual',
 'OverallCond',
 'RoofStyle',
 'RoofMatl',
 'MasVnrType',
 'MasVnrArea',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'Heating',
 'CentralAir',
 'KitchenQual',
 'FireplaceQu',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolQC',
 'Fence',
 'MiscFeature',
 'MiscVal',
 'MoSold',

 'SaleType',
 'SaleCondition',
 'HeatingQC_ord',
 'LotShape_com',
 'MSZoning_com',
 'LF_Near_NS_RR',
 'LF_Near_Positive_Feature',
 'LF_Adjacent_Arterial_St',
 'LF_Near_EW_RR',
 'LF_Adjacent_Feeder_St',
 'LF_Near_Postive_Feature',
 'Heating_com',
 'Electrical_com',
 'LotConfig_com',
 'LotFrontage_log',
 'LotArea_log',
 'ext_Wood_Siding',
 'ext_Hard_Board',
 'ext_Metal_Siding',
 'ext_Vinyl_Siding',
 'ext_Wood_Shingles',
 'ext_Plywood',
 'ext_Stucco',
 'ext_Cement_Board',
 'ext_Face_Brick',
 'ext_Asbestos_Shingles',
 'ext_Common_Brick',
 'ext_Imitation_Stucco',
 'ext_Other',
 'BsmtCond_ord',
 'BsmtQual_ord',
 'BsmtExposure_ord',
 'BSMT_GLQ',
 'BSMT_ALQ',
 'BSMT_BLQ',
 'BSMT_LwQ',
 'BSMT_Rec',
 'GarageType_com',
 'number_floors',
 'attic',
 'PUD',
 'Functional_ord',
]

In [11]:
list(df2.columns)

['GrLivArea',
 'LotFrontage',
 'LotArea',
 'Street_paved',
 'Alley',
 'LandContour',
 'Utilities',
 'LandSlope',
 'Neighborhood',
 'BldgType',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'RoofStyle',
 'RoofMatl',
 'MasVnrType',
 'MasVnrArea',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtUnfSF',
 'TotalBsmtSF',
 'Heating',
 'CentralAir',
 '1stFlrSF',
 '2ndFlrSF',
 'LowQualFinSF',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'KitchenQual',
 'TotRmsAbvGrd',
 'Fireplaces',
 'FireplaceQu',
 'GarageYrBlt',
 'GarageFinish',
 'GarageCars',
 'GarageArea',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolArea',
 'PoolQC',
 'Fence',
 'MiscFeature',
 'MiscVal',
 'MoSold',
 'YrSold',
 'SaleType',
 'SaleCondition',
 'HeatingQC_ord',
 'LotShape_com',
 'MSZoning_com',
 'LF_Near_NS_RR',
 'LF_Near_Positive_Feature',
 'LF_Adjacent_Arterial_St',
 'LF_Ne

In [None]:
pd.get_dummies()

In [7]:
lasso = Lasso(normalize = True)

In [8]:
params = {'alpha' : [1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]
         }

In [None]:
skfold = StratifiedKFold(n_splits=5, shuffle = True, random_state = 1)