# Ames Housing Regression Project -- SM

## import 

**packages**

In [None]:
import pandas as pd
import numpy as np


In [None]:
from sklearn.preprocessing import LabelEncoder      # Model Pre-Processing
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error # Model Performance Analysis
from sklearn.metrics import r2_score


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
import statsmodels.api as sm


In [4]:
from catboost import CatBoostRegressor    # Regression Models
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor


In [None]:
from IPython.display import display, HTML # Display Preferences


**raw file**

In [8]:
# Import Raw Data
data_raw = pd.read_csv('data/Ames_Housing_Price_Data.csv')

**adjust display**

In [9]:
# Display Max Column and Rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [10]:
# Block Output: 100px + scrolling
HTML("""
<style>
.output_scroll {
    box-sizing: border-box;
    display: block;
    -webkit-box-sizing: border-box;
    -moz-box-sizing: border-box;
    max-height: 100px;
    overflow: auto;
}
</style>
""")


# EDA

**The Data**

**Processing Insight 1: High Dimensionality, we'll need a high dimensionality Regression Model**

In [11]:
# 82 Columns, 2580 Observations
data_raw.shape

(2580, 82)

**Processing Insight 2: With 43 Objects, We'll Need to Test different Encoding Methods and some models may require ordinal processing, where order is important. We may want to log transform others**

In [12]:
# 43 Object Columns: Encode
# 28 int and 11 Float: Log
data_raw.dtypes.value_counts()

object     43
int64      28
float64    11
dtype: int64

In [44]:
# Get a list of tuples where each tuple is (column_name, dtype)
column_types = list(zip(data_raw.columns, data_raw.dtypes))

# Sort the list by dtype
column_types.sort(key=lambda x: str(x[1]))

# Print the column names and their data types
for column, dtype in column_types:
    print(f"{column}: {dtype}")


LotFrontage: float64
MasVnrArea: float64
BsmtFinSF1: float64
BsmtFinSF2: float64
BsmtUnfSF: float64
TotalBsmtSF: float64
BsmtFullBath: float64
BsmtHalfBath: float64
GarageYrBlt: float64
GarageCars: float64
GarageArea: float64
Unnamed: 0: int64
PID: int64
GrLivArea: int64
SalePrice: int64
MSSubClass: int64
LotArea: int64
OverallQual: int64
OverallCond: int64
YearBuilt: int64
YearRemodAdd: int64
1stFlrSF: int64
2ndFlrSF: int64
LowQualFinSF: int64
FullBath: int64
HalfBath: int64
BedroomAbvGr: int64
KitchenAbvGr: int64
TotRmsAbvGrd: int64
Fireplaces: int64
WoodDeckSF: int64
OpenPorchSF: int64
EnclosedPorch: int64
3SsnPorch: int64
ScreenPorch: int64
PoolArea: int64
MiscVal: int64
MoSold: int64
YrSold: int64
MSZoning: object
Street: object
Alley: object
LotShape: object
LandContour: object
Utilities: object
LotConfig: object
LandSlope: object
Neighborhood: object
Condition1: object
Condition2: object
BldgType: object
HouseStyle: object
RoofStyle: object
RoofMatl: object
Exterior1st: object
E

In [None]:
# Unnamed: 0 is not suposed to be there per Data Description, that will be remove


In [None]:
# PID: property ID

In [None]:
# Sales Price

In [None]:
# MSSubClass: Identifies the type of dwelling involved in the sale.
# BldgType: Type of dwelling
# HouseStyle: Style of dwelling
# YearBuilt: Original construction date
# YearRemodAdd

In [None]:
# OverallQual: Rates the overall material and finish of the house
# ExterQual: Evaluates the quality of the material on the exterior 
# HeatingQC: Heating quality and condition
# BsmtFinType1: Rating of basement finished area
# BsmtFinType2: Rating of basement finished area (if multiple types)
# KitchenQual: Kitchen quality
# FireplaceQu: Fireplace quality
# GarageQual: Garage quality


In [None]:
# OverallCond: Rates the overall condition of the house
# ExterCond: Evaluates the present condition of the material on the exterior
# BsmtCond: Evaluates the general condition of the basement
# Functional: Home functionality (Assume typical unless deductions are warranted)
# GarageCond: Garage condition


In [None]:
# Lot Frontage: Linear feet of street connected to property
# Lot Area: Lot size in square feet
# LotShape: Genereal Shape of Property
# LandContour: Flatness of the property
# LandSlope: Slope of property
# LotConfig: Lot configuration (inside lot, corner lot, etc)
# Condition1: Proximity to various conditions
# Condition2: Proximity to various conditions (if more than one is present)




In [None]:
# RoofStyle: Type of roof
# RoofMatl: Roof material
# Exterior1st: Exterior covering on house
# Exterior2nd: Exterior covering on house (if more than one material)
# MasVnrType: Masonry veneer type
# Foundation: Type of foundation
# BsmtExposure: Refers to walkout or garden level walls
# BsmtQual: Evaluates the height of the basement
# Fireplaces: Number of fireplaces
    

In [None]:
# MasVnrArea: Masonry veneer area in square feet
# BsmtFinSF1: Type 1 finished square feet
# BsmtFinSF2: Type 2 finished square feet
# BsmtUnfSF: Unfinished square feet of basement area  
# TotalBsmtSF: Total square feet of basement area
# 1stFlrSF: First Floor square feet
# 2ndFlrSF: Second floor square feet
# LowQualFinSF: Low quality finished square feet (all floors)
# GrLivArea: Above grade (ground) living area square feet
# GarageArea: Size of garage in square feet


In [None]:
# BsmtFullBath: Basement full bathrooms
# BsmtHalfBath: Basement half bathrooms
# FullBath: Full bathrooms above grade
# HalfBath: Half baths above grade
# Bedroom: Bedrooms above grade (does NOT include basement bedrooms)
# Kitchen: Kitchens above grade
# TotRmsAbvGrd: Total rooms above grade (does not include bathrooms)
# BsmtFullBath: Basement full bathrooms


In [None]:
# GarageType: Garage location
# GarageYrBlt: Year garage was built
# GarageFinish: Interior finish of the garage
# GarageCars: Size of garage in car capacity
    

In [None]:
# PavedDrive: Paved driveway

In [None]:
# WoodDeckSF: Wood deck area in square feet
# OpenPorchSF: Open porch area in square feet
# EnclosedPorch: Enclosed porch area in square feet
# 3SsnPorch: Three season porch area in square feet
# ScreenPorch: Screen porch area in square feet


In [None]:
# PoolArea: Pool area in square feet
# PoolQC: Pool quality


In [None]:
# Fence: Fence quality

In [None]:
# MiscFeature: Miscellaneous feature not covered in other categories

In [None]:
# MiscVal: $Value of miscellaneous feature

In [None]:
# MSZoning: Identifies the general zoning classification of the sale


In [None]:
# Street: Type of road access to property
# Alley:  Type of alley access to property
# Neighborhood: Physical locations within Ames city limits


In [25]:
# Utilities: Type of utilities available
# Heating: Type of heating
# CentralAir: Central air conditioning
# Electrical: Electrical system

In [None]:
# MoSold: Month Sold (MM)
# YrSold: Year Sold (YYYY)


In [None]:
# SaleType: Type of sale
# SaleCondition: Condition of sale