# Kaggle Housing Pricing Tutorial

## Decision Trees 

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

iowa_file_path='./Datasets/train.csv'

home_data = pd.read_csv(iowa_file_path)
y = home_data.SalePrice
feature_columns = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']
X = home_data[feature_columns]

iowa_model = DecisionTreeRegressor()
iowa_model.fit(X, y)

### Prediction for training data

In [None]:
print(f"First in-sample predictions: {iowa_model.predict(X.head())}")
print(f"Actual target values for those homes: {y.head().tolist()}")

### Split the data in validation and training data

In [None]:
from sklearn.model_selection import train_test_split

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=0)
iowa_model.fit(train_X, train_y)


In [None]:
print(f"First validation predictions: {iowa_model.predict(val_X.head())}")
print(f"Actual target values for those homes: {val_y.head().tolist()}")

**Finding Mean Square Error for the validation data**

In [None]:
model_predictions = iowa_model.predict(val_X)
from sklearn.metrics import mean_absolute_error
val_mae = mean_absolute_error(val_y, model_predictions)
print(f"Model valiation Mean Square Error: {val_mae}")

### Analyzing overfitting by changing maximum number of leaf nodes in the Decision Tree

In [None]:
def get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y):
    model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=0)
    model.fit(train_X, train_y)
    preds_val = model.predict(val_X)
    return mean_absolute_error(val_y, preds_val)

leaf_nodes = [5, 25, 50, 100, 250, 500]
model_maes = []
for leaf_node in leaf_nodes:
    model_maes.append(get_mae(leaf_node, train_X, val_X, train_y, val_y))

print(model_maes)

### Random Forest 

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(random_state=1)
rf_model.fit(train_X, train_y)

In [None]:
rf_predictions = rf_model.predict(val_X)
print(mean_absolute_error(val_y, rf_predictions))