In [1]:
#Import some basic packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
sns.set_style("whitegrid")

In [2]:
#Let's import the data and instanciate a train/test split with the seed everyrone is using.

housing = pd.read_csv('Housing_Data_Cleaned_03052025.csv')

from sklearn.model_selection import train_test_split
housing_train,housing_test = train_test_split(housing, test_size=.2, random_state=216, shuffle=True, stratify=housing['Destroyed'])

In [3]:
#Let's import all the functions from sklearn that we're using.

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [14]:
preprocessor = ColumnTransformer(
    transformers=[
        ("Structure Type", OneHotEncoder(), ['* Structure Type']),
        ("Roof Construction", OneHotEncoder(), ['* Roof Construction']),
        ("Eaves", OneHotEncoder(), ['* Eaves']),
        ("Vent Screen", OneHotEncoder(), ['* Vent Screen']),
        ("Exterior Siding", OneHotEncoder(), ['* Exterior Siding']),
        ("Window Pane", OneHotEncoder(), ['* Window Pane']),
        ("Elevated", OneHotEncoder(), ['* Deck/Porch Elevated']),
        ("Patio Cover/Carport Attached", OneHotEncoder(), ['* Patio Cover/Carport Attached to Structure']),
        ("Fence Attached to Structure", OneHotEncoder(), ['* Fence Attached to Structure']),
        ("Longitude", StandardScaler(), ['Longitude']),
        ("Latitude", StandardScaler(), ['Latitude'])
        ])

ETModel = Pipeline([('Preprocess', preprocessor),('Extra Trees', ExtraTreesClassifier(n_estimators=500, max_depth=40))])

ETModel.fit(housing_train[['* Structure Type', '* Roof Construction', '* Eaves', '* Vent Screen',
       '* Exterior Siding', '* Window Pane', '* Deck/Porch On Grade',
       '* Deck/Porch Elevated', '* Patio Cover/Carport Attached to Structure',
       '* Fence Attached to Structure', 'Latitude', 'Longitude']],housing_train['Destroyed'])

preds = ETModel.predict(housing_test[['* Structure Type', '* Roof Construction', '* Eaves', '* Vent Screen',
       '* Exterior Siding', '* Window Pane', '* Deck/Porch On Grade',
       '* Deck/Porch Elevated', '* Patio Cover/Carport Attached to Structure',
       '* Fence Attached to Structure', 'Latitude', 'Longitude']])

In [15]:
print('The confusion matrix is \n', confusion_matrix(housing_test['Destroyed'].values, preds))
print('The accuracy score is', accuracy_score(housing_test['Destroyed'].values, preds), '\n')
print('The precision score is', precision_score(housing_test['Destroyed'].values, preds), '\n')
print('The recall score is', precision_score(housing_test['Destroyed'].values, preds), '\n')



The confusion matrix is 
 [[4579  536]
 [ 694 2749]]
The accuracy score is 0.8562748305678897 

The precision score is 0.836834094368341 

The recall score is 0.836834094368341 



In [27]:
preprocessor = ColumnTransformer(
    transformers=[
        ("Structure Type", OneHotEncoder(), ['* Structure Type']),
        ("Roof Construction", OneHotEncoder(), ['* Roof Construction']),
        ("Eaves", OneHotEncoder(), ['* Eaves']),
        ("Vent Screen", OneHotEncoder(), ['* Vent Screen']),
        ("Exterior Siding", OneHotEncoder(), ['* Exterior Siding']),
        ("Window Pane", OneHotEncoder(), ['* Window Pane']),
        ("Elevated", OneHotEncoder(), ['* Deck/Porch Elevated']),
        ("Patio Cover/Carport Attached", OneHotEncoder(), ['* Patio Cover/Carport Attached to Structure']),
        ("Fence Attached to Structure", OneHotEncoder(), ['* Fence Attached to Structure']),
        ("Longitude", StandardScaler(), ['Longitude']),
        ("Latitude", StandardScaler(), ['Latitude'])
        ])

RFModel = Pipeline([('Preprocess', preprocessor),('Extra Trees', RandomForestClassifier(n_estimators=4000, min_samples_leaf = 20, max_depth=200, random_state=216))])

RFModel.fit(housing_train[['* Structure Type', '* Roof Construction', '* Eaves', '* Vent Screen',
       '* Exterior Siding', '* Window Pane', '* Deck/Porch On Grade',
       '* Deck/Porch Elevated', '* Patio Cover/Carport Attached to Structure',
       '* Fence Attached to Structure', 'Latitude', 'Longitude']],housing_train['Destroyed'])

preds = RFModel.predict(housing_test[['* Structure Type', '* Roof Construction', '* Eaves', '* Vent Screen',
       '* Exterior Siding', '* Window Pane', '* Deck/Porch On Grade',
       '* Deck/Porch Elevated', '* Patio Cover/Carport Attached to Structure',
       '* Fence Attached to Structure', 'Latitude', 'Longitude']])

In [28]:
print('The confusion matrix is \n', confusion_matrix(housing_test['Destroyed'].values, preds))
print('The accuracy score is', accuracy_score(housing_test['Destroyed'].values, preds), '\n')
print('The precision score is', precision_score(housing_test['Destroyed'].values, preds), '\n')
print('The recall score is', precision_score(housing_test['Destroyed'].values, preds), '\n')

The confusion matrix is 
 [[4886  229]
 [ 945 2498]]
The accuracy score is 0.8628184155176443 

The precision score is 0.916024935826916 

The recall score is 0.916024935826916 

