In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.5.0'

In [4]:
df = pd.read_csv('Data/Clean_Real_Estate_With_Crime.csv')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
df.drop(df[df['Average School Rating'] == 'No Schools'].index, inplace=True)
df.astype({'Average School Rating': 'category', 'ZIP OR POSTAL CODE':'object','LATITUDE':'object','LONGITUDE':'object', 'BATHS':'int64'}).dtypes

Unnamed: 0                                                                                       int64
ADDRESS                                                                                         object
CITY                                                                                            object
STATE OR PROVINCE                                                                               object
ZIP OR POSTAL CODE                                                                              object
PRICE                                                                                            int64
BEDS                                                                                             int64
BATHS                                                                                            int64
SQUARE FEET                                                                                      int64
LOT SIZE                                                                 

In [5]:
for col in ['ZIP OR POSTAL CODE', 'CITY']:
    df[col]=df[col].astype('category')

In [33]:
df_new = df.loc[:,['BEDS','CITY', 'ZIP OR POSTAL CODE', 'BATHS','PRICE', 
              'SQUARE FEET', 'LOT SIZE', 'YEAR BUILT', 'DAYS ON MARKET','LATITUDE','LONGITUDE', 'Average School Rating','Crime per Capita (1000s)']]

In [34]:
new_df = pd.get_dummies(df_new)

In [35]:
#step 1: choose columsn of interest
#step 2: use get_dummies
#step 3: follow this now voice_recognition
X = new_df.drop(["PRICE"],axis=1)
y = np.log(new_df.PRICE.values)
print(X.shape, y.shape)

(1260, 157) (1260,)


In [37]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [38]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42)

In [39]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [41]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, input_dim=157))
model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=2, activation='softmax'))
model.add(Dense(units=1))

In [42]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse'])

In [43]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               15800     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 101       
Total params: 26,001
Trainable params: 26,001
Non-trainable params: 0
_________________________________________________________________


In [44]:
model.fit(
    X_train_scaled,
    y_train,
    
    validation_split=.15,

    epochs=80,
    shuffle=True,
    verbose=2
)

Epoch 1/80
26/26 - 0s - loss: 174.3182 - mse: 174.3182 - val_loss: 119.5063 - val_mse: 119.5063
Epoch 2/80
26/26 - 0s - loss: 50.3752 - mse: 50.3752 - val_loss: 1.8071 - val_mse: 1.8071
Epoch 3/80
26/26 - 0s - loss: 3.1347 - mse: 3.1347 - val_loss: 1.4726 - val_mse: 1.4726
Epoch 4/80
26/26 - 0s - loss: 1.0590 - mse: 1.0590 - val_loss: 0.8021 - val_mse: 0.8021
Epoch 5/80
26/26 - 0s - loss: 0.6595 - mse: 0.6595 - val_loss: 0.6657 - val_mse: 0.6657
Epoch 6/80
26/26 - 0s - loss: 0.5349 - mse: 0.5349 - val_loss: 0.5388 - val_mse: 0.5388
Epoch 7/80
26/26 - 0s - loss: 0.4632 - mse: 0.4632 - val_loss: 0.4750 - val_mse: 0.4750
Epoch 8/80
26/26 - 0s - loss: 0.4173 - mse: 0.4173 - val_loss: 0.4494 - val_mse: 0.4494
Epoch 9/80
26/26 - 0s - loss: 0.3774 - mse: 0.3774 - val_loss: 0.3963 - val_mse: 0.3963
Epoch 10/80
26/26 - 0s - loss: 0.3516 - mse: 0.3516 - val_loss: 0.3866 - val_mse: 0.3866
Epoch 11/80
26/26 - 0s - loss: 0.3312 - mse: 0.3312 - val_loss: 0.3612 - val_mse: 0.3612
Epoch 12/80
26/26 - 

<tensorflow.python.keras.callbacks.History at 0x7f9bd6a4c9a0>

## Quantify our Trained Model

In [45]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 0.1611 - mse: 0.1611
Normal Neural Network - Loss: 0.1610599011182785, Accuracy: 0.1610599011182785


In [46]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [47]:
rf = RandomForestRegressor()
rf.fit(X_train,y_train)

RandomForestRegressor()

In [48]:
rf.score(X_train,y_train)

0.9817408405997372

In [49]:
rf.score(X_test,y_test)

0.9038660618290706

In [50]:
np.exp(rf.predict(X_test)[2])

748377.8334491701

In [51]:
X_test.iloc[2]

BEDS                              3.000000
BATHS                             1.500000
SQUARE FEET                    1440.000000
LOT SIZE                       6500.000000
YEAR BUILT                     1949.000000
DAYS ON MARKET                   11.000000
LATITUDE                         33.836051
LONGITUDE                      -117.890319
Crime per Capita (1000s)          5.244735
CITY_Aliso Viejo                  0.000000
CITY_Anaheim                      1.000000
CITY_Brea                         0.000000
CITY_Buena Park                   0.000000
CITY_Costa Mesa                   0.000000
CITY_Cypress                      0.000000
CITY_Dana Point                   0.000000
CITY_Fountain Valley              0.000000
CITY_Fullerton                    0.000000
CITY_Garden Grove                 0.000000
CITY_Huntington Beach             0.000000
CITY_Irvine                       0.000000
CITY_La Habra                     0.000000
CITY_La Palma                     0.000000
CITY_Laguna