In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.5.0'

In [4]:
df = pd.read_csv('Data/Clean_Real_Estate_With_Crime.csv')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
df.drop(df[df['Average School Rating'] == 'No Schools'].index, inplace=True)
df.astype({'Average School Rating': 'category', 'ZIP OR POSTAL CODE':'object','LATITUDE':'object','LONGITUDE':'object', 'BATHS':'int64'}).dtypes

Unnamed: 0                                                                                       int64
ADDRESS                                                                                         object
CITY                                                                                            object
STATE OR PROVINCE                                                                               object
ZIP OR POSTAL CODE                                                                              object
PRICE                                                                                            int64
BEDS                                                                                             int64
BATHS                                                                                            int64
SQUARE FEET                                                                                      int64
LOT SIZE                                                                 

In [5]:
for col in ['ZIP OR POSTAL CODE', 'CITY']:
    df[col]=df[col].astype('category')

In [6]:
df_new = df.loc[:,['CITY', 'ZIP OR POSTAL CODE', 'BATHS','PRICE', 
              'SQUARE FEET', 'LOT SIZE', 'YEAR BUILT', 'DAYS ON MARKET','LATITUDE','LONGITUDE', 'Average School Rating','Crime per Capita (1000s)']]

In [7]:
new_df = pd.get_dummies(df_new)

In [8]:
#step 1: choose columsn of interest
#step 2: use get_dummies
#step 3: follow this now voice_recognition
X = new_df.drop(["PRICE"],axis=1)
y = np.log(new_df.PRICE.values)
print(X.shape, y.shape)

(1260, 156) (1260,)


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42)

In [11]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [13]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, input_dim=156))
model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=2, activation='softmax'))
model.add(Dense(units=1))

In [14]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse'])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               15700     
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 25,901
Trainable params: 25,901
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.fit(
    X_train_scaled,
    y_train,
    
    validation_split=.15,

    epochs=80,
    shuffle=True,
    verbose=2
)

Epoch 1/80
26/26 - 1s - loss: 175.4551 - mse: 175.4551 - val_loss: 124.7748 - val_mse: 124.7748
Epoch 2/80
26/26 - 0s - loss: 56.7235 - mse: 56.7235 - val_loss: 2.0092 - val_mse: 2.0092
Epoch 3/80
26/26 - 0s - loss: 3.3390 - mse: 3.3390 - val_loss: 1.2788 - val_mse: 1.2788
Epoch 4/80
26/26 - 0s - loss: 1.0193 - mse: 1.0193 - val_loss: 0.7360 - val_mse: 0.7360
Epoch 5/80
26/26 - 0s - loss: 0.5915 - mse: 0.5915 - val_loss: 0.6081 - val_mse: 0.6081
Epoch 6/80
26/26 - 0s - loss: 0.4607 - mse: 0.4607 - val_loss: 0.5069 - val_mse: 0.5069
Epoch 7/80
26/26 - 0s - loss: 0.3979 - mse: 0.3979 - val_loss: 0.4470 - val_mse: 0.4470
Epoch 8/80
26/26 - 0s - loss: 0.3529 - mse: 0.3529 - val_loss: 0.3994 - val_mse: 0.3994
Epoch 9/80
26/26 - 0s - loss: 0.3241 - mse: 0.3241 - val_loss: 0.3815 - val_mse: 0.3815
Epoch 10/80
26/26 - 0s - loss: 0.2980 - mse: 0.2980 - val_loss: 0.3559 - val_mse: 0.3559
Epoch 11/80
26/26 - 0s - loss: 0.2810 - mse: 0.2810 - val_loss: 0.3367 - val_mse: 0.3367
Epoch 12/80
26/26 - 

<tensorflow.python.keras.callbacks.History at 0x7f9bd4c54340>

## Quantify our Trained Model

In [17]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 0.1436 - mse: 0.1436
Normal Neural Network - Loss: 0.14363379776477814, Accuracy: 0.14363379776477814


In [20]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [21]:
rf = RandomForestRegressor()
rf.fit(X_train,y_train)

RandomForestRegressor()

In [22]:
rf.score(X_train,y_train)

0.9820048583322024

In [23]:
rf.score(X_test,y_test)

0.9096746707426758

In [28]:
np.exp(rf.predict(X_test)[2])

754298.2779062538

In [29]:
X_test.iloc[2]

BATHS                             1.500000
SQUARE FEET                    1440.000000
LOT SIZE                       6500.000000
YEAR BUILT                     1949.000000
DAYS ON MARKET                   11.000000
LATITUDE                         33.836051
LONGITUDE                      -117.890319
Crime per Capita (1000s)          5.244735
CITY_Aliso Viejo                  0.000000
CITY_Anaheim                      1.000000
CITY_Brea                         0.000000
CITY_Buena Park                   0.000000
CITY_Costa Mesa                   0.000000
CITY_Cypress                      0.000000
CITY_Dana Point                   0.000000
CITY_Fountain Valley              0.000000
CITY_Fullerton                    0.000000
CITY_Garden Grove                 0.000000
CITY_Huntington Beach             0.000000
CITY_Irvine                       0.000000
CITY_La Habra                     0.000000
CITY_La Palma                     0.000000
CITY_Laguna Beach                 0.000000
CITY_Laguna