In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.5.0'

In [4]:
df = pd.read_csv('Data/Clean_Real_Estate_With_Crime.csv')
pd.set_option('display.max_rows', None)
df.drop(df[df['Average School Rating'] == 'No Schools'].index, inplace=True)
df.astype({'Average School Rating': 'float64', 'ZIP OR POSTAL CODE':'object','LATITUDE':'object','LONGITUDE':'object'}).dtypes

Unnamed: 0                                                                                      int64
ADDRESS                                                                                        object
CITY                                                                                           object
STATE OR PROVINCE                                                                              object
ZIP OR POSTAL CODE                                                                             object
PRICE                                                                                           int64
BEDS                                                                                            int64
BATHS                                                                                         float64
SQUARE FEET                                                                                     int64
LOT SIZE                                                                          

In [5]:
df_new = df.loc[:,['CITY', 'ZIP OR POSTAL CODE', 'BATHS','PRICE', 
              'SQUARE FEET', 'LOT SIZE', 'YEAR BUILT', 'DAYS ON MARKET','LATITUDE','LONGITUDE', 'Average School Rating','Crime per Capita (1000s)']]

In [6]:
df_new.astype({'Average School Rating': 'float64', 'ZIP OR POSTAL CODE':'object','LATITUDE':'object','LONGITUDE':'object'}).dtypes

CITY                         object
ZIP OR POSTAL CODE           object
BATHS                       float64
PRICE                         int64
SQUARE FEET                   int64
LOT SIZE                      int64
YEAR BUILT                    int64
DAYS ON MARKET                int64
LATITUDE                     object
LONGITUDE                    object
Average School Rating       float64
Crime per Capita (1000s)    float64
dtype: object

In [9]:
new_df = pd.get_dummies(df_new)

## Data Pre-Processing

In [13]:
#step 1: choose columsn of interest
#step 2: use get_dummies
#step 3: follow this now voice_recognition
X = new_df.drop(["PRICE"],axis=1)
y = new_df.PRICE.values
print(X.shape, y.shape)

(1260, 81) (1260,)


In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [16]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [17]:
X_train.shape

(945, 81)

In [None]:
# # Step 1: Label-encode data set
# label_encoder = LabelEncoder()
# label_encoder.fit(y_train)
# encoded_y_train = label_encoder.transform(y_train)
# encoded_y_test = label_encoder.transform(y_test)

In [None]:
# Step 2: Convert encoded labels to one-hot-encoding
# y_train_categorical = to_categorical(encoded_y_train)
# y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [34]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=81))
model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=2, activation='softmax'))
model.add(Dense(units=1))

In [35]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse'])

In [36]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               8200      
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 101       
Total params: 18,401
Trainable params: 18,401
Non-trainable params: 0
_________________________________________________________________


In [37]:
model.fit(
    X_train_scaled,
    y_train,
    
    validation_split=.15,

    epochs=17,
    shuffle=True,
    verbose=2
)

Epoch 1/17
26/26 - 3s - loss: 33912428429312.0000 - mse: 33912428429312.0000 - val_loss: 7254149431296.0000 - val_mse: 7254149431296.0000
Epoch 2/17
26/26 - 0s - loss: 33912394874880.0000 - mse: 33912394874880.0000 - val_loss: 7254108012544.0000 - val_mse: 7254108012544.0000
Epoch 3/17
26/26 - 0s - loss: 33912283725824.0000 - mse: 33912283725824.0000 - val_loss: 7253972746240.0000 - val_mse: 7253972746240.0000
Epoch 4/17
26/26 - 0s - loss: 33911946084352.0000 - mse: 33911946084352.0000 - val_loss: 7253650309120.0000 - val_mse: 7253650309120.0000
Epoch 5/17
26/26 - 0s - loss: 33911281287168.0000 - mse: 33911281287168.0000 - val_loss: 7253020639232.0000 - val_mse: 7253020639232.0000
Epoch 6/17
26/26 - 0s - loss: 33910075424768.0000 - mse: 33910075424768.0000 - val_loss: 7252031307776.0000 - val_mse: 7252031307776.0000
Epoch 7/17
26/26 - 1s - loss: 33908244611072.0000 - mse: 33908244611072.0000 - val_loss: 7250578505728.0000 - val_mse: 7250578505728.0000
Epoch 8/17
26/26 - 0s - loss: 3390

<tensorflow.python.keras.callbacks.History at 0x7f88af41ddf0>

## Quantify our Trained Model

In [38]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 14296837783552.0000 - mse: 14296837783552.0000
Normal Neural Network - Loss: 14296837783552.0, Accuracy: 14296837783552.0


## Make Predictions

In [39]:
prediction_labels = model.predict_classes(X_test_scaled)
print(prediction_labels)
#prediction_labels = label_encoder.inverse_transform(encoded_predictions)



[[1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]


In [45]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 

In [43]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [47]:
rf = RandomForestRegressor()
rf.fit(X_train,y_train)

RandomForestRegressor()

In [48]:
rf.score(X_train,y_train)

0.9292825931886022

In [49]:
rf.score(X_test,y_test)

0.41906246316425877