In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.5.0'

In [5]:
df = pd.read_csv('Data/Clean_Real_Estate_With_Crime.csv')
pd.set_option('display.max_rows', None)
df.drop(df[df['Average School Rating'] == 'No Schools'].index, inplace=True)
df.astype({'Average School Rating': 'float128', 'ZIP OR POSTAL CODE':'object'}).dtypes

Unnamed: 0                                                                                       int64
ADDRESS                                                                                         object
CITY                                                                                            object
STATE OR PROVINCE                                                                               object
ZIP OR POSTAL CODE                                                                              object
PRICE                                                                                            int64
BEDS                                                                                             int64
BATHS                                                                                          float64
SQUARE FEET                                                                                      int64
LOT SIZE                                                                 

## Data Pre-Processing

In [5]:
X = voice.drop("label", axis=1)
y = voice["label"]
print(X.shape, y.shape)

(3168, 20) (3168,)


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [10]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [12]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [23]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=20))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [24]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [25]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 100)               2100      
_________________________________________________________________
dense_7 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 202       
Total params: 12,402
Trainable params: 12,402
Non-trainable params: 0
_________________________________________________________________


In [26]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    
    validation_split=.15,

    epochs=17,
    shuffle=True,
    verbose=2
)

Epoch 1/17
64/64 - 0s - loss: 0.4957 - accuracy: 0.8078 - val_loss: 0.3357 - val_accuracy: 0.8964
Epoch 2/17
64/64 - 0s - loss: 0.2620 - accuracy: 0.9188 - val_loss: 0.1691 - val_accuracy: 0.9664
Epoch 3/17
64/64 - 0s - loss: 0.1477 - accuracy: 0.9594 - val_loss: 0.1077 - val_accuracy: 0.9636
Epoch 4/17
64/64 - 0s - loss: 0.1123 - accuracy: 0.9638 - val_loss: 0.0901 - val_accuracy: 0.9692
Epoch 5/17
64/64 - 0s - loss: 0.0952 - accuracy: 0.9688 - val_loss: 0.0844 - val_accuracy: 0.9692
Epoch 6/17
64/64 - 0s - loss: 0.0909 - accuracy: 0.9718 - val_loss: 0.0819 - val_accuracy: 0.9776
Epoch 7/17
64/64 - 0s - loss: 0.0862 - accuracy: 0.9737 - val_loss: 0.0814 - val_accuracy: 0.9804
Epoch 8/17
64/64 - 0s - loss: 0.0873 - accuracy: 0.9728 - val_loss: 0.0828 - val_accuracy: 0.9692
Epoch 9/17
64/64 - 0s - loss: 0.0797 - accuracy: 0.9737 - val_loss: 0.0970 - val_accuracy: 0.9664
Epoch 10/17
64/64 - 0s - loss: 0.0891 - accuracy: 0.9713 - val_loss: 0.1139 - val_accuracy: 0.9580
Epoch 11/17
64/64 -

<tensorflow.python.keras.callbacks.History at 0x1412d95c0>

## Quantify our Trained Model

In [27]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

25/25 - 0s - loss: 0.0625 - accuracy: 0.9773
Normal Neural Network - Loss: 0.06252730637788773, Accuracy: 0.9772727489471436


## Make Predictions

In [28]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [29]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['female' 'female' 'female' 'female' 'female']
Actual Labels: ['female', 'female', 'female', 'female', 'female']


In [30]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [31]:
rf = RandomForestClassifier()
rf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [32]:
rf.score(X_train,y_train)

1.0

In [33]:
rf.score(X_test,y_test)

0.9797979797979798