# ELEC 598 : Deep Learning Applications 
# Project 1 - Predicting Housing Prices 
### By: Matthew Stein and Ethan Tsao 
### February 10, 2020 

In [69]:
from keras import models
from keras import layers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing 

Data is imported via pandas, and processed via numpy. Working with Keras, all data is processed as arrays in numpy. To simplify model learning, feature-wise normalization is performed. Note that feature-wise normalization is calc'ed using the mean and std of the training set. 

The test set was chosen to be 15% of the overall data set. 

In [73]:
data = pd.read_excel('data/resd_data.xlsx')
length = data.shape[0]

# Remove all rows with null points in dataset 
updated_data = data.dropna(how='any',axis=0) 


# Remove all rows with "0" for the Postal Code, "0" for Year Built, "0" for Lot Square Footage, and "0" for List Price
updated_data = updated_data[updated_data.field_PostalCode != 0 ]
updated_data = updated_data[updated_data.YearBuilt != 0]
updated_data = updated_data[updated_data.LotSquareFootage != 0]
updated_data = updated_data[updated_data.ListPrice != 0]

updated_data['ListPrice'] = updated_data['ListPrice']/1000 

# Update Index column 

print(updated_data) 

      SquareFootageStructure  LotSquareFootage  YearBuilt  Bedrooms  \
0                     1549.0            5825.0     1974.0       3.0   
1                     1196.0            7900.0     1981.0       3.0   
3                     3884.0           16013.0     1978.0       3.0   
6                     1164.0            6611.0     1961.0       3.0   
12                    2766.0            7331.0     1986.0       4.0   
...                      ...               ...        ...       ...   
9989                  2300.0            6547.0     1924.0       4.0   
9990                  1545.0            2178.0     1967.0       2.0   
9991                  1404.0            7841.0     1964.0       2.0   
9993                   900.0            6000.0     1924.0       2.0   
9999                  2865.0            7841.0     2005.0       4.0   

      BathsTotal  field_StoriesTotal  field_PostalCode  ListPrice  
0            2.0                 1.0           92624.0      3.300  
1          

In [74]:
train_data = updated_data.loc[:(6370*.85),:'field_PostalCode'].values.tolist()
train_data = np.asarray(train_data) 
train_targets = updated_data.loc[:(6370*.85),'ListPrice'].values.tolist()
train_targets = np.asarray(train_targets) 

test_data = updated_data.loc[(6370*.85 +1 ):,:'field_PostalCode'].values.tolist()
test_data = np.asarray(test_data) 
test_targets = updated_data.loc[(6370*.85 + 1):,'ListPrice'].values.tolist()
test_targets = np.asarray(test_targets)

In [75]:
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std
test_data -= mean
test_data /= std

## Model Selection 

As is standard for neural networks - and regression models, in particular - a Sequential model was chosen. The model has 3 hidden layers, with 64 

In [86]:
keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)


def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',
                           input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

NameError: name 'keras' is not defined

## Training and Validation 

In [85]:
k=4
num_val_samples = len(train_data) // k
num_epochs = 100
all_mae_histories = []


for i in range(k):
    print('processing fold #', i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

    print(train_data[:i * num_val_samples].shape)
    
    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
        train_data[(i + 1) * num_val_samples:]],
        axis=0)
    
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
        train_targets[(i + 1) * num_val_samples:]],
        axis=0)
    
    model = build_model()
    history = model.fit(partial_train_data, partial_train_targets,
                        validation_data=(val_data, val_targets),
                        epochs=num_epochs, batch_size=32, verbose=1)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=1) 
    #mae_history = history.history['val_mean_absolute_error']
    #all_mae_histories.append(mae_history)

processing fold # 0
(0, 7)
Train on 1723 samples, validate on 1722 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
processing fold # 1
(1722, 7)
Train on 1723 samples, validate on 1722 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100

KeyboardInterrupt: 

## Results Discussion 