In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import distutils as _distutils
import tensorflow as tf
import tensorflow.compat.v1 as tf
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Import and read the cleaned_state_market_tracker.csv.
import pandas as pd 
market_df = pd.read_csv("cleaned_state_market_tracker.csv")
market_df.head()

Unnamed: 0,table_id,state,state_code,property_type,property_type_id,median_sale_price,median_list_price,median_ppsf,median_list_ppsf,homes_sold,pending_sales,new_listings,inventory,months_of_supply,avg_sale_to_list,sold_above_list,parent_metro_region,year,month,rei_id
0,1,Alabama,AL,Multi-Family (2-4 Unit),4,61900,179000.0,27.0,95.0,4,3.0,3.0,15.0,3.8,0.900837,0.0,South Region,2014,3,1
1,1,Alabama,AL,Condo/Co-op,3,363000,353700.0,388.0,185.0,281,230.0,286.0,902.0,3.2,0.983281,0.135231,South Region,2023,7,2
2,1,Alabama,AL,Townhouse,13,130500,186400.0,83.0,95.0,116,61.0,120.0,430.0,3.7,0.962387,0.12069,South Region,2015,4,3
3,1,Alabama,AL,All Residential,-1,191000,192400.0,97.0,98.0,5181,3033.0,5544.0,20941.0,4.0,0.971602,0.182397,South Region,2017,6,4
4,1,Alabama,AL,Single Family Residential,6,138400,162100.0,72.0,86.0,2042,724.0,2815.0,13234.0,6.5,0.950388,0.15475,South Region,2013,2,5


In [3]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
market_df.drop(columns = ['table_id','property_type_id', 'state_code', 'median_ppsf', "median_list_ppsf", "sold_above_list", "year", "months_of_supply", "state", "parent_metro_region"], inplace=True)
market_df

Unnamed: 0,property_type,median_sale_price,median_list_price,homes_sold,pending_sales,new_listings,inventory,avg_sale_to_list,month,rei_id
0,Multi-Family (2-4 Unit),61900,179000.0,4,3.0,3.0,15.0,0.900837,3,1
1,Condo/Co-op,363000,353700.0,281,230.0,286.0,902.0,0.983281,7,2
2,Townhouse,130500,186400.0,116,61.0,120.0,430.0,0.962387,4,3
3,All Residential,191000,192400.0,5181,3033.0,5544.0,20941.0,0.971602,6,4
4,Single Family Residential,138400,162100.0,2042,724.0,2815.0,13234.0,0.950388,2,5
...,...,...,...,...,...,...,...,...,...,...
35418,Multi-Family (2-4 Unit),153500,244900.0,9,8.0,14.0,33.0,0.947979,10,35419
35419,Multi-Family (2-4 Unit),141600,231700.0,5,9.0,12.0,33.0,0.941061,5,35420
35420,Single Family Residential,243700,249200.0,1509,1505.0,1928.0,4627.0,0.977123,4,35421
35421,Townhouse,73800,143500.0,19,6.0,15.0,99.0,0.944210,10,35422


In [4]:
null_market_df = market_df.dropna(how='any', axis= 0)
null_market_df

Unnamed: 0,property_type,median_sale_price,median_list_price,homes_sold,pending_sales,new_listings,inventory,avg_sale_to_list,month,rei_id
0,Multi-Family (2-4 Unit),61900,179000.0,4,3.0,3.0,15.0,0.900837,3,1
1,Condo/Co-op,363000,353700.0,281,230.0,286.0,902.0,0.983281,7,2
2,Townhouse,130500,186400.0,116,61.0,120.0,430.0,0.962387,4,3
3,All Residential,191000,192400.0,5181,3033.0,5544.0,20941.0,0.971602,6,4
4,Single Family Residential,138400,162100.0,2042,724.0,2815.0,13234.0,0.950388,2,5
...,...,...,...,...,...,...,...,...,...,...
35418,Multi-Family (2-4 Unit),153500,244900.0,9,8.0,14.0,33.0,0.947979,10,35419
35419,Multi-Family (2-4 Unit),141600,231700.0,5,9.0,12.0,33.0,0.941061,5,35420
35420,Single Family Residential,243700,249200.0,1509,1505.0,1928.0,4627.0,0.977123,4,35421
35421,Townhouse,73800,143500.0,19,6.0,15.0,99.0,0.944210,10,35422


In [5]:
# Determine the number of unique values in each column.
category_values = null_market_df.nunique()
category_values

property_type            5
median_sale_price     6061
median_list_price     5553
homes_sold            8834
pending_sales         7719
new_listings          9472
inventory            14159
avg_sale_to_list     33875
month                   12
rei_id               34012
dtype: int64

In [6]:
# Split our preprocessed data into our features and target arrays
y = null_market_df["property_type"].astype('category').cat.codes
X = null_market_df.drop(["property_type"], axis=1)

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=55)
y

0        2
1        1
2        4
3        0
4        3
        ..
35418    2
35419    2
35420    3
35421    4
35422    1
Length: 34012, dtype: int8

In [7]:
# Create a StandardScaler instances
scaler = MinMaxScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])

nn_model = tf.keras.models.Sequential()

hidden_nodes_layer1 = 4
hidden_nodes_layer2 = 4
hidden_nodes_layer3= 1 

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='sigmoid'))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='sigmoid'))

# Output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation= "sigmoid"))

# Check the structure of the model
nn_model.summary()

print(number_input_features)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


9


In [9]:
# Compile the model
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [10]:
# Train the model
fit_model = nn_model.fit(X_train_scaled,y_train, epochs=50)

Epoch 1/50


[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 854us/step - accuracy: 0.2044 - loss: 0.1489
Epoch 2/50
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 912us/step - accuracy: 0.1968 - loss: -3.8418
Epoch 3/50
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 912us/step - accuracy: 0.1992 - loss: -8.0370
Epoch 4/50
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 864us/step - accuracy: 0.1964 - loss: -11.9461
Epoch 5/50
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.1967 - loss: -15.8046
Epoch 6/50
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.1989 - loss: -19.3694
Epoch 7/50
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 998us/step - accuracy: 0.1958 - loss: -22.9364
Epoch 8/50
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 902us/step - accuracy: 0.1972 - loss: -26.4234
Epoch 9/50
[1m798/798[0m 

In [11]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

266/266 - 0s - 1ms/step - accuracy: 0.2019 - loss: -1.7904e+02
Loss: -179.04246520996094, Accuracy: 0.2019287347793579
