## Dependencies 

In [10]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import pandas as pd
import tensorflow as tf
from config import password


In [2]:
# Database dependencies
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine

# Database credentials
DATABASES = {
    'finalproject':{
        'POSTGRES_ADDRESS' :'finalproject.c969jxlkzz2v.us-east-2.rds.amazonaws.com',
        'POSTGRES_PORT' : '5432',
        'POSTGRES_USERNAME' : "postgres",
        'POSTGRES_PASSWORD' : password,
        'POSTGRES_DBNAME' : 'housingdata',
    },
}

# Choose the database to use
db = DATABASES['finalproject']

# Connect to postgres
postgres_str = ('postgresql+psycopg2://{username}:{password}@{ipaddress}:{port}/{dbname}'
               .format(username= db['POSTGRES_USERNAME'],
                      password= db['POSTGRES_PASSWORD'],
                      ipaddress= db['POSTGRES_ADDRESS'],
                      port= db['POSTGRES_PORT'],
                      dbname= db['POSTGRES_DBNAME'])
               )

# Create an database engine instance
connection = create_engine(postgres_str)

## Load the datasets

In [6]:
#Load zillow datasets
city_df = pd.read_sql_table('city_data', connection)
city_df.head()

Unnamed: 0,zipcode,city
0,61563,Saint David
1,61564,South Pekin
2,61565,Sparland
3,61567,Topeka
4,61568,Tremont


In [4]:
# Load acs datasets
acs_df = pd.read_sql_table('acs_data', connection)
acs_df.head()

Unnamed: 0,zipcode,total_pop,male,female,med_age,race_w,race_aa,race_nat,race_as,race_api,race_oth,eth_hisp,eth_nonhisp
0,52761,30619,15205,15414,37.8,28856,1504,255,493,11,603,5480,25139
1,60002,24066,12076,11990,43.3,22792,869,89,630,31,402,1662,22404
2,60004,50915,24193,26722,43.0,43996,738,79,5458,116,1597,4013,46902
3,60005,29539,15040,14499,43.8,24937,892,143,2961,0,1089,3163,26376
4,60007,33420,16504,16916,43.8,28444,532,245,3680,24,1500,4020,29400


In [7]:
crime_df = pd.read_sql_table('crime_data', connection)
crime_df.head()

Unnamed: 0,city,vio_crime,murder_mansl,rape,robbery,agg_assault,prop_crime,burglary,lar_theft,car_theft,arson
0,Lincoln,52.0,0.0,5.0,2.0,45.0,253.0,38.0,204.0,11.0,2.0
1,Lincolnwood,15.0,0.0,0.0,5.0,10.0,380.0,40.0,317.0,23.0,3.0
2,Lindenhurst,13.0,1.0,8.0,1.0,3.0,58.0,6.0,48.0,4.0,0.0
3,Lisle,16.0,0.0,4.0,5.0,7.0,106.0,20.0,74.0,12.0,0.0
4,Litchfield,5.0,0.0,2.0,1.0,2.0,217.0,26.0,188.0,3.0,3.0


In [8]:
income_df = pd.read_sql_table('income_data', connection)
income_df.head()

Unnamed: 0,zipcode,median_income
0,60974,47857
1,61001,65240
2,61006,59559
3,61007,52829
4,61008,58709


In [9]:
zhvi_df = pd.read_sql_table('zhvi_data', connection)
zhvi_df.head()

Unnamed: 0,zipcode,zhvi
0,61531,84009.17
1,61532,46023.33
2,61533,117554.17
3,61534,111049.92
4,61535,180972.58


## Data preprocessing

## Split, Train and Standardize the data

In [14]:
# Split our preprocessed data into our features and target arrays
y = housing_df['zhvi'].values 
X = housing_df.drop(['zhvi'], 1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [16]:
# # Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Predict and Evaluate the model

In [None]:
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5

nn =tf.keras.models.Sequential()

# Fist hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim= number_input_features, activation="relu")
)

# Second hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu")
)

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="linear"))

#Check the structure of the model
nn.summary()

In [12]:
## Compile the model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["MSE"])

In [None]:
# Train the model
fit_model = nn.fit(X_train,y_train, epochs=100) #it could be higher

In [None]:
# The model creates predicted y values based on X values
y_pred = fit_model.predict(X)