In [None]:
import importlib
import utils 
import const

import pandas as pd
import numpy as np

import matplotlib.pylab as plt
from pandas.plotting import scatter_matrix
import seaborn as sns; sns.set()

from sklearn.pipeline import Pipeline

%matplotlib inline

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

In [None]:
importlib.reload(utils)

df = utils.PrepareData().df
df.head()


In [None]:

import my_transformers
importlib.reload(my_transformers)
from my_transformers import DropColumns, YearTransformer, ColumnToDateFormat, Drop33Rooms, DummyTransform

transform_pipeline = Pipeline([
        ('yr_built_transformer', YearTransformer(column='yr_built')),
        ('33_bedrooms_row_drop', Drop33Rooms()),
        ('clean', DropColumns(columns=['id', 'date', 'price', 'yr_renovated', 'bathrooms'])),
        ('to_dummies', DummyTransform(columns=['bedrooms', 'floors', 'waterfront', 'view', 'condition','grade'])), 
        ('zipcode_to_dummies', DummyTransform(columns=['zipcode'])),          
     ])

In [None]:
ndf = transform_pipeline.transform(df)
ndf.head()

In [None]:
col_num = len(ndf.columns)
col_num

In [None]:
from sklearn.model_selection import train_test_split

train_X, test_X, train_y, test_y = train_test_split(
    ndf.drop(columns=['price_bin'], inplace=False),
    ndf['price_bin'],
    test_size=const.TEST_SIZE,
    random_state=const.RANDOM_STATE)

data_X, data_y = ndf.drop(columns=['price_bin'], inplace=False), ndf['price_bin']


In [None]:
model = Sequential([
    Dense(units=124, input_shape=(col_num - 2,), activation='relu'),
    Dropout(0.5),
    Dense(units=256, activation='relu'),
    Dropout(0.5),
    Dense(units=128, activation='relu'),
    Dropout(0.2),
    Dense(units=64, activation='softmax'),
    Dropout(0.2),
    Dense(units=32, activation='relu'),
    Dense(units=2, activation='softmax')
])

In [None]:
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(
    x=train_X,
    y=train_y,
    validation_split=0.2,
    batch_size=50,
    epochs=15,
    shuffle=True,
    verbose=2,
    )

In [None]:
print(history.history.keys())

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
model.save('models/model_zip_cat_dl')