In [8]:

import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, TimeDistributed, RepeatVector, Dense, BatchNormalization
import numpy as np

import random


class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.
        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)


maxlen = 25 #input max len
output_len = 10 # output max len
# All the possible symbols in the input 
chars = '0123456789+$-,/.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuwxyv '
ctable = CharacterTable(chars)


import pandas as pd
df = pd.read_csv("sale_prices.csv")
df = df.fillna(0)
from sklearn.utils import shuffle
df = shuffle(df)


raw_prices = df["Price on site"].tolist()
  
int_min_price = [int(price) for price in df["Min price"]]
int_max_price = [int(price) for price in df["Max price"]]

split_at = len(int_min_price) - len(int_min_price) // 10
    
#val data
raw_prices_val = []
parced_min_prices_val = []
parced_max_prices_val = []

for idx in range(split_at,len(raw_prices)-1):
    raw_price = str(raw_prices[idx])
    
    parsed_min_price = str(int_min_price[idx])
    parsed_min_price+=' '*(output_len - len(str(int_min_price[idx])))
    parced_min_prices_val.append(parsed_min_price)
    
    parsed_max_price = str(int_max_price[idx])
    parsed_max_price+=' '*(output_len - len(str(int_max_price[idx])))
    parced_max_prices_val.append(parsed_max_price)
    
    raw_prices_val.append(raw_price)



x_min_val = np.zeros((len(raw_prices_val), maxlen, len(chars)), dtype=np.bool)
y_min_val = np.zeros((len(parced_max_prices_val), output_len, len(chars)), dtype=np.bool)
for i, sentence in enumerate(raw_prices_val):
    x_min_val[i] = ctable.encode(str(sentence), maxlen)
for i, sentence in enumerate(parced_max_prices_val):
    y_min_val[i] = ctable.encode(str(sentence), output_len)

from tensorflow.keras.models import load_model


In [9]:

min_price_model = load_model('./wandb/run-sales_min/model-best.h5')
print('Min price prediction ----------------------------------------')
for ind in range(len(x_min_val)):
    rowx, rowy = x_min_val[np.array([ind])], y_min_val[np.array([ind])]
    preds = min_price_model.predict_classes(rowx, verbose=0)
    q = ctable.decode(rowx[0])
    correct = ctable.decode(rowy[0])
    guess = ctable.decode(preds[0], calc_argmax=False)
    print('Q', q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print('☑', end=' ')
    else:
        print('☒', end=' ')
    print('G',guess, end='\n')

Min price prediction ----------------------------------------
Q $1,000,000                T 1000000    ☑ G 1000000   
Q $199,000                  T 199000     ☑ G 199000    
Q $2,750,000                T 2750000    ☑ G 2750000   
Q $1,095,000                T 1095000    ☑ G 1095000   
Q $3,539,900+               T 0          ☑ G 0         
Q $68,500                   T 68500      ☑ G 68500     
Q $95,000                   T 95000      ☑ G 95000     
Q $1,958,395                T 1958395    ☑ G 1958395   
Q $2,500,000                T 2500000    ☑ G 2500000   
Q $1,175,000                T 1175000    ☑ G 1175000   
Q $185,000                  T 185000     ☑ G 185000    
Q $1,000,000                T 1000000    ☑ G 1000000   
Q $55,000                   T 55000      ☑ G 55000     
Q $1,499,900                T 1499900    ☑ G 1499900   
Q $2,000,000                T 2000000    ☑ G 2000000   
Q $1,999,999                T 1999999    ☑ G 1999999   
Q $159,999                  T 159999     ☑

In [10]:
max_price_model = load_model('./wandb/run-sales_max/model-best.h5')
print('Max price prediction ----------------------------------------')
for ind in range(len(x_max_val)):
    rowx, rowy = x_max_val[np.array([ind])], y_max_val[np.array([ind])]
    preds = max_price_model.predict_classes(rowx, verbose=0)
    q = ctable.decode(rowx[0])
    correct = ctable.decode(rowy[0])
    guess = ctable.decode(preds[0], calc_argmax=False)
    print('Q', q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print('☑', end=' ')
    else:
        print('☒', end=' ')
    print('G',guess, end='\n')

Max price prediction ----------------------------------------
Q $2,745,000                T 2745000    ☑ G 2745000   
Q Price Unavailable         T 0          ☑ G 0         
Q $1,295,000                T 1295000    ☑ G 1295000   
Q $650,000                  T 650000     ☑ G 650000    
Q $4,999                    T 4999       ☑ G 4999      
Q $2,129,000                T 2129000    ☑ G 2129000   
Q $588,000                  T 588000     ☑ G 588000    
Q $1,599,999                T 1599999    ☑ G 1599999   
Q $365,000 - $450,000       T 450000     ☑ G 450000    
Q $1,500,000                T 1500000    ☑ G 1500000   
Q $2,000,000                T 2000000    ☑ G 2000000   
Q $4,875,000                T 4875000    ☑ G 4875000   
Q $199,000                  T 199000     ☑ G 199000    
Q $389,000                  T 389000     ☑ G 389000    
Q $294,888                  T 294888     ☑ G 294888    
Q $996,904                  T 996904     ☑ G 996904    
Q $380,000                  T 380000     ☑