In [0]:
import numpy as np
import pandas as pd
from datetime import timedelta
from datetime import date
from datetime import datetime
import time
from datascience import *
from scipy.spatial import distance

from IPython.display import display
from IPython.display import HTML

import ipywidgets as widgets
from ipywidgets import IntSlider

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets



In [0]:
# Constansts

output_directory = "output/"

COL_PURCHASE_DATE = 'PURCHASE DATE'
COL_SOLD_DATE = 'SOLD DATE'
COL_PURCHASE_PRICE = 'PURCHASE PRICE'
COL_SOLD_PRICE = 'SOLD PRICE'

COL_PRICE_PERCENT = 'PRICE CHANGE %'

# Columns for standard units
COL_PURCHASE_DATE_SU = 'PURCHASE DATE SU'
COL_PURCHASE_PRICE_SU = 'PURCHASE PRICE SU'
COL_SOLD_DATE_SU = 'SOLD DATE SU'


# Load Model

In [0]:
df = pd.read_csv('condo_sales.csv')

# DATE is  but should be datetime
df[ COL_PURCHASE_DATE] = pd.to_datetime(df[COL_PURCHASE_DATE], errors='coerce')
df[ COL_SOLD_DATE] = pd.to_datetime(df[COL_SOLD_DATE], errors='coerce')

condo_sales = Table.from_df(df)

# Price Predictor

Enter the **purchase price and date** of an apartment to see it's current price and comparable sales

In [0]:

# Columns used to the calculate the distance between two point i.e. 2 properties that were purchased and sold. 
# We picked the purchase date & price and the sold date converted to standard units as the important columns
# to use for calculating the distance.
distance_columns = [COL_PURCHASE_DATE_SU, COL_PURCHASE_PRICE_SU, COL_SOLD_DATE_SU]


purchase_dates_timestamps = [ date.timestamp() for date in condo_sales.column(COL_PURCHASE_DATE)]
sold_dates_timestamps = [ date.timestamp() for date in condo_sales.column(COL_SOLD_DATE)]



def all_distances(training, new_point):
    """Returns an array of distances
    between each point in the training set
    and the new point (which is a row of attributes)"""
    attributes = training.select(distance_columns)
    return distance.cdist( attributes.to_array().tolist(), [new_point]).flatten()

def table_with_distances(training, new_point):
    """Augments the training table 
    with a column of distances from new_point"""
    return training.with_column('Distance', all_distances(training, new_point))

def closest(training, new_point, k):
    """Returns a table of the k rows of the augmented table
    corresponding to the k smallest distances"""
    with_dists = table_with_distances(training, new_point)
    sorted_by_distance = with_dists.sort('Distance')
    topk = sorted_by_distance.take(np.arange(k))
    return topk

def estimate(training, purchase_point, k):
    """Estimate a price based on nearest neighbours"""
    close_points = closest(condo_sales, purchase_point, k)
    avg_price_change = np.mean(close_points.column(COL_PRICE_PERCENT))
    return avg_price_change


def predict_sale_price(purchase_price, purchase_date: str):
    """Predict the current value based on the purchase price and purchase date."""
    
    purchase_date_timestamp = time.mktime(datetime.strptime(purchase_date, '%Y-%m-%d').timetuple())
    
    # Set sold date to now
    sold_date = datetime.now().timestamp()

    # calcuate all values in standard units.
    purchase_date_su = (purchase_date_timestamp - np.mean(purchase_dates_timestamps)) / np.std(purchase_dates_timestamps)
    purchase_price_su = (purchase_price - np.mean(condo_sales.column(COL_PURCHASE_PRICE))) / np.std(condo_sales.column(COL_PURCHASE_PRICE))
    sold_date_su = (sold_date - np.mean(sold_dates_timestamps)) / np.std(sold_dates_timestamps)

    # debugging
    # print(purchase_date_su, purchase_price_su, sold_date_su)

    # create a target row to get estimates
    target_row = Table(distance_columns)
    target_row = target_row.with_row([purchase_date_su,purchase_price_su,sold_date_su ])
    
    
    price_change_percent = estimate(condo_sales, target_row.row(0), 10)
    
    price = purchase_price * (1 + price_change_percent / 100)
    
    display(HTML("<H1>Current Value: ${:,.0f} </H1>".format(price)))
#     display(HTML("<H2>Estimated Price Change: {:.2f}% </H2>".format(price_change_percent)))
    
    display(HTML("<H1>Compareable sales</H1>"))
    closest(condo_sales, target_row.row(0), 10).drop(distance_columns).show()

purchase_price = 685000 #@param {type:"integer"}
purchase_date = '2012-11-09' #@param {type:"date"}

predict_sale_price(purchase_price=purchase_price, purchase_date=purchase_date)


FULL ADDRESS,PURCHASE DATE,PURCHASE PRICE,SOLD DATE,SOLD PRICE,PRICE CHANGE,PERIOD,DAILY PRICE CHANGE,PRICE CHANGE %,Distance
"88 GREENWICH STREET, 1007",2012-09-21 00:00:00,713700,2018-10-11 00:00:00,875000,161300,2211,72.9534,22.6005,0.0698598
"171 EAST 84TH STREET, 12H",2012-09-28 00:00:00,737000,2018-08-10 00:00:00,961000,224000,2142,104.575,30.3935,0.111348
"99 JOHN STREET, 422",2012-11-16 00:00:00,675000,2018-06-05 00:00:00,873000,198000,2027,97.6813,29.3333,0.140513
"162 WEST 56TH STREET, 602",2013-02-04 00:00:00,575000,2018-09-26 00:00:00,730000,155000,2060,75.2427,26.9565,0.144141
"15 BROAD STREET, 2222",2012-11-27 00:00:00,685000,2018-05-31 00:00:00,927000,242000,2011,120.338,35.3285,0.144594
"116 CENTRAL PARK SOUTH, 10G",2012-06-21 00:00:00,617500,2018-09-17 00:00:00,730000,112500,2279,49.3638,18.2186,0.156283
"21 LUDLOW STREET, 5B",2012-11-14 00:00:00,605858,2018-06-07 00:00:00,980000,374142,2031,184.216,61.7541,0.158444
"21 LUDLOW STREET, 3B",2013-03-22 00:00:00,595000,2018-09-06 00:00:00,893750,298750,1994,149.824,50.2101,0.164553
"55 WEST 84TH STREET, 4B",2013-05-01 00:00:00,697500,2018-09-27 00:00:00,835000,137500,1975,69.6203,19.7133,0.165877
"99 JOHN STREET, 2008",2012-06-05 00:00:00,690000,2018-07-27 00:00:00,938000,248000,2243,110.566,35.942,0.17414
