In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import src.NearNear_v3 as nn
%load_ext autoreload
%autoreload 2

#### Test `compute_distances` function used in Model
 - Use toy dataset with items in SF and ATX (roughly)
 - Pop off a 1 row in Austin as a 'test case'
 - Expect distance matrix to include 
     - zeros for ATX-ATX and SF-SF 
     - ~1475mi for SF-ATX cases

In [2]:
df = pd.read_csv('data/test_data.csv')
df

Unnamed: 0,property_latitude,property_longitude,property_imprating,sale_amount
0,38,-122,A,10
1,38,-122,A,10
2,38,-122,A,10
3,38,-122,B,20
4,38,-122,B,20
5,38,-122,B,20
6,30,-98,C,40
7,30,-98,B,30
8,30,-98,C,30


In [3]:
predictors = ['property_latitude','property_longitude','property_imprating']
X_train = df.loc[:7,predictors]
X_test = df.loc[8:,predictors]
y_train = df.loc[:7,'sale_amount']
y_test = df.loc[8:,'sale_amount']

In [4]:
lat_train = X_train['property_latitude'].values
lon_train = X_train['property_longitude'].values
lat_test = X_test['property_latitude'].values
lon_test = X_test['property_longitude'].values
distances = nn.compute_distances(lat_train,lon_train,lat_test,lon_test)
print('Distance SF to ATX: {}mi'.format(round(distances[0][0])))
print('Distance ATX to ATX: {}mi'.format(round(distances[0][-1])))

Distance SF to ATX: 1477.0mi
Distance ATX to ATX: 0.0mi


#### Test - model uses LatLong to establish comparables
 - Use toy dataset with items in SF and ATX (roughly)
 - Pop off a single row in Austin as a 'test case'
 - Expect model returns average value of remaining items in ATX (40+30 / 2 = 35)

In [5]:
predictors = ['property_latitude','property_longitude','property_imprating']
X_train = df.loc[:7,predictors]
X_test = df.loc[8:,predictors]
y_train = df.loc[:7,'sale_amount']
y_test = df.loc[8:,'sale_amount']

In [6]:
nn_model = nn.NearNear(lat='property_latitude',
                        lon='property_longitude')
nn_model.fit(X_train,y_train)
y_pred = nn_model.predict(X_test)
print('Predicted value for our test case is: {}'.format(y_pred[0]))

Predicted value for our test case is: 35.0


#### Test - model uses LatLong and `cat_comp` parameter to establish comparables
 - Use toy dataset with items in SF and ATX (roughly)
 - Pop of single ATX row with **C** for `property_imprating`
 - Expect model to use other ATX row with **C** for `property_imprating` and predict **40**

In [7]:
predictors = ['property_latitude','property_longitude','property_imprating']
X_train = df.loc[:7,predictors]
X_test = df.loc[8:,predictors]
y_train = df.loc[:7,'sale_amount']
y_test = df.loc[8:,'sale_amount']

In [8]:
nn_model = nn.NearNear(lat='property_latitude',
                        lon='property_longitude', comp_cat='property_imprating')
nn_model.fit(X_train,y_train)
y_pred = nn_model.predict(X_test)
print('Predicted value for our test case is: {}'.format(y_pred[0]))

Predicted value for our test case is: 40.0
