In [None]:
import datetime
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from shapely.geometry import Point
from geopandas import GeoDataFrame
import os
pd.options.display.float_format = '{:.5f}'.format

import seaborn as sns
pd.set_option("display.max_colwidth", 0)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
import warnings
warnings.filterwarnings("ignore")

In [None]:
import sys
import logging

nblog = open("./logs/rent_"+str(datetime.datetime.now())+".log", "a+")
sys.stdout.echo = nblog
sys.stderr.echo = nblog

get_ipython().log.handlers[0].stream = nblog
get_ipython().log.setLevel(logging.INFO)

%autosave 5

In [None]:
%%time
df_localness = pd.read_csv('./data/food_localness_oct.csv',header=0) #,nrows=10
df_localness['customer_loc_id'] = df_localness['customer_lon'].astype(str)+df_localness['customer_lat'].astype(str)

In [None]:
# create geodf for rental locations

df_rent = pd.read_csv('./data/units_for_rent.csv', sep=',', header=0,thousands=',')
df_rent['price_persqt'] = df_rent['price']/df_rent['floorsize']
df_rent = df_rent[['longitude','latitude','price_persqt']]
geometry = [Point(xy) for xy in zip(df_rent.longitude, df_rent.latitude)]
gdf_rent = GeoDataFrame(df_rent, crs="EPSG:4326", geometry=geometry)
gdf_rent = gdf_rent.to_crs(epsg=3857)
print(gdf_rent.shape[0])

In [None]:
%%time
# create geodf for customer locations

df_unique_customers = df_localness[['customer_loc_id','customer_lon','customer_lat']].drop_duplicates()
print(df_unique_customers.shape[0])
geometry = [Point(xy) for xy in zip(df_unique_customers.customer_lon, df_unique_customers.customer_lat)]
gdf_customer_loc = GeoDataFrame(df_unique_customers, crs="EPSG:4326", geometry=geometry)
gdf_customer_loc = gdf_customer_loc.to_crs(epsg=3857)#convert customer locations to 3857
gdf_customer_loc = gdf_customer_loc.rename(columns={"geometry": "customer_geometry"})

In [None]:
import numpy as np
from scipy.spatial import cKDTree

def ckdnearest_n(gdA, gdB, nearest_n):
    nA = np.array(list(gdA.customer_geometry.apply(lambda x: (x.x, x.y)))) # customer location
    nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y)))) # rent location
    btree = cKDTree(nB)
    # identify the nearest n rentals according to the Euclidean distance if using cKDTree (that's why I convert the coordinates to epsg:3857!!!)
    dist, idx = btree.query(nA, k=nearest_n, distance_upper_bound=2000)
    allrent = []
    for i in range(0,len(idx)): # each loop for one customer location
        distance = dist[i]
        distance[~np.isfinite(distance)] = -1
        distance = [item for item in distance if item >= 0]
        if len(distance) == 0: 
            allrent.append(0)
            continue

        idlist = idx[i]
        idlist = [item for item in idlist if item < gdB.shape[0]]
        
        if i%1000==0: print(datetime.datetime.now(),i,'/',len(idx))

        weights = np.array(gdB.iloc[idlist]['price_persqt'].tolist())
        distances = np.array(distance)
        # Compute inverse distance weights
        id_weights = 1 / distances**2  # assuming p = 2
        # Calculate the weighted average
        rent_onelocation = np.sum(id_weights * weights) / np.sum(id_weights)
        allrent.append(rent_onelocation)
    gdA['approxi_rent'] = allrent
    return gdA

In [None]:
%%time
import traceback
try:
    # unique_customerlocs_withrent = ckdnearest_ten(unique_customerlocs.iloc[23:], gdf_rent)
    unique_customerlocs_withrent = ckdnearest_n(gdf_customer_loc, gdf_rent, 20)
except Exception:
    traceback.print_exc()

In [None]:
print(unique_customerlocs_withrent.shape[0])
unique_customerlocs_withrent.head()

In [None]:
unique_customerlocs_withrent[['customer_loc_id','approxi_rent']].to_csv('./data/estimated_rent_customerlocations_oct.csv',index=False)