In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import pysal
from osgeo import gdal
import copy
import libpysal as lps
import scipy
from itertools import combinations

In [2]:
# bring in data
base = "J:\\Shared drives\\TMD_TSA\\Data\\Parking\\WebScraped_ParkingCost\\required_inputs"
# parking costs
rates = pd.read_csv(base+"\parking_cost_fullrec_NAP_F16.csv")

# spatial points
points = gpd.read_file(base+"\GeocodedParkingLots\DKedits_parking_cost_fullrec_NAP.shp")
points = points.dropna(subset=["geometry"])

# join cost to points
lots = points[['IN_SingleL','geometry','USER_month','USER_lot_u']].merge(rates[['IN_SingleLine','USER_lot_url',
                                                                                'MR','DR','HR']],
                                                                         left_on='USER_lot_u',right_on='USER_lot_url')

In [3]:
# bring in relevant TAZs
base2 = "J:\Shared drives\TMD_TSA\Data\GIS Data\TAZ"
alltazs = gpd.read_file(base2+"\\candidate_CTPS_TAZ_STATEWIDE_2019_wgs84.shp")
#alltazs = alltazs[(alltazs['town'].isin(["BOSTON","CAMBRIDGE","SOMERVILLE","BROOKLINE","NEWTON"])) & (
    #alltazs['id'] < 200000) & 
    #~(alltazs['id'].isin([397,398,399]))][["id","town","geometry"]]

In [4]:
def estimate_parking_costs(mr,dr,hr,mdr,dhr,mhr,elots):
    # filter out customer only parking (no rates for any category)
    estmonth = lots[(~lots['MR'].isna()) | (~lots['DR'].isna()) | (~lots['HR'].isna())].to_crs("EPSG:4326")
    # 1. Calculate ratios
    estmonth[mdr] = np.where(estmonth[dr] == 0,None,estmonth[mr]/estmonth[dr])
    estmonth[dhr] = np.where(estmonth[hr] == 0,None,estmonth[dr]/estmonth[hr])
    estmonth[mhr] = estmonth[dhr]*estmonth[mdr]
    
    # 2. Multiply sample rates by means of ratios
    estmonth['Est_'+mr] = estmonth[dr] * estmonth[mdr].mean()
    estmonth['Est_'+dr] = estmonth[hr] * estmonth[dhr].mean()
    estmonth['Est_'+hr] = estmonth[mr] / estmonth[mhr].mean()
    
    # 3. Create a new field containing sample data if exists, otherwise use estimated value
    for tp in [mr, dr, hr]:
        estmonth[tp+'_wEst'] = np.where(estmonth[tp].isna(),estmonth['Est_'+tp],estmonth[tp])
    
    # 4. Do it again!        
    estmonth['Est_'+mr+'2'] = estmonth[dr+'_wEst'] * estmonth[mdr].mean()
    estmonth['Est_'+dr+'2'] = estmonth[hr+'_wEst'] * estmonth[dhr].mean()
    estmonth['Est_'+hr+'2'] = estmonth[mr+'_wEst'] / estmonth[mhr].mean()

    for tp in [mr, dr, hr]:
        estmonth[tp+'_wEst2'] = np.where(estmonth[tp+'_wEst'].isna(),estmonth['Est_'+tp+'2'],estmonth[tp+'_wEst'])

    return estmonth

In [5]:
estmonth = estimate_parking_costs("MR","DR","HR","Monthly_to_Daily","Daily_to_Hourly","Monthly_to_Hourly",lots)

In [None]:
estmonth.to_csv("J:\Shared drives\TMD_TSA\Data\Parking\WebScraped_ParkingCost\estmonth_update.csv")
estmonth.to_file("J:\Shared drives\TMD_TSA\Data\Parking\WebScraped_ParkingCost\estmonth_update.geojson") 

In [14]:
# IMPORT THE RESULTS OF LOCAL MORAN's I
estmonth = gpd.read_file("J:\\Shared drives\\TMD_TSA\\Data\\Parking\\WebScraped_ParkingCost\\tazs_avg_rates2010_barf.geojson")
estmonth = estmonth.drop(columns=["index"])
#estmonthLM = gpd.read_file("J:\Shared drives\TMD_TSA\Data\Parking\WebScraped_ParkingCost\\required_inputs\estmonth_April14_HR_DR_MR_LM.geojson")
#estmonth = estmonth.to_crs(estmonthLM.crs)

#estmonth = estmonth.drop(columns=["index_right"])
#estmonth = estmonth.sjoin_nearest(estmonthLM[["COType_HR","COType_DR","COType_MR","geometry"]], how="left")

# 1 and 13 are very close to each other (see index_right) removing them so can filter later
estmonth = estmonth[~estmonth.index.duplicated(keep='first')]

In [15]:
# get euclidean distance matrix from TAZ centroids to lots
# also reproject to Mass State Plane (meters) so that distance is correct
rdg83 = alltazs.to_crs("EPSG:26986").set_index("id") # TAZ ids are now the column names
estmonth83 = estmonth.to_crs("EPSG:26986") # index is the row name

eucdist = estmonth83.centroid.geometry.apply(lambda g: rdg83.distance(g))

In [16]:
# convert to miles
lpt = {}
eucdistmi = eucdist/1609.34
# get just closest 16 lots to each TAZ centroid based on euclidean distance
numlot = len(eucdistmi)
for col in eucdistmi.columns:
    big8 = max(eucdistmi[col].nsmallest(16))
    eucdistmi.loc[eucdistmi[col] > big8, col]= np.nan
    lpt[col] = eucdistmi[eucdistmi[col] <= big8][col].index
# set distances (weights) to 1 so all have equal weights
eucdistmi[eucdistmi.notna()] = 1

In [17]:
tazids = alltazs[(alltazs['town'].isin(["BOSTON","CAMBRIDGE","SOMERVILLE",
                                        "BROOKLINE","NEWTON"])) & (alltazs['id'] < 200000)]["id"].tolist()

# get lot ids where HL or LH for each time period and exclude them from the weighted average
hr_in = estmonth[~estmonth['COType_HR'].isin(["LH", "HL"])].reset_index()['index']
mr_in = estmonth[~estmonth['COType_MR'].isin(["LH", "HL"])].reset_index()['index']
dr_in = estmonth[~estmonth['COType_DR'].isin(["LH", "HL"])].reset_index()['index']

In [19]:
# calculate weighted average

# 1. multiply weights (1) by rates
# filter the rates by whether the lot is an outlier - so will match weights below
hr = estmonth["Hourly_Rate_wEst2"].filter(items = hr_in, axis=0)
dr = estmonth["Daily_Rate_wEst2"].filter(items = dr_in, axis=0)
mr = estmonth["Monthly_Rate_wEst2"].filter(items = mr_in, axis=0)

# filter the weights by whether the lot is an outlier, then multiply by rates
xWhr = eucdistmi.filter(items = hr_in, axis=0).multiply(hr, axis="index")
xWdr = eucdistmi.filter(items = dr_in, axis=0).multiply(dr, axis="index")
xWmr = eucdistmi.filter(items = mr_in, axis=0).multiply(mr, axis="index")

# sum weighted rates by TAZ
xW_hr_taz = xWhr.sum()
xW_dr_taz = xWdr.sum()
xW_mr_taz = xWmr.sum()
xW_hr_taz.name = "HRSum16"
xW_dr_taz.name = "DRSum16"
xW_mr_taz.name = "MRSum16"

#sum weights by TAZ
W_taz = eucdistmi.sum()
W_taz.name = "TotalNN"

# join weighted rates sums by taz and sum weights by taz together
wAvg = pd.merge(W_taz,xW_hr_taz, left_index=True, right_index=True)
wAvg = pd.merge(wAvg,xW_dr_taz, left_index=True, right_index=True)
wAvg = pd.merge(wAvg,xW_mr_taz, left_index=True, right_index=True)

# set weighted average rates to 0 where TAZ not in prediction area
wAvg["HRSum16"] = np.where(~wAvg.index.isin(tazids), 0, wAvg["HRSum16"])
wAvg["DRSum16"] = np.where(~wAvg.index.isin(tazids), 0, wAvg["DRSum16"])
wAvg["MRSum16"] = np.where(~wAvg.index.isin(tazids), 0, wAvg["MRSum16"])

wAvg["NN_Average_HR"] = wAvg["HRSum16"]/wAvg["TotalNN"]
wAvg["NN_Average_DR"] = wAvg["DRSum16"]/wAvg["TotalNN"]
wAvg["NN_Average_MR"] = wAvg["MRSum16"]/wAvg["TotalNN"]

In [20]:
tazs_avg_rates = pd.merge(rdg83,wAvg, left_index=True, right_index=True)

In [21]:
tazs_avg_rates.to_file("J:\Shared drives\\TMD_TSA\Data\Parking\WebScraped_ParkingCost\\tazs_avg_rates_updated.geojson")  
tazs_avg_rates.drop("geometry",axis=1).to_csv("J:\\Shared drives\\TMD_TSA\\Data\\Parking\\WebScraped_ParkingCost\\tazs_avg_rates_updated.csv")