In [1]:
import requests
from blue_conduit_spatial.utilities import *
import geopandas as gpd

In [2]:
col_name_dictionary = {'pid': 'pid', 'Property Z': 'Property Zip Code', 'Owner Type': 'Owner Type',
                       'Owner Stat': 'Owner State', 'Homestead': 'Homestead', 'Homestea_1': 'Homestead Percent',
                       'HomeSEV': 'HomeSEV', 'Land Value': 'Land Value', 'Land Impro': 'Land Improvements Value',
                       'Residentia': 'Residential Building Value', 'Resident_1': 'Residential Building Style',
                       'Commercial': 'Commercial Building Value', 'Building S': 'Building Storeys',
                       'Parcel Acr': 'Parcel Acres', 'Rental': 'Rental', 'Use Type': 'Use Type',
                       'Prop Class': 'Prop Class', 'Old Prop c': 'Old Prop class', 'Year Built': 'Year Built',
                       'USPS Vacan': 'USPS Vacancy', 'Zoning': 'Zoning', 'Future Lan': 'Future Landuse',
                       'DRAFT Zone': 'DRAFT Zone', 'Housing Co': 'Housing Condition 2012',
                       'Housing _1': 'Housing Condition 2014', 'Commerci_1': 'Commercial Condition 2013',
                       'Latitude': 'Latitude', 'Longitude': 'Longitude', 'Hydrant Ty': 'Hydrant Type',
                       'Ward': 'Ward', 'PRECINCT': 'PRECINCT', 'CENTRACT': 'CENTRACT', 'CENBLOCK': 'CENBLOCK',
                       'SL_Type': 'SL_Type', 'SL_Type2': 'SL_Type2', 'SL_Lead': 'SL_Lead', 'Ed_July': 'Ed_July',
                       'Ed_March': 'Ed_March', 'Last_Test': 'Last_Test', 'Max_Lead': 'Max_Lead',
                       'Med_Lead': 'Med_Lead', 'Num_Tests': 'Num_Tests', 'Res_Test': 'Res_Test',
                       'Sen_Test': 'Sen_Test', 'SL_private': 'SL_private_inspection',
                       'B_median_a': 'B_median_age_all_women', 'B_median_1': 'B_median_age_all_men', 
                       'B_median_2': 'B_median_age_all', 'B_median_3': 'B_median_age_all_women_white',
                       'B_median_4': 'B_median_age_all_men_white', 'B_median_5': 'B_median_age_all_white',
                       'B_median_6': 'B_median_age_all_women_black', 'B_median_7': 'B_median_age_all_men_black',
                       'B_median_8': 'B_median_age_all_black', 'B_total_bl': 'B_total_black_pop',
                       'B_total_wh': 'B_total_white_pop', 'B_married_': 'B_married_couples',
                       'B_single_w': 'B_single_women', 'B_marrie_1': 'B_married_couples_white',
                       'B_single_1': 'B_single_women_white', 'B_marrie_2': 'B_married_couples_black',
                       'B_single_2': 'B_single_women_black', 'B_marrie_3': 'B_married_couples_w_children',
                       'B_single_m': 'B_single_mothers_w_children', 'B_househol': 'B_households_w_elderly',
                       'B_househod': 'B_househod_no_elderly', 'B_aggregat': 'B_aggregate_income',
                       'B_speak_sp': 'B_speak_spanish', 'B_speak_on': 'B_speak_only_english',
                       'B_no_engli': 'B_no_english', 'B_hispanic': 'B_hispanic_household',
                       'B_imputed_': 'B_imputed_rent', 'B_impute_1': 'B_imputed_value',
                       'known_priv': 'known_private_sl', 'known_publ': 'known_public_sl', 'hydrovac': 'hydrovac',
                       'sl_priva_1': 'sl_private_type', 'sl_public_': 'sl_public_type', 'created_at': 'created_at',
                       'source': 'source', 'hv_visit': 'hv_visit', 'sl_visit': 'sl_visit', 'replaced': 'replaced',
                       'dangerous': 'dangerous', 'geometry': 'geometry'}
sl_df = gpd.read_file('../data/raw/flint_sl_materials/')
sl_df = sl_df.rename(col_name_dictionary, axis=1)

In [3]:
sl_df = sl_df[sl_df['Longitude'].isna()==False]
#sl_df[['Longitude', 'Latitude']]
sl_df['longlatstr'] = sl_df['Longitude'].round(6).astype(str) + ',' + sl_df['Latitude'].round(6).astype(str)
#sl_df['longlatstr']
sl_df = sl_df[~pd.isnull(sl_df.dangerous)].reset_index()

In [4]:
%%time
from sklearn.metrics.pairwise import euclidean_distances, haversine_distances
N_have = 10000
sl_df_coord = sl_df[['Latitude', 'Longitude']].apply(np.radians)
#sl_dists = euclidean_distances(sl_df_coord[:N])
try:
    del have_dists
except:
    pass

have_dists = haversine_distances(sl_df_coord[:N_have])

#print(f"Euclidean Max: {(sl_dists*6371).max():0.4f} km; Min: {(sl_dists*6371).min():0.4f}")
#print(f"Euclidean: Mean={(sl_dists*6371).mean():0.4f} km; SD={(sl_dists*6371).std():0.4f} km")

print(f"Haversine Max: {(have_dists*6371).max():0.4f} km; Min: {(have_dists*6371).min():0.4f}")
print(f"Haversine: Mean={(have_dists*6371).mean():0.4f} km; SD={(have_dists*6371).std():0.4f} km")

print(f"Size: {have_dists.nbytes*1e-9:0.2f} GB")

# Save out haversine distances
idx2pid = {idx: sl_df.iloc[i]['pid'] for i, idx in enumerate(sl_df_coord.index)}
np.savez("../data/processed/haversine_dists.npz", haversine_distances = have_dists, 
        idx2pid = idx2pid, allow_pickle=True)

Haversine Max: 19.0465 km; Min: 0.0000
Haversine: Mean=4.6467 km; SD=2.3708 km
Size: 0.80 GB
CPU times: user 19.7 s, sys: 2.44 s, total: 22.2 s
Wall time: 27 s


In [5]:
ip = "54.144.55.140"


#calculate_street_distance(0, [1, 2])

In [79]:
class RoadDistanceMatrix:
    def __init__(self, N, df):
        self.road_dist_arr = self._create_road_dist_array(N)
        self.N = self.road_dist_arr.shape[0]
        self.lat_long_df = df[['Latitude', 'Longitude']]
        self.pids = df[['pid']]
        
        # Set up error tracking for largest number of points queried
        self.max_query = 0
        
        
    def fit(self, base_dists, ip, limit=0.5):
        """Fits road distance matrix"""
        have_dists_adj = self._convert_subset_baseline_dists(base_dists, N_road=self.N)
        self.lat_long_df = self.lat_long_df.iloc[ : self.N]
        self.populate_road_dist_matrix(have_dists_adj, ip, limit)
        self.limit=limit
        
    def save(self, filepath):
        idx2pid = {idx: self.pids.iloc[i]['pid'] for i, idx in enumerate(self.lat_long_df.index)}
        np.savez(filepath, road_distances = self.road_dist_arr, idx2pid = idx2pid, limit=self.limit, allow_pickle=True)
        
        
    def populate_road_dist_matrix(self, base_dists, ip, limit=0.5):
        """Populates the road distance array"""
        for i in range(self.N):
            if i % 100 == 0:
                print(f"Finished Row {i}")
            idx = np.argwhere(base_dists[i] < limit).flatten()
            n_query = len(idx)
            if n_query > 0:
                if n_query > self.max_query:
                    self.max_query = n_query
                x = RoadDistanceMatrix.calculate_street_distance(i, idx, ip_add=ip, df=self.lat_long_df)
            else:
                x = []
            self.road_dist_arr[i][idx] = x
        
    def _create_road_dist_array(self, N_road=10):
        road_dist_arr = np.ones(N_road**2, dtype='float64').reshape(N_road, N_road) * 1e5
        return road_dist_arr

    def _convert_subset_baseline_dists(self, dists, N_road=10):
        """Converts haversine distance matrix to be in km and subset to same size as road array"""
        return dists[:N_road, :N_road] * 6371 # 6371 is radius of earth in km
    
    @staticmethod
    def create_long_lat_string(df, j):
        """Helper method to concatenate strings into OSM-accepted format"""
        long_lat_str =  str(df.iloc[j]['Longitude'].round(6)) + ',' + str(df.iloc[j]['Latitude'].round(6))
        return long_lat_str
    
    @staticmethod
    def calculate_street_distance(i, j_list, ip_add, df, how='walking'):
        """Returns street distance time in seconds"""

        # Find origin lat/long list
        i_long_lat = RoadDistanceMatrix.create_long_lat_string(df, i)

        # Find longitude/latitude pairs as a list
        #long_lat_list = [ll_str(j) for j in j_list]

        longlat_concatstr = ''
        for i, c in enumerate(j_list):
            if i == 0:
                longlat_concatstr = RoadDistanceMatrix.create_long_lat_string(df, c)
            else:
                longlat_concatstr += ';' + RoadDistanceMatrix.create_long_lat_string(df, c)


        url = f"http://{ip_add}:5000/table/v1/{how}/{i_long_lat};{longlat_concatstr}?sources=0"
        r = requests.get(url)
        output = r.json()

        return np.array(output['durations'][0][1:])

In [6]:
from blue_conduit_spatial.distance_matrix import RoadDistanceMatrix

In [7]:
%%time
rdmat = RoadDistanceMatrix(N=20, df=sl_df)
rdmat.fit(have_dists, ip=ip, limit=0.1)
#rdmat.save("../data/processed/road_dists.npz")

#RoadDistanceMatrix.calculate_street_distance(0, [1,2,3], ip_add=ip, df=sl_df[['Latitude', 'Longitude']], how='walking')

Finished Row 0
CPU times: user 293 ms, sys: 20.9 ms, total: 314 ms
Wall time: 1.33 s


In [8]:
rdmat.max_query

7

In [31]:
%%time

limit = 0.5
idx_list = []
for i in range(N_road):
    if i % 100 == 0:
        print(i)
    idx = np.argwhere(new_y[i] * 6371 < limit).flatten()
    if len(idx) > 0:
        x = calculate_street_distance(i, idx)
    else:
        x = []
    road_dist_arr[i][idx] = x

print(f"Road dist. matrix size: {road_dist_arr.nbytes*1e-9:0.2f} GB.")
road_dist_arr[0]

0
Road dist. matrix size: 0.00 GB.
CPU times: user 122 ms, sys: 742 µs, total: 123 ms
Wall time: 612 ms


array([ 0. , 46.7, 46.9, 49. ,  0. , 49.8, 46.1, 44.2, 44. , 44.1])

### DEPRECATED

In [143]:
%%time
import math
ip = "54.144.55.140"
def street_distance(i, j, ip_add=ip, df=sl_df[['Latitude', 'Longitude']], how='walking'):
    """Returns street distance time in seconds"""
    def ll_str(j):
        return str(df.iloc[j]['Longitude'].round(6)) + ',' + str(df.iloc[j]['Latitude'].round(6))
    
    #tot_idx = j - i
    #iters = math.ceil(tot_idx / 100) ** 2
    #step_size = 100
   # 
   # row_it = 0
    #i_idx = i
    #j_idx = i + step_size
    ##for it in range(iters):
     #   if j_idx > j:
     #       row_it += 1
     #       i_idx = row_it * step_size
     #       j_idx = i + step_size
     #   else:
     #       i_idx = 
        
    i_long_lat = str(df.iloc[i]['Longitude'].round(6)) + ',' + str(df.iloc[i]['Latitude'].round(6))

    latlonglist = [ll_str(x) for x in range(i+1, j)]

    longstr = ''
    for i, c in enumerate(latlonglist):
        if i == 0:
            longstr = c
        else:
            longstr += ';' + c

    url = f"http://{ip_add}:5000/table/v1/{how}/{i_long_lat};{longstr}"#"?sources=0"
    r = requests.get(url)
    output = r.json()
    
    return output['durations']#[0] #[1]
#n = street_distance(0, 100)

KeyError: 'durations'

In [140]:
np.array(n).shape

(1, 10000)

In [96]:
%%time
N = 100
road_dist_array = np.zeros(N**2).reshape(N, N)
for i in range(N):
    #for j in range(N):
    #    if have_dists[i][j] * 6371 > 0.75:
    #        road_dist_array[i][j] = 1e5
    #    else:
    #        road_dist_array[i][j] = street_distance(i, j)
    road_dist_array[i] = street_distance(i, N-1)

CPU times: user 8.23 s, sys: 0 ns, total: 8.23 s
Wall time: 14 s


In [98]:
road_dist_array

array([[  0. ,   0. ,  46.7, ..., 345.8, 377.2, 372.8],
       [  0. ,  35.9,   0. , ..., 363.5, 394.9, 396.4],
       [  0. ,  43.6,  73.4, ..., 350.8, 382.2, 354.7],
       ...,
       [  0. , 365.1, 362. , ...,  66.7,   0. ,  80.7],
       [  0. , 376.4, 375.3, ..., 116.6,  75. ,   0. ],
       [  0. , 362.4, 359.3, ...,  91.7,  94.3,  74.3]])

In [5]:
%%time
N = 100
limit = 0.5
for i in range(N):
    idx = np.argwhere(have_dists[i] * 6371 < limit).flatten()
    #x = street_distance_v2(i, idx)
    
#idx.shape
#idx_1 = np.argwhere(idx[:,0] == 0)
#idx[idx_1]

CPU times: user 14.6 ms, sys: 688 µs, total: 15.3 ms
Wall time: 11.7 ms


In [17]:
%%time
#Latitude,Longitude
latlonglist = ["-83.738798,43.018999"] 

longstr = ''
for i, c in enumerate(latlonglist):
    if i == 0:
        longstr = c
    else:
        longstr += ';' + c

ip_add = "54.144.55.140"
init_lat_long = "-83.703084,43.018999"
how = "driving" #"walking" or "driving"
url = f"http://{ip_add}:5000/table/v1/{how}/{init_lat_long};{longstr}"
print(url)
r = requests.get(url)
output = r.json()
#output['durations'] # output is in seconds
output['durations']

http://54.144.55.140:5000/table/v1/driving/-83.703084,43.018999;-83.738798,43.018999
CPU times: user 9.79 ms, sys: 0 ns, total: 9.79 ms
Wall time: 69.1 ms


[[0, 520.9], [534.3, 0]]

In [6]:
np.argwhere(have_dists * 6371 < limit).shape

(2727084, 2)

In [7]:
N_TEST = 10000
x = np.tril(np.ones(N_TEST**2).reshape(N_TEST, N_TEST) * 1e5)
y = x + have_dists

In [164]:
y

array([[1.00000000e+05, 2.38114699e-05, 3.18948539e-05, ...,
        4.42230932e-04, 4.41770459e-04, 3.88888651e-04],
       [1.00000000e+05, 1.00000000e+05, 5.47877364e-05, ...,
        4.63505730e-04, 4.61225399e-04, 4.10839840e-04],
       [1.00000000e+05, 1.00000000e+05, 1.00000000e+05, ...,
        4.10544135e-04, 4.11049448e-04, 3.57024383e-04],
       ...,
       [1.00000000e+05, 1.00000000e+05, 1.00000000e+05, ...,
        1.00000000e+05, 6.71969822e-05, 6.03603816e-05],
       [1.00000000e+05, 1.00000000e+05, 1.00000000e+05, ...,
        1.00000000e+05, 1.00000000e+05, 1.05360468e-04],
       [1.00000000e+05, 1.00000000e+05, 1.00000000e+05, ...,
        1.00000000e+05, 1.00000000e+05, 1.00000000e+05]])

In [9]:
big_idx = np.argwhere(y * 6371 < limit)

(1358542, 2)

In [77]:
(road_dist_arr > 0).sum(axis=1).min()

0

In [78]:
road_dist_arr.nbytes

800

dtype('float64')