# Joining our addresses to nearest nodes 
Notebook by Donna

I am trying to match all of our addresses of origins and destinations to the nearest node in the SF street network


In [1]:
%%capture
!pip install osmnx

In [2]:
import numpy as np
import pandas as pd
import osmnx as ox, networkx as nx
ox.config(log_console=True, use_cache=True)

# 1. set osmnx to look at San Francisco only

In [3]:
G = ox.graph_from_place('San Francisco, California, USA', network_type='drive')

# 2. Import all of our addresses that need to be noded
Origins are drugstores (CVS, Walgreens)
Destinations are hospitals and other health centers

They are in two list with addresses and lat/long. It is easiest to find the shortest path between origin and destination if we convert the addresses to nearest node. 

In [4]:
#import origins (drugstores)
url = 'https://raw.githubusercontent.com/ryglaws/255_Final/master/4-16_drugstore.csv'
origin = pd.read_csv(url, error_bad_lines=False)
origin= pd.DataFrame(origin)
origin.head()

Unnamed: 0.1,Unnamed: 0,facility_name,facility_type,location,zip,contact,geocode,point,latitude,longitude,altitude
0,0,CVS,Drug Store,"731 Market St, San Francisco, CA 94103",94103,+1 415-243-0273,"CVS Pharmacy, 731, Market Street, Union Square...","(37.7868371, -122.4041386, 0.0)",37.786837,-122.404139,0.0
1,1,CVS,Drug Store,"351 California St, San Francisco, CA 94104",94104,+1 415-243-0273,"351, California Street, Financial District, Sa...","(37.7928462, -122.40090691922816, 0.0)",37.792846,-122.400907,0.0
2,2,CVS,Drug Store,"400 Sutter St, San Francisco, CA 94108",94108,+1 415-398-2175,"400, Sutter Street, Union Square, San Francisc...","(37.789496, -122.407127, 0.0)",37.789496,-122.407127,0.0
3,3,CVS,Drug Store,"3600 Geary Blvd, San Francisco, CA 94118",94118,+1 415-668-6083,"3600, Geary Boulevard, Jordan Park, San Franci...","(37.78114096774194, -122.45804891935485, 0.0)",37.781141,-122.458049,0.0
4,4,CVS,Drug Store,"701 Portola Dr, San Francisco, CA 94127",94127,+1 415-504-6043,"CVS Pharmacy, 701, Portola Drive, Forest Hill,...","(37.744190950000004, -122.45325431912494, 0.0)",37.744191,-122.453254,0.0


In [5]:
#import destinations (hospitals and health centers)
url = 'https://raw.githubusercontent.com/ryglaws/255_Final/master/4-16_healthcare.csv'
dest = pd.read_csv(url, error_bad_lines=False)
dest = pd.DataFrame(dest)
dest.head()

Unnamed: 0.1,Unnamed: 0,facility_name,facility_type,location,zip,contact,geocode,point,latitude,longitude,altitude
0,88,California Pacific Med Ctr-davies Campus,General Acute Care Hospital,"45 Castro St, San Francisco, CA 94114",94114,Unknown,"CPMC Davies Campus, 45, Castro Street, Duboce ...","(37.768256949999994, -122.43462122694451, 0.0)",37.768257,-122.434621,0.0
1,89,On Lok Senior Health Services-mission Center,Community Clinic,"1333 Bush St, San Francisco, CA 94109",94109,Unknown,"1333, Bush Street, Nob Hill, San Francisco, Sa...","(37.788752836734695, -122.41923185714285, 0.0)",37.788753,-122.419232,0.0
2,92,Special Programs For Youth,Community Health Network,"375 Woodside Ave, San Francisco, CA 94127",94127,Unknown,San Francisco Superior Court - Juvenile Justic...,"(37.74606115, -122.45228484733647, 0.0)",37.746061,-122.452285,0.0
3,93,Mission Neighborhood Health Ctr.-excelsior Clinic,Community Clinic,"4434 Mission St, San Francisco, CA 94112",94112,Unknown,"4434, Mission Street, Excelsior, San Francisco...","(37.726757, -122.43335016666667, 0.0)",37.726757,-122.43335,0.0
4,95,Laguna Honda Hospital And Rehabilitation Center,General Acute Care Hospital,"375 Laguna Honda Blvd, San Francisco, CA 94116",94116,Unknown,Laguna Honda Hospital And Rehabilitation Cente...,"(37.7494814, -122.45729651523612, 0.0)",37.749481,-122.457297,0.0


# 3. Isolate the lat longs of the origins and destinations

In [6]:
#Origin data
orig_df = pd.DataFrame({'place': origin['facility_name'], 'location': origin['location'],'x': origin['longitude'], 'y': origin['latitude']})
orig_df

Unnamed: 0,place,location,x,y
0,CVS,"731 Market St, San Francisco, CA 94103",-122.404139,37.786837
1,CVS,"351 California St, San Francisco, CA 94104",-122.400907,37.792846
2,CVS,"400 Sutter St, San Francisco, CA 94108",-122.407127,37.789496
3,CVS,"3600 Geary Blvd, San Francisco, CA 94118",-122.458049,37.781141
4,CVS,"701 Portola Dr, San Francisco, CA 94127",-122.453254,37.744191
...,...,...,...,...
78,Walgreens,"2050 Irving St, San Francisco, CA 94122",-122.480004,37.763636
79,Walgreens,"825 Market St, San Francisco, CA 94103",-122.411346,37.781354
80,Walgreens,"33 Drumm St, San Francisco, CA 94111",-122.396690,37.794103
81,Walgreens,"2262 Market St, San Francisco, CA 94114",-122.432402,37.764965


In [7]:
orig_df.shape

(83, 4)

In [8]:
s_orig_df = orig_df.sort_values('location').drop_duplicates('location')
s_orig_df.shape

(75, 4)

In [9]:
orig_df = s_orig_df

In [10]:
#Destination data
dest_df = pd.DataFrame({'place': dest['facility_name'], 'location': dest['location'],'x': dest['longitude'], 'y': dest['latitude']})
dest_df

Unnamed: 0,place,location,x,y
0,California Pacific Med Ctr-davies Campus,"45 Castro St, San Francisco, CA 94114",-122.434621,37.768257
1,On Lok Senior Health Services-mission Center,"1333 Bush St, San Francisco, CA 94109",-122.419232,37.788753
2,Special Programs For Youth,"375 Woodside Ave, San Francisco, CA 94127",-122.452285,37.746061
3,Mission Neighborhood Health Ctr.-excelsior Clinic,"4434 Mission St, San Francisco, CA 94112",-122.433350,37.726757
4,Laguna Honda Hospital And Rehabilitation Center,"375 Laguna Honda Blvd, San Francisco, CA 94116",-122.457297,37.749481
...,...,...,...,...
56,Ucsf Medical Center,"505 Parnassus Ave, San Francisco, CA 94143",-122.457403,37.763070
57,North East Medical Services - Noriega,"1450 Noriega St, San Francisco, CA 94122",-122.479578,37.754248
58,On Lok Senior Health Services - Bush St,"1000 Montgomery St, San Francisco, CA 94133",-122.403813,37.798362
59,Richmond Maxi-center,"3626 Balboa St, San Francisco, CA 94121",-122.497905,37.775817


In [11]:
dest_df.shape

(61, 4)

In [15]:
s_dest_df = dest_df.sort_values('location').drop_duplicates('location')
s_dest_df.shape

(54, 4)

In [17]:
dest_df=s_dest_df
dest_df

Unnamed: 0,place,location,x,y
58,On Lok Senior Health Services - Bush St,"1000 Montgomery St, San Francisco, CA 94133",-122.403813,37.798362
25,General Medical Clinic At Sfgh,"1001 Potrero Ave, San Francisco, CA 94110",-122.40379,37.754548
43,Adult Immunization & Travel Clinic,"101 Grove St, San Francisco, CA 94102",-122.418425,37.778175
10,Potrero Hill Health Center,"1050 Wisconsin St, San Francisco, CA 94107",-122.39883,37.754023
20,Native American Health Center,"1089 Mission St, San Francisco, CA 94103",-122.410563,37.779391
52,Maxine Hall Health Center,"1301 Pierce St, San Francisco, CA 94115",-122.435945,37.782046
1,On Lok Senior Health Services-mission Center,"1333 Bush St, San Francisco, CA 94109",-122.419232,37.788753
21,Larkin Street Medical Clinic,"134 Golden Gate Ave, San Francisco, CA 94102",-122.412855,37.782076
36,Ocean Park Health Center,"1351 24th Ave, San Francisco, CA 94122",-122.482714,37.762392
48,On Lok Senior Health Services - Powell,"1441 Powell St, San Francisco, CA 94133",-122.410661,37.799309


# 4. Associate the latlongs with nearest nodes

In [18]:
def nearest_node(row):
    return ox.get_nearest_node(G, (row.y, row.x), method='euclidean')

orig_df['orig_node'] = orig_df.apply(nearest_node, axis=1)  # axis=1 means do row-wise calculations (not column-wise)

orig_df.head(83)

Unnamed: 0,place,location,x,y,orig_node
12,CVS,"1 Jefferson St, San Francisco, CA 94133",-122.412700,37.808456,2071244791
6,CVS,"1059 Hyde St, San Francisco, CA 94109",-122.417413,37.790559,65295336
5,CVS,"1101 Market St, San Francisco, CA 94103",-122.412434,37.780237,4061806118
41,Walgreens,"1175 Columbus Ave, San Francisco, CA 94133",-122.417094,37.804969,65296818
76,Walgreens,"1189 Potrero Ave, San Francisco, CA 94110",-122.406254,37.753238,65308820
...,...,...,...,...,...
36,Walgreens,"790 Van Ness Ave, San Francisco, CA 94102",-122.420555,37.782810,258758552
10,CVS,"799 Beach St, San Francisco, CA 94109",-122.421576,37.806437,65297259
33,Walgreens,"825 Market St, San Francisco, CA 94103",-122.411346,37.781354,763026749
32,Walgreens,"88 Spear St, San Francisco, CA 94105",-122.392628,37.791392,65303495


In [19]:
dest_df['dest_node'] = dest_df.apply(nearest_node, axis=1)

dest_df.sort_values('location')

Unnamed: 0,place,location,x,y,dest_node
58,On Lok Senior Health Services - Bush St,"1000 Montgomery St, San Francisco, CA 94133",-122.403813,37.798362,65305848
25,General Medical Clinic At Sfgh,"1001 Potrero Ave, San Francisco, CA 94110",-122.40379,37.754548,65340189
43,Adult Immunization & Travel Clinic,"101 Grove St, San Francisco, CA 94102",-122.418425,37.778175,65324327
10,Potrero Hill Health Center,"1050 Wisconsin St, San Francisco, CA 94107",-122.39883,37.754023,65318149
20,Native American Health Center,"1089 Mission St, San Francisco, CA 94103",-122.410563,37.779391,65303696
52,Maxine Hall Health Center,"1301 Pierce St, San Francisco, CA 94115",-122.435945,37.782046,5745956392
1,On Lok Senior Health Services-mission Center,"1333 Bush St, San Francisco, CA 94109",-122.419232,37.788753,65319961
21,Larkin Street Medical Clinic,"134 Golden Gate Ave, San Francisco, CA 94102",-122.412855,37.782076,65308264
36,Ocean Park Health Center,"1351 24th Ave, San Francisco, CA 94122",-122.482714,37.762392,6353186530
48,On Lok Senior Health Services - Powell,"1441 Powell St, San Francisco, CA 94133",-122.410661,37.799309,65332853


# 5. Make a new dataframe of every combination of origin and destination node combo


In [20]:
orig_list = []
dest_list = []

for o in orig_df.orig_node:
    for d in dest_df.dest_node:
        orig_list.append(o)
        dest_list.append(d)

all_ods = pd.DataFrame({'orig_node': orig_list, 'dest_node': dest_list})

all_ods

Unnamed: 0,orig_node,dest_node
0,2071244791,65305848
1,2071244791,65340189
2,2071244791,65324327
3,2071244791,65318149
4,2071244791,65303696
...,...,...
4045,65327580,65325277
4046,65327580,65337399
4047,65327580,258973940
4048,65327580,65319958


In [21]:
def distance(row):
    try:
        dist = nx.shortest_path_length(G, source=row.orig_node, target=row.dest_node, weight="length")
    except:
        dist = np.nan
    return dist

all_ods['dist'] = all_ods.apply(distance, axis=1)
all_ods

Unnamed: 0,orig_node,dest_node,dist
0,2071244791,65305848,1679.589
1,2071244791,65340189,7128.046
2,2071244791,65324327,4296.428
3,2071244791,65318149,7162.003
4,2071244791,65303696,3764.030
...,...,...,...
4045,65327580,65325277,3637.442
4046,65327580,65337399,10235.463
4047,65327580,258973940,90.091
4048,65327580,65319958,9109.683


In [22]:
sum(all_ods['dist'].isnull().values.ravel())

54

In [23]:
#this is means to sort the dataframe by distance, then drop the duplicate o_nodes
short_ods = all_ods.sort_values('dist').drop_duplicates('orig_node')

In [24]:
print('Number of colums in Dataframe : ', len(short_ods.columns))
print('Number of rows in Dataframe : ', len(short_ods.index))


Number of colums in Dataframe :  3
Number of rows in Dataframe :  74


In [25]:
pd.options.display.max_rows

60

In [26]:
pd.set_option('display.max_rows', None)

In [27]:
print(short_ods)()

       orig_node   dest_node      dist
2407    65338198    65338198     0.000
330     65319961    65319961     0.000
2741    65314390    65314390     0.000
2961    65296327    65296327     0.000
3016    65307322    65327929    50.890
4047    65327580   258973940    90.091
552     65299327  5610843322   122.475
3118    65352429    65328705   135.923
1700    65299863    65302002   170.903
2463  4023087068    65295401   171.138
106     65295336    65319958   212.191
3828   258758552    65327154   244.320
2683  1996007301    65289010   262.867
673    259138852    65301542   264.301
115   4061806118    65308264   274.353
982     65288290    65288284   282.956
622     65332806    65333814   292.999
3217   258911598    65338198   293.444
3545   258958632    65343803   312.210
434   4061762661    65324327   331.178
2901    65314658    65361978   334.551
742     65303533    65328705   346.274
3916   763026749    65333814   352.284
269     65308820   261510697   353.941
1917    65359046    65288

TypeError: 'NoneType' object is not callable

In [28]:
#merge names of the origins
merged_o = pd.merge(left=orig_df, right=short_ods, left_on='orig_node', right_on='orig_node')
merged_o.shape

(75, 7)

In [29]:
merged_ods = pd.merge(left=merged_o, right=dest_df, left_on='dest_node', right_on='dest_node')
merged_ods.shape

(75, 11)

In [31]:
for col in merged_ods: 
    print(col) 

place_x
location_x
x_x
y_x
orig_node
dest_node
dist
place_y
location_y
x_y
y_y


In [34]:
short_ods = pd.DataFrame({'orig_node': merged_ods['orig_node'], 
                          'dest_node': merged_ods['dest_node'],
                          'dist': merged_ods['dist'],
                          'origin': merged_ods['place_x'], 
                          'o_loc': merged_ods['location_x'],
                          'dest': merged_ods['place_y'], 
                          'd_loc': merged_ods['location_y']})
short_ods

Unnamed: 0,orig_node,dest_node,dist,origin,o_loc,dest,d_loc
0,2071244791,65332853,1096.901,CVS,"1 Jefferson St, San Francisco, CA 94133",On Lok Senior Health Services - Powell,"1441 Powell St, San Francisco, CA 94133"
1,65296818,65332853,917.145,Walgreens,"1175 Columbus Ave, San Francisco, CA 94133",On Lok Senior Health Services - Powell,"1441 Powell St, San Francisco, CA 94133"
2,65297259,65332853,1417.046,CVS,"799 Beach St, San Francisco, CA 94109",On Lok Senior Health Services - Powell,"1441 Powell St, San Francisco, CA 94133"
3,65295336,65319958,212.191,CVS,"1059 Hyde St, San Francisco, CA 94109",St. Francis Memorial Hospital,"900 Hyde St, San Francisco, CA 94109"
4,4061806118,65308264,274.353,CVS,"1101 Market St, San Francisco, CA 94103",Larkin Street Medical Clinic,"134 Golden Gate Ave, San Francisco, CA 94102"
5,65308820,261510697,353.941,Walgreens,"1189 Potrero Ave, San Francisco, CA 94110",Family Health Center At Sfgh,"995 Potrero Ave, San Francisco, CA 94110"
6,6356743334,65333152,1044.236,Walgreens,"1201 Taraval St, San Francisco, CA 94116",North East Medical Services-taraval,"2308 Taraval St, San Francisco, CA 94116"
7,261499008,65333152,3002.26,CVS,"233 Winston Dr, San Francisco, CA 94132",North East Medical Services-taraval,"2308 Taraval St, San Francisco, CA 94116"
8,65350552,65333152,660.48,Walgreens,"3001 Taraval St, San Francisco, CA 94116",North East Medical Services-taraval,"2308 Taraval St, San Francisco, CA 94116"
9,65319961,65319961,0.0,Walgreens,"1300 Bush St, San Francisco, CA 94109",On Lok Senior Health Services-mission Center,"1333 Bush St, San Francisco, CA 94109"


In [36]:
short_ods.shape

(75, 7)

In [35]:
short_ods.to_csv('short_ods.csv')