### This notebook is used to find the mid - GPS points for all teh blocks

In [1]:
import pandas as pd
import sys
import googlemaps
sys.path.append('../')
from common import get_train, swap_streets, get_test, load_midpoint_id

### Load in the train-test lookup (street intersection --> gps coordinates)

In [2]:
df = pd.read_feather('../ref_data/gmaps_df_tt.feather')

In [3]:
df.head()

Unnamed: 0,Street1,Street2,Search_Addr,address_tags,lat,lng,neighborhood,related_addr,street_name,street_no,zipcode
0,Mission Street,25th Street,"Mission Street and 25th Street, San Francisco",,37.750648,-122.418316,Mission District,"[['2900 Mission St, San Francisco, CA 94110, U...",Mission Street,2900,94110
1,Polk Street,Ellis Street,"Polk Street and Ellis Street, San Francisco","bus_station,establishment,point_of_interest,tr...",37.78404,-122.419381,Tenderloin,"[['801 Polk St, San Francisco, CA 94109, USA',...",Polk Street,801,94109
2,Van Ness Avenue,Geary Boulevard,"Van Ness Avenue and Geary Boulevard, San Franc...","bus_station,establishment,point_of_interest,tr...",37.785696,-122.421327,Tenderloin,"[['1060 Geary Blvd, San Francisco, CA 94109, U...",Geary Boulevard,1060,94109
3,Van Ness Avenue,Bush Street,"Van Ness Avenue and Bush Street, San Francisco","establishment,general_contractor,local_governm...",37.788499,-122.421892,Nob Hill,"[['1400 Van Ness Ave, San Francisco, CA 94109,...",Van Ness Avenue,1400,94109
4,Van Ness Avenue,Daniel Burnham Court,"Van Ness Avenue and Daniel Burnham Court, San ...","bus_station,establishment,point_of_interest,tr...",37.787084,-122.421757,Western Addition,"[['1207-1227 Van Ness Ave, San Francisco, CA 9...",Van Ness Avenue,1207-1227,94109


### Calculate the midpoint GPS point 

In [5]:
def calc_midgps(input_train, df):
    train = input_train.copy()
    keep_cols = ['Street1','Street2','lat','lng']
    train = train.merge(df[keep_cols], how='left', left_on=['Clean_Street','Clean_From'], right_on =['Street1','Street2'])
    train.drop(columns=['Street1','Street2'], inplace=True)
    train.rename({
        'lat': 'lat_from',
        'lng': 'lng_from',
    }, axis='columns', inplace=True)

    train = train.merge(df[keep_cols], how='left', left_on=['Clean_Street','Clean_To'], right_on =['Street1','Street2'])
    train.drop(columns=['Street1','Street2'], inplace=True)
    train.rename({
        'lat': 'lat_to',
        'lng': 'lng_to',
    }, axis='columns', inplace=True)

    train['lat_mid'] = train['lat_from']/2. + train['lat_to']/2. 
    train['lng_mid'] = train['lng_from']/2. + train['lng_to']/2.
    return train

#midgps2add made below
def add_mid_addr(input_train, midgps2add):
    tmp = input_train.copy()
    tmp['mid_joinkey'] = tmp['lat_mid'].astype(str) + ',' + tmp['lng_mid'].astype(str)
    tmp['mid_address'] = tmp['mid_joinkey'].map(midgps2add)
    tmp.drop(columns='mid_joinkey', inplace=True)
    return tmp


def gmap_search_gps(gps_tuples):
    gps2addr = {}
    with open('/Users/timlee/Dropbox/keys/google_api_key.txt','r') as f:
        gmap_api_key = f.read()

    gmaps = googlemaps.Client( key = gmap_api_key)
    for i, tup in enumerate(gps_tuples):
        print(i)
        geocode_result = gmaps.reverse_geocode((tup[0],tup[1]))
        gps2addr[str(tup[0])+','+str(tup[1])] = geocode_result
    
    print("complete")
    return gps2addr


def gpsaddr_extract_json(one_addr):
    one_addr_details = {}
    one_addr_details['related_addr']  = []
    for item in one_addr:
        for subitem in item['address_components']:
            if subitem['types'] == ['street_number']:
                one_addr_details['street_no'] = subitem['long_name']
            if subitem['types'] == ['route']:
                one_addr_details['street_name'] = subitem['long_name']
            if 'neighborhood' in subitem['types']:
                one_addr_details['neighborhood'] = subitem['short_name']
            if 'postal_code' in subitem['types']:
                one_addr_details['zipcode'] = subitem['short_name']            
        if ('political' not in item['types']) and ('postal_code' not in item['types']):
            one_addr_details['related_addr'].append([item['formatted_address'],item['types']])
    if len(one_addr_details['related_addr']) > 1:
        one_addr_details['address_tags'] = [ x[1] for x in one_addr_details['related_addr'][1:]]
    return one_addr_details




train = get_train()
test = get_test()
print(train.shape, test.shape)
train = swap_streets(train)
test = swap_streets(test)
print(train.shape, test.shape)
train = calc_midgps(train, df)
test = calc_midgps(test, df)
print(train.shape, test.shape)


midblocks = train[['lat_mid', 'lng_mid']].drop_duplicates()
mid_addr = gmap_search_gps(midblocks.values)
from pprint import pprint
out = []
midgps2add = {}
for k, v in mid_addr.items():
    #print('=========================================== ')
    res = gpsaddr_extract_json(v)
    midgps2add[k] ='%s|%s|%s' % (res['street_no'],res['street_name'], res['neighborhood'])
    #print('%s %s' % (res['street_no'],res['street_name']))


train = add_mid_addr(train, midgps2add)
test = add_mid_addr(test, midgps2add)
print(train.shape, test.shape)
train.to_feather('../ref_data/df_train_w_clean_str_gmaps_addr.feather')
test.to_feather('../ref_data/df_test_w_clean_str_gmaps_addr.feather')


Time it took: 0.12674975395202637
Time it took: 0.08107185363769531
(1100, 8) (726, 6)
(1100, 11) (726, 9)
(1100, 17) (726, 15)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
complete
(1100, 18) (726, 16)


In [12]:
test.shape

(1100, 17)

In [9]:
midblocks

Unnamed: 0,lat_mid,lng_mid
0,37.749846,-122.418241
1,37.784273,-122.419429
2,37.785458,-122.421356
3,37.788271,-122.421847
4,37.786863,-122.421637
5,37.786821,-122.418260
6,37.789740,-122.419691
7,37.789530,-122.421295
8,37.785884,-122.418071
9,37.788026,-122.421875


### For copying to excel

In [10]:
# for_print = train[['Clean_Street','Clean_From','Clean_To','lat_from','lng_from','lat_to','lng_from','lat_mid','lng_mid']].drop_duplicates()

# for row in for_print.values:
# #    print(row)
#     print('%s\t%s\t%s\t%f\t%f\t%f\t%f\t%f\t%f' % (row[0],row[1],row[2],row[3],row[4],row[5], row[6], row[7], row[8]))

In [11]:
def gmap_search_gps(gps_tuples):
    gps2addr = {}
    with open('/Users/timlee/Dropbox/keys/google_api_key.txt','r') as f:
        gmap_api_key = f.read()

    gmaps = googlemaps.Client( key = gmap_api_key)
    for i, tup in enumerate(gps_tuples):
        print(i)
        geocode_result = gmaps.reverse_geocode((tup[0],tup[1]))
        gps2addr[str(tup[0])+','+str(tup[1])] = geocode_result
    
    print("complete")
    return gps2addr


def gpsaddr_extract_json(one_addr):
    one_addr_details = {}
    one_addr_details['related_addr']  = []
    for item in one_addr:
        for subitem in item['address_components']:
            if subitem['types'] == ['street_number']:
                one_addr_details['street_no'] = subitem['long_name']
            if subitem['types'] == ['route']:
                one_addr_details['street_name'] = subitem['long_name']
            if 'neighborhood' in subitem['types']:
                one_addr_details['neighborhood'] = subitem['short_name']
            if 'postal_code' in subitem['types']:
                one_addr_details['zipcode'] = subitem['short_name']            
        if ('political' not in item['types']) and ('postal_code' not in item['types']):
            one_addr_details['related_addr'].append([item['formatted_address'],item['types']])
    if len(one_addr_details['related_addr']) > 1:
        one_addr_details['address_tags'] = [ x[1] for x in one_addr_details['related_addr'][1:]]
    return one_addr_details

In [12]:
mid_addr = gmap_search_gps(midblocks.values)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
complete


In [15]:
from pprint import pprint
out = []
midgps2add = {}
for k, v in mid_addr.items():
    #print('=========================================== ')
    res = gpsaddr_extract_json(v)
    midgps2add[k] ='%s|%s|%s' % (res['street_no'],res['street_name'], res['neighborhood'])
    print('%s %s' % (res['street_no'],res['street_name']))

2920-2950 Mission Street
800-834 Polk Street
1038-1050 Van Ness Avenue
1400 Van Ness Avenue
1200 Van Ness Avenue
737 Hyde Street
1450 Pine Street
1540-1554 Pine Street
820-898 Larkin Street
1400 Van Ness Avenue
1130 Larkin Street
200 Larkin Street
3325 23rd Street
3351-3399 23rd Street
3222 23rd Street
3284 23rd Street
1800 Bryant Street
1200 Van Ness Avenue
133-155 Jessie Street
1400 Van Ness Avenue
1827-1849 Van Ness Avenue
1000-1018 Van Ness Avenue
14-22 Kearny Street
88-98 Kearny Street
1736 Polk Street
1826-1830 Polk Street
48 Market Street
2145-2151 Van Ness Avenue
1740-1750 Washington Street
1655A Washington Street
1648 Jackson Street
2211 Polk Street
2244-2298 Polk Street
846-850 Bush Street
1188 Mission Street
1045 Mission Street
731 Van Ness Avenue
690 Van Ness Avenue
234 Van Ness Avenue
524 Van Ness Avenue
29-77 Van Ness Avenue
900-948 Polk Street
480 Eddy Street
342 Jones Street
4 Mark Lane
640 Geary Street
737 Hyde Street
555 Polk Street
943 Geary Street
722 Taylor Street


In [16]:
midgps2add

{'37.7498461,-122.4182409': '2920-2950|Mission Street|Mission District',
 '37.753743,-122.42028245': '3351-3399|23rd Street|Mission District',
 '37.75381245,-122.41917425': '3325|23rd Street|Mission District',
 '37.7538786,-122.4180744': '3284|23rd Street|Mission District',
 '37.75394385,-122.41698505': '3222|23rd Street|Mission District',
 '37.76368,-122.41032555': '1800|Bryant Street|Mission District',
 '37.77369985,-122.41792865': '1500-1546|Mission Street|South of Market',
 '37.7759007,-122.4194987': '29-77|Van Ness Avenue|Civic Center',
 '37.77777955,-122.41980545': '234|Van Ness Avenue|Civic Center',
 '37.7780915,-122.4123757': '1188|Mission Street|South of Market',
 '37.7782425,-122.41820765': '101|Polk Street|Civic Center',
 '37.7783705,-122.4191173': '155|Grove Street|Civic Center',
 '37.7785892,-122.4174497': '101|Polk Street|Civic Center',
 '37.7800448,-122.41691805': '200|Larkin Street|Civic Center',
 '37.7800944,-122.40984825': '1045|Mission Street|South of Market',
 '37.7

In [10]:
points = [(37.749846,-122.418241),
(37.784273,-122.419429),
(37.785458,-122.421356),
(37.788271,-122.421847),
(37.786863,-122.421637),
(37.786821,-122.41826),
(37.78974,-122.419691),
(37.78953,-122.421295),
(37.785884,-122.418071),
(37.788026,-122.421875),
(37.788446,-122.418593),
(37.780045,-122.416918),
(37.753812,-122.419174),
(37.753743,-122.420282),
(37.753944,-122.416985),
(37.753879,-122.418074),
(37.76368,-122.410326),
(37.786875,-122.421564),
(37.787599,-122.401461),
(37.78873,-122.421941),
(37.792618,-122.422874),
(37.785012,-122.421189),
(37.788207,-122.403552),
(37.788672,-122.403641),
(37.792832,-122.421157),
(37.793711,-122.421328),
(37.789009,-122.402863),
(37.795276,-122.423412),
(37.793165,-122.422102),
(37.793378,-122.420422),
(37.794053,-122.422206),
(37.797144,-122.422025),
(37.797612,-122.422118),
(37.789859,-122.411279),
(37.778092,-122.412376),
(37.780094,-122.409848),
(37.782432,-122.42075),
(37.781504,-122.420553),
(37.77778,-122.419805),
(37.780564,-122.420359),
(37.775901,-122.419499),
(37.781504,-122.420553),
(37.780564,-122.420359),
(37.77778,-122.419805),
(37.785207,-122.41962),
(37.784202,-122.414358),
(37.784407,-122.412714),
(37.79054,-122.404719),
(37.786638,-122.414008),
(37.787132,-122.417662),
(37.781947,-122.418966),
(37.786011,-122.418936),
(37.789284,-122.412005),
(37.785417,-122.417978),
(37.778371,-122.419117),
(37.780527,-122.417001),
(37.786431,-122.415653),
(37.789129,-122.409446),
(37.778243,-122.418208),
(37.783265,-122.414166),
(37.781187,-122.415429),
(37.783345,-122.419244),
(37.778589,-122.41745),
(37.783687,-122.410881),
(37.78648,-122.411456),
(37.789492,-122.410365),
(37.788917,-122.411091),
(37.782853,-122.417454),
(37.789074,-122.413658),
(37.788145,-122.41347),
(37.787538,-122.413346),
(37.787068,-122.41325),
(37.785097,-122.412851),
(37.794455,-122.403121),
(37.790508,-122.402322),
(37.792189,-122.400389),
(37.79567,-122.400488),
(37.795392,-122.408183),
(37.789367,-122.403788),
(37.796579,-122.405243),
(37.796867,-122.403609),
(37.797586,-122.403761),
(37.798051,-122.403851),
(37.793418,-122.407785),
(37.7737,-122.417929),
(37.795107,-122.403252),
(37.795982,-122.403423),
(37.790518,-122.404027),
(37.795569,-122.403042),
(37.793463,-122.400037),
(37.792744,-122.399893),
(37.791802,-122.403428),
(37.79779,-122.405481),
(37.799957,-122.409059),
(37.797005,-122.405439),
(37.780687,-122.419513)]

In [11]:
with open('/Users/timlee/Dropbox/keys/google_api_key.txt','r') as f:
    gmap_api_key = f.read()

gmaps = googlemaps.Client( key = gmap_api_key)
geocode_result = gmaps.reverse_geocode((37.784273,-122.419429))

In [12]:
for tup in points:
    geocode_result = gmaps.reverse_geocode(tup)
    print(geocode_result[0]['formatted_address'])

2920-2950 Mission St, San Francisco, CA 94110, USA
800-834 Polk St, San Francisco, CA 94109, USA
1038-1050 Van Ness Ave, San Francisco, CA 94109, USA
1350-1354 Van Ness Ave, San Francisco, CA 94109, USA
1200 Van Ness Ave, San Francisco, CA 94109, USA
945-955 Larkin St, San Francisco, CA 94109, USA
1450 Pine St, San Francisco, CA 94109, USA
1540-1554 Pine St, San Francisco, CA 94109, USA
820-898 Larkin St, San Francisco, CA 94109, USA
1320-1326 Van Ness Ave, San Francisco, CA 94109, USA
1130 Larkin St, San Francisco, CA 94109, USA
200 Larkin St, San Francisco, CA 94102, USA
3325 23rd St, San Francisco, CA 94110, USA
3351-3399 23rd St, San Francisco, CA 94110, USA
3222 23rd St, San Francisco, CA 94110, USA
3284 23rd St, San Francisco, CA 94110, USA
1800 Bryant St, San Francisco, CA 94110, USA
1200 Van Ness Ave, San Francisco, CA 94109, USA
133-155 Jessie St, San Francisco, CA 94105, USA
1400-1412 Van Ness Ave, San Francisco, CA 94109, USA
1827-1849 Van Ness Ave, San Francisco, CA 94109, 

{'37.7498461,-122.4182409': [{'address_components': [{'long_name': '2920-2950',
     'short_name': '2920-2950',
     'types': ['street_number']},
    {'long_name': 'Mission Street',
     'short_name': 'Mission St',
     'types': ['route']},
    {'long_name': 'Mission District',
     'short_name': 'Mission District',
     'types': ['neighborhood', 'political']},
    {'long_name': 'San Francisco',
     'short_name': 'SF',
     'types': ['locality', 'political']},
    {'long_name': 'San Francisco County',
     'short_name': 'San Francisco County',
     'types': ['administrative_area_level_2', 'political']},
    {'long_name': 'California',
     'short_name': 'CA',
     'types': ['administrative_area_level_1', 'political']},
    {'long_name': 'United States',
     'short_name': 'US',
     'types': ['country', 'political']},
    {'long_name': '94110', 'short_name': '94110', 'types': ['postal_code']}],
   'formatted_address': '2920-2950 Mission St, San Francisco, CA 94110, USA',
   'geometry'

In [17]:
train['mid_joinkey'] = train['lat_mid'].astype(str) + ',' + train['lng_mid'].astype(str)

In [20]:
train['mid_address'] = train['mid_joinkey'].map(midgps2add)

In [21]:
train

Unnamed: 0,Street,From,To,Date,Time,Real.Spots,Street.Length,any_spot,Clean_Street,Clean_From,Clean_To,lat_from,lng_from,lat_to,lng_to,lat_mid,lng_mid,mid_joinkey,mid_address
0,Mission Street,25th Street,26th Street,2014-01-07,16:19,4,179.132970,1,Mission Street,25th Street,26th Street,37.750648,-122.418316,37.749044,-122.418166,37.749846,-122.418241,"37.7498461,-122.4182409",2920-2950|Mission Street
1,Polk Street,Ellis Street,Olive Street,2014-01-18,20:42,0,52.740210,0,Polk Street,Ellis Street,Olive Street,37.784040,-122.419381,37.784507,-122.419478,37.784273,-122.419429,"37.78427335,-122.4194294",800-834|Polk Street
2,Van Ness Avenue,Geary Boulevard,Myrtle Street,2014-01-18,20:39,0,52.517840,0,Van Ness Avenue,Geary Boulevard,Myrtle Street,37.785696,-122.421327,37.785221,-122.421385,37.785458,-122.421356,"37.78545845,-122.4213562",1038-1050|Van Ness Avenue
3,Van Ness Avenue,Bush Street,Fern Street,2014-01-18,20:38,0,52.405315,0,Van Ness Avenue,Bush Street,Fern Street,37.788499,-122.421892,37.788044,-122.421802,37.788271,-122.421847,"37.78827135,-122.421847",1400|Van Ness Avenue
4,Van Ness Avenue,Daniel Burnham Court,Post Street,2014-01-18,20:38,0,52.191193,0,Van Ness Avenue,Daniel Burnham Court,Post Street,37.787084,-122.421757,37.786642,-122.421517,37.786863,-122.421637,"37.78686335,-122.4216371",1200|Van Ness Avenue
5,Larkin Street,Cedar Street,Post Street,2014-01-18,20:20,0,52.324250,0,Larkin Street,Cedar Street,Post Street,37.786592,-122.418214,37.787051,-122.418306,37.786821,-122.418260,"37.78682135,-122.41825985",737|Hyde Street
6,Pine Street,Larkin Street,Polk Street,2014-01-18,20:21,0,146.416550,0,Pine Street,Larkin Street,Polk Street,37.789843,-122.418874,37.789637,-122.420507,37.789740,-122.419691,"37.7897396,-122.4196905",1450|Pine Street
7,Pine Street,Polk Street,Van Ness Avenue,2014-01-18,20:22,0,139.494200,0,Pine Street,Polk Street,Van Ness Avenue,37.789637,-122.420507,37.789423,-122.422083,37.789530,-122.421295,"37.78952955,-122.42129505",1540-1554|Pine Street
8,Larkin Street,Myrtle Street,Geary Street,2014-01-18,20:20,0,52.188343,0,Larkin Street,Myrtle Street,Geary Street,37.785650,-122.418024,37.786119,-122.418118,37.785884,-122.418071,"37.78588445,-122.4180713",820-898|Larkin Street
9,Van Ness Avenue,Van Ness Avenue,Bush Street,2014-01-18,20:37,0,39.027992,0,Van Ness Avenue,Sutter Avenue,Bush Street,37.787554,-122.421858,37.788499,-122.421892,37.788026,-122.421875,"37.7880262,-122.42187495",1400|Van Ness Avenue
