In [None]:
import pandas as pd
import ast
from tqdm import tqdm
from geopy.geocoders import GoogleV3
from geopy.extra.rate_limiter import RateLimiter
import keyring

### Import data

In [None]:
bakersfield = pd.read_csv('/Users/seantibbitts/Documents/Data Bootcamp/Homework/online_shopping_analysis/Data/bakersfield2.csv')
sacramento = pd.read_csv('/Users/seantibbitts/Documents/Data Bootcamp/Homework/online_shopping_analysis/Data/sacramento.csv')
sandiego = pd.read_csv('/Users/seantibbitts/Documents/Data Bootcamp/Homework/online_shopping_analysis/Data/sandiego.csv')
redding = pd.read_csv('/Users/seantibbitts/Documents/Data Bootcamp/Homework/online_shopping_analysis/Data/redding.csv')

### Concatenate dfs

In [None]:
all_reg = pd.concat([bakersfield, sacramento, sandiego, redding], ignore_index = True)

In [None]:
all_reg['subtitle'] = all_reg['uncat_attrs'].map(lambda x: ast.literal_eval(x))\
                                                    .map(lambda x: x[0] if x else None).str.strip()

### Create df of unique locations

In [None]:
locs = all_reg[['latitude','longitude']].drop_duplicates().copy()

In [None]:
locs = locs[locs.notnull().all(axis=1)].copy()

In [None]:
locs.shape

### Create query string for each location

In [None]:
locs['lat_lon'] = locs['latitude'].astype(str) + ',' + locs['longitude'].astype(str)

### Get API key from keychain

In [None]:
api_key = keyring.get_password('Google API Key','Google')

### Instantiate Google Maps API geolocator object

In [None]:
geolocator = GoogleV3(api_key=api_key)

### Prepare to use progress bar on dataframe iteration

In [None]:
tqdm.pandas()

### Wrap geolocator in rate-limiting function

In [None]:
# Define rate-limited geocoding function
geocoder = RateLimiter(geolocator.reverse, min_delay_seconds=.02)

In [None]:
# locs1 = locs.iloc[0:10].copy()

# locs1['reverse'] = locs1['lat_lon'].progress_map(lambda x: geocoder(x, exactly_one = True))

# next((item['long_name'] for item in locs1.loc[0,'reverse'].raw['address_components'] if 'postal_code' in item['types']),False)

# locs1['raw'] = locs1['reverse'].map(lambda x: x.raw if x else None)

# locs1['address_components'] = locs1['raw'].map(lambda x: x.get('address_components') if x else None)

# locs1['zip'] = locs1['address_components'].map(lambda x: next((item['long_name'] for item in x if 'postal_code' in item['types']),False))

# locs1

### Reverse geocode each location

In [None]:
locs['reverse'] = locs['lat_lon'].progress_map(lambda x: geocoder(x, exactly_one = True))

### Extract data from raw return values

In [None]:
locs['raw'] = locs['reverse'].map(lambda x: x.raw if x else None)

In [None]:
locs['address_components'] = locs['raw'].map(lambda x: x.get('address_components') if x else None)

### Return values are lists of dictionaries, so search each one for the right item

In [None]:
locs['zip'] = locs['address_components']\
.map(lambda x: next((item['long_name'] for item in x if 'postal_code' in item['types']),False) if x else None)

In [None]:
locs['city'] = locs['address_components']\
.map(lambda x: next((item['long_name'] for item in x if 'locality' in item['types']),False) if x else None)

In [None]:
locs['county'] = locs['address_components']\
.map(lambda x: next((item['long_name'] for item in x if 'administrative_area_level_2' in item['types']),False)
     if x else None)

In [None]:
locs['state'] = locs['address_components']\
.map(lambda x: next((item['long_name'] for item in x if 'administrative_area_level_1' in item['types']),False)
     if x else None)

In [None]:
locs['country'] = locs['address_components']\
.map(lambda x: next((item['long_name'] for item in x if 'country' in item['types']),False) if x else None)

### Merge location data back onto data set

In [None]:
all_reg2 = pd.merge(all_reg, locs[['latitude','longitude','zip','city','county','state','country']], how = 'left',
                    on = ['latitude','longitude'])

In [None]:
all_reg2.to_csv('/Users/seantibbitts/Documents/Data Bootcamp/Homework/online_shopping_analysis/Data/craigslist_w_zips.csv', index = False)