In [1]:
# generic imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
# notebook settings
%config IPCompleter.greedy=True
%load_ext autoreload
%autoreload 2 
# precision and plot settings
num_precision = 3
np.set_printoptions(precision=num_precision, suppress=True)
pd.set_option('display.float_format', lambda x: f'{x:,.{num_precision}f}')
pd.set_option("display.precision", num_precision)
pd.set_option('display.max_columns', None)

plt.style.use('tableau-colorblind10')
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.size'] = 16
plt.rcParams['legend.fontsize'] = 'large'
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['lines.linewidth'] = 2

In [2]:
import random, time
import googlemaps

In [3]:
# setup dir and import helper functions
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'src'))
import helper_funcs as my_funcs

In [4]:
# US, 7 categories
closed = pd.read_csv('../data/iOverlander Places -  closed US 7 cats 2020-09-14.csv')
deleted = pd.read_csv('../data/iOverlander Places - US deleted 2020-09-14.csv')
clos_del = pd.read_csv('../data/iOverlander Places - closed and deleted 2020-09-14.csv')

In [5]:
closed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 681 entries, 0 to 680
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   154 non-null    object 
 1   Name                       681 non-null    object 
 2   Category                   681 non-null    object 
 3   Description                681 non-null    object 
 4   Latitude                   681 non-null    float64
 5   Longitude                  681 non-null    float64
 6   Altitude                   670 non-null    float64
 7   Date verified              681 non-null    object 
 8   Open                       681 non-null    object 
 9   Electricity                661 non-null    object 
 10  Wifi                       661 non-null    object 
 11  Kitchen                    661 non-null    object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 661 non-null    object 

In [6]:
dfs = [closed, deleted, clos_del]

In [7]:
# are these unique?
clos_set = set(closed['Name'])
del_set = set(deleted['Name'])
clos_del_set = set(clos_del['Name'])

In [8]:
clos_set.intersection(del_set)

{'Albertsons',
 'Cabellas',
 'Camping World',
 'Dead end road',
 'Parking for overnight',
 'Parking lot ',
 'Planet Fitness ',
 'Rest Area',
 'Rest area',
 'Saddle Mountain',
 'Street parking ',
 'Super Walmart ',
 'Walmart',
 'Walmart ',
 'Walmart Supercenter',
 'Walmart camping',
 'Walmart parking lot',
 'walmart'}

In [9]:
closed['latlong'] = closed['Latitude'].astype('str') + '_' + closed['Longitude'].astype('str')

In [10]:
def foo(df):
    df['latlong'] = df['Latitude'].astype('str') + '_' + df['Longitude'].astype('str')
    return df

In [11]:
dfs2 = [df.pipe(foo) for df in dfs]

In [12]:
# are these unique?
clos_set = set(dfs2[0]['latlong'])
del_set = set(dfs2[1]['latlong'])
clos_del_set = set(dfs2[2]['latlong'])

In [13]:
clos_set & del_set & clos_del_set

set()

In [14]:
# get states and zips for them
# setup API
G_MAPS_API = os.environ.get("GOOGLE_STATICMAPS_API")
gmaps = googlemaps.Client(G_MAPS_API)
n_requests = 0

In [15]:
names = ['closed', 'deleted', 'closed_deleted']

In [16]:
len(dfs)

3

In [17]:
dfs[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 681 entries, 0 to 680
Data columns (total 37 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   154 non-null    object 
 1   Name                       681 non-null    object 
 2   Category                   681 non-null    object 
 3   Description                681 non-null    object 
 4   Latitude                   681 non-null    float64
 5   Longitude                  681 non-null    float64
 6   Altitude                   670 non-null    float64
 7   Date verified              681 non-null    object 
 8   Open                       681 non-null    object 
 9   Electricity                661 non-null    object 
 10  Wifi                       661 non-null    object 
 11  Kitchen                    661 non-null    object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 661 non-null    object 

In [None]:
for name, df in zip(names, dfs):
    zips, states = my_funcs.get_state_zip(df, gmaps, n_requests)
    zips_pd = pd.Series(zips, name='zip_codes') 
    states_pd = pd.Series(states, name='states') 
    zips_pd.to_csv(f'../data/zip_codes_{name}.csv')
    states_pd.to_csv(f'../data/states_{name}.csv')

Pulling request 0, total API requests so far = 0
waiting for: 1 seconds
