In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline
pd.options.display.float_format = '{:,.2f}'.format
plt.style.use('seaborn-white')
# colorblind safe
plt.style.use('seaborn-colorblind')
plt.style.use('tableau-colorblind10')

# width and precision for f strings
width = 10
precision = 4

# default sizes for plots
# https://matplotlib.org/3.3.0/tutorials/introductory/customizing.html#customizing-with-matplotlibrc-files
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.size'] = 16
plt.rcParams['legend.fontsize'] = 'large'
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['lines.linewidth'] = 2

# other settings
pd.options.display.float_format = '{:,.4f}'.format
pd.set_option("display.precision", 3)
np.set_printoptions(precision=3, suppress=True)
%load_ext autoreload
%autoreload 2
pd.set_option('display.max_columns', None)
%config IPCompleter.greedy=True


In [2]:
USdata = pd.read_csv('data/iOverlander Places -  2020-08-25_campgrounds.csv')

In [3]:
USdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3883 entries, 0 to 3882
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   1012 non-null   object 
 1   Name                       3883 non-null   object 
 2   Category                   3883 non-null   object 
 3   Description                3882 non-null   object 
 4   Latitude                   3883 non-null   float64
 5   Longitude                  3883 non-null   float64
 6   Altitude                   3780 non-null   float64
 7   Date verified              3883 non-null   object 
 8   Open                       3883 non-null   object 
 9   Electricity                3883 non-null   object 
 10  Wifi                       3883 non-null   object 
 11  Kitchen                    3883 non-null   object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 3883 non-null   objec

In [4]:
USdata["Category"].unique()

array(['Established Campground'], dtype=object)

In [5]:
USdata_need_loc = USdata[pd.isna(USdata['Location'])].copy()

In [6]:
USdata_need_loc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2871 entries, 402 to 3882
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   0 non-null      object 
 1   Name                       2871 non-null   object 
 2   Category                   2871 non-null   object 
 3   Description                2871 non-null   object 
 4   Latitude                   2871 non-null   float64
 5   Longitude                  2871 non-null   float64
 6   Altitude                   2798 non-null   float64
 7   Date verified              2871 non-null   object 
 8   Open                       2871 non-null   object 
 9   Electricity                2871 non-null   object 
 10  Wifi                       2871 non-null   object 
 11  Kitchen                    2871 non-null   object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 2871 non-null   obj

In [7]:
USdata_has_loc = USdata[~pd.isna(USdata['Location'])].copy()

In [8]:
USdata_has_loc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1012 entries, 0 to 1423
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   1012 non-null   object 
 1   Name                       1012 non-null   object 
 2   Category                   1012 non-null   object 
 3   Description                1011 non-null   object 
 4   Latitude                   1012 non-null   float64
 5   Longitude                  1012 non-null   float64
 6   Altitude                   982 non-null    float64
 7   Date verified              1012 non-null   object 
 8   Open                       1012 non-null   object 
 9   Electricity                1012 non-null   object 
 10  Wifi                       1012 non-null   object 
 11  Kitchen                    1012 non-null   object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 1012 non-null   objec

In [9]:
USdata_has_loc.head()

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center
0,"Borrego Salton Seaway, Borrego Springs, CA 920...",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exis...",33.2802,-116.1458,0.0,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.0013,-112.0356,0.0,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,An old standby for us. Multiple sites for lar...,62.0954,-145.9805,0.0,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,This is a great beach for a first night on the...,44.7288,-124.0555,0.0,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hooku...",43.3308,-124.3707,2.6656,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,


In [10]:
# keep only the ones with state abbreviation
USdata_has_loc = USdata_has_loc[USdata_has_loc['Location'].str.contains('\s[A-Z]{2}\s', case=True, regex=True)]

In [11]:
USdata_has_loc['zip_code'] = ''

In [12]:
# set state
USdata_has_loc['State'] = USdata_has_loc['Location'].str.extract(r'(\s[A-Z]{2}\s)')

In [13]:
USdata_has_loc.head()

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State
0,"Borrego Salton Seaway, Borrego Springs, CA 920...",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exis...",33.2802,-116.1458,0.0,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.0013,-112.0356,0.0,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,An old standby for us. Multiple sites for lar...,62.0954,-145.9805,0.0,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,This is a great beach for a first night on the...,44.7288,-124.0555,0.0,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hooku...",43.3308,-124.3707,2.6656,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR


In [14]:
# now pull state and zip for rows without it
import os
G_MAPS_API = os.environ.get("GOOGLE_STATICMAPS_API")
import random, time
import googlemaps
gmaps = googlemaps.Client(G_MAPS_API)
n_requests = 0

In [15]:
def get_state_zip(df):
    '''
    gets zip code and state and inserts into df
    '''
    zips = []
    states = []
    
    global n_requests
    
    for i, site in df.iterrows():
        result = None
        print(f'Pulling request {i}, total API requests so far = {n_requests}')
        
        # get latlong in right format
        # temp save coords
        lat = str(site['Latitude'])
        long = str(site['Longitude'])
        latlong = lat + ',' + long
        #rint(latlong)
        # get geocode data
        res_type = 'postal_code' # administrative_area_level_1|
        result = gmaps.reverse_geocode(latlong, result_type=res_type)
        # increase counter
        n_requests += 1
        #rint(result)
        if result:
            # pull out things we need
            zip_code = result[0]['address_components'][0]['short_name']
            # sometimes the index isn't the same
            #state = result[0]['address_components'][3]['short_name']
            types = ['administrative_area_level_1', 'political']
            statethingy = [d.items() for d in result[0]['address_components'] if d['types'] == types]
            # sometimes there isn't this type (like Puerto Rico)
            #print(statethingy)
            if statethingy:
                state = [x[1] for x in statethingy[0] if x[0] == 'short_name'][0]
    #             for item in statethingy[0]:
    #                 if item[0] == 'short_name':
    #                     state = item[1]
            else:
                # if it didn't work for this row
                zip_code = ''
                state = ''
        else:
            # if it didn't work for this row
            zip_code = ''
            state = ''
        
        # append
        zips.append(zip_code)
        states.append(state)
        
        # wait a bit before next request
        wait_time = random.randint(1, 3)
        print(f'waiting for: {wait_time} seconds')
        time.sleep(wait_time) # in seconds
    
    return zips, states

In [None]:
zips, states = get_state_zip(USdata_need_loc)

Pulling request 402, total API requests so far = 0
waiting for: 1 seconds
Pulling request 403, total API requests so far = 1
waiting for: 3 seconds
Pulling request 921, total API requests so far = 2
waiting for: 1 seconds
Pulling request 922, total API requests so far = 3
waiting for: 2 seconds
Pulling request 935, total API requests so far = 4
waiting for: 1 seconds
Pulling request 936, total API requests so far = 5
waiting for: 3 seconds
Pulling request 937, total API requests so far = 6
waiting for: 2 seconds
Pulling request 938, total API requests so far = 7
waiting for: 2 seconds
Pulling request 939, total API requests so far = 8
waiting for: 3 seconds
Pulling request 940, total API requests so far = 9
waiting for: 2 seconds
Pulling request 942, total API requests so far = 10
waiting for: 2 seconds
Pulling request 943, total API requests so far = 11
waiting for: 1 seconds
Pulling request 944, total API requests so far = 12
waiting for: 3 seconds
Pulling request 945, total API requ

Pulling request 1054, total API requests so far = 109
waiting for: 1 seconds
Pulling request 1055, total API requests so far = 110
waiting for: 2 seconds
Pulling request 1056, total API requests so far = 111
waiting for: 2 seconds
Pulling request 1057, total API requests so far = 112
waiting for: 1 seconds
Pulling request 1059, total API requests so far = 113
waiting for: 1 seconds
Pulling request 1060, total API requests so far = 114
waiting for: 1 seconds
Pulling request 1061, total API requests so far = 115
waiting for: 1 seconds
Pulling request 1062, total API requests so far = 116
waiting for: 2 seconds
Pulling request 1063, total API requests so far = 117
waiting for: 1 seconds
Pulling request 1064, total API requests so far = 118
waiting for: 2 seconds
Pulling request 1065, total API requests so far = 119
waiting for: 3 seconds
Pulling request 1066, total API requests so far = 120
waiting for: 1 seconds
Pulling request 1067, total API requests so far = 121
waiting for: 3 seconds

Pulling request 1198, total API requests so far = 216
waiting for: 1 seconds
Pulling request 1199, total API requests so far = 217
waiting for: 2 seconds
Pulling request 1200, total API requests so far = 218
waiting for: 1 seconds
Pulling request 1201, total API requests so far = 219
waiting for: 3 seconds
Pulling request 1202, total API requests so far = 220
waiting for: 3 seconds
Pulling request 1203, total API requests so far = 221
waiting for: 3 seconds
Pulling request 1204, total API requests so far = 222
waiting for: 1 seconds
Pulling request 1206, total API requests so far = 223
waiting for: 1 seconds
Pulling request 1208, total API requests so far = 224
waiting for: 3 seconds
Pulling request 1209, total API requests so far = 225
waiting for: 1 seconds
Pulling request 1210, total API requests so far = 226
waiting for: 1 seconds
Pulling request 1211, total API requests so far = 227
waiting for: 1 seconds
Pulling request 1212, total API requests so far = 228
waiting for: 3 seconds

Pulling request 1327, total API requests so far = 323
waiting for: 3 seconds
Pulling request 1328, total API requests so far = 324
waiting for: 2 seconds
Pulling request 1329, total API requests so far = 325
waiting for: 3 seconds
Pulling request 1330, total API requests so far = 326
waiting for: 2 seconds
Pulling request 1332, total API requests so far = 327
waiting for: 2 seconds
Pulling request 1333, total API requests so far = 328
waiting for: 2 seconds
Pulling request 1334, total API requests so far = 329
waiting for: 3 seconds
Pulling request 1335, total API requests so far = 330
waiting for: 2 seconds
Pulling request 1336, total API requests so far = 331
waiting for: 2 seconds
Pulling request 1337, total API requests so far = 332
waiting for: 3 seconds
Pulling request 1338, total API requests so far = 333
waiting for: 1 seconds
Pulling request 1339, total API requests so far = 334
waiting for: 1 seconds
Pulling request 1340, total API requests so far = 335
waiting for: 1 seconds

Pulling request 1442, total API requests so far = 430
waiting for: 3 seconds
Pulling request 1443, total API requests so far = 431
waiting for: 3 seconds
Pulling request 1444, total API requests so far = 432
waiting for: 1 seconds
Pulling request 1445, total API requests so far = 433
waiting for: 3 seconds
Pulling request 1446, total API requests so far = 434
waiting for: 3 seconds
Pulling request 1447, total API requests so far = 435
waiting for: 2 seconds
Pulling request 1448, total API requests so far = 436
waiting for: 3 seconds
Pulling request 1449, total API requests so far = 437
waiting for: 2 seconds
Pulling request 1450, total API requests so far = 438
waiting for: 1 seconds
Pulling request 1451, total API requests so far = 439
waiting for: 1 seconds
Pulling request 1452, total API requests so far = 440
waiting for: 1 seconds
Pulling request 1453, total API requests so far = 441
waiting for: 2 seconds
Pulling request 1454, total API requests so far = 442
waiting for: 2 seconds

Pulling request 1549, total API requests so far = 537
waiting for: 2 seconds
Pulling request 1550, total API requests so far = 538
waiting for: 2 seconds
Pulling request 1551, total API requests so far = 539
waiting for: 1 seconds
Pulling request 1552, total API requests so far = 540
waiting for: 1 seconds
Pulling request 1553, total API requests so far = 541
waiting for: 2 seconds
Pulling request 1554, total API requests so far = 542
waiting for: 3 seconds
Pulling request 1555, total API requests so far = 543
waiting for: 1 seconds
Pulling request 1556, total API requests so far = 544
waiting for: 2 seconds
Pulling request 1557, total API requests so far = 545
waiting for: 1 seconds
Pulling request 1558, total API requests so far = 546
waiting for: 1 seconds
Pulling request 1559, total API requests so far = 547
waiting for: 3 seconds
Pulling request 1560, total API requests so far = 548
waiting for: 2 seconds
Pulling request 1561, total API requests so far = 549
waiting for: 1 seconds

In [None]:
# save
zips_pd = pd.Series(zips, name='zip_codes') 
states_pd = pd.Series(states, name='states') 
zips_pd.to_csv('data/zip_codes_est_campgrounds.csv')
states_pd.to_csv('data/states_est_campgrounds.csv')

In [None]:
# add to df
USdata_need_loc_with_states = USdata_need_loc.copy()
USdata_need_loc_with_states['zip_code'] = zips
USdata_need_loc_with_states['State'] = states

In [None]:
# merge with data with location
dfs = [USdata_has_loc, USdata_need_loc_with_states]
USdata_camgrounds_zip_states_combined = pd.concat(dfs)

In [None]:
# check state format
USdata_camgrounds_zip_states_combined['State'].unique()
USdata_camgrounds_zip_states_combined['State'] = USdata_camgrounds_zip_states_combined['State'].str.strip()

In [None]:
# save csv, next we will pull sat images
USdata_camgrounds_zip_states_combined.to_csv('data/USdata_est_campgrounds_zip_states_combined.csv')

In [None]:
def download_images(client, df, zoomlevel, max_requests=10, prefix="", out_path="data/"):
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    
    global n_requests
    
    for i, site in df.iterrows():
        print(f'Pulling image {i}, total API requests so far = {n_requests}')
        #print(f"{site['Latitude']}, {site['Longitude']}")
        
        # temp save coords
        lat = site['Latitude']
        long = site['Longitude']
        
        # and tags for site
        cat = site['Category']
        
        # create filename
        cur_filename = f'satimg_{prefix}_{i}_{cat}_{zoomlevel}_{lat}_{long}.png'
        print(cur_filename)

        # if it already exists, skip to next
        if os.path.exists(out_path + cur_filename):
            continue

        # get the image
        satimg = client.static_map(size = (400, 400), # pixels
           zoom = zoomlevel, # 1-21
           center = (lat, long),
           scale = 1, # default is 1, 2 returns 2x pixels for high res displays
           maptype = "satellite",
           format = "png"
          )
        
        # if it didn't work, exit
        if satimg is None or n_requests >= max_requests:
            print("API requests quota exceeded!")    
            break
        # increase counter otherwise
        n_requests += 1
        
        # save the current image
        f = open(out_path + cur_filename, 'wb')
        for chunk in satimg:
            if chunk:
                f.write(chunk)
        f.close()
        
        # open it to crop the text off
        img = plt.imread(out_path + cur_filename)
        # maybe crop all 4 sides?
        cropped = img[25:375, 25:375]
        # and resave
        #cropped_filename = f'satimg_{i}_{cat}_{zoomlevel}_{lat}_{long}.png'
        plt.imsave(out_path + cur_filename, cropped)
        
        # and rotate and save that version
        for k, degrees in enumerate([90, 180, 270]):
            #print(k, degrees)
            cropped_rotated = np.rot90(cropped, k=k)
            cropped_rot_filename = f'satimg_{prefix}_{i}_{cat}_{zoomlevel}_{lat}_{long}_rot{degrees}.png'
            #print(cropped_rot_filename)
            plt.imsave(out_path + cropped_rot_filename, cropped_rotated)
        
        # and zoom in and save
#         zoomed = img[100:300, 100:300]
#         zoomed_cropped_filename = f'satimg_{i}_{cat}_{zoomlevel}_zoomed_{lat}_{long}.png'
#         plt.imsave(out_path + zoomed_cropped_filename, zoomed)
        
#         # and rotate on zoom in and save
#         zoomed_rot = np.rot90(zoomed)
#         zoomed_cropped_rot_filename = f'satimg_{i}_{cat}_{zoomlevel}_zoomed_rot90_{lat}_{long}.png'
#         plt.imsave(out_path + zoomed_cropped_rot_filename, zoomed_rot)
        
        # wait a bit before next request
        wait_time = random.randint(1, 5)
        print(f'waiting for: {wait_time} seconds')
        time.sleep(wait_time) # in seconds

        # display samples every now and then
        if i % 100 == 0:
            img = plt.imread(out_path + cur_filename)
            plt.imshow(img)
            plt.title(f'image {i}')
            plt.show()
            time.sleep(2)
    return True

In [None]:
max_requests = 10_000

In [None]:
categories = list(USdata_cleaned['Category'].unique())
categories

In [None]:
for category in categories:
    print(category)
    tempdf = USdata_CO[USdata_CO['Category'] == category]
    #tempdf.info()
    download_images(gmaps, tempdf, max_requests=max_requests, zoomlevel=17, prefix="CO", out_path=f"data/sites_CO/{category}/")