In [234]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline
pd.options.display.float_format = '{:,.2f}'.format
plt.style.use('seaborn-white')
# colorblind safe
plt.style.use('seaborn-colorblind')
plt.style.use('tableau-colorblind10')

# width and precision for f strings
width = 10
precision = 4

# default sizes for plots
# https://matplotlib.org/3.3.0/tutorials/introductory/customizing.html#customizing-with-matplotlibrc-files
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.size'] = 16
plt.rcParams['legend.fontsize'] = 'large'
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['lines.linewidth'] = 2

# other settings
pd.options.display.float_format = '{:,.4f}'.format
pd.set_option("display.precision", 3)
np.set_printoptions(precision=3, suppress=True)
%load_ext autoreload
%autoreload 2
pd.set_option('display.max_columns', None)
%config IPCompleter.greedy=True

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [235]:
# setup dir and import helper functions
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'src'))
import helper_funcs as my_funcs
import re

In [236]:
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

In [237]:
names = states

In [238]:
len(names)

51

In [239]:
# filter US data for this - this is what I'm calling "all US data"
# forgot that I only pulled closed from all 50 states
USdata = pd.read_csv('../data/USdata_all_zip_states_combined_cleaned_NEW.csv')

In [240]:
list(USdata['Category'].unique())

['Established Campground',
 'Wild Camping',
 'Informal Campsite',
 'Showers',
 'Water',
 'Short-term Parking',
 'Eco-Friendly']

In [241]:
USdata['State'].unique()

array(['CA', 'UT', 'AK', 'OR', 'WA', 'WV', 'MD', 'MT', 'AZ', 'SE', 'NV',
       'CO', 'NM', 'TX', 'TN', 'KY', 'FL', 'ME', 'NC', 'VA', 'IL', 'SD',
       'NE', 'OH', 'WY', 'AR', 'MI', 'ID', 'MS', 'OK', 'GA', 'AL', 'LA',
       'SC', 'NY', 'VT', 'KS', 'MO', 'RV', 'US', 'NW', 'PA', 'WI', 'MA',
       'MN', 'NJ', 'SW', 'ND', 'IN', 'HI', 'IA', 'AB', 'NH', 'YT', 'DE',
       'CT', 'BC', 'RI', 'N.L.', 'NB', nan, 'DC', 'ON', 'QC', 'S.L.P.',
       'Ver.', 'Nay.', 'NS', 'B.C.', 'Pue.'], dtype=object)

In [242]:
All_USdata = USdata[USdata['State'].isin(names)]

In [243]:
All_USdata['State'].unique()

array(['CA', 'UT', 'AK', 'OR', 'WA', 'WV', 'MD', 'MT', 'AZ', 'NV', 'CO',
       'NM', 'TX', 'TN', 'KY', 'FL', 'ME', 'NC', 'VA', 'IL', 'SD', 'NE',
       'OH', 'WY', 'AR', 'MI', 'ID', 'MS', 'OK', 'GA', 'AL', 'LA', 'SC',
       'NY', 'VT', 'KS', 'MO', 'PA', 'WI', 'MA', 'MN', 'NJ', 'ND', 'IN',
       'HI', 'IA', 'NH', 'DE', 'CT', 'RI', 'DC'], dtype=object)

In [244]:
All_USdata.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11786 entries, 0 to 12411
Data columns (total 38 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   2212 non-null   object 
 1   Name                       11786 non-null  object 
 2   Category                   11786 non-null  object 
 3   Description                11785 non-null  object 
 4   Latitude                   11786 non-null  float64
 5   Longitude                  11786 non-null  float64
 6   Altitude                   11490 non-null  float64
 7   Date verified              11786 non-null  object 
 8   Open                       11786 non-null  object 
 9   Electricity                11007 non-null  object 
 10  Wifi                       11007 non-null  object 
 11  Kitchen                    11007 non-null  object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 11007 non-null  obj

In [245]:
All_USdata.head()

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State
0,"Borrego Salton Seaway, Borrego Springs, CA 92004, USA",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exists Campsite fires are required to be in some sort of metal container. Perhaps ~10 sites for car camping.",33.2802,-116.1458,0.0,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.0013,-112.0356,0.0,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,"An old standby for us. Multiple sites for large or small RV's and tents along a windy creek. Hot showers cost .25/minute, wifi close to office.",62.0954,-145.9805,0.0,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,"This is a great beach for a first night on the Oregon coast. It is a very nice State Park campground with great beach access under a highway 101 overpass. A Beautiful old growth loop-system campground with full hookups, tent sites (good selection), hot showers, clean bathrooms, but no wifi available. It is not free (which is hard to do in Oregon) but the price was $17 for a tent site. Be sure to check the pay period dates though as we were there in a ""Discovery"" period ($17 was between Oct 1-Apr 30).",44.7288,-124.0555,0.0,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hookups, hot showers, no wifi Price: $15/tent site",43.3308,-124.3707,2.6656,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR


In [246]:
# look at potential binary columns for wild & est first
All_USdata_est_wild = All_USdata[All_USdata['Category'].isin(['Established Campground','Wild Camping'])]

In [247]:
All_USdata_est_wild.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8325 entries, 0 to 12410
Data columns (total 38 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   1720 non-null   object 
 1   Name                       8325 non-null   object 
 2   Category                   8325 non-null   object 
 3   Description                8324 non-null   object 
 4   Latitude                   8325 non-null   float64
 5   Longitude                  8325 non-null   float64
 6   Altitude                   8108 non-null   float64
 7   Date verified              8325 non-null   object 
 8   Open                       8325 non-null   object 
 9   Electricity                8325 non-null   object 
 10  Wifi                       8325 non-null   object 
 11  Kitchen                    8325 non-null   object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 8325 non-null   obje

In [248]:
All_USdata_est_wild['Toilets'].unique()

array(['Pit Toilets', 'Unknown', 'Running Water', 'No', 'Yes'],
      dtype=object)

In [249]:
All_USdata_est_wild.loc[:,['Category', 'Description']].groupby(['Category']).agg('count')

Unnamed: 0_level_0,Description
Category,Unnamed: 1_level_1
Established Campground,3713
Wild Camping,4611


In [250]:
cat_counts = All_USdata_est_wild.loc[:,['Category', 'Description']].groupby(['Category']).agg(
    description_pct = ('Description','count'))
cat_counts
total = cat_counts['description_pct'].sum()
description_pct = cat_counts.groupby('description_pct').apply(lambda x: 100 * x / total)
description_pct

Unnamed: 0_level_0,description_pct
Category,Unnamed: 1_level_1
Established Campground,44.606
Wild Camping,55.394


In [251]:
All_USdata_est_wild.loc[:,['Category', 'Description', 'Toilets']].groupby(['Toilets', 'Category']).agg('count')

Unnamed: 0_level_0,Unnamed: 1_level_0,Description
Toilets,Category,Unnamed: 2_level_1
No,Established Campground,255
No,Wild Camping,3960
Pit Toilets,Established Campground,1317
Pit Toilets,Wild Camping,394
Running Water,Established Campground,2002
Running Water,Wild Camping,165
Unknown,Established Campground,88
Unknown,Wild Camping,80
Yes,Established Campground,51
Yes,Wild Camping,12


In [252]:
All_USdata_est_wild.loc[:,['Category', 'Description', 'Electricity']].groupby(['Electricity', 'Category']).agg('count')

Unnamed: 0_level_0,Unnamed: 1_level_0,Description
Electricity,Category,Unnamed: 2_level_1
No,Established Campground,1662
No,Wild Camping,4520
Unknown,Established Campground,104
Unknown,Wild Camping,47
Yes,Established Campground,50
Yes - At Sites,Established Campground,1713
Yes - At Sites,Wild Camping,21
Yes - Not at Sites,Established Campground,184
Yes - Not at Sites,Wild Camping,23


In [253]:
All_USdata_est_wild.loc[:,['Category', 'Description', 'Water']].groupby(['Water', 'Category']).agg('count')

Unnamed: 0_level_0,Unnamed: 1_level_0,Description
Water,Category,Unnamed: 2_level_1
Natural Source,Established Campground,337
Natural Source,Wild Camping,792
No,Established Campground,617
No,Wild Camping,3491
Non-Potable,Established Campground,161
Non-Potable,Wild Camping,64
Potable,Established Campground,2125
Potable,Wild Camping,71
Unknown,Established Campground,166
Unknown,Wild Camping,118


In [254]:
# for water, toilets, electricity, make binary cols for each - these seem interesting and don't split with wild/est
All_USdata_est_wild['water_binary'] = 0
yes = ['Potable', 'Yes']
# df['color'] = np.where(df['Set']=='Z', 'green', 'red')
All_USdata_est_wild['water_binary'][All_USdata_est_wild['Water'].isin(yes)] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(~key, value, inplace=True)


In [255]:
All_USdata_est_wild.head()

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary
0,"Borrego Salton Seaway, Borrego Springs, CA 92004, USA",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exists Campsite fires are required to be in some sort of metal container. Perhaps ~10 sites for car camping.",33.2802,-116.1458,0.0,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA,0
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.0013,-112.0356,0.0,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT,0
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,"An old standby for us. Multiple sites for large or small RV's and tents along a windy creek. Hot showers cost .25/minute, wifi close to office.",62.0954,-145.9805,0.0,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK,0
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,"This is a great beach for a first night on the Oregon coast. It is a very nice State Park campground with great beach access under a highway 101 overpass. A Beautiful old growth loop-system campground with full hookups, tent sites (good selection), hot showers, clean bathrooms, but no wifi available. It is not free (which is hard to do in Oregon) but the price was $17 for a tent site. Be sure to check the pay period dates though as we were there in a ""Discovery"" period ($17 was between Oct 1-Apr 30).",44.7288,-124.0555,0.0,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hookups, hot showers, no wifi Price: $15/tent site",43.3308,-124.3707,2.6656,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1


In [256]:
All_USdata_est_wild['toilets_binary'] = 0
yes = ['Pit Toilets', 'Yes', 'Running Water']
# df['color'] = np.where(df['Set']=='Z', 'green', 'red')
All_USdata_est_wild['toilets_binary'][All_USdata_est_wild['Toilets'].isin(yes)] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(~key, value, inplace=True)


In [257]:
All_USdata_est_wild.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8325 entries, 0 to 12410
Data columns (total 40 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   1720 non-null   object 
 1   Name                       8325 non-null   object 
 2   Category                   8325 non-null   object 
 3   Description                8324 non-null   object 
 4   Latitude                   8325 non-null   float64
 5   Longitude                  8325 non-null   float64
 6   Altitude                   8108 non-null   float64
 7   Date verified              8325 non-null   object 
 8   Open                       8325 non-null   object 
 9   Electricity                8325 non-null   object 
 10  Wifi                       8325 non-null   object 
 11  Kitchen                    8325 non-null   object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 8325 non-null   obje

In [258]:
All_USdata_est_wild

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary
0,"Borrego Salton Seaway, Borrego Springs, CA 92004, USA",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exists Campsite fires are required to be in some sort of metal container. Perhaps ~10 sites for car camping.",33.2802,-116.1458,0.0000,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA,0,1
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.0013,-112.0356,0.0000,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT,0,1
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,"An old standby for us. Multiple sites for large or small RV's and tents along a windy creek. Hot showers cost .25/minute, wifi close to office.",62.0954,-145.9805,0.0000,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK,0,0
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,"This is a great beach for a first night on the Oregon coast. It is a very nice State Park campground with great beach access under a highway 101 overpass. A Beautiful old growth loop-system campground with full hookups, tent sites (good selection), hot showers, clean bathrooms, but no wifi available. It is not free (which is hard to do in Oregon) but the price was $17 for a tent site. Be sure to check the pay period dates though as we were there in a ""Discovery"" period ($17 was between Oct 1-Apr 30).",44.7288,-124.0555,0.0000,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hookups, hot showers, no wifi Price: $15/tent site",43.3308,-124.3707,2.6656,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12403,"Unnamed Road, Fallon, NV 89406, USA",hidden cave BLM site,Wild Camping,Parking lot for hidden cave. about 1 mile from highway 50 (turn right if coming from the east). no noise. cell reception from nearby Fallon. The 1 mile graded dirt road starts near the petroglyph site which is on highway 50 (about 6 miles east before Fallon) . it is ok even after rain. some parts leveled. many more locations near by. we were here with 1 more truck.,39.4090,-118.6291,1207.0000,2020-05-21 00:00:00 UTC,Yes,No,Yes - Fast,No,,No,No,No,Pit Toilets,Yes,Unknown,Yes,,,,,,,,,,,,,,,,,,NV,0,1
12404,"2560 E Lucky Ln, Flagstaff, AZ 86004, USA",Lucky Lane street parking,Wild Camping,"Flagstaff is extremely unfriendly to RVs and travelers. I managed to stay here for awhile last summer. If you park across the street from the Cracker Barrel and Super 8 Motel, you can get free wifi from the motel. This is street parking along a fence with the 40 Westbound Freeway about 15 feet away. A lot of people park here. Potable water: there is a small spigot near the trash area behind Cracker Barrel. The water is delicious! Up the street at the Conoco station is a dump station.",35.1957,-111.6174,2091.3766,2020-08-08 00:00:00 UTC,Yes,No,Yes - Unknown,No,,Yes,No,Potable,No,Yes,No,Yes,,,,,,,,,,,,,,,,,,AZ,1,0
12408,"Mexican Hat Rock Rd, Mexican Hat, UT 84531, USA",Between San Juan river and Mexican Hat,Wild Camping,"A lot near the road along San Juan river, really quiet, there are several places to park, doesn't work if rains",37.1757,-109.8456,0.0000,2020-05-20 00:00:00 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,,UT,0,0
12409,"818-898 E Locust St, San Antonio, TX 78212, USA",Neighborhood Parking,Wild Camping,Super quiet and low profile place to park for the night next to the river around the corner from Viva Tacoland and El Gloria - there were even portapotties that were super clean!,29.4439,-98.4824,0.0000,2020-03-12 00:00:00 UTC,Yes,No,No,No,,No,No,No,No,Unknown,No,Yes,,,,,,,,,,,,,,,,,,TX,0,0


In [259]:
# # make lat long col to match with image filenames {prefix}_{i}_{cat}_{zoomlevel}_{lat}_{long}.png'
# All_USdata_est_wild['filename_end'] = All_USdata_est_wild['Category'].astype(str) + '_17_' + \
#                                     All_USdata_est_wild['Latitude'].astype(str) + '_' + \
#                                     All_USdata_est_wild['Longitude'].astype(str) + '.png'

In [260]:
# pd.set_option('max_colwidth', None)
# All_USdata_est_wild['filename_end'].head()

In [261]:
directory = '/Users/pault/Desktop/github/CampsitePredict/data/symlink_data/only_unaugmented'

In [262]:
# get list of images in os.walk order
filelist = []
for root_path, dirs, files in os.walk(directory, followlinks=False):
    for file in files:
        if file.endswith(".png"):
            # only keep original files not augmented
            if not re.search('rot[0-9]{2,3}.png$', file):
                # parse out part of filename
                filelist.append(file[:-4])

In [263]:
file_df = pd.DataFrame(filelist)

In [137]:
file_df.to_csv('../data/file_df_for_land.csv', index=False)

In [None]:
str.replace('__')

In [32]:
filelist[:10]

['satimg_CO__352_Wild Camping_17_38.98102_-107.32651',
 'satimg_ID_7863_Wild Camping_17_43.149667_-111.052531',
 'satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999',
 'satimg_CO__216_Wild Camping_17_39.337122_-107.660378',
 'satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278',
 'satimg_MI_6491_Wild Camping_17_46.76277_-85.02438000000002',
 'satimg_UT_2806_Wild Camping_17_37.84661_-111.428193',
 'satimg_AK_12112_Wild Camping_17_63.887170999999995_-149.348656',
 'satimg_OR_6000_Wild Camping_17_44.413897_-120.495699',
 'satimg_PA_2971_Wild Camping_17_41.430395_-78.883376']

In [264]:
# make index to reorder the df the same
image_file_df = pd.DataFrame(filelist, columns =['filename']) 

In [174]:
# image_file_df.to_csv('../data/example_to_split.csv')

In [265]:
def func(f, n=6):
    result =['.'.join([j[:n] for j in i.split('.')]) for i in f['filename'].split('_')[-2:]]
    return {'lat_from_file':result[0], 'long_from_file':result[1]}

In [266]:
latlongs = image_file_df[['filename']].apply(func, axis = 1, result_type = 'expand')

In [267]:
image_file_df = pd.concat([image_file_df, latlongs], axis=1)

In [38]:
# split = image_file_df['filename'].str.rsplit('_', 2, expand=True)

In [39]:
# split = split.rename(columns={0: "file_start", 1: "lat", 2: "long"})

In [40]:
# image_file_df['lat_from_file'] = split.iloc[:,0].astype('str')
# image_file_df['long_from_file'] = split.iloc[:,1].astype('str')
# image_file_df['lat_from_file']
# image_file_df['long_from_file'] float( '%.3f'%(x) )df
# image_file_df['lat_from_file'].apply(lambda x: '%.3f'%(x))

In [268]:
image_file_df['order'] = image_file_df.index

In [269]:
len(image_file_df['filename'].unique())

7855

In [270]:
len(image_file_df['filename'].unique())

7855

In [271]:
All_USdata_est_wild[All_USdata_est_wild.duplicated()]

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary
2630,,Lebanon Hills Regional Park Campground,Established Campground,Established campground. Very busy during the summer. Plan ahead if possible. You might get one night at this time of year. Seems typical for places like this near a big metropolis. Pros: It's close in. Cons: It's close in. Plan ahead and enjoy.,44.7789,-93.1639,,2019-07-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Unknown,Potable,Yes,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,55123,MN,1,1
2938,,Greensport RV Park and Campground,Established Campground,"Welcome to Greensport RV Park located at Historic Greensport on Neely Henry Lake in Ashville, AL. Bring your RV or Camper to enjoy fishing, boating, camping, picnicking, and swimming. All sites have full hook-ups, 30/50 amp electric, water, and sewer. We have 32 sites completed at this time and will continue adding amenities as we develop. Come check out what our peaceful retreat has to offer you and your family!",33.8483,-86.0784,,2019-09-30 00:00:00 UTC,Yes,Yes - At Sites,Yes - Average,Unknown,,Unknown,Hot,Yes,Running Water,Yes,No,Yes,Unknown,,,,,,,,,,,,,,,,35953,AL,1,1
3231,,Mother Neff State Park,Established Campground,"Nice little park named after Isabella Neff, mother of Pat Neff, her youngest son, who became governor of Texas in 1921. She donated the first 6 acres for the park upon her death.",31.3327,-97.4643,,2019-09-22 00:00:00 UTC,Yes,Yes,Unknown,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,76557,TX,1,1


In [272]:
All_USdata_est_wild.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [273]:
All_USdata_est_wild.shape

(8322, 40)

In [274]:
All_USdata_est_wild['latlong_test'] = All_USdata_est_wild['Latitude'].astype('str') + '_' + All_USdata_est_wild['Longitude'].astype('str')
# 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [275]:
image_file_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7855 entries, 0 to 7854
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   filename        7855 non-null   object
 1   lat_from_file   7855 non-null   object
 2   long_from_file  7855 non-null   object
 3   order           7855 non-null   int64 
dtypes: int64(1), object(3)
memory usage: 245.6+ KB


In [276]:
image_file_df2 = image_file_df.copy()
image_file_df2['latlong_test'] = image_file_df2['lat_from_file'].astype('str') + '_' + image_file_df2['long_from_file'].astype('str')

In [317]:
image_file_df2.head()

Unnamed: 0,filename,lat_from_file,long_from_file,order,latlong_test
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651,38.98102,-107.32651,0,38.98102_-107.32651
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531,43.149667,-111.052531,1,43.149667_-111.052531
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999,35.2375,-102.83496,2,35.2375_-102.834960
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378,39.337122,-107.660378,3,39.337122_-107.660378
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278,34.169239,-110.794278,4,34.169239_-110.794278


In [278]:
image_file_df2 = image_file_df.copy()
image_file_df2['latlong_test'] = image_file_df2['lat_from_file'].astype('str') + '_' + image_file_df2['long_from_file'].astype('str')

Unnamed: 0,filename,lat_from_file,long_from_file,order,latlong_test
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651,38.98102,-107.32651,0,38.98102_-107.32651
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531,43.149667,-111.052531,1,43.149667_-111.052531
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999,35.2375,-102.83496,2,35.2375_-102.834960
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378,39.337122,-107.660378,3,39.337122_-107.660378
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278,34.169239,-110.794278,4,34.169239_-110.794278


In [169]:
duplicated = All_USdata_est_wild[All_USdata_est_wild['latlong_test'].duplicated()]
duplicated.to_csv('../data/duplicated_sites_latlong.csv')

In [171]:
duplicated['']

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,latlong_test
783,"911 Hemlock St, Seward, AK 99664, USA",Williwaw Forest Service Campground,Established Campground,"Nice Forest Service campground, sites are very...",60.1332,-149.4298,-1.0,2017-06-22 00:00:00 UTC,Yes,No,No,No,,No,No,Non-Potable,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,AK,0,1,60.133196_-149.429799
947,,Poe Paddy State Park,Established Campground,$15 campsites here with quite decent bathrooms...,40.8341,-77.4174,,2016-07-29 12:52:45 UTC,Yes,Yes - Not at Sites,No,No,,No,No,Natural Source,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,16882.0,PA,0,1,40.834145_-77.417449
1177,,Copper Mountain RV Park,Established Campground,Location has been updated. \r\n\r\n10 min sout...,32.6882,-113.9551,111.0,2018-03-20 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,No,Yes,Unknown,,,,,,,,,,,,,,,,85356.0,AZ,1,1,32.688244_-113.955072
1178,,The Scenic Road RV Park,Established Campground,"Private park in Quartzite, AZ. Clean park. On ...",32.6882,-113.9551,111.0,2018-02-26 00:00:00 UTC,Yes,Yes - At Sites,Yes - Average,No,,No,Hot,Non-Potable,Running Water,Yes,No,Yes,Unknown,,,,,,,,,,,,,,,,85356.0,AZ,0,1,32.688244_-113.955072
1832,,Moose River Plains FREE camp site #64,Established Campground,This is a good site right next to a little riv...,43.6847,-74.7036,606.7074,2018-10-16 00:00:00 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,13360.0,NY,0,1,43.684708_-74.703609
1833,,Moose River Plains FREE camp site #62,Established Campground,This is a big open spot but it’s up a steep hill,43.6847,-74.7036,606.7074,2018-10-16 00:00:00 UTC,Yes,No,No,No,,No,No,Natural Source,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,13360.0,NY,0,1,43.684708_-74.703609
2352,,Delaware State Forest,Established Campground,Very quiet and secluded feel. Only 5 sites al...,41.1152,-75.2196,,2019-05-22 00:00:00 UTC,Yes,No,No,No,,No,No,No,Yes,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,18302.0,PA,0,1,41.11518_-75.21961
3080,,Dos Rios RV Park,Established Campground,On the Llano River with RV and tent sites rive...,30.651,-99.25,470.0,2019-11-01 00:00:00 UTC,Yes,Yes,Yes - Unknown,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,76856.0,TX,1,1,30.65103_-99.25
3363,,Home Stay Campground,Established Campground,"A great little spot just a few minutes off 26,...",33.4485,-80.7045,48.2821,2020-05-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Yes,Potable,Yes,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,29115.0,SC,1,1,33.44855_-80.70454000000002
5506,,Challis NF dispersed camping,Wild Camping,"Another spot. Close to the 21 byway, but still...",44.2563,-115.0453,0.0,2018-09-30 00:00:00 UTC,Yes,No,No,No,,No,No,No,No,Yes,Yes,Yes,,,,,,,,,,,,,,,,,83278.0,ID,0,0,44.256325_-115.045303


In [298]:
# merge duplicated with images df
please = image_file_df2.merge(All_USdata_est_wild, how='outer', 
                                                  on='latlong_test')

In [301]:
please.to_csv('../data/hope.csv')

In [187]:
duplicated_df.to_csv('../data/duplicated_df.csv')

In [203]:
dup_files_to_delete = duplicated_df.copy()

In [204]:
dup_files_to_delete['filename'] = dup_files_to_delete['filename']#  + '.png'

In [205]:
pd.set_option('max_colwidth', None)
dup_files_to_delete_list = set(dup_files_to_delete['filename'].to_list())

In [222]:
len(dup_files_to_delete_list)

20

In [213]:
dup_files_to_delete_list

{'satimg_AK_782_Established Campground_17_60.133196_-149.429799',
 'satimg_AK_783_Established Campground_17_60.133196_-149.429799',
 'satimg_AZ_1189_Established Campground_17_32.688244_-113.955072',
 'satimg_AZ_1190_Established Campground_17_32.688244_-113.955072',
 'satimg_AZ_944_Established Campground_17_32.688244_-113.955072',
 'satimg_ID_5505_Wild Camping_17_44.256325_-115.045303',
 'satimg_ID_5506_Wild Camping_17_44.256325_-115.045303',
 'satimg_NE_8115_Wild Camping_17_40.68755_-99.38306',
 'satimg_NE_8116_Wild Camping_17_40.68755_-99.38306',
 'satimg_NY_1831_Established Campground_17_43.684708_-74.703609',
 'satimg_NY_1832_Established Campground_17_43.684708_-74.703609',
 'satimg_NY_1833_Established Campground_17_43.684708_-74.703609',
 'satimg_PA_2351_Established Campground_17_41.11518_-75.21961',
 'satimg_PA_2352_Established Campground_17_41.11518_-75.21961',
 'satimg_PA_946_Established Campground_17_40.834145_-77.417449',
 'satimg_PA_947_Established Campground_17_40.834145_-77

In [223]:
# add other possibilities
dup_files_to_delete_list_new = []
for filename in dup_files_to_delete_list:
    dup_files_to_delete_list_new.append(filename)
    dup_files_to_delete_list_new.append(filename + '_rot90')
    dup_files_to_delete_list_new.append(filename + '_rot180')
    dup_files_to_delete_list_new.append(filename + '_rot270')

In [225]:
dup_files_to_delete_list_new = set(dup_files_to_delete_list_new)

In [215]:
test.replace('.png', '')

'satimg_ID_5505_Wild Camping_17_44.256325_-115.045303'

In [212]:
test = dellist[0]
test2 = test.split('_')[:6]
'_'.join(test2)

'satimg_ID_5505_Wild Camping_17_44.256325'

In [231]:
# just remove these images...
directory = '/Users/pault/Desktop/github/CampsitePredict/data/sat_images'
dellist = []
for root_path, dirs, files in os.walk(directory, followlinks=False):
    for file in files:
        # just get the first part of the file (to match rot things etc)
        if file.endswith(".png"):
            test = file.replace('.png', '')
#             print(test)
            if test in dup_files_to_delete_list_new:
                print(os.path.join(root_path, file))
#                 os.remove(os.path.join(root_path, file))
                dellist.append(file)

In [221]:
len(dellist)

18

In [157]:
All_USdata_est_wild[All_USdata_est_wild['latlong_test'].duplicated()].count()

Location                      2
Name                         13
Category                     13
Description                  13
Latitude                     13
Longitude                    13
Altitude                      9
Date verified                13
Open                         13
Electricity                  13
Wifi                         13
Kitchen                      13
Parking                       0
Restaurant                   13
Showers                      13
Water                        13
Toilets                      13
Big rig friendly             13
Tent friendly                13
Pet friendly                 13
Sanitation dump station       9
Outdoor gear                  0
Groceries                     0
Artesian goods                0
Bakery                        0
Rarity in this area           0
Repairs vehicles              0
Repairs motorcycles           0
Repairs bicycles              0
Sells parts                   0
Recycles batteries            0
Recycles

In [154]:
len(All_USdata_est_wild['latlong_test'].unique())

8309

In [158]:
All_USdata_est_wild.shape

(8322, 41)

In [159]:
All_USdata_est_wild.shape[0] - len(All_USdata_est_wild['latlong_test'].unique())

13

In [292]:
All_USdata_est_wild_no_dup = All_USdata_est_wild[~All_USdata_est_wild['latlong_test'].duplicated()]

In [293]:
All_USdata_est_wild_no_dup.shape

(8309, 41)

In [281]:
image_file_df.shape

(7855, 4)

In [282]:
image_file_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7855 entries, 0 to 7854
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   filename        7855 non-null   object
 1   lat_from_file   7855 non-null   object
 2   long_from_file  7855 non-null   object
 3   order           7855 non-null   int64 
dtypes: int64(1), object(3)
memory usage: 245.6+ KB


In [283]:
def func2(col, n=5):
    result = ['.'.join([j[:n] for j in x.split('.')]) for x in col.astype('str')]
#     ['.'.join([j[:n] for j in i.split('.')]) for i in f['filename'].split('_')[-2:]]
    return result

In [44]:
# [x.split('.') for x in All_USdata_est_wild['Latitude'].astype('str')]

In [45]:
# ['.'.join([j[:6] for j in x.split('.')]) for x in All_USdata_est_wild['Latitude'].astype('str')]

In [46]:
# All_USdata_est_wild['Latitude'].split('.')
# ['.'.join([All_USdata_est_wild[:n] for j in i.split('.')])

In [47]:
# All_USdata_est_wild['Latitude'].apply(func2)

In [305]:
All_USdata_est_wild_no_dup['lat_from_df'] = ['.'.join([j[:5] for j in x.split('.')]) for x in All_USdata_est_wild_no_dup['Latitude'].astype('str')]
All_USdata_est_wild_no_dup['long_from_df'] = ['.'.join([j[:5] for j in x.split('.')]) for x in All_USdata_est_wild_no_dup['Longitude'].astype('str')]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [62]:
# All_USdata_est_wild['lat_from_df'] = ['.'.join([j[:6] for j in x.split('.')]) for x in All_USdata_est_wild['Latitude'].astype('str')]
# All_USdata_est_wild['long_from_df'] = ['.'.join([j[:6] for j in x.split('.')]) for x in All_USdata_est_wild['Longitude'].astype('str')]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [307]:
All_USdata_est_wild_no_dup.drop('latlong_test', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [308]:
All_USdata_est_wild_no_dup.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8309 entries, 0 to 12410
Data columns (total 42 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   1718 non-null   object 
 1   Name                       8309 non-null   object 
 2   Category                   8309 non-null   object 
 3   Description                8308 non-null   object 
 4   Latitude                   8309 non-null   float64
 5   Longitude                  8309 non-null   float64
 6   Altitude                   8099 non-null   float64
 7   Date verified              8309 non-null   object 
 8   Open                       8309 non-null   object 
 9   Electricity                8309 non-null   object 
 10  Wifi                       8309 non-null   object 
 11  Kitchen                    8309 non-null   object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 8309 non-null   obje

In [93]:
type(image_file_df.iloc[0,:][4])
# image_file_df['lat_from_file'].unique()

str

In [98]:
lats = list(image_file_df['lat_from_file'].values)
lats = list(image_file_df['lat_from_file'].to_list)

In [103]:
lats[0]

array(['38.98102', '38.98102'], dtype=object)

In [84]:
image_file_df.groupby('lat_from_file').agg({'filename':'count'})

ValueError: Grouper for 'lat_from_file' not 1-dimensional

In [66]:
All_USdata_est_wild.loc[All_USdata_est_wild['Latitude']==38.46495] 

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,lat_from_df,long_from_df
9479,,I-5/ fishing place,Wild Camping,It’s a large area big enough to park for the n...,38.465,-121.5031,0.0,2020-03-08 00:00:00 UTC,Yes,No,No,No,,Unknown,No,No,No,No,No,Yes,,,,,,,,,,,,,,,,,95832,CA,0,0,38.46495,-121.50308


In [61]:
image_file_df.iloc[12]

filename          satimg_CA_5739_Wild Camping_17_38.46495_-121.5...
lat_from_file                                              38.46495
long_from_file                                           -121.50308
order                                                            12
Name: 12, dtype: object

In [302]:
image_df2

NameError: name 'image_df2' is not defined

In [311]:
All_USdata_est_wild_no_dup

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,lat_from_df,long_from_df
0,"Borrego Salton Seaway, Borrego Springs, CA 92004, USA",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exists Campsite fires are required to be in some sort of metal container. Perhaps ~10 sites for car camping.",33.2802,-116.1458,0.0000,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA,0,1,33.280188,-116.145787
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.0013,-112.0356,0.0000,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT,0,1,37.001290,-112.035587
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,"An old standby for us. Multiple sites for large or small RV's and tents along a windy creek. Hot showers cost .25/minute, wifi close to office.",62.0954,-145.9805,0.0000,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK,0,0,62.095368,-145.980492
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,"This is a great beach for a first night on the Oregon coast. It is a very nice State Park campground with great beach access under a highway 101 overpass. A Beautiful old growth loop-system campground with full hookups, tent sites (good selection), hot showers, clean bathrooms, but no wifi available. It is not free (which is hard to do in Oregon) but the price was $17 for a tent site. Be sure to check the pay period dates though as we were there in a ""Discovery"" period ($17 was between Oct 1-Apr 30).",44.7288,-124.0555,0.0000,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,44.728808,-124.055513
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hookups, hot showers, no wifi Price: $15/tent site",43.3308,-124.3707,2.6656,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,43.330797,-124.370728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12403,"Unnamed Road, Fallon, NV 89406, USA",hidden cave BLM site,Wild Camping,Parking lot for hidden cave. about 1 mile from highway 50 (turn right if coming from the east). no noise. cell reception from nearby Fallon. The 1 mile graded dirt road starts near the petroglyph site which is on highway 50 (about 6 miles east before Fallon) . it is ok even after rain. some parts leveled. many more locations near by. we were here with 1 more truck.,39.4090,-118.6291,1207.0000,2020-05-21 00:00:00 UTC,Yes,No,Yes - Fast,No,,No,No,No,Pit Toilets,Yes,Unknown,Yes,,,,,,,,,,,,,,,,,,NV,0,1,39.409003,-118.629126
12404,"2560 E Lucky Ln, Flagstaff, AZ 86004, USA",Lucky Lane street parking,Wild Camping,"Flagstaff is extremely unfriendly to RVs and travelers. I managed to stay here for awhile last summer. If you park across the street from the Cracker Barrel and Super 8 Motel, you can get free wifi from the motel. This is street parking along a fence with the 40 Westbound Freeway about 15 feet away. A lot of people park here. Potable water: there is a small spigot near the trash area behind Cracker Barrel. The water is delicious! Up the street at the Conoco station is a dump station.",35.1957,-111.6174,2091.3766,2020-08-08 00:00:00 UTC,Yes,No,Yes - Unknown,No,,Yes,No,Potable,No,Yes,No,Yes,,,,,,,,,,,,,,,,,,AZ,1,0,35.195679,-111.61735
12408,"Mexican Hat Rock Rd, Mexican Hat, UT 84531, USA",Between San Juan river and Mexican Hat,Wild Camping,"A lot near the road along San Juan river, really quiet, there are several places to park, doesn't work if rains",37.1757,-109.8456,0.0000,2020-05-20 00:00:00 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,,UT,0,0,37.175709,-109.84558
12409,"818-898 E Locust St, San Antonio, TX 78212, USA",Neighborhood Parking,Wild Camping,Super quiet and low profile place to park for the night next to the river around the corner from Viva Tacoland and El Gloria - there were even portapotties that were super clean!,29.4439,-98.4824,0.0000,2020-03-12 00:00:00 UTC,Yes,No,No,No,,No,No,No,No,Unknown,No,Yes,,,,,,,,,,,,,,,,,,TX,0,0,29.443920,-98.48244


In [318]:
All_USdata_est_wild_no_dup2 = All_USdata_est_wild_no_dup.copy()
All_USdata_est_wild_no_dup2['latlong_test'] = All_USdata_est_wild_no_dup2['lat_from_df'].astype('str') + '_' + All_USdata_est_wild_no_dup2['long_from_df'].astype('str')

In [None]:
image_file_df2 = image_file_df.copy()
image_file_df2['latlong_test'] = image_file_df2['lat_from_file'].astype('str') + '_' + image_file_df2['long_from_file'].astype('str')

In [322]:
image_file_df3 = image_file_df2.drop(['lat_from_file', 'long_from_file'], axis=1)

In [323]:
image_file_df3

Unnamed: 0,filename,order,latlong_test
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651,0,38.98102_-107.32651
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531,1,43.149667_-111.052531
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999,2,35.2375_-102.834960
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378,3,39.337122_-107.660378
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278,4,34.169239_-110.794278
...,...,...,...
7850,satimg_UT_1580_Established Campground_17_38.482453_-109.741828,7850,38.482453_-109.741828
7851,satimg_FL_3132_Established Campground_17_25.849862_-80.989081,7851,25.849862_-80.989081
7852,satimg_TN_3372_Established Campground_17_35.613972_-88.040368,7852,35.613972_-88.040368
7853,satimg_SD_2626_Established Campground_17_44.361324_-97.13078,7853,44.361324_-97.13078


In [319]:
All_USdata_est_wild_no_dup2

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,lat_from_df,long_from_df,latlong_test
0,"Borrego Salton Seaway, Borrego Springs, CA 92004, USA",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exists Campsite fires are required to be in some sort of metal container. Perhaps ~10 sites for car camping.",33.2802,-116.1458,0.0000,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA,0,1,33.280188,-116.145787,33.280188_-116.145787
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.0013,-112.0356,0.0000,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT,0,1,37.001290,-112.035587,37.001290_-112.035587
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,"An old standby for us. Multiple sites for large or small RV's and tents along a windy creek. Hot showers cost .25/minute, wifi close to office.",62.0954,-145.9805,0.0000,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK,0,0,62.095368,-145.980492,62.095368_-145.980492
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,"This is a great beach for a first night on the Oregon coast. It is a very nice State Park campground with great beach access under a highway 101 overpass. A Beautiful old growth loop-system campground with full hookups, tent sites (good selection), hot showers, clean bathrooms, but no wifi available. It is not free (which is hard to do in Oregon) but the price was $17 for a tent site. Be sure to check the pay period dates though as we were there in a ""Discovery"" period ($17 was between Oct 1-Apr 30).",44.7288,-124.0555,0.0000,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,44.728808,-124.055513,44.728808_-124.055513
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hookups, hot showers, no wifi Price: $15/tent site",43.3308,-124.3707,2.6656,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,43.330797,-124.370728,43.330797_-124.370728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12403,"Unnamed Road, Fallon, NV 89406, USA",hidden cave BLM site,Wild Camping,Parking lot for hidden cave. about 1 mile from highway 50 (turn right if coming from the east). no noise. cell reception from nearby Fallon. The 1 mile graded dirt road starts near the petroglyph site which is on highway 50 (about 6 miles east before Fallon) . it is ok even after rain. some parts leveled. many more locations near by. we were here with 1 more truck.,39.4090,-118.6291,1207.0000,2020-05-21 00:00:00 UTC,Yes,No,Yes - Fast,No,,No,No,No,Pit Toilets,Yes,Unknown,Yes,,,,,,,,,,,,,,,,,,NV,0,1,39.409003,-118.629126,39.409003_-118.629126
12404,"2560 E Lucky Ln, Flagstaff, AZ 86004, USA",Lucky Lane street parking,Wild Camping,"Flagstaff is extremely unfriendly to RVs and travelers. I managed to stay here for awhile last summer. If you park across the street from the Cracker Barrel and Super 8 Motel, you can get free wifi from the motel. This is street parking along a fence with the 40 Westbound Freeway about 15 feet away. A lot of people park here. Potable water: there is a small spigot near the trash area behind Cracker Barrel. The water is delicious! Up the street at the Conoco station is a dump station.",35.1957,-111.6174,2091.3766,2020-08-08 00:00:00 UTC,Yes,No,Yes - Unknown,No,,Yes,No,Potable,No,Yes,No,Yes,,,,,,,,,,,,,,,,,,AZ,1,0,35.195679,-111.61735,35.195679_-111.61735
12408,"Mexican Hat Rock Rd, Mexican Hat, UT 84531, USA",Between San Juan river and Mexican Hat,Wild Camping,"A lot near the road along San Juan river, really quiet, there are several places to park, doesn't work if rains",37.1757,-109.8456,0.0000,2020-05-20 00:00:00 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,,UT,0,0,37.175709,-109.84558,37.175709_-109.84558
12409,"818-898 E Locust St, San Antonio, TX 78212, USA",Neighborhood Parking,Wild Camping,Super quiet and low profile place to park for the night next to the river around the corner from Viva Tacoland and El Gloria - there were even portapotties that were super clean!,29.4439,-98.4824,0.0000,2020-03-12 00:00:00 UTC,Yes,No,No,No,,No,No,No,No,Unknown,No,Yes,,,,,,,,,,,,,,,,,,TX,0,0,29.443920,-98.48244,29.443920_-98.48244


In [328]:
# join with df of data, keep all images, hopefully they have data in the df
image_file_df_final_with_df = image_file_df3.merge(All_USdata_est_wild_no_dup2, how='outer', 
                                                  on = 'latlong_test')

In [313]:
# join with df of data, keep all images, hopefully they have data in the df
image_file_df_final_with_df = image_file_df.merge(All_USdata_est_wild_no_dup, how='outer', 
                                                  left_on=['lat_from_file', 'long_from_file'],
                                                 right_on=['lat_from_df', 'long_from_df'])

In [329]:
image_file_df_final_with_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9632 entries, 0 to 9631
Data columns (total 45 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   filename                   7855 non-null   object 
 1   order                      7855 non-null   float64
 2   latlong_test               9632 non-null   object 
 3   Location                   1718 non-null   object 
 4   Name                       8313 non-null   object 
 5   Category                   8313 non-null   object 
 6   Description                8312 non-null   object 
 7   Latitude                   8313 non-null   float64
 8   Longitude                  8313 non-null   float64
 9   Altitude                   8099 non-null   float64
 10  Date verified              8313 non-null   object 
 11  Open                       8313 non-null   object 
 12  Electricity                8313 non-null   object 
 13  Wifi                       8313 non-null   objec

In [315]:
image_file_df_final_with_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9632 entries, 0 to 9631
Data columns (total 46 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   filename                   7855 non-null   object 
 1   lat_from_file              7855 non-null   object 
 2   long_from_file             7855 non-null   object 
 3   order                      7855 non-null   float64
 4   Location                   1718 non-null   object 
 5   Name                       8313 non-null   object 
 6   Category                   8313 non-null   object 
 7   Description                8312 non-null   object 
 8   Latitude                   8313 non-null   float64
 9   Longitude                  8313 non-null   float64
 10  Altitude                   8099 non-null   float64
 11  Date verified              8313 non-null   object 
 12  Open                       8313 non-null   object 
 13  Electricity                8313 non-null   objec

In [330]:
image_file_df_final_with_df.to_csv('../data/again.csv')