In [94]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline
pd.options.display.float_format = '{:,.10f}'.format
plt.style.use('seaborn-white')
# colorblind safe
plt.style.use('seaborn-colorblind')
plt.style.use('tableau-colorblind10')

# width and precision for f strings
width = 10
precision = 4

# default sizes for plots
# https://matplotlib.org/3.3.0/tutorials/introductory/customizing.html#customizing-with-matplotlibrc-files
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.size'] = 16
plt.rcParams['legend.fontsize'] = 'large'
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['lines.linewidth'] = 2

# other settings
pd.set_option("display.precision", 3)
np.set_printoptions(precision=3, suppress=True)
%load_ext autoreload
%autoreload 2
pd.set_option('display.max_columns', None)
%config IPCompleter.greedy=True

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [95]:
# setup dir and import helper functions
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'src'))
import helper_funcs as my_funcs
import re

In [96]:
image_names = pd.read_csv('../data/image_file_df2.csv')
alldata_df = pd.read_csv('../data/All_USdata_est_wild_no_dup3.csv')
image_names = pd.read_csv('../data/image_file_dfNEW.csv')
alldata_df = pd.read_csv('../data/All_USdata_est_wild_no_dupNEW.csv')

In [97]:
image_names.drop('Unnamed: 0', axis=1, inplace=True)

In [98]:
alldata_df.drop('Unnamed: 0', axis=1, inplace=True)

In [99]:
image_names.shape

(7855, 5)

In [100]:
alldata_df.shape

(8309, 43)

In [101]:
image_names.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7855 entries, 0 to 7854
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   filename        7855 non-null   object 
 1   lat_from_file   7855 non-null   float64
 2   long_from_file  7855 non-null   float64
 3   order           7855 non-null   int64  
 4   latlong_test    7855 non-null   object 
dtypes: float64(2), int64(1), object(2)
memory usage: 307.0+ KB


In [102]:
alldata_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8309 entries, 0 to 8308
Data columns (total 43 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   1718 non-null   object 
 1   Name                       8309 non-null   object 
 2   Category                   8309 non-null   object 
 3   Description                8308 non-null   object 
 4   Latitude                   8309 non-null   float64
 5   Longitude                  8309 non-null   float64
 6   Altitude                   8099 non-null   float64
 7   Date verified              8309 non-null   object 
 8   Open                       8309 non-null   object 
 9   Electricity                8309 non-null   object 
 10  Wifi                       8309 non-null   object 
 11  Kitchen                    8309 non-null   object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 8309 non-null   objec

In [103]:
alldata_df.head()

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,latlong_test,lat_from_df,long_from_df
0,"Borrego Salton Seaway, Borrego Springs, CA 92004, USA",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exists Campsite fires are required to be in some sort of metal container. Perhaps ~10 sites for car camping.",33.280188,-116.145788,0.0,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA,0,1,33.2801_-116.1457,33.2801,-116.1457
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.00129,-112.035588,0.0,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT,0,1,37.0012_-112.0355,37.0012,-112.0355
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,"An old standby for us. Multiple sites for large or small RV's and tents along a windy creek. Hot showers cost .25/minute, wifi close to office.",62.095368,-145.980492,0.0,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK,0,0,62.0953_-145.9804,62.0953,-145.9804
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,"This is a great beach for a first night on the Oregon coast. It is a very nice State Park campground with great beach access under a highway 101 overpass. A Beautiful old growth loop-system campground with full hookups, tent sites (good selection), hot showers, clean bathrooms, but no wifi available. It is not free (which is hard to do in Oregon) but the price was $17 for a tent site. Be sure to check the pay period dates though as we were there in a ""Discovery"" period ($17 was between Oct 1-Apr 30).",44.728808,-124.055514,0.0,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,44.7288_-124.0555,44.7288,-124.0555
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hookups, hot showers, no wifi Price: $15/tent site",43.330797,-124.370728,2.6655788422,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,43.3307_-124.3707,43.3307,-124.3707


In [104]:
# check one
pd.set_option('max_colwidth', None)
image_names.loc[79]

filename          satimg_CA_4982_Wild Camping_17_33.75881500000001_-118.14552
lat_from_file                                                   33.7588000000
long_from_file                                                -118.1455000000
order                                                                      79
latlong_test                                                33.7588_-118.1455
Name: 79, dtype: object

In [106]:
alldata_df.loc[alldata_df['latlong_test'] == '33.758815_-118.14552'] 
alldata_df.loc[alldata_df['latlong_test'] == '33.7588_-118.1455'] 
# alldata_df.loc[alldata_df['long_from_file'] == '-118.14551']

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,latlong_test,lat_from_df,long_from_df
6199,,Back street by fitness business.,Wild Camping,This location is street parking by the shore and next to a fitness business. Quite a lot of cars park here and there is a 2 hour limit between 9 am and 6 pm. No parking on Thursday mornings from 6 am to 8 am. There is a nice shoreline park to enjoy. It was quiet from approximately 10 pm to 6:30 am.,33.758815,-118.14552,5.3437419171,2020-02-04 08:51:05 UTC,Yes,No,No,Yes,,Unknown,Unknown,No,No,No,No,Yes,,,,,,,,,,,,,,,,,90803.0,CA,0,0,33.7588_-118.1455,33.7588,-118.1455


In [107]:
image_names.head()

Unnamed: 0,filename,lat_from_file,long_from_file,order,latlong_test
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651,38.981,-107.3265,0,38.9810_-107.3265
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531,43.1496,-111.0525,1,43.1496_-111.0525
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999,35.2375,-102.8349,2,35.2375_-102.8349
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378,39.3371,-107.6603,3,39.3371_-107.6603
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278,34.1692,-110.7942,4,34.1692_-110.7942


In [114]:
# join with df of data, keep all images, hopefully they have data in the df
image_file_df_final_with_df = image_names.merge(alldata_df, how='left', 
                                                  on = 'latlong_test')

In [115]:
image_file_df_final_with_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7863 entries, 0 to 7862
Data columns (total 47 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   filename                   7863 non-null   object 
 1   lat_from_file              7863 non-null   float64
 2   long_from_file             7863 non-null   float64
 3   order                      7863 non-null   int64  
 4   latlong_test               7863 non-null   object 
 5   Location                   1650 non-null   object 
 6   Name                       7851 non-null   object 
 7   Category                   7851 non-null   object 
 8   Description                7850 non-null   object 
 9   Latitude                   7851 non-null   float64
 10  Longitude                  7851 non-null   float64
 11  Altitude                   7665 non-null   float64
 12  Date verified              7851 non-null   object 
 13  Open                       7851 non-null   objec

In [117]:
# there is some I couldn't match, so fix these:
unmatched = pd.read_csv('../data/All_USdata_est_wild_unmatched.csv')
unmatched.drop('Unnamed: 0', axis=1, inplace=True)

In [118]:
unmatched.head()

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,lat_from_df,long_from_df,lat_3digits,long_3digits,latlong_test
0,"Borrego Salton Seaway, Borrego Springs, CA 92004, USA",Arroyo Salado Camping,Established Campground,"Free with two vault toilets, nothing else exists Campsite fires are required to be in some sort of metal container. Perhaps ~10 sites for car camping.",33.280188,-116.145788,0.0,2020-02-16 14:20:45 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,CA,0,1,33.2801,-116.1457,33.28,-116.145,33.280_-116.145
1,"Stateline Campground Rd, Kanab, UT 84741, USA",State Line Campground,Established Campground,Cute free BLM camping in near TH baths.,37.00129,-112.035588,0.0,2019-09-29 09:53:41 UTC,Yes,No,No,No,,No,No,No,Pit Toilets,No,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,UT,0,1,37.0012,-112.0355,37.001,-112.035,37.001_-112.035
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,"An old standby for us. Multiple sites for large or small RV's and tents along a windy creek. Hot showers cost .25/minute, wifi close to office.",62.095368,-145.980492,0.0,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK,0,0,62.0953,-145.9804,62.095,-145.98,62.095_-145.980
3,"Beverly Beach State Park, Newport, OR 97365, USA",Beverly Beach State Park,Established Campground,"This is a great beach for a first night on the Oregon coast. It is a very nice State Park campground with great beach access under a highway 101 overpass. A Beautiful old growth loop-system campground with full hookups, tent sites (good selection), hot showers, clean bathrooms, but no wifi available. It is not free (which is hard to do in Oregon) but the price was $17 for a tent site. Be sure to check the pay period dates though as we were there in a ""Discovery"" period ($17 was between Oct 1-Apr 30).",44.728808,-124.055514,0.0,2020-02-07 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,44.7288,-124.0555,44.728,-124.055,44.728_-124.055
4,"Cottell Ln, Coos Bay, OR 97420, USA",Sunset Bay State Park,Established Campground,"Another night, another campground. Full hookups, hot showers, no wifi Price: $15/tent site",43.330797,-124.370728,2.665578842,2020-06-27 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,OR,1,1,43.3307,-124.3707,43.33,-124.37,43.330_-124.370


In [135]:
image_file_df_final_with_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7863 entries, 0 to 7862
Data columns (total 47 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   filename                   7863 non-null   object 
 1   lat_from_file              7863 non-null   float64
 2   long_from_file             7863 non-null   float64
 3   order                      7863 non-null   int64  
 4   latlong_test               7863 non-null   object 
 5   Location                   1650 non-null   object 
 6   Name                       7851 non-null   object 
 7   Category                   7851 non-null   object 
 8   Description                7850 non-null   object 
 9   Latitude                   7851 non-null   float64
 10  Longitude                  7851 non-null   float64
 11  Altitude                   7665 non-null   float64
 12  Date verified              7851 non-null   object 
 13  Open                       7851 non-null   objec

In [113]:
image_file_df_final_with_df.to_csv('../data/image_file_df_final_with_df_NEW_outer.csv')

In [203]:
# build set of just these images
image_file_df_final_with_df = pd.read_csv('../data/image_file_df_final_with_df_NEW.csv')
image_file_df_final_with_df.drop('Unnamed: 0', axis=1, inplace=True)

In [204]:
image_file_df_final_with_df.head()

Unnamed: 0,filename,lat_from_file,long_from_file,order,latlong_test,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State,water_binary,toilets_binary,lat_from_df,long_from_df
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651,38.981,-107.3265,0,38.9810_-107.3265,,Above Paonia State Park,Wild Camping,"It’s not at 0m altitude, but I cannot find out the right altitude at this time. Hopefully somebody can add it.\r\n(MOD NOTE: 2.123m according to Google Earth) \r\n\r\nWe ended up at this BLM spot for one night after a long day of driving. There’s a shelter with a picnic table underneath and a fire pit next to it. Very nice view over the valley and beautiful wild flowers, some little wild animals including little snakes and a lot of birds including hummingbirds. In the morning one woman came there to walk her dogs. Besides that, there were no other people.",38.98102,-107.32651,0.0,2020-06-03 00:00:00 UTC,Yes,No,No,No,,No,No,No,No,No,Yes,Yes,,,,,,,,,,,,,,,,,81434.0,CO,0.0,0.0,38.981,-107.3265
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531,43.1496,-111.0525,1,43.1496_-111.0525,,Camping around the lake,Wild Camping,"This is a recreational area and there are signs for a max 5 day stay. There is no service whatsoever but oh, it’s worth it!! Right on the lake, big rig friendly for certain spots. Lots of people riding side-by-sides and dirt bikes on the weekend. Boats and seadoos on the lake but somewhat quiet thru the week days. It’s free.",43.149667,-111.052531,1724.1839256287,2020-06-03 00:00:00 UTC,Yes,No,No,No,,No,No,Natural Source,No,Yes,Yes,Yes,,,,,,,,,,,,,,,,,83428.0,ID,0.0,0.0,43.1496,-111.0525
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999,35.2375,-102.8349,2,35.2375_-102.8349,,Texas picnic area,Wild Camping,"Good spot to pull over for the night. Located directly off I-40 . Same as a rest area without amenities. Does have grills and palapas. Plenty of parking for any size rig. Noisy at night, you are next to the interstate.",35.2375,-102.834961,1212.2605075836,2020-05-25 06:33:53 UTC,Yes,No,No,No,,No,No,No,No,Yes,Yes,Yes,,,,,,,,,,,,,,,,,79001.0,TX,0.0,0.0,35.2375,-102.8349
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378,39.3371,-107.6603,3,39.3371_-107.6603,,Cow pond and beyond,Wild Camping,"Decided to camp here after locating the site that is located just up the road. I drove past this spot on the way in and it's a lot better than the other. there are a few spots where people have had firerings, plenty of room for tents or campers. got the entire place to myself and I would definitely stay here again. No signs about private property or no camping and it is in the NF. I decided to take the site off to the right as it is tucked back more from the road view. cheers!",39.337122,-107.660378,2274.7775878906,2019-07-29 00:00:00 UTC,Yes,No,No,Yes,,No,No,No,No,Yes,Yes,Yes,,,,,,,,,,,,,,,,,81624.0,CO,0.0,0.0,39.3371,-107.6603
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278,34.1692,-110.7942,4,34.1692_-110.7942,,Road 103,Wild Camping,Nice camp spots along dirt roads accessible with 2WD (if not wet) in the National Forest.,34.169239,-110.794278,1993.3764648438,2020-04-17 00:00:00 UTC,Yes,No,No,No,,No,No,No,No,No,Yes,Yes,,,,,,,,,,,,,,,,,85554.0,AZ,0.0,0.0,34.1692,-110.7942


In [205]:
image_file_df_final_with_df = image_file_df_final_with_df[~image_file_df_final_with_df['filename'].duplicated(keep=False)]

In [207]:
image_file_df_final_with_df = image_file_df_final_with_df[~image_file_df_final_with_df['State'].isna()].copy()

In [208]:
filenames = image_file_df_final_with_df['filename'][~image_file_df_final_with_df['State'].isna()].copy()

In [209]:
filenames

0                   satimg_CO__352_Wild Camping_17_38.98102_-107.32651
1                 satimg_ID_7863_Wild Camping_17_43.149667_-111.052531
2           satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999
3                 satimg_CO__216_Wild Camping_17_39.337122_-107.660378
4        satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278
                                     ...                              
7858    satimg_UT_1580_Established Campground_17_38.482453_-109.741828
7859     satimg_FL_3132_Established Campground_17_25.849862_-80.989081
7860     satimg_TN_3372_Established Campground_17_35.613972_-88.040368
7861      satimg_SD_2626_Established Campground_17_44.361324_-97.13078
7862        satimg_TN_1296_Established Campground_17_36.55846_-87.9052
Name: filename, Length: 7835, dtype: object

In [210]:
# filenames = filenames + '.png'

In [211]:
len(filenames.unique())

7835

In [212]:
filenames_set = set(filenames.to_list())

In [213]:
len(filenames)

7835

In [214]:
image_file_df_final_with_df.shape

(7835, 47)

In [215]:
dups = [file for file in filenames if file not in filenames_set]

In [216]:
dups

[]

In [224]:
filenames = filenames + '.png'

In [225]:
filenames

0                   satimg_CO__352_Wild Camping_17_38.98102_-107.32651.png
1                 satimg_ID_7863_Wild Camping_17_43.149667_-111.052531.png
2           satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999.png
3                 satimg_CO__216_Wild Camping_17_39.337122_-107.660378.png
4        satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278.png
                                       ...                                
7858    satimg_UT_1580_Established Campground_17_38.482453_-109.741828.png
7859     satimg_FL_3132_Established Campground_17_25.849862_-80.989081.png
7860     satimg_TN_3372_Established Campground_17_35.613972_-88.040368.png
7861      satimg_SD_2626_Established Campground_17_44.361324_-97.13078.png
7862        satimg_TN_1296_Established Campground_17_36.55846_-87.9052.png
Name: filename, Length: 7835, dtype: object

In [226]:
filenames_set = set(filenames.to_list())

In [227]:
filenames_set

{'satimg_CA_1897_Wild Camping_17_37.376989_-122.404186.png',
 'satimg_UT_6641_Wild Camping_17_37.515638_-113.325156.png',
 'satimg_MT_11069_Wild Camping_17_44.593843_-111.12836399999999.png',
 'satimg_UT_6162_Wild Camping_17_38.468694_-109.369134.png',
 'satimg_AZ_7992_Wild Camping_17_32.292006_-111.347752.png',
 'satimg_CO__461_Wild Camping_17_39.67635_-105.61886.png',
 'satimg_OR_4963_Wild Camping_17_45.85902_-123.76363899999998.png',
 'satimg_TX_4277_Wild Camping_17_27.485357_-97.270055.png',
 'satimg_NV_6987_Wild Camping_17_38.403347_-114.629466.png',
 'satimg_AZ_4968_Wild Camping_17_34.977198_-111.78027.png',
 'satimg_CO__186_Wild Camping_17_37.452499_-108.22276399999998.png',
 'satimg_CA_2441_Established Campground_17_40.551501_-120.813424.png',
 'satimg_WI_3593_Established Campground_17_46.607297_-91.891711.png',
 'satimg_AK_469_Established Campground_17_60.52074200000001_-150.388491.png',
 'satimg_NY_8474_Wild Camping_17_40.814578999999995_-73.961617.png',
 'satimg_TN_3372_Esta

In [217]:
# image_file_df_final_with_df.to_csv('../data/image_file_df_final_with_df_NO_DUPS.csv')

In [232]:
directory = '/Users/pault/Desktop/github/CampsitePredict/data/sat_images/'
destination = '/Users/pault/Desktop/github/CampsitePredict/data/symlink_data/'
dest_dir_name = 'unique_wild_est_for_aligned_model'
class_dirs = ['Wild Camping', 'Established Campground']

In [235]:
file_dict = my_funcs.make_symlinks(directory, destination, dest_dir_name, class_dirs, filenames_set)

7835 files were created as symlinks.
