In [1]:
# generic imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
# notebook settings
%config IPCompleter.greedy=True
%load_ext autoreload
%autoreload 2 
# precision and plot settings
num_precision = 3
np.set_printoptions(precision=num_precision, suppress=True)
pd.set_option('display.float_format', lambda x: f'{x:,.{num_precision}f}')
pd.set_option("display.precision", num_precision)
pd.set_option('display.max_columns', None)

plt.style.use('tableau-colorblind10')
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.size'] = 16
plt.rcParams['legend.fontsize'] = 'large'
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['lines.linewidth'] = 2

In [11]:
# setup dir and import helper functions
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'src'))
import helper_funcs as my_funcs

In [4]:
# subset the new data we have to see what the numbers and class balances look like
USdata_with_est_campgrounds = pd.read_csv('../data/USdata_est_campgrounds_zip_states_combined_cleaned.csv')
USdata_with_est_campgrounds.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis=1, inplace=True)

KeyError: "['Unnamed: 0.1'] not found in axis"

In [7]:
USdata_with_est_campgrounds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12412 entries, 0 to 12411
Data columns (total 38 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Location                   2241 non-null   object 
 1   Name                       12412 non-null  object 
 2   Category                   12412 non-null  object 
 3   Description                12411 non-null  object 
 4   Latitude                   12412 non-null  float64
 5   Longitude                  12412 non-null  float64
 6   Altitude                   12103 non-null  float64
 7   Date verified              12412 non-null  object 
 8   Open                       12412 non-null  object 
 9   Electricity                11609 non-null  object 
 10  Wifi                       11609 non-null  object 
 11  Kitchen                    11609 non-null  object 
 12  Parking                    0 non-null      float64
 13  Restaurant                 11609 non-null  obj

In [9]:
# names = ['CO', 'CA', 'AZ', 'OR', 'UT', 'WA']
names = ['TX', 'FL', 'AK', 'MT', 'NM', 'ID', 'WY', 'NV', 'NY']

In [8]:
USdata_with_est_campgrounds.groupby('State').agg('count')['Description'].sort_values(ascending=False).head(20)

State
CA    1698
AZ     886
OR     729
CO     707
WA     692
UT     651
TX     576
FL     465
AK     452
MT     446
NM     426
ID     383
WY     298
NV     285
NY     197
VA     161
NC     157
TN     152
MI     144
SD     144
Name: Description, dtype: int64

In [11]:
USdata_with_est_campgrounds_other_states = USdata_with_est_campgrounds[USdata_with_est_campgrounds['State'].isin(names)].copy()

In [12]:
USdata_with_est_campgrounds_other_states.groupby('State').agg('count')['Description'].sort_values(ascending=False).head(20)

State
TX    576
FL    465
AK    452
MT    446
NM    426
ID    383
WY    298
NV    285
NY    197
Name: Description, dtype: int64

In [78]:
USdata_with_est_campgrounds_other_states.head()

Unnamed: 0,Location,Name,Category,Description,Latitude,Longitude,Altitude,Date verified,Open,Electricity,Wifi,Kitchen,Parking,Restaurant,Showers,Water,Toilets,Big rig friendly,Tent friendly,Pet friendly,Sanitation dump station,Outdoor gear,Groceries,Artesian goods,Bakery,Rarity in this area,Repairs vehicles,Repairs motorcycles,Repairs bicycles,Sells parts,Recycles batteries,Recycles oil,Bio fuel,Electric vehicle charging,Composting sawdust,Recycling center,zip_code,State
2,"Glenn Hwy, Glennallen, AK 99588, USA",Tolsona River RV Park and Campground,Established Campground,An old standby for us. Multiple sites for lar...,62.0954,-145.9805,0.0,2016-05-27 11:38:27 UTC,Yes,Unknown,Unknown,Unknown,,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,AK
15,"12878 East Rebarchek Avenue, Palmer, AK 99645,...",Palmer State Fairgrounds,Established Campground,"Gated fairgrounds, full hook ups",61.578,-149.1368,61.7309,2014-09-04 17:21:15 UTC,Yes,Yes - At Sites,Yes - Average,No,,No,No,Potable,Running Water,Yes,No,No,Unknown,,,,,,,,,,,,,,,,,AK
16,"Unnamed Road, Helmville, MT 59843, USA",Browns Lake,Established Campground,Located on Browns lake. Sate camp site with fe...,46.9514,-113.0107,,2014-08-18 00:00:00 UTC,Yes,No,No,No,,No,No,Natural Source,Pit Toilets,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,MT
26,"300 Sierra Ln, Verdi, NV 89439, USA",Gold Ranch RV,Established Campground,Best little RV park in the Reno/Sparks area. C...,39.5017,-120.0004,1531.1493,2018-02-18 12:15:32 UTC,Yes,Yes - At Sites,Unknown,Unknown,,Yes,Unknown,Yes,Unknown,Unknown,Unknown,Unknown,Unknown,,,,,,,,,,,,,,,,,NV
30,"NM-9, Columbus, NM 88029, USA",Pancho Villa State Park,Established Campground,A surprisingly nice and inexpensive campground...,31.827,-107.6429,1229.0,2020-03-03 00:00:00 UTC,Yes,Yes - At Sites,No,No,,No,Hot,Potable,Running Water,Yes,Yes,Yes,Unknown,,,,,,,,,,,,,,,,,NM


In [None]:
for_summary = USdata_with_est_campgrounds_other_states[['State', 'Category', 'Description']]

In [86]:
counts = for_summary.groupby(['State','Category']).agg('count').reset_index()

In [87]:
counts

Unnamed: 0,State,Category,Description
0,AK,Established Campground,136
1,AK,Informal Campsite,65
2,AK,Showers,13
3,AK,Water,22
4,AK,Wild Camping,216
5,FL,Established Campground,179
6,FL,Informal Campsite,148
7,FL,Short-term Parking,4
8,FL,Showers,11
9,FL,Water,18


In [93]:
counts['%'] = round(counts['Description'].div(counts.groupby('State')['Description'].transform('sum'))*100, 2)

In [94]:
counts

Unnamed: 0,State,Category,Description,%
0,AK,Established Campground,136,30.09
1,AK,Informal Campsite,65,14.38
2,AK,Showers,13,2.88
3,AK,Water,22,4.87
4,AK,Wild Camping,216,47.79
5,FL,Established Campground,179,38.49
6,FL,Informal Campsite,148,31.83
7,FL,Short-term Parking,4,0.86
8,FL,Showers,11,2.37
9,FL,Water,18,3.87


In [101]:
# just keep major classes
wild_est = counts[counts['Category'].isin(['Established Campground', 'Wild Camping'])].copy()
#counts[counts['%'] > 20]
wild_est['pct_total'] = round(wild_est['Description'].div(wild_est.groupby('State')['Description'].transform('sum'))*100, 2)

In [107]:
wild_est = wild_est.drop(['%'], axis=1)
wild_est

Unnamed: 0,State,Category,Description,pct_total
0,AK,Established Campground,136,38.64
4,AK,Wild Camping,216,61.36
5,FL,Established Campground,179,63.03
10,FL,Wild Camping,105,36.97
11,ID,Established Campground,119,43.27
15,ID,Wild Camping,156,56.73
17,MT,Established Campground,187,55.16
21,MT,Wild Camping,152,44.84
23,NM,Established Campground,127,41.37
28,NM,Wild Camping,180,58.63


In [108]:
# focus on some
wild_est_focus = wild_est[wild_est['State'].isin(['ID', 'MT', 'NM'])]

In [109]:
wild_est_focus

Unnamed: 0,State,Category,Description,pct_total
11,ID,Established Campground,119,43.27
15,ID,Wild Camping,156,56.73
17,MT,Established Campground,187,55.16
21,MT,Wild Camping,152,44.84
23,NM,Established Campground,127,41.37
28,NM,Wild Camping,180,58.63


In [115]:
wild_est_focus.groupby('Category')['Description'].agg('sum')

Category
Established Campground    433
Wild Camping              488
Name: Description, dtype: int64

In [114]:
wild_est_focus.groupby('Category')['Description'].agg('sum').div(wild_est_focus['Description'].sum())

Category
Established Campground   0.4701
Wild Camping             0.5299
Name: Description, dtype: float64

In [13]:
cat_counts = USdata_with_est_campgrounds_other_states.loc[:,['Category', 'Description']].groupby(['Category']).agg(
    description_count = ('Description','count'))
cat_counts
total = cat_counts['description_count'].sum()
description_pct = cat_counts.groupby('description_count').apply(lambda x: 100 * x / total)
description_pct

Unnamed: 0_level_0,description_count
Category,Unnamed: 1_level_1
Eco-Friendly,0.085
Established Campground,35.0057
Informal Campsite,23.3277
Short-term Parking,0.1984
Showers,1.8991
Water,4.3084
Wild Camping,35.1757


In [15]:
USdata_with_est_campgrounds_other_states_TX = USdata_with_est_campgrounds_other_states[USdata_with_est_campgrounds_other_states['State'] == 'TX']

In [16]:
cat_counts = USdata_with_est_campgrounds_other_states_TX.loc[:,['Category', 'Description']].groupby(['Category']).agg(
    description_count = ('Description','count'))
cat_counts
total = cat_counts['description_count'].sum()
description_pct = cat_counts.groupby('description_count').apply(lambda x: 100 * x / total)
description_pct

Unnamed: 0_level_0,description_count
Category,Unnamed: 1_level_1
Established Campground,42.5347
Informal Campsite,33.8542
Showers,1.0417
Water,4.1667
Wild Camping,18.4028
