# Data Preparation for Find your Destination Travel App 

In [1]:
# Import dependencies to be used for the data cleaning or exploring 
import numpy as np
import pandas as pd
import os 
from bs4 import BeautifulSoup
import requests
from splinter import Browser

Data set for the locatinos of all US campsites 

In [2]:
# Load and read the CSV 
camp_file = "us_campsites.csv"
camp_df = pd.read_csv(camp_file)

In [3]:
# Data exploration of campsites 
camp_df 

Unnamed: 0,longitude,latitude,code,name,type,phone,dates_open,comments,num_sites,elevation,amenities,state,nearest_town_distance,nearest_town_bearing,city
0,-92.875,42.953,ACKL,Ackley Creek County Park,CP,641.756.3490,,,40,,E DP DW SH L$,IA,1.0,SW,Marble Rock
1,-93.724,41.738,ACOR,Acorn Valley - Saylorville Lake,COE,515.276.0429,early jun-late aug,,70,,E DP DW SH RS PA,IA,11.2,NW,Des Moines
2,-94.643,41.498,ADAI,Adair City Park,CP,641.742.3751,,,12,,E SH,IA,0.2,S,Adair
3,-92.210,43.120,ADOL,Adolph Munson Park,CP,,,,,,NH PT,IA,6.8,NE,New Hampton
4,-92.348,43.090,AIRP,Airport Lake Park,CP,,,primitive - tent,50,,E PT,IA,2.6,NW,New Hampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11403,-95.151,30.665,WOLP,Wolf Creek Park - Lake Livingston,AUTH,936.653.4312,,,100,,WE DP SH,TX,,,
11404,-100.646,36.227,WOLC,Wolf Creek Park - Perryton,CP,806.435.4559,,,94,2665.0,WE DP DW SH,TX,14.8,SE,Perryton
11405,-96.535,30.295,YEGU,Yegua Creek - Lake Somerville,COE,979.596.1622,all year,,80,,WE DP SH RS,TX,3.5,S,Somerville
11406,-97.129,29.305,YOAK,Yoakum City Park,CP,,,concrete - not good for tents,25,,WES DP FT DW SH L$,TX,1.8,NE,Yoakum


In [None]:
# Get rankings of the states with the most campsites: group by location columns 
camps = camp_df.groupby(["state"]).count()
camps_sort = camps.sort_values("longitude", ascending = False)
camps_sort

In [5]:
# We're mainly interested in the latitude and longitude for the states so we will make the dataset smaller 
camp_df = camp_df[["latitude","longitude","name","state"]]
camp_df

Unnamed: 0,latitude,longitude,name,state
0,42.953,-92.875,Ackley Creek County Park,IA
1,41.738,-93.724,Acorn Valley - Saylorville Lake,IA
2,41.498,-94.643,Adair City Park,IA
3,43.120,-92.210,Adolph Munson Park,IA
4,43.090,-92.348,Airport Lake Park,IA
...,...,...,...,...
11403,30.665,-95.151,Wolf Creek Park - Lake Livingston,TX
11404,36.227,-100.646,Wolf Creek Park - Perryton,TX
11405,30.295,-96.535,Yegua Creek - Lake Somerville,TX
11406,29.305,-97.129,Yoakum City Park,TX


In [6]:
# Export to CSV - ready to go 
camp_df.to_csv("campsite_selected.csv", index=False, header=True)

Data for the value of the dolar per state 
Note: I started this analysis but it gives a lot of extra data so instead I used a scraping software to make it easier

In [7]:
url = "https://www.usatoday.com/story/money/economy/2018/05/10/cost-of-living-value-of-dollar-in-every-state/34567549/"

In [8]:
# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<html class="gnt__njs" lang="en-US">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width,initial-scale=1,minimum-scale=1" name="viewport"/>
  <meta content="#0098FE" name="theme-color"/>
  <title>
   Cost of living: The value of a dollar in every state
  </title>
  <meta content="Cost of living: The purchasing power of a dollar in every state" property="og:title"/>
  <link href="https://amp.usatoday.com/amp/34567549" rel="amphtml"/>
  <meta content="Michael B. Sauter" property="article:author"/>
  <meta content="free" property="article:content_tier"/>
  <meta content="false" property="article:opinion"/>
  <link href="//user.usatoday.com" rel="dns-prefetch"/>
  <link href="https://user.usatoday.com" rel="preconnect"/>
  <link href="//content-static.gannett.com" rel="dns-prefetch"/>
  <link href="//www.gannett-cdn.com" rel="dns-prefetch"/>
  <link href="//securepubads.g.doubleclick.net" rel="dns-prefetch"/>
  <link href="https://securepubads.g.doubleclic

In [9]:
# This result shows the class which contains all the states and the corresponding info
results = soup.find_all('div', class_="gnt_ar_b")
results

[<div class="gnt_ar_b"><aside aria-label="Video - How much money you need to earn to be considered rich in these cities" class="gnt_em gnt_em__fp gnt_em_vp__tp gnt_em__el"><div aria-live="polite" class="gnt_em_vp_w" data-c-vt="teal" data-g-r="vp_tp" data-t-pl="teal-hero"><button aria-label="Play video" class="gnt_em_vp_a gnt_em_vp__tp_a" data-c-vpattrs='{"muted":true,"xprp":1,"bContent":false,"an":"usatoday","pn":"USA TODAY","vidNum":1,"docking":true}' data-c-vpdata='{"adsEnabled":true,"awsPath":"money","byline":"Buzz60","canonicalURL":"","contentSourceCode":"BZ60","createDate":"2018-02-27T13:17:23.817Z","credit":"Buzz60","duration":86698,"headline":"","hlsURL":"","id":"110885214","image":{"url":"http://www.gannett-cdn.com/-mm-/621b37bf60a76adf00ec82d1926457b31d7ab3b6/c=0-30-580-356&amp;r=1280x720/local/-/media/2018/02/24/USATODAY/usatsports/large-pile-of-hundred-dollar-bills-cash-money-savings-rich_large.jpg","opto":true},"initialPublishDate":"2018-02-27T13:27:17.587Z","keywords":"Sim

In [None]:
for result in results:
    
    # Retrieve the thread title
    title = result.find_all('p', class_="gnt_ar_b_p")
    value = result.find_all('ul', class_="gnt_ar_b_ul")
    
#     print(title)
    print(value)

In [23]:
state_names = []
value = []

for x in range(len(results)):
    state_names.append(result.find_all('p', class_="gnt_ar_b_p"))
    value.append(result.find_all('ul', class_="gnt_ar_b_ul"))

value_of_dolar = pd.DataFrame({'Name': state_names, "Value": value})
value_of_dolar

Unnamed: 0,Name,Value
0,[[While a dollar bill looks and feels the same...,"[[[<strong>Value of a dollar:</strong>, $1.15..."
