In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from datetime import datetime
import re

csv_file = 'national_parks.csv'
csv_clean = 'national_parks_clean.csv'

In [2]:
park_df = pd.read_csv(csv_file)
park_df

Unnamed: 0,Name,Location,coordinates,Date established as park[5][10],Area (2019)[11],Recreation visitors (2018)[8],Description
0,Acadia,Maine,44.35°N 68.21°W,26-Feb-19,"49,076.63 acres (198.6 km2)",3537575,Covering most of Mount Desert Island and other...
1,American Samoa,American Samoa,14.25°S 170.68°W,31-Oct-88,"8,256.67 acres (33.4 km2)",28626,The southernmost national park is on three Sam...
2,Arches,Utah,38.68°N 109.57°W,12-Nov-71,"76,678.98 acres (310.3 km2)",1663557,"This site features more than 2,000 natural san..."
3,Badlands,South Dakota,43.75°N 102.50°W,10-Nov-78,"242,755.94 acres (982.4 km2)",1008942,"The Badlands are a collection of buttes, pinna..."
4,Big Bend dagger,Texas,29.25°N 103.25°W,12-Jun-44,"801,163.21 acres (3,242.2 km2)",440091,Named for the prominent bend in the Rio Grande...
...,...,...,...,...,...,...,...
57,Wind Cave,South Dakota,43.57°N 103.48°W,9-Jan-03,"33,970.84 acres (137.5 km2)",656397,Wind Cave is distinctive for its calcite fin f...
58,Wrangell,Alaska,61.00°N 142.00°W,2-Dec-80,"8,323,146.48 acres (33,682.6 km2)",79450,"An over 8 million acres (32,375 km2) plot of m..."
59,Yellowstone double-dagger,"Wyoming, Montana, Idaho",44.60°N 110.50°W,"March 1, 1872","2,219,790.71 acres (8,983.2 km2)",4115000,"Situated on the Yellowstone Caldera, the park ..."
60,Yosemite *,California,37.83°N 119.50°W,"October 1, 1890","761,747.50 acres (3,082.7 km2)",4009436,"Yosemite features sheer granite cliffs, except..."


In [3]:
park_df['coordinates']

0      44.35°N 68.21°W
1     14.25°S 170.68°W
2     38.68°N 109.57°W
3     43.75°N 102.50°W
4     29.25°N 103.25°W
            ...       
57    43.57°N 103.48°W
58    61.00°N 142.00°W
59    44.60°N 110.50°W
60    37.83°N 119.50°W
61    37.30°N 113.05°W
Name: coordinates, Length: 62, dtype: object

In [4]:
park_df['lat'] = 0
park_df['lon'] = 0
park_df['new_name'] = 0
park_df['area, acres'] = 0
park_df['area, km^2'] = 0

In [5]:
re.split(' dagger| ', park_df[:][4:5]['Name'].item())[0]

'Big'

In [6]:
park_df['Area (2019)[11]']

0           49,076.63 acres (198.6 km2)
1             8,256.67 acres (33.4 km2)
2           76,678.98 acres (310.3 km2)
3          242,755.94 acres (982.4 km2)
4        801,163.21 acres (3,242.2 km2)
                    ...                
57          33,970.84 acres (137.5 km2)
58    8,323,146.48 acres (33,682.6 km2)
59     2,219,790.71 acres (8,983.2 km2)
60       761,747.50 acres (3,082.7 km2)
61         147,242.66 acres (595.9 km2)
Name: Area (2019)[11], Length: 62, dtype: object

In [7]:
re.split(' dagger| double-dagger| \*','Wrangell–St. Elias *')[0]

'Wrangell–St. Elias'

In [8]:
park_df['Name'][58:59].item()

'Wrangell'

In [9]:
re.split('°W|°N |°S ', park_df.loc[park_df['Name'] == 'Acadia', 'coordinates'].item())[1]

'68.21'

In [10]:
re.split(' acres|\n|\xa0km2|\)|\(',park_df.loc[park_df['Name'] == 'Acadia', 'Area (2019)[11]'].item())

['49,076.63', ' ', '198.6', '', '']

In [11]:
new_name = []
for name in park_df['Name']:
    print(name)
    acre_split = ' acres|\n|\xa0km2\)|\('
    old_name = park_df.loc[park_df['Name'] == name, 'Name'].item()
    new_name.append(re.split(' dagger| double-dagger| \*', old_name)[0])
    park_df.loc[park_df['Name'] == name, 'lat'] = re.split('°W|°N |°S ',park_df.loc[park_df['Name'] == name, 'coordinates'].item())[0]
    park_df.loc[park_df['Name'] == name, 'lon'] = '-' + re.split('°W|°N |°S ',park_df.loc[park_df['Name'] == name, 'coordinates'].item())[1]
    park_df.loc[park_df['Name'] == name, 'area, acres'] = re.split(acre_split,park_df.loc[park_df['Name'] == name, 'Area (2019)[11]'].item())[0]
    park_df.loc[park_df['Name'] == name, 'area, km^2'] = re.split(acre_split,park_df.loc[park_df['Name'] == name, 'Area (2019)[11]'].item())[2]

Acadia
American Samoa
Arches
Badlands
Big Bend dagger
Biscayne
Black Canyon of the Gunnison
Bryce Canyon
Canyonlands
Capitol Reef
Carlsbad Caverns
Channel Islands
Congaree
Crater Lake
Cuyahoga Valley
Death Valley
Denali dagger
Dry Tortugas dagger
Everglades double-dagger
Gates of the Arctic
Gateway Arch
Glacier double-dagger
Glacier Bay double-dagger
Grand Canyon *
Grand Teton dagger
Great Basin
Great Sand Dunes
Great Smoky Mountains double-dagger
Guadalupe Mountains
Haleakalā dagger
Hawaiʻi Volcanoes double-dagger
Hot Springs
Indiana Dunes
Isle Royale dagger
Joshua Tree dagger
Katmai
Kenai Fjords
Kings Canyon dagger
Kobuk Valley
Lake Clark
Lassen Volcanic
Mammoth Cave double-dagger
Mesa Verde *
Mount Rainier
North Cascades
Olympic double-dagger
Petrified Forest
Pinnacles
Redwood *
Rocky Mountain dagger
Saguaro
Sequoia dagger
Shenandoah
Theodore Roosevelt
Virgin Islands
Voyageurs
White Sands
Wind Cave
Wrangell
Yellowstone double-dagger
Yosemite *
Zion


In [12]:
park_df.loc[park_df['Name'] == 'American Samoa', 'lat'] = '-' + park_df.loc[park_df['Name'] == 'American Samoa', 'lat']

In [13]:
park_df['Area (2019)[11]']

0           49,076.63 acres (198.6 km2)
1             8,256.67 acres (33.4 km2)
2           76,678.98 acres (310.3 km2)
3          242,755.94 acres (982.4 km2)
4        801,163.21 acres (3,242.2 km2)
                    ...                
57          33,970.84 acres (137.5 km2)
58    8,323,146.48 acres (33,682.6 km2)
59     2,219,790.71 acres (8,983.2 km2)
60       761,747.50 acres (3,082.7 km2)
61         147,242.66 acres (595.9 km2)
Name: Area (2019)[11], Length: 62, dtype: object

In [14]:
park_df['lat']

0      44.35
1     -14.25
2      38.68
3      43.75
4      29.25
       ...  
57     43.57
58     61.00
59     44.60
60     37.83
61     37.30
Name: lat, Length: 62, dtype: object

In [15]:
park_df['lon']

0      -68.21
1     -170.68
2     -109.57
3     -102.50
4     -103.25
       ...   
57    -103.48
58    -142.00
59    -110.50
60    -119.50
61    -113.05
Name: lon, Length: 62, dtype: object

In [16]:
new_name

['Acadia',
 'American Samoa',
 'Arches',
 'Badlands',
 'Big Bend',
 'Biscayne',
 'Black Canyon of the Gunnison',
 'Bryce Canyon',
 'Canyonlands',
 'Capitol Reef',
 'Carlsbad Caverns',
 'Channel Islands',
 'Congaree',
 'Crater Lake',
 'Cuyahoga Valley',
 'Death Valley',
 'Denali',
 'Dry Tortugas',
 'Everglades',
 'Gates of the Arctic',
 'Gateway Arch',
 'Glacier',
 'Glacier Bay',
 'Grand Canyon',
 'Grand Teton',
 'Great Basin',
 'Great Sand Dunes',
 'Great Smoky Mountains',
 'Guadalupe Mountains',
 'Haleakalā',
 'Hawaiʻi Volcanoes',
 'Hot Springs',
 'Indiana Dunes',
 'Isle Royale',
 'Joshua Tree',
 'Katmai',
 'Kenai Fjords',
 'Kings Canyon',
 'Kobuk Valley',
 'Lake Clark',
 'Lassen Volcanic',
 'Mammoth Cave',
 'Mesa Verde',
 'Mount Rainier',
 'North Cascades',
 'Olympic',
 'Petrified Forest',
 'Pinnacles',
 'Redwood',
 'Rocky Mountain',
 'Saguaro',
 'Sequoia',
 'Shenandoah',
 'Theodore Roosevelt',
 'Virgin Islands',
 'Voyageurs',
 'White Sands',
 'Wind Cave',
 'Wrangell',
 'Yellowstone'

In [17]:
park_df['Name'] = new_name

In [18]:
park_df['Name']

0             Acadia
1     American Samoa
2             Arches
3           Badlands
4           Big Bend
           ...      
57         Wind Cave
58          Wrangell
59       Yellowstone
60          Yosemite
61              Zion
Name: Name, Length: 62, dtype: object

In [19]:
park_df.columns

Index(['Name', 'Location', 'coordinates', 'Date established as park[5][10]',
       'Area (2019)[11]', 'Recreation visitors (2018)[8]', 'Description',
       'lat', 'lon', 'new_name', 'area, acres', 'area, km^2'],
      dtype='object')

In [20]:
new_cols = ['Name', 'Location', 'coordinates','lat', 'lon', 'Date established as park[5][10]',
       'Area (2019)[11]','area, acres', 'area, km^2', 'Recreation visitors (2018)[8]', 'Description']

In [21]:
park_df = park_df[new_cols]

In [22]:
park_clean_df = park_df.rename(columns = {'Date established as park[5][10]':'Date established as park', 'Area (2019)[11]':'Area, acres', 'Recreation visitors (2018)[8]' : 'Recreation visitors'})

In [23]:
park_clean_df

Unnamed: 0,Name,Location,coordinates,lat,lon,Date established as park,"Area, acres","area, acres","area, km^2",Recreation visitors,Description
0,Acadia,Maine,44.35°N 68.21°W,44.35,-68.21,26-Feb-19,"49,076.63 acres (198.6 km2)",49076.63,198.6,3537575,Covering most of Mount Desert Island and other...
1,American Samoa,American Samoa,14.25°S 170.68°W,-14.25,-170.68,31-Oct-88,"8,256.67 acres (33.4 km2)",8256.67,33.4,28626,The southernmost national park is on three Sam...
2,Arches,Utah,38.68°N 109.57°W,38.68,-109.57,12-Nov-71,"76,678.98 acres (310.3 km2)",76678.98,310.3,1663557,"This site features more than 2,000 natural san..."
3,Badlands,South Dakota,43.75°N 102.50°W,43.75,-102.50,10-Nov-78,"242,755.94 acres (982.4 km2)",242755.94,982.4,1008942,"The Badlands are a collection of buttes, pinna..."
4,Big Bend,Texas,29.25°N 103.25°W,29.25,-103.25,12-Jun-44,"801,163.21 acres (3,242.2 km2)",801163.21,3242.2,440091,Named for the prominent bend in the Rio Grande...
...,...,...,...,...,...,...,...,...,...,...,...
57,Wind Cave,South Dakota,43.57°N 103.48°W,43.57,-103.48,9-Jan-03,"33,970.84 acres (137.5 km2)",33970.84,137.5,656397,Wind Cave is distinctive for its calcite fin f...
58,Wrangell,Alaska,61.00°N 142.00°W,61.00,-142.00,2-Dec-80,"8,323,146.48 acres (33,682.6 km2)",8323146.48,33682.6,79450,"An over 8 million acres (32,375 km2) plot of m..."
59,Yellowstone,"Wyoming, Montana, Idaho",44.60°N 110.50°W,44.60,-110.50,"March 1, 1872","2,219,790.71 acres (8,983.2 km2)",2219790.71,8983.2,4115000,"Situated on the Yellowstone Caldera, the park ..."
60,Yosemite,California,37.83°N 119.50°W,37.83,-119.50,"October 1, 1890","761,747.50 acres (3,082.7 km2)",761747.50,3082.7,4009436,"Yosemite features sheer granite cliffs, except..."


In [24]:
park_clean_df.to_csv(csv_clean, index = False)