# Biodiversity in U.S. National Parks

## North American Birds of Prey

In [114]:
# Importing the necessary modules
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import numpy as np

## Loading in the data

# `parks.csv`

In [115]:
parks_data = pd.read_csv("parks.csv")
print(f"Columns: {parks_data.columns.tolist()}\n")
print(f"(Rows, Columns): {parks_data.shape}")
parks_data.head()

Columns: ['Park Code', 'Park Name', 'State', 'Acres', 'Latitude', 'Longitude']

(Rows, Columns): (56, 6)


Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [116]:
parks_data.columns = [col.lower().replace(" ", "_") for col in parks_data.columns]

#### Creating a list of the Southwestern states.

Arizona, California, Colorado, Nevada, New Mexico, Oklahoma, Texas, Utah

In [117]:
sw_states_list = ["AZ", "CA", "CO", "NV", "NM", "OK", "TX", "UT"]

In [118]:
sw_data = pd.DataFrame(columns=parks_data.columns)
# iterate through the list of states
for state in sw_states_list:
    # is state in column, 'State'?
    match = parks_data[parks_data['state'] == state]
    # Append matching states
    sw_data = pd.concat([sw_data, match])

sw_data.head()

Unnamed: 0,park_code,park_name,state,acres,latitude,longitude
22,GRCA,Grand Canyon National Park,AZ,1217403,36.06,-112.14
42,PEFO,Petrified Forest National Park,AZ,93533,35.07,-109.78
46,SAGU,Saguaro National Park,AZ,91440,32.25,-110.5
10,CHIS,Channel Islands National Park,CA,249561,34.01,-119.42
31,JOTR,Joshua Tree National Park,CA,789745,33.79,-115.9


In [119]:
sw_parks_list = sw_data['park_name'].unique().tolist()
print(f"We have {len(sw_parks_list)} parks: \n\n{sw_parks_list}")

We have 23 parks: 

['Grand Canyon National Park', 'Petrified Forest National Park', 'Saguaro National Park', 'Channel Islands National Park', 'Joshua Tree National Park', 'Lassen Volcanic National Park', 'Pinnacles National Park', 'Redwood National Park', 'Sequoia and Kings Canyon National Parks', 'Yosemite National Park', 'Black Canyon of the Gunnison National Park', 'Great Sand Dunes National Park and Preserve', 'Mesa Verde National Park', 'Rocky Mountain National Park', 'Great Basin National Park', 'Carlsbad Caverns National Park', 'Big Bend National Park', 'Guadalupe Mountains National Park', 'Arches National Park', 'Bryce Canyon National Park', 'Canyonlands National Park', 'Capitol Reef National Park', 'Zion National Park']


Useful to save a `sw_parks_list` variable for later

Create seperate Dataframes for each state $\in$ `sw_states_list`

In [120]:
d = {}
for state in sw_states_list:
    d[state] = pd.DataFrame(sw_data[sw_data['state'] == state])

for i in range(0, len(sw_states_list)):
    print(f"\n{sw_states_list[i]}:\nShape: {d[sw_states_list[i]].shape}")


AZ:
Shape: (3, 6)

CA:
Shape: (7, 6)

CO:
Shape: (4, 6)

NV:
Shape: (1, 6)

NM:
Shape: (1, 6)

OK:
Shape: (0, 6)

TX:
Shape: (2, 6)

UT:
Shape: (5, 6)


In [121]:
# We can call DataFrames for each state using below syntax
# California:
d['CA']

Unnamed: 0,park_code,park_name,state,acres,latitude,longitude
10,CHIS,Channel Islands National Park,CA,249561,34.01,-119.42
31,JOTR,Joshua Tree National Park,CA,789745,33.79,-115.9
36,LAVO,Lassen Volcanic National Park,CA,106372,40.49,-121.51
43,PINN,Pinnacles National Park,CA,26606,36.48,-121.16
44,REDW,Redwood National Park,CA,112512,41.3,-124.0
47,SEKI,Sequoia and Kings Canyon National Parks,CA,865952,36.43,-118.68
54,YOSE,Yosemite National Park,CA,761266,37.83,-119.5


Seven isn't enough...
Probably just keep all the parks. Fuck it!

On a map to get an idea of where these parks are

<img src="Images/California_Coast.png"/>

<img src="Images/Desert.png"/>

<img src="Images/Area_Baja.png"/>

[AZ, CO, NV, UT](https://earth.google.com/web/search/Zion+National+Park,+Utah,+USA/@35.13603563,-108.8328106,1267.20627283a,2369647.23679662d,35y,0h,0t,0r/data=CigiJgokCanCP9QYRUNAEZ9uI2M12UJAGZSefyJpnlvAIQD8Bzs89FvA)
[CA](https://earth.google.com/web/search/Zion+National+Park,+Utah,+USA/@37.45209159,-120.51900674,-23.85042741a,2315631.43578798d,35y,0h,0t,0r/data=CigiJgokCanCP9QYRUNAEZ9uI2M12UJAGZSefyJpnlvAIQD8Bzs89FvA)
[Baja, TX](https://earth.google.com/web/search/Zion+National+Park,+Utah,+USA/@37.45209159,-120.51900674,-23.85042741a,2315631.43578798d,35y,0h,0t,0r/data=CigiJgokCanCP9QYRUNAEZ9uI2M12UJAGZSefyJpnlvAIQD8Bzs89FvA)

---

# `species.csv`

In [122]:
species_data = pd.read_csv('species.csv')
print(f"(Rows, Columns): {species_data.shape}\n")
print(f"Type of data:\n\n{species_data.dtypes}\n")
print(f"The number of unique values:\n\n{species_data.nunique()}")
species_data.head()

(Rows, Columns): (119248, 14)

Type of data:

Species ID             object
Park Name              object
Category               object
Order                  object
Family                 object
Scientific Name        object
Common Names           object
Record Status          object
Occurrence             object
Nativeness             object
Abundance              object
Seasonality            object
Conservation Status    object
Unnamed: 13            object
dtype: object

The number of unique values:

Species ID             119248
Park Name                  56
Category                   14
Order                     554
Family                   2332
Scientific Name         46022
Common Names            35826
Record Status              54
Occurrence                  7
Nativeness                  5
Abundance                   8
Seasonality                24
Conservation Status        11
Unnamed: 13                 3
dtype: int64


  species_data = pd.read_csv('species.csv')


Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Record Status,Occurrence,Nativeness,Abundance,Seasonality,Conservation Status,Unnamed: 13
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,Approved,Present,Native,Rare,Resident,,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",Approved,Present,Native,Abundant,,,
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Approved,Present,Not Native,Common,,Species of Concern,
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Approved,Not Confirmed,Native,,,Endangered,
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Approved,Present,Unknown,Common,Breeder,,


In [123]:
species_data.columns = [col.lower().replace(" ", "_") for col in species_data.columns]
print(f"Columns: {species_data.columns.tolist()}\n")

Columns: ['species_id', 'park_name', 'category', 'order', 'family', 'scientific_name', 'common_names', 'record_status', 'occurrence', 'nativeness', 'abundance', 'seasonality', 'conservation_status', 'unnamed:_13']



### Isolate Birds

In [124]:
bird_species = species_data[species_data.category == 'Bird']
bird_species.head()

Unnamed: 0,species_id,park_name,category,order,family,scientific_name,common_names,record_status,occurrence,nativeness,abundance,seasonality,conservation_status,unnamed:_13
55,ACAD-1055,Acadia National Park,Bird,Accipitriformes,Accipitridae,Accipiter cooperii,Cooper's Hawk,Approved,Present,Native,Uncommon,,Species of Concern,
56,ACAD-1056,Acadia National Park,Bird,Accipitriformes,Accipitridae,Accipiter gentilis,"Eastern Goshawk, Goshawk, Northern Goshawk",Approved,Present,Native,Uncommon,Breeder,,
57,ACAD-1057,Acadia National Park,Bird,Accipitriformes,Accipitridae,Accipiter striatus,"Northern Sharp-Shinned Hawk, Sharp-Shinned Hawk",Approved,Present,Native,Common,Breeder,Species of Concern,
58,ACAD-1058,Acadia National Park,Bird,Accipitriformes,Accipitridae,Aquila chrysaetos,"American Golden Eagle, Golden Eagle",Approved,Present,Native,Occasional,Vagrant,Species of Concern,
59,ACAD-1059,Acadia National Park,Bird,Accipitriformes,Accipitridae,Buteo jamaicensis,"Eastern Red-Tailed Hawk, Red-Tailed Hawk",Approved,Present,Native,Common,Breeder,,


In [125]:
print(f"(Rows, Columns): {bird_species.shape}\n")
print(f"The number of unique values:\n\n{bird_species.nunique()}")

(Rows, Columns): (14601, 14)

The number of unique values:

species_id             14601
park_name                 56
category                   1
order                     24
family                    86
scientific_name         1436
common_names            1551
record_status             10
occurrence                 6
nativeness                 4
abundance                  7
seasonality               23
conservation_status        8
unnamed:_13                1
dtype: int64


### Isolate Southwest States By Park Name

In [126]:
print(sw_parks_list)

['Grand Canyon National Park', 'Petrified Forest National Park', 'Saguaro National Park', 'Channel Islands National Park', 'Joshua Tree National Park', 'Lassen Volcanic National Park', 'Pinnacles National Park', 'Redwood National Park', 'Sequoia and Kings Canyon National Parks', 'Yosemite National Park', 'Black Canyon of the Gunnison National Park', 'Great Sand Dunes National Park and Preserve', 'Mesa Verde National Park', 'Rocky Mountain National Park', 'Great Basin National Park', 'Carlsbad Caverns National Park', 'Big Bend National Park', 'Guadalupe Mountains National Park', 'Arches National Park', 'Bryce Canyon National Park', 'Canyonlands National Park', 'Capitol Reef National Park', 'Zion National Park']


In [127]:
sw_birds = pd.DataFrame(columns=bird_species.columns)
# iterate through the list of parks
for park in sw_parks_list:
    # is park in column, 'park_name'?
    match = bird_species[bird_species['park_name'] == park]
    # Append matching states
    sw_birds = pd.concat([sw_birds, match])

print(f"Shape Of Data: {sw_birds.shape}")
sw_birds.head()

Shape Of Data: (6514, 14)


Unnamed: 0,species_id,park_name,category,order,family,scientific_name,common_names,record_status,occurrence,nativeness,abundance,seasonality,conservation_status,unnamed:_13
42081,GRCA-1106,Grand Canyon National Park,Bird,Accipitriformes,Accipitridae,Accipiter cooperii,Cooper's Hawk,Approved,Present,Native,Common,Breeder,Species of Concern,
42082,GRCA-1107,Grand Canyon National Park,Bird,Accipitriformes,Accipitridae,Accipiter gentilis,Northern Goshawk,Approved,Present,Native,Common,Breeder,,
42083,GRCA-1108,Grand Canyon National Park,Bird,Accipitriformes,Accipitridae,Accipiter striatus,Sharp-Shinned Hawk,Approved,Present,Native,Common,Breeder,Species of Concern,
42084,GRCA-1109,Grand Canyon National Park,Bird,Accipitriformes,Accipitridae,Aquila chrysaetos,Golden Eagle,Approved,Present,Native,Uncommon,Breeder,Species of Concern,
42085,GRCA-1110,Grand Canyon National Park,Bird,Accipitriformes,Accipitridae,Buteo albonotatus,Zone-Tailed Hawk,Approved,Present,Native,Common,Breeder,,


#### Exploring `conservation_status` column

In [128]:
print(f"There are {sw_birds.conservation_status.nunique()} categories:\n\n{sw_birds.conservation_status.unique()}")

There are 7 categories:

['Species of Concern' nan 'In Recovery' 'Endangered' 'Threatened'
 'Under Review' 'Breeder' 'Resident']


In [129]:
print(f"NaN value count: {sw_birds.conservation_status.isna().sum()}\n")
sw_birds.groupby("conservation_status").size()

NaN value count: 5233



conservation_status
Breeder                  4
Endangered              27
In Recovery             34
Resident                 4
Species of Concern    1163
Threatened              29
Under Review            20
dtype: int64

### `conservation_status`
- `Breeder`:
- `Endangered`: seriously at risk of extinction
- `In Recovery`: formerly `Endangered`, but currently neither in danger of extinction throughout all or a significant portion of its range
- `Resident`:
- `Species of Concern`: declining or appear to be in need of conservation
- `Threatened`: vulnerable to endangerment in the near future
- `Under Review`:


Need to research [IUCN](https://en.wikipedia.org/wiki/IUCN_Red_List) Red List.
- What do we populate NaN with?

#### `Least Concern`?

---
### `record_status`

In [130]:
print(f"There are {sw_birds.record_status.nunique()} categories:\n\n{sw_birds.record_status.unique()}")

There are 10 categories:

['Approved' 'In Review' ' Northern Goshawk' ' Northern Pintail'
 ' Common Poorwill' ' Pigeon Hawk' ' Bushtit' ' American Crow' ' Catbird'
 ' Robin']


Strange... Why populated with common names?
lose!

---
### `occurence`

In [112]:
print(f"There are {sw_birds.occurrence.nunique()} categories:\n\n{sw_birds.occurrence.unique()}")

There are 6 categories:

['Present' 'Not Present' 'Not Present (False Report)' 'Not Confirmed'
 'Not Present (Historical Report)' nan 'Approved']


lose!

---
### `nativeness`

In [131]:
print(f"There are {sw_birds.nativeness.nunique()} categories:\n\n{sw_birds.nativeness.unique()}")

There are 4 categories:

['Native' 'Not Native' 'Unknown' nan 'Present']


lose!

---
### `abundance`

In [132]:
print(f"There are {sw_birds.abundance.nunique()} categories:\n\n{sw_birds.abundance.unique()}")

There are 7 categories:

['Common' 'Uncommon' 'Rare' 'Occasional' nan 'Abundant' 'Unknown' 'Native']


In [133]:
print(f"NaN value count: {sw_birds.abundance.isna().sum()}\n")
sw_birds.groupby("abundance").size()

NaN value count: 943



abundance
Abundant       178
Common        1222
Native           8
Occasional    1169
Rare          1078
Uncommon      1231
Unknown        685
dtype: int64

This could be useful...

---
`seasonality`

In [134]:
print(f"There are {sw_birds.seasonality.nunique()} categories:\n\n{sw_birds.seasonality.unique()}")

There are 21 categories:

['Breeder' 'Migratory, Winter' 'Vagrant' 'Migratory' 'Breeder, Winter'
 'Resident' nan 'Winter' 'Resident, Winter' 'Summer, Vagrant'
 'Breeder, Resident, Summer' 'Resident, Summer' 'Migratory, Vagrant'
 'Summer' 'Breeder, Resident, Migratory, Summer'
 'Breeder, Resident, Summer, Winter' 'Breeder, Migratory'
 'Migratory, Summer' 'Winter, Vagrant' 'Rare' 'Uncommon' 'Common']


We don't have date/time/season data.
Don't think we can use this

### Will lose columns;
- `species_ID`
- `unnamed:13`

---

# Milestone

---

## Which columns to lose??

In [None]:
# We use this code to assign, 'Is it a bird of prey?'

# Bird of prey list - Is this exhaustive?
birds_of_prey = ["Eagle", "Hawk", "Falcon", "Buzzard", "Harrier", "Kite", "Owl", "Osprey", "Vulture", "Condor", "Kestrel"]

sw_birds['bird_of_prey'] = sw_birds.common_names.str.findall('({0})'.format('|'.join(birds_of_prey)))
sw_birds['bird_of_prey'] = sw_birds['bird_of_prey'].apply('. '.join)
