In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from util import summarize

sns.set_theme(style='whitegrid')

# load merged / cleaned data frame `df_merged.feather`
df = pd.read_feather('df_merged.feather')

In [None]:
# Canonical ordering (also used for legends)
CONS_ORDER = ["In Recovery", "Endangered", "Species of Concern", "Threatened"]

# One palette for the entire notebook
CONS_PALETTE = {
    "In Recovery": "#1b9e77",
    "Endangered": "#d95f02",
    "Species of Concern": "#7570b3",
    "Threatened": "#e7298a",
}


# Bird Conservation Concerns

In [None]:
cons_bird = (
    df[(df.category == 'Bird') & (df.conservation_status != 'No Concern')]
        .sort_values(by='observations')
        .reset_index()
) 

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.countplot(
    data=cons_bird,
    x='conservation_status',
    order=[s for s in CONS_ORDER if s in cons_bird['conservation_status'].unique()],
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    stat='count',
    legend='full',
    dodge=False,
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')


ax.set_ylabel('Count')
ax.set_xlabel('')
ax.set_title('Count of Bird Species on Conservation Status')

plt.tight_layout()
plt.savefig('fig4_a_birdcount.png')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.barplot(
    data=cons_bird.head(50),
    x='observations',
    y='common_names',
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    errorbar=None,
    dodge=False,
    legend='full',
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Common Name')
ax.set_xlabel('Total Observations')
ax.set_title('50 Bird Species with Lowest Observations on Conservation Status')

plt.tight_layout()
plt.savefig('fig4_b_bird_obs.png')

# Mammal Conservation Concerns

In [None]:
cons_mammal = (
    df[(df.category == 'Mammal')&(df.conservation_status != 'No Concern')]
    .sort_values(by='observations')
    .reset_index()
)

In [None]:
def dedupe_common_names_strict(s):
    if pd.isna(s):
        return s

    parts = [p.strip() for p in s.split(',')]
    normalized = {p.lower(): p for p in parts}  # preserve original casing
    return ', '.join(sorted(normalized.values()))


In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.countplot(
    data=cons_mammal,
    x='conservation_status',
    order=[s for s in CONS_ORDER if s in cons_mammal['conservation_status'].unique()],
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    stat='count',
    legend='full',
    dodge=False,
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Count')
ax.set_xlabel('')
ax.set_title('Count of Mammal Specices on Conservation Status')

plt.tight_layout()
plt.savefig('fig5_a_mammalcount.png')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.barplot(
    data=cons_mammal.head(50),
    x='observations',
    y='common_names',
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    errorbar=None,
    dodge=False,
    legend='full',
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Common Name')
ax.set_xlabel('Total Observations')
ax.set_title('50 Mammal Species with Lowest Observation Counts on Conservation Status')

plt.tight_layout()
plt.savefig('fig5_b_mammal_obs.png')

# Fish Conservation Concerns

In [None]:
cons_fish = (
    df[(df.category == 'Fish')&(df.conservation_status != 'No Concern')]
    .sort_values(by='observations')
    .reset_index()
)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.countplot(
    data=cons_fish,
    x='conservation_status',
    order=[s for s in CONS_ORDER if s in cons_fish['conservation_status'].unique()],
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    stat='count',
    legend='full',
    dodge=False,
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Count')
ax.set_xlabel('')
ax.set_title('Count of Fish Specices on Conservation Status')

plt.tight_layout()
plt.savefig('fig6_a_fishcount.png')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.barplot(
    data=cons_fish.head(50),
    x='observations',
    y='common_names',
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    errorbar=None,
    dodge=False,
    legend='full',
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Common Name')
ax.set_xlabel('Total Observations')
ax.set_title('Total Fish Observations on Conservation Status')

plt.tight_layout()
plt.savefig('fig6_b_fish_obs.png')

# Amphibian Conservation

In [None]:
cons_amph = (
    df[(df.category == 'Amphibian')&(df.conservation_status != 'No Concern')]
    .sort_values(by='observations')
    .reset_index()
)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.countplot(
    data=cons_amph,
    x='conservation_status',
    order=[s for s in CONS_ORDER if s in cons_amph['conservation_status'].unique()],
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    stat='count',
    legend='full',
    dodge=False,
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Count')
ax.set_xlabel('')
ax.set_title('Count of Amphibian Specices on Conservation Status')

plt.tight_layout()
plt.savefig('fig7_a_amph_count.png')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.barplot(
    data=cons_amph.head(50),
    x='observations',
    y='common_names',
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    errorbar=None,
    dodge=False,
    legend='full',
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Common Name')
ax.set_xlabel('Total Observations')
ax.set_title('Total Amphibian Observations on Conservation Status')

plt.tight_layout()
plt.savefig('fig7_b_amph_obs.png')

# Reptile Conservation

In [None]:
cons_rept = (
    df[(df.category == 'Reptile')&(df.conservation_status != 'No Concern')]
    .sort_values(by='observations')
    .reset_index()
)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.countplot(
    data=cons_rept,
    x='conservation_status',
    order=[s for s in CONS_ORDER if s in cons_rept['conservation_status'].unique()],
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    stat='count',
    legend='full',
    dodge=False,
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Count')
ax.set_xlabel('')
ax.set_title('Count of Reptile Specices on Conservation Status')

plt.tight_layout()
plt.savefig('fig8_a_reptcount.png')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.barplot(
    data=cons_rept.head(50),
    x='observations',
    y='common_names',
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    errorbar=None,
    dodge=False,
    legend='full',
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Common Name')
ax.set_xlabel('Total Observations')
ax.set_title('Total Reptile Observations on Conservation Status')

plt.tight_layout()
plt.savefig('fig8_b_rept_obs.png')

# Vascular Plants

In [None]:
cons_vasc = (
    df[(df.category == 'Vascular Plant')&(df.conservation_status != 'No Concern')]
    .sort_values(by='observations')
    .reset_index()
)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.countplot(
    data=cons_vasc,
    x='conservation_status',
    order=[s for s in CONS_ORDER if s in cons_vasc['conservation_status'].unique()],
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    stat='count',
    legend='full',
    dodge=False,
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Count')
ax.set_xlabel('')
ax.set_title('Count of Vascular Plant Specices on Conservation Status')

plt.tight_layout()
plt.savefig('fig9_a_vasc_count.png')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.barplot(
    data=cons_vasc.head(50),
    x='observations',
    y='common_names',
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    errorbar=None,
    dodge=False,
    legend='full',
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Common Name')
ax.set_xlabel('Total Observations')
ax.set_title('Total Vascular Plant Observations on Conservation Status')

plt.tight_layout()
plt.savefig('fig9_b_vasc_obs.png')

# Nonvascular Plants

In [None]:
cons_nvasc = (
    df[(df.category == 'Nonvascular Plant')&(df.conservation_status != 'No Concern')]
    .sort_values(by='observations')
    .reset_index()
)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

sns.countplot(
    data=cons_nvasc,
    x='conservation_status',
    order=[s for s in CONS_ORDER if s in cons_nvasc['conservation_status'].unique()],
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    stat='count',
    legend='full',
    dodge=False,
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Count')
ax.set_xlabel('')
ax.set_title('Count of Nonvascular Plant Specices on Conservation Status')

plt.tight_layout()
plt.savefig('fig10_a_nvasc_count.png')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

sns.barplot(
    data=cons_nvasc.head(50),
    x='observations',
    y='common_names',
    hue='conservation_status',
    hue_order=CONS_ORDER,
    palette=CONS_PALETTE,
    errorbar=None,
    dodge=False,
    legend='full',
    ax=ax,
)
sns.move_legend(ax, 'upper left', bbox_to_anchor=(1.02, 1), title='Conservation Status')

ax.set_ylabel('Common Name')
ax.set_xlabel('Total Observations')
ax.set_title('Total Nonvascular Plant Observations on Conservation Status')

plt.tight_layout()
plt.savefig('fig10_a_nvasc_obs.png')