In [58]:
import pandas as pd
from pathlib import Path

df_a2_aerial_cencus = pd.read_pickle("../data/pkl/df_a2_aerial_cencus.pkl")
df_a2_aerial_cencus.head(2)

Unnamed: 0,Date,Year,Month,Property,Sector,Block,Species,Total,Category,Remarks,ObjectId,GlobalID,CreationDate,Creator,EditDate,Editor
0,2005-09-01,2005,September,Ol Pejeta,Eastern,,Elephant,40,Mega herbivore,,9513,69a31103-37bd-4189-93c7-0de8643687d5,2021-01-22 12:56:33.973,alfred.kibungei_Olpejeta,2021-01-22 12:56:33.973,alfred.kibungei_Olpejeta
1,2005-09-01,2005,September,Ol Pejeta,Eastern,,Buffalo,243,Mega herbivore,,9514,e8c6e83a-4de6-46b5-8a9e-299fb0a9077f,2021-01-22 12:56:33.973,alfred.kibungei_Olpejeta,2021-01-22 12:56:33.973,alfred.kibungei_Olpejeta


### What this cell does (summary)

- **Filters data** to `Property = "Ol Pejeta"`, optionally by **Sector** (`Eastern`, `Western`) and an **All-sectors** view.
- **Excludes species** in a configured list (e.g., `["Kori bustard", "Ostrich"]`).
- Builds **abundance (counts) pivots**: rows = *Species*, columns = *Year*, values = sum of *Total*.
- Adds **Row_Total** and **Column_Total** to the counts pivots.
- Computes **rank tables** per Year (1 = most abundant; ties share rank).
- **Saves** each table as **PKL**:
  - Counts: `../data/pkl/a2_df_census_ol_pejeta_{sector}.pkl` (e.g., `eastern`, `western`, `all`)
  - Ranks:  `../data/pkl/a2_df_census_ol_pejeta_{sector}_rank.pkl`
- **Writes to one Excel workbook** `../data/xlsx/a2_df_census_ol_pejeta_by_sector.xlsx`:
  - Counts to sheets: `Eastern`, `Western`, `All`
  - Ranks to sheets:  `Eastern_rank`, `Western_rank`, `All_rank`
- **Re-runs safely**: if the Excel file exists, sheets with the same names are **replaced** (others are kept).

> Requires `openpyxl` for Excel I/O (`pip install openpyxl`).


### Set species to exclude from the census tables

In [59]:
exclude = ["Kori bustard", "Ostrich", "Northern white rhino"]

In [60]:
# === Counts + Ranks (one cell) ===============================================
from pathlib import Path
import pandas as pd
import numpy as np

# --- CONFIG ---
sectors = ["Eastern", "Western"]
property_name = "Ol Pejeta"

# Base filter: only the chosen property
df_base = df_a2_aerial_cencus[df_a2_aerial_cencus["Property"].eq(property_name)]

# Helper: abundance pivot (Species x Year) without totals (for both counts & ranks)
def make_abundance(dfin: pd.DataFrame) -> pd.DataFrame:
    abund = pd.pivot_table(
        dfin,
        index="Species",
        columns="Year",
        values="Total",
        aggfunc="sum",
        fill_value=0
    )
    abund.columns.name = "Year"
    return abund

# Helper: counts pivot with totals (for your earlier summary tables)
def add_totals(counts: pd.DataFrame) -> pd.DataFrame:
    out = counts.copy()
    out["Row_Total"] = out.sum(axis=1)
    out.loc["Column_Total"] = out.sum(axis=0)
    out.columns.name = "Year"
    return out

# Helper: rank table (rank within each Year; 1 = highest)
def make_ranks(abund: pd.DataFrame) -> pd.DataFrame:
    # Rank per year (column); ties share rank via 'min'
    ranks = abund.rank(axis=0, method='min', ascending=False).astype('Int64')
    ranks.columns.name = "Year"
    return ranks

# Output locations
pkl_dir = Path("../data/pkl")
xlsx_dir = Path("../data/export/excel")
pkl_dir.mkdir(parents=True, exist_ok=True)
xlsx_dir.mkdir(parents=True, exist_ok=True)
excel_path = xlsx_dir / "a2_df_census_ol_pejeta_by_sector.xlsx"

counts_tables = {}
rank_tables = {}

# Build per-sector and 'All'
for label in sectors + ["All"]:
    if label == "All":
        df_subset = df_base
    else:
        df_subset = df_base[df_base["Sector"].eq(label)]

    # Exclude unwanted species
    df_clean = df_subset[~df_subset["Species"].isin(exclude)].copy()

    if df_clean.empty:
        print(f"[WARN] No rows after filtering for: {label}; skipping.")
        continue

    # Abundance (counts) without totals
    abund = make_abundance(df_clean)

    # Save counts with totals
    counts = add_totals(abund)
    counts_tables[label] = counts
    counts.to_pickle(pkl_dir / f"a2_df_census_ol_pejeta_{label.lower()}.pkl")

    # Save ranks (no totals)
    ranks = make_ranks(abund)
    rank_tables[label] = ranks
    ranks.to_pickle(pkl_dir / f"a2_df_census_ol_pejeta_{label.lower()}_rank.pkl")

    print(f"Saved PKLs → {label}: counts + ranks")

# Write/append to the Excel workbook:
# - Counts to sheets: Eastern, Western, All
# - Ranks  to sheets: Eastern_rank, Western_rank, All_rank
# Requires openpyxl:  pip install openpyxl
excel_exists = Path(excel_path).exists()

if excel_exists:
    # Append and replace sheets that already exist
    with pd.ExcelWriter(excel_path, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
        for label, tbl in counts_tables.items():
            tbl.to_excel(writer, sheet_name=label)
        for label, tbl in rank_tables.items():
            tbl.to_excel(writer, sheet_name=f"{label}_rank")
else:
    # Create new workbook; do NOT pass if_sheet_exists here
    with pd.ExcelWriter(excel_path, engine="openpyxl", mode="w") as writer:
        for label, tbl in counts_tables.items():
            tbl.to_excel(writer, sheet_name=label)
        for label, tbl in rank_tables.items():
            tbl.to_excel(writer, sheet_name=f"{label}_rank")
# ============================================================================== 
print(f"✅ Excel saved to: {excel_path}")

Saved PKLs → Eastern: counts + ranks
Saved PKLs → Western: counts + ranks
Saved PKLs → All: counts + ranks
✅ Excel saved to: ../data/export/excel/a2_df_census_ol_pejeta_by_sector.xlsx
