# Chapter 18: Understanding Census Education Data

⚠️ **DO NOT SKIP THIS CELL**

## Run the Next cell.
### Before executing any other cell you must run the next cell to set up the project folder environment.

In [None]:
from pathlib import Path

try:
    from google.colab import drive
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    drive.mount("/content/drive")
    PROJECT_ROOT = Path("/content/drive/MyDrive/DataScience/census-education-analysis")
else:
    PROJECT_ROOT = Path.cwd().parent

DATA_DIR = PROJECT_ROOT / "data"
RAW_DIR = DATA_DIR / "raw"
STAGING_DIR = DATA_DIR / "staging"
PROCESSED_DIR = DATA_DIR / "processed"
OUTPUTS_DIR = PROJECT_ROOT / "outputs"

PROJECT_ROOT


## Problem 1: What Data Are We Working With, and Where Does It Come From?

## Problem 2: Why Do We Need a State Master File?

## Problem 3: How Do I Identify State-Level Rows in the Location Master?

In [None]:
import pandas as pd

master_path = RAW_DIR / "location_master" / "PC11_TV_DIR.xlsx"
master_df = pd.read_excel(master_path)

master_df.head()

In [None]:
master_df.columns

## Problem 4: How Do I Filter Only State-Level Records?

In [None]:
state_rows = master_df[
    (master_df["District Code"] == 0) &
    (master_df["Sub District Code"] == 0) &
    (master_df["Town-Village Code"] == 0)
]

In [None]:
states_df = state_rows[["State Code", "Town-Village Name"]]
states_df.head()

In [None]:
states_df.columns = ["state_code", "state_name"]

## Problem 5: How Do I Save the State Master as a Reusable Asset?

In [None]:
(STAGING_DIR / "location_master").mkdir(parents=True, exist_ok=True)

In [None]:
states_csv_path = STAGING_DIR / "location_master" / "states.csv"
states_df.to_csv(states_csv_path, index=False)

states_csv_path

## Problem 6: Why Must Raw Education Files Be Renamed Automatically?

## Problem 7: How Do I Load the State Mapping Back into Python?

In [None]:
states = pd.read_csv(states_csv_path)
states.head()

In [None]:
def to_snake(name):
    return (
        name.lower()
        .replace("&", "and")
        .replace("-", "_")
        .replace(" ", "_")
    )

In [None]:
state_lookup = {
    str(row.state_code).zfill(2): to_snake(row.state_name)
    for _, row in states.iterrows()
}

## Problem 8: How Do I Rename and Stage Education Files Safely?

In [None]:
raw_edu_dir = RAW_DIR / "education"
staging_edu_dir = STAGING_DIR / "education"
staging_edu_dir.mkdir(parents=True, exist_ok=True)

In [None]:
import shutil

for file_path in raw_edu_dir.iterdir():
    if file_path.suffix == ".xlsx":
        state_code = file_path.name.split("-")[1][:2]
        if state_code in state_lookup:
            new_name = f"{state_lookup[state_code]}.xlsx"
            shutil.copy(file_path, staging_edu_dir / new_name)

## End-of-Chapter Direction