# Data Preparation

Focus on school years 2021-2022


In [1]:
# import libraries
import importlib
import os

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import subprocess
import sys

from pathlib import Path

In [2]:
# import other libraries
from helper import load_cde_txt, clean_calschls_safety, clean_safety_by_connectedness

# check if jcds library is installed
package_name = "jcds"

if importlib.util.find_spec(package_name) is None:
    print(f" '{package_name}' not found. Installing from Github... ")
    subprocess.check_call(
        [
            sys.executable,
            "-m",
            "pip",
            "install",
            "https://github.com/junclemente/jcds.git",
        ]
    )
else:
    print(f" '{package_name}' is already installed.")

from jcds import eda as jeda
from jcds import reports as jrep

 'jcds' is already installed.


In [3]:
data_folder = Path("../data")
ca_doe = Path(data_folder / "raw/ca_doe")
cde = Path(data_folder / "raw/cde")
ca_schls = Path(data_folder / "raw/ca_schls")
civil_rights = Path(data_folder / "raw/civil_rights_data")
raw_data = Path(data_folder / "raw")

# CA Dept of Education

## Adjusted Cohort Graduation Rate and Outcome Data (ACGR)

**Adjusted Cohort Graduation Rate and Outcome Data**
Four-year Adjusted Cohort Graduation Rate (ACGR) and Outcome data reported by race/ethnicity, student group, and gender.  
Source: [https://www.cde.ca.gov/ds/ad/filesacgr.asp](https://www.cde.ca.gov/ds/ad/filesacgr.asp)

**Note:** To protect student privacy, data are suppressed (\*) on the data file if the cell size within a selected student population (cohort students) is 10 or less. Additionally, the “Not Reported” race/ethnicity is suppressed, regardless of actual cell size, if the student population for one or more other race/ethnicity groups is suppressed.

Raw Datasets may not be included in the repository due to its size.
The datasets can be downloaded from here: [raw datasets](https://drive.google.com/drive/folders/1OUvpL1nY1uTu2PmjIHJ7wZmY4YgW4onM?usp=sharing)


In [4]:
dataset = "acgr21.txt"
df = load_cde_txt(ca_doe / dataset)
df

Unnamed: 0,AcademicYear,AggregateLevel,CountyCode,DistrictCode,SchoolCode,CountyName,DistrictName,SchoolName,CharterSchool,DASS,...,SPED Certificate (Count),SPED Certificate (Rate),GED Completer (Count),GED Completer (Rate),Other Transfer (Count),Other Transfer (Rate),Dropout (Count),Dropout (Rate),Still Enrolled (Count),Still Enrolled (Rate)
0,2020-21,C,01,,,Alameda,,,All,All,...,57,0.7,0,0.0,15,0.2,449,5.2,305,3.5
1,2020-21,C,01,,,Alameda,,,All,All,...,97,1.1,5,0.1,29,0.3,849,9.3,472,5.2
2,2020-21,C,01,,,Alameda,,,All,All,...,*,*,*,*,*,*,*,*,*,*
3,2020-21,C,01,,,Alameda,,,All,All,...,18,0.4,1,0.0,2,0.0,112,2.5,47,1.1
4,2020-21,C,01,,,Alameda,,,All,All,...,27,1.6,0,0.0,3,0.2,172,10.1,139,8.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254933,2020-21,T,00,,,State,,,Yes,Yes,...,0,0.0,1,0.1,6,0.8,273,37.6,269,37.1
254934,2020-21,T,00,,,State,,,Yes,Yes,...,2,0.1,28,0.7,62,1.6,2007,52.1,1056,27.4
254935,2020-21,T,00,,,State,,,Yes,Yes,...,0,0.0,0,0.0,2,1.3,33,22.1,55,36.9
254936,2020-21,T,00,,,State,,,Yes,Yes,...,7,0.0,142,0.6,376,1.6,10306,44.2,6911,29.6


| **Column Name**       | **Description**                                                                                                                                                                                                                                                           | **Values Kept** | **Reason for Filter**                                                                                                              |
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| **AggregateLevel**    | Indicates the data aggregation level within the ACGR dataset.<br><br>Possible values:<br>• `T` = State<br>• `C` = County<br>• `D` = District<br>• `S` = School                                                                                                            | `S` (School)    | Focuses analysis on **individual schools**, providing more data points and allowing finer-grained modeling of graduation outcomes. |
| **CharterSchool**     | Indicates whether the record includes data for charter schools.<br><br>Possible values:<br>• `All` = All schools combined<br>• `Y` = Charter only<br>• `N` = Non-charter only                                                                                             | `No` (or `N`)   | Excludes **charter schools** to focus on **traditional public high schools**.                                                      |
| **DASS**              | Dashboard Alternative School Status (DASS) flag.<br><br>Possible values:<br>• `All` = All schools combined<br>• `Y` = DASS only<br>• `N` = Non-DASS only                                                                                                                  | `No` (or `N`)   | Removes **alternative/continuation programs** so graduation rates reflect typical comprehensive high schools.                      |
| **ReportingCategory** | Identifies which subgroup the record represents.<br><br>Examples:<br>• Race/Ethnicity (`RB` = African American, `RH` = Hispanic, etc.)<br>• Gender (`GM` = Male, `GF` = Female)<br>• Program groups (`SD` = Students with Disabilities, etc.)<br>• `TA` = Total Aggregate | `TA`            | Keeps **aggregate totals** for each school (not broken down by subgroup) to simplify modeling.                                     |


In [5]:
# filter dataset
df_filtered = df[
    (df["AggregateLevel"].str.strip() == "S")
    & (df["CharterSchool"].str.strip() == "No")
    & (df["DASS"].str.strip() == "No")
    & (df["ReportingCategory"] == "TA")
]

df_filtered

Unnamed: 0,AcademicYear,AggregateLevel,CountyCode,DistrictCode,SchoolCode,CountyName,DistrictName,SchoolName,CharterSchool,DASS,...,SPED Certificate (Count),SPED Certificate (Rate),GED Completer (Count),GED Completer (Rate),Other Transfer (Count),Other Transfer (Rate),Dropout (Count),Dropout (Rate),Still Enrolled (Count),Still Enrolled (Rate)
66594,2020-21,S,01,31609,0131755,Alameda,California School for the Blind (State Special...,California School for the Blind,No,No,...,4,36.4,0,0.0,0,0.0,7,63.6,0,0.0
66654,2020-21,S,01,31617,0131763,Alameda,California School for the Deaf-Fremont (State ...,California School for the Deaf-Fremont,No,No,...,2,5.3,0,0.0,0,0.0,1,2.6,11,28.9
66718,2020-21,S,01,61119,0000001,Alameda,Alameda Unified,"Nonpublic, Nonsectarian Schools",No,No,...,*,*,*,*,*,*,*,*,*,*
66782,2020-21,S,01,61119,0106401,Alameda,Alameda Unified,Alameda Science and Technology Institute,No,No,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
66910,2020-21,S,01,61119,0130229,Alameda,Alameda Unified,Alameda High,No,No,...,11,2.8,0,0.0,1,0.3,9,2.3,4,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254262,2020-21,S,58,72736,0000000,Yuba,Marysville Joint Unified,District Office,No,No,...,0,0.0,0,0.0,0,0.0,30,43.5,0,0.0
254314,2020-21,S,58,72736,0000001,Yuba,Marysville Joint Unified,"Nonpublic, Nonsectarian Schools",No,No,...,*,*,*,*,*,*,*,*,*,*
254426,2020-21,S,58,72736,5830013,Yuba,Marysville Joint Unified,Lindhurst High,No,No,...,0,0.0,0,0.0,2,0.9,14,6.2,13,5.8
254630,2020-21,S,58,72736,5835202,Yuba,Marysville Joint Unified,Marysville High,No,No,...,0,0.0,0,0.0,1,0.5,14,7.0,4,2.0


## Save dataset as pickle file


In [6]:
output_path = Path(data_folder / "acgr21_22_raw.pkl")

df_filtered.to_pickle(output_path)

In [7]:
jrep.data_info(df_filtered, show_columns=True)


SHAPE:
There are 1502 rows and 34 columns (2.97 MB).

DUPLICATES:
There are 0 duplicated rows.

COLUMNS/VARIABLES:
Column dType Summary:
 * object: 34
There are 0 numerical (int/float/bool) variables.
 * Columns: []
There are 34 categorical (nominal/ordinal) variables.
 * Columns: ['AcademicYear', 'AggregateLevel', 'CountyCode', 'DistrictCode', 'SchoolCode', 'CountyName', 'DistrictName', 'SchoolName', 'CharterSchool', 'DASS', 'ReportingCategory', 'CohortStudents', 'Regular HS Diploma Graduates (Count)', 'Regular HS Diploma Graduates (Rate)', "Met UC/CSU Grad Req's (Count)", "Met UC/CSU Grad Req's (Rate)", 'Seal of Biliteracy (Count)', 'Seal of Biliteracy (Rate)', 'Golden State Seal Merit Diploma (Count)', 'Golden State Seal Merit Diploma (Rate', 'CHSPE Completer (Count)', 'CHSPE Completer (Rate)', 'Adult Ed. HS Diploma (Count)', 'Adult Ed. HS Diploma (Rate)', 'SPED Certificate (Count)', 'SPED Certificate (Rate)', 'GED Completer (Count)', 'GED Completer (Rate)', 'Other Transfer (Count)

# CA Dept of Education

[Downloadable Data Files](https://www.cde.ca.gov/ds/ad/downloadabledata.asp)  
Downloadable files about California's K–12 educational system by topic area, including enrollment, assessment and accountability, English learners, foster youth, free or reduced-price meal, graduates and dropouts, and staff data.


## Absenteeism

The Absenteeism Downloadable Files page provides access to data about student absenteeism, including chronic absenteeism and absenteeism by reason counts and rates, disaggregated by race/ethnicity, gender, student program group, and grade span.


In [None]:
df_raw = load_cde_txt(cde / "chronicabsenteeism21.txt")

df_chron_abs = df_raw[
    (df_raw["Aggregate Level"].str.strip() == "S")
    & (df_raw["Charter School"].str.strip() == "No")
    & (df_raw["Reporting Category"].str.strip() == "TA")
]
df_chron_abs

Unnamed: 0,Academic Year,Aggregate Level,County Code,District Code,School Code,County Name,District Name,School Name,Charter School,Reporting Category,ChronicAbsenteeismEligibleCumula,ChronicAbsenteeismCount,ChronicAbsenteeismRate
57598,2020-21,S,01,10017,0130419,Alameda,Alameda County Office of Education,Alameda County Community,No,TA,122,103,84.4
57599,2020-21,S,01,10017,0130401,Alameda,Alameda County Office of Education,Alameda County Juvenile Hall/Court,No,TA,107,66,61.7
57621,2020-21,S,01,31609,0131755,Alameda,California School for the Blind (State Special...,California School for the Blind,No,TA,68,6,8.8
57644,2020-21,S,01,31617,0131763,Alameda,California School for the Deaf-Fremont (State ...,California School for the Deaf-Fremont,No,TA,329,38,11.6
58027,2020-21,S,01,61119,6090013,Alameda,Alameda Unified,Edison Elementary,No,TA,460,17,3.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
263006,2020-21,S,58,72751,6056832,Yuba,Wheatland,Lone Tree Elementary,No,TA,354,59,16.7
263008,2020-21,S,58,72751,6056840,Yuba,Wheatland,Wheatland Elementary,No,TA,314,85,27.1
263062,2020-21,S,58,72769,0123570,Yuba,Wheatland Union High,Wheatland Community Day High,No,TA,,,
263063,2020-21,S,58,72769,0133751,Yuba,Wheatland Union High,Edward P. Duplex,No,TA,82,82,100


In [17]:
df_raw = load_cde_txt(cde / "absenteeismreason22-v3.txt")
df_raw

df_abs = df_raw[
    (df_raw["Aggregate Level"].str.strip() == "S")
    & (df_raw["Charter School"].str.strip() == "No")
    & (df_raw["DASS"].str.strip() == "No")
    & (df_raw["Reporting Category"] == "TA")
]

df_abs

Unnamed: 0,Academic Year,Aggregate Level,County Code,District Code,School Code,County Name,District Name,School Name,Charter School,DASS,...,Average Days Absent,Total Days Absent,Excused Absences (percent),Unexcused Absences (percent),Out-of-School Suspension Absences (percent),Incomplete Independent Study Absences (percent),Excused Absences (count),Unexcused Absences (count),Out-of-School Suspension Absences (count),Incomplete Independent Study Absences (count)
583,2021-22,S,01,31609,0131755,Alameda,California School for the Blind (State Special...,California School for the Blind,No,No,...,15.5,1041,53.1,46.9,0,0,553,488,0,0
608,2021-22,S,01,31617,0131763,Alameda,California School for the Deaf-Fremont (State ...,California School for the Deaf-Fremont,No,No,...,15.8,4874,56.4,41.8,1.8,0,2750,2038,86,0
628,2021-22,S,01,61119,0000000,Alameda,Alameda Unified,District Office,No,No,...,0,0,0,0,0,0,0,0,0,0
647,2021-22,S,01,61119,0106401,Alameda,Alameda Unified,Alameda Science and Technology Institute,No,No,...,6.9,944,79.2,20.2,0,0.5,748,191,0,5
670,2021-22,S,01,61119,0111765,Alameda,Alameda Unified,Ruby Bridges Elementary,No,No,...,20.2,9495,70.9,28.4,0.2,0.6,6730,2692,16,57
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227659,2021-22,S,58,72751,6056816,Yuba,Wheatland,Bear River,No,No,...,11.9,6766,48.7,43.9,0.2,7.3,3292,2967,14,493
227682,2021-22,S,58,72751,6056832,Yuba,Wheatland,Lone Tree Elementary,No,No,...,9,3365,62.1,29,0.1,8.9,2088,977,2,298
227706,2021-22,S,58,72751,6056840,Yuba,Wheatland,Wheatland Elementary,No,No,...,14.3,4957,53.4,33.5,0,13,2648,1663,1,645
227743,2021-22,S,58,72769,0000000,Yuba,Wheatland Union High,District Office,No,No,...,*,*,*,*,*,*,*,*,*,*


## Public Schools and Districts

The Public Schools and Districts Downloadable Files page provides access to data files containing general information about California's public schools and districts found in the California School Directory.


In [10]:
df_schooldata = pd.read_excel(cde / "pubschls.xlsx", header=5)

df_schooldata

Unnamed: 0,CDSCode,NCESDist,NCESSchool,StatusType,County,District,School,Street,StreetAbr,City,...,Virtual,Magnet,YearRoundYN,FederalDFCDistrictID,Latitude,Longitude,AdmFName,AdmLName,LastUpDate,Multilingual
0,01100170000000,0691051,No Data,Active,Alameda,Alameda County Office of Education,No Data,313 West Winton Avenue,313 West Winton Ave.,Hayward,...,No Data,No Data,No Data,No Data,37.658212,-122.09713,Alysse,Castro,2023-03-08,No Data
1,01100170109835,0691051,10546,Closed,Alameda,Alameda County Office of Education,FAME Public Charter,"39899 Balentine Drive, Suite 335","39899 Balentine Dr., Ste. 335",Newark,...,P,N,N,No Data,37.521436,-121.99391,No Data,No Data,2015-09-01,No Data
2,01100170112607,0691051,10947,Active,Alameda,Alameda County Office of Education,Envision Academy for Arts & Technology,1515 Webster Street,1515 Webster St.,Oakland,...,N,N,N,0601614,37.804520,-122.26815,Elizabeth,Raji-Greg,2024-05-20,N
3,01100170114363,0691051,12013,Active,Alameda,Alameda County Office of Education,American Indian Public Charter School II,171 12th Street,171 12th St.,Oakland,...,N,N,N,0601880,37.800368,-122.26548,Marco,Menendez,2025-09-08,N
4,01100170118489,0691051,12283,Closed,Alameda,Alameda County Office of Education,Aspire California College Preparatory Academy,2125 Jefferson Avenue,2125 Jefferson Ave.,Berkeley,...,N,N,N,No Data,37.868991,-122.27844,No Data,No Data,2015-07-01,No Data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18400,58727695830039,0642350,No Data,Closed,Yuba,Wheatland Union High,Wheatland Continuation,PO Box 398,PO Box 398,Wheatland,...,No Data,No Data,N,No Data,No Data,No Data,No Data,No Data,1999-06-24,No Data
18401,58727695830070,0642350,No Data,Closed,Yuba,Wheatland Union High,Wheatland Alternative Education,801 Olive Street,801 Olive St.,Wheatland,...,No Data,No Data,N,No Data,No Data,No Data,No Data,No Data,1999-06-24,No Data
18402,58727695830120,0642350,08437,Closed,Yuba,Wheatland Union High,Academy for Career Education Charter,801 Olive Street,801 Olive St.,Wheatland,...,No Data,No Data,N,No Data,39.010399,-121.42927,No Data,No Data,2013-07-02,No Data
18403,58727695838305,0642350,06930,Active,Yuba,Wheatland Union High,Wheatland Union High,1010 Wheatland Road,1010 Wheatland Rd.,Wheatland,...,N,N,N,No Data,38.998968,-121.45497,Brandon,Moore,2024-09-13,N


## Free or Reduced-Price Meal (Student Poverty)

The Free or Reduced-Price Meal Downloadable Files page provides access to data about students who are eligible for Free or Reduced-Price Meals (FRPM).


In [48]:
df_raw = pd.read_excel(
    cde / "frpm2122_v2.xlsx", sheet_name="FRPM School-Level Data ", header=1
)
df_raw.iloc[:, 11]
col = df_raw.columns[11]
print(col)

mask = df_raw[col].astype(str).str.strip().eq("N")

df_frpm = df_raw[mask]

df_frpm

Charter 
School 
(Y/N)


Unnamed: 0,Academic Year,County Code,District Code,School Code,County Name,District Name,School Name,District Type,School Type,Educational \nOption Type,...,Free Meal \nCount \n(K-12),Percent (%) \nEligible Free \n(K-12),FRPM Count \n(K-12),Percent (%) \nEligible FRPM \n(K-12),Enrollment \n(Ages 5-17),Free Meal \nCount \n(Ages 5-17),Percent (%) \nEligible Free \n(Ages 5-17),FRPM Count \n(Ages 5-17),Percent (%) \nEligible FRPM \n(Ages 5-17),CALPADS Fall 1 \nCertification Status
0,2021-2022,1,10017,130419,Alameda,Alameda County Office of Education,Alameda County Community,County Office of Education (COE),County Community,County Community School,...,45,0.789474,47,0.824561,37,29,0.783784,31,0.837838,Y
1,2021-2022,1,10017,130401,Alameda,Alameda County Office of Education,Alameda County Juvenile Hall/Court,County Office of Education (COE),Juvenile Court Schools,Juvenile Court School,...,64,1.000000,64,1.000000,56,56,1.000000,56,1.000000,Y
14,2021-2022,1,31609,131755,Alameda,California School for the Blind (State Special...,California School for the Blind,State Special Schools,State Special Schools,State Special School,...,62,1.000000,62,1.000000,43,43,1.000000,43,1.000000,Y
15,2021-2022,1,31617,131763,Alameda,California School for the Deaf-Fremont (State ...,California School for the Deaf-Fremont,State Special Schools,State Special Schools,State Special School,...,318,1.000000,318,1.000000,263,263,1.000000,263,1.000000,Y
17,2021-2022,1,61119,130229,Alameda,Alameda Unified,Alameda High,Unified School District,High Schools (Public),Traditional,...,311,0.172013,327,0.180863,1743,293,0.168101,308,0.176707,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10550,2021-2022,58,72751,6056832,Yuba,Wheatland,Lone Tree Elementary,Elementary School District,Elementary Schools (Public),Traditional,...,68,0.193732,119,0.339031,338,66,0.195266,117,0.346154,Y
10552,2021-2022,58,72751,6056840,Yuba,Wheatland,Wheatland Elementary,Elementary School District,Elementary Schools (Public),Traditional,...,178,0.523529,189,0.555882,329,170,0.516717,181,0.550152,Y
10554,2021-2022,58,72769,133751,Yuba,Wheatland Union High,Edward P. Duplex,High School District,Continuation High Schools,Continuation School,...,32,0.711111,45,1.000000,28,16,0.571429,28,1.000000,Y
10556,2021-2022,58,72769,123570,Yuba,Wheatland Union High,Wheatland Community Day High,High School District,District Community Day Schools,Community Day School,...,3,0.600000,4,0.800000,5,3,0.600000,4,0.800000,Y


## CBEDS Data about Schools & Districts

Downloadable data files for information about schools and districts, including estimated number of teacher hires, work visa applications, home-to-school transportation, kindergarten program type, and educational calendar.  
[File Structure](https://www.cde.ca.gov/ds/ad/fscbedsorab19.asp)


In [12]:
df_cbeds = load_cde_txt(cde / "cbedsora21b.txt")

df_cbeds

Unnamed: 0,Cdscode,CountyName,DistrictName,SchoolName,Description,Level,Section,RowNumber,Value,Year
0,01100170000000,Alameda,Alameda County Office of Education,District Office,Life Science,D,B,10,2.5,2122
1,01100170000000,Alameda,Alameda County Office of Education,District Office,Mathematics,D,B,11,8,2122
2,01100170000000,Alameda,Alameda County Office of Education,District Office,Physical Education,D,B,13,2.5,2122
3,01100170000000,Alameda,Alameda County Office of Education,District Office,Physical Science,D,B,14,3.5,2122
4,01100170000000,Alameda,Alameda County Office of Education,District Office,Reading,D,B,15,4.3,2122
...,...,...,...,...,...,...,...,...,...,...
58706,58727695838305,Yuba,Wheatland Union High,Wheatland Union High,Kindergarten None,S,B,4,True,2122
58707,58727695838305,Yuba,Wheatland Union High,Wheatland Union High,Transitional Kindergarten None,S,B,8,True,2122
58708,58727695838305,Yuba,Wheatland Union High,Wheatland Union High,Traditional,S,D,1,True,2122
58709,58727695838305,Yuba,Wheatland Union High,Wheatland Union High,Start Date,S,D,4,20210811,2122


## Staff Data Files

The Staff Downloadable Files page provides access to data about certificated and classified staff demographic information, staff assignments, student/staff ratios, and estimated teacher hires.

### Student / Staff Ratio


In [49]:
df_raw = load_cde_txt(cde / "strat2122.txt")


df_ss_ratio = df_raw[
    (df_raw["Aggregate Level"].str.strip() == "S")
    & (df_raw["Charter School"].str.strip() == "N")
    & (df_raw["DASS"].str.strip() == "N")
    # & (df_raw["Reporting Category"] == "TA")
]
df_ss_ratio

Unnamed: 0,Academic Year,Aggregate Level,County Code,District Code,School Code,County Name,District Name,School Name,Charter School,DASS,School Grade Span,TOTAL_ENR_N,TCH_FTE_N,ADM_FTE_N,PSV_FTE_N,OTH_FTE_N,STU_TCH_RATIO,STU_ADM_RATIO,STU_PSV_RATIO,STU_OTH_RATIO
556,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,GS_K12,0,0.0,4.5,0.0,1.0,*,*,*,*
571,2021-22,S,01,31609,0131755,Alameda,California School for the Blind (State Special...,California School for the Blind,N,N,GS_K12,62,13.0,5.0,16.0,15.0,4.8,12.4,3.9,4.1
572,2021-22,S,01,31617,0000000,Alameda,California School for the Deaf-Fremont (State ...,District Office,N,N,GS_K12,0,0.0,8.0,12.5,13.0,*,*,*,*
573,2021-22,S,01,31617,0131763,Alameda,California School for the Deaf-Fremont (State ...,California School for the Deaf-Fremont,N,N,GS_K12,318,71.8,7.9,2.0,15.3,4.4,40.3,159,20.7
574,2021-22,S,01,61119,0000000,Alameda,Alameda Unified,District Office,N,N,GS_K12,0,0.0,15.0,24.2,13.7,*,*,*,*
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30235,2021-22,S,58,72751,6056816,Yuba,Wheatland,Bear River,N,N,GS_K12,568,30.0,2.0,3.2,0.0,18.9,284,180.3,*
30236,2021-22,S,58,72751,6056832,Yuba,Wheatland,Lone Tree Elementary,N,N,GS_K6,351,17.0,0.5,2.0,0.0,20.6,*,175.5,*
30237,2021-22,S,58,72751,6056840,Yuba,Wheatland,Wheatland Elementary,N,N,GS_K6,340,17.0,1.0,2.8,0.0,20,340,123.6,*
30239,2021-22,S,58,72769,0000000,Yuba,Wheatland Union High,District Office,N,N,GS_K12,3,0.8,1.0,0.1,0.0,*,3,*,*


### Staff Education


In [50]:
df_raw = load_cde_txt(cde / "sted2122.txt")

df_staff_ed = df_raw[
    (df_raw["Aggregate Level"].str.strip() == "S")
    & (df_raw["Charter School"].str.strip() == "N")
    & (df_raw["DASS"].str.strip() == "N")
    # & (df_raw["Reporting Category"] == "TA")
]
df_staff_ed

Unnamed: 0,Academic Year,Aggregate Level,County Code,District Code,School Code,County Name,District Name,School Name,Charter School,DASS,...,Staff Gender,Total Staff Count,Associate,Baccalaureate,Baccalaureate Plus,Master,Master Plus,Doctorate,Special (Juris Doctor),None
7395,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,...,ALL,5,0,0,0,0,4,1,0,0
7396,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,...,GF,5,0,0,0,0,4,1,0,0
7397,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,...,ALL,6,0,0,0,0,5,1,0,0
7398,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,...,GF,6,0,0,0,0,5,1,0,0
7399,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,...,ALL,1,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360896,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,...,ALL,11,0,2,1,7,0,1,0,0
360897,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,...,GF,9,0,2,1,6,0,0,0,0
360898,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,...,GM,2,0,0,0,1,0,1,0,0
360899,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,...,ALL,1,0,0,1,0,0,0,0,0


### Staff Experience


In [51]:
df_raw = load_cde_txt(cde / "stex2122.txt")

df_staff_xp = df_raw[
    (df_raw["Aggregate Level"].str.strip() == "S")
    & (df_raw["Charter School"].str.strip() == "N")
    & (df_raw["DASS"].str.strip() == "N")
]
df_staff_xp

Unnamed: 0,Academic Year,Aggregate Level,County Code,District Code,School Code,County Name,District Name,School Name,Charter School,DASS,Staff Type,School Grade Span,Staff Gender,Total Staff Count,Average Total Years Experience,Average District Years Experience,Experienced,Inexperienced,First Year,Second Year
7395,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,ADM,GS_K12,ALL,5,20.6,9.2,5,0,0,0
7396,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,ADM,GS_K12,GF,5,20.6,9.2,5,0,0,0
7397,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,ALL,GS_K12,ALL,6,20.0,10.2,6,0,0,0
7398,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,ALL,GS_K12,GF,6,20.0,10.2,6,0,0,0
7399,2021-22,S,01,10017,0000000,Alameda,Alameda County Office of Education,District Office,N,N,OTH,GS_K12,ALL,1,17.0,15.0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360896,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,ALL,GS_K12,ALL,11,21.6,14.6,11,0,0,0
360897,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,ALL,GS_K12,GF,9,22.8,16.0,9,0,0,0
360898,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,ALL,GS_K12,GM,2,16.5,8.5,2,0,0,0
360899,2021-22,S,58,10587,0000000,Yuba,Yuba County Office of Education,District Office,N,N,OTH,GS_K12,ALL,1,16.0,15.0,1,0,0,0


### Enrollment by School


In [None]:
df_enroll = load_cde_txt(cde / "enr202022-v2.txt")

df_enroll

Unnamed: 0,ACADEMIC_YEAR,CDS_CODE,COUNTY,DISTRICT,SCHOOL,ENR_TYPE,RACE_ETHNICITY,GENDER,GR_KN,GR_1,...,GR_7,GR_8,UNGR_ELM,GR_9,GR_10,GR_11,GR_12,UNGR_SEC,ENR_TOTAL,ADULT
0,2020-21,01100170112607,ALAMEDA,Alameda County Office of Education,Envision Academy for Arts & Technology,C,0,F,0,0,...,3,0,0,1,0,1,0,0,6,0
1,2020-21,01100170112607,ALAMEDA,Alameda County Office of Education,Envision Academy for Arts & Technology,C,0,M,0,0,...,0,0,0,0,1,1,0,0,3,0
2,2020-21,01100170112607,ALAMEDA,Alameda County Office of Education,Envision Academy for Arts & Technology,C,1,F,0,0,...,0,0,0,1,1,0,1,0,3,0
3,2020-21,01100170112607,ALAMEDA,Alameda County Office of Education,Envision Academy for Arts & Technology,C,2,F,0,0,...,0,0,0,1,0,0,0,0,1,0
4,2020-21,01100170112607,ALAMEDA,Alameda County Office of Education,Envision Academy for Arts & Technology,C,2,M,0,0,...,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
793375,2022-23,58727695838305,YUBA,Wheatland Union High,Wheatland Union High,P,7,F,0,0,...,0,0,0,35,54,60,43,0,192,0
793376,2022-23,58727695838305,YUBA,Wheatland Union High,Wheatland Union High,P,7,M,0,0,...,0,0,0,61,67,61,63,0,252,0
793377,2022-23,58727695838305,YUBA,Wheatland Union High,Wheatland Union High,P,7,X,0,0,...,0,0,0,0,0,1,0,0,1,0
793378,2022-23,58727695838305,YUBA,Wheatland Union High,Wheatland Union High,P,9,F,0,0,...,0,0,0,16,10,14,12,0,52,0


## Ca DOE School Climate, Health, and Learning Surveys

### Perception of Safety by Grade Level


In [17]:
df_raw = pd.read_excel(
    ca_schls / "Kidsdata-Perceptions-of-School-Safety--by-Grade-Level--2017.xls",
    header=None,
)

df_safety = clean_calschls_safety(df_raw)

df_safety

Unnamed: 0,geography,geo_type,grade,very_safe_pct,safe_pct,neither_pct,unsafe_pct,very_unsafe_pct,years,level_of_safety_filter
0,California,State,9,0.128,0.420,0.364,0.053,0.035,2017-2019,All
1,California,State,11,0.134,0.403,0.373,0.055,0.036,2017-2019,All
2,Alameda County,County,9,0.132,0.459,0.341,0.044,0.023,2017-2019,All
3,Alameda County,County,11,0.145,0.423,0.351,0.051,0.029,2017-2019,All
4,Amador County,County,9,0.153,0.403,0.374,0.048,0.021,2017-2019,All
...,...,...,...,...,...,...,...,...,...,...
109,Ventura County,County,11,0.162,0.420,0.335,0.050,0.033,2017-2019,All
110,Yolo County,County,9,0.139,0.424,0.371,0.043,0.023,2017-2019,All
111,Yolo County,County,11,0.162,0.437,0.342,0.034,0.025,2017-2019,All
112,Yuba County,County,9,0.075,0.415,0.359,0.097,0.055,2017-2019,All


### Perception of Safety by School Connectedness


In [19]:
df_raw = pd.read_excel(
    ca_schls / "Kidsdata-Perceptions-of-School-Safety--by-Level-of-School-C.xls",
    header=None,
)

df_connected = clean_safety_by_connectedness(df_raw)

df_connected

Unnamed: 0,Geography,Connectedness,Very Safe,Safe,Neither Safe nor Unsafe,Unsafe,Very Unsafe,Safety_Positive
0,California,High,0.268,0.559,0.157,0.011,0.005,0.827
1,California,Medium,0.052,0.334,0.520,0.065,0.028,0.386
2,California,Low,0.069,0.111,0.428,0.196,0.196,0.180
3,Alameda County,High,0.268,0.582,0.138,0.009,0.004,0.850
4,Alameda County,Medium,0.060,0.370,0.494,0.057,0.020,0.430
...,...,...,...,...,...,...,...,...
172,Yolo County,Medium,0.064,0.385,0.475,0.053,0.022,0.449
173,Yolo County,Low,0.083,0.136,0.456,0.163,0.162,0.219
174,Yuba County,High,0.234,0.584,0.160,0.015,0.007,0.818
175,Yuba County,Medium,0.036,0.331,0.498,0.086,0.049,0.367
