In [2]:
import os
import geopandas as gpd
import pandas as pd


folder_path = r"C:\Users\cansu\Dropbox\studio2263\sdat_dataUpdates" 

## Load Files

In [6]:

raw_sd_22 = gpd.read_file( os.path.join( folder_path , "data/processed/geo/sds_2022_15pct.geojson") )
raw_sd_22.set_crs( "ESRI:102008" , allow_override=True )

raw_sd_22.head()

Unnamed: 0,state,district_id,district_name,level,geometry
0,12,1200180,Broward County School District,unified,"MULTIPOLYGON (((-80.11825 25.97524, -80.11777 ..."
1,12,1200240,Charlotte County School District,unified,"MULTIPOLYGON (((-82.20082 26.77289, -82.19748 ..."
2,12,1200330,Collier County School District,unified,"MULTIPOLYGON (((-81.43693 25.80326, -81.43931 ..."
3,12,1200390,Dade County School District,unified,"MULTIPOLYGON (((-80.39924 25.25691, -80.40002 ..."
4,12,1200420,DeSoto County School District,unified,"POLYGON ((-81.56406 27.34064, -81.61056 27.340..."


## Use prepared Geojsons to find geometries  
This might be different for each state.

### NJ

In [None]:
area = 'trenton' # change this and run again for multiple region.
state_code = '34'

In [51]:
geo_area = gpd.read_file( os.path.join( folder_path, 'nj' , f"{area}.geojson") )

geo_area['DOEID'] = geo_area['NJDOE_ID_E'].fillna(geo_area['NJDOE_ID_U'])
geo_area['district_id'] = geo_area['ELSDLEA'].fillna( geo_area['UNSDLEA'])
geo_area['district_id'] = geo_area['district_id'].apply( lambda x: "34" + x)

geo_area.head(3)

Unnamed: 0,NJDOE_ID_E,DIST_NAME,ELSDLEA,SD_TYPE,NJDOE_ID_U,UNSDLEA,voucher_name,geometry,DOEID,district_id
0,,Cranbury Township School District,,U,23-0970,3540.0,trenton,"MULTIPOLYGON (((-74.48454 40.33866, -74.48436 ...",23-0970,3403540
1,,East Windsor Regional School District,,U,21-1245,4320.0,trenton,"MULTIPOLYGON (((-74.57212 40.30091, -74.57204 ...",21-1245,3404320
2,05-3650,North Hanover Township School District,11580.0,E,,,trenton,"MULTIPOLYGON (((-74.58947 40.13804, -74.58941 ...",05-3650,3411580


In [52]:
filtered_sd_22 = raw_sd_22[ raw_sd_22['district_id'].isin( geo_area['district_id'].unique() ) ].copy()
filtered_sd_22['voucher_area'] = area
filtered_sd_22.head()

filtered_sd_22[['district_id','voucher_area']].to_csv(
    os.path.join(
        folder_path, 'nj', 'processed',
        f"{area}_sds_in_tool.csv"
    ), index=False
)

filtered_sd_22.to_file(
        os.path.join(
        folder_path, 'nj', 'processed',
        f"{area}.geojson"
    ),
    driver="GeoJSON", encoding='utf-8'
)

## Get Data and Join

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os

areas = ['newark','trenton']

voucher_areas = pd.DataFrame()

for area in areas:
    temp = pd.read_csv( os.path.join(folder_path,'nj','processed',f"{area}_sds_in_tool.csv"), dtype=str )
    voucher_areas = pd.concat([ voucher_areas , temp ])

included_areas = sorted(voucher_areas['district_id'].unique())
voucher_areas.sample(5)

Unnamed: 0,district_id,voucher_area
42,3403600,newark
4,3417730,trenton
2,3408730,newark
72,3407890,newark
70,3408760,newark


### open all data sets to join them to the district ID

In [110]:
# frl
frl_district = pd.read_csv(os.path.join( folder_path , 'data' ,'processed/nces_pct_frl.csv'),  dtype={'district_id': str})
frl_district = frl_district[['district_id', 'pct_frl']]

# nces
nces_data = pd.read_csv(os.path.join(folder_path, "data", "processed", "nces_data_24.csv"),  dtype={'district_id': str})
cols = [
    'district_id',
    'district',
    'enroll_24',
    'student_teacher_ratio_24',
    'hs_students_per_guidance_couselor_24',
    'expend_per_pupil_22',
    'pct_native_24',
    'pct_aapi_24',
    'pct_latinx_24',
    'pct_black_24',
    'pct_white_24',
    'pct_hawpi_24',
    'pct_two_plus_24'
]
nces_data = nces_data[cols]

# Add pct_bipoc_24 column
nces_data['pct_bipoc_24'] = 1 - nces_data['pct_white_24']

# crdc
crdc_data = pd.read_csv(os.path.join(folder_path, "data", "processed", "crdc_data_21.csv"),  dtype={'district_id': str})
crdc_data = crdc_data.drop(columns=['district'])

edge_data = pd.read_csv(os.path.join(folder_path, "data", "processed", "edge_data_2018_22.csv"),  dtype={'district_id': str})
edge_data = edge_data.drop(columns=['Geography'])

friendship_data = pd.read_csv(os.path.join(folder_path, "data", "processed", "sce_cross_class_friendships_data_22.csv"),  dtype={'district_id': str})
friendship_data = friendship_data[['district_id', 'pct_cc_friends']]

theils = pd.read_csv(os.path.join(folder_path, "data", "processed", "theils_segregation_index.csv"),  dtype={'district_id': str})
theils['theils_race_cat_desc'] = theils['theils_race_cat'] + " (" + theils['theils_H_race'].round(2).astype(str) + ")"
theils['theils_frl_cat_desc'] = theils['theils_frl_cat'] + " (" + theils['theils_H_frl'].round(2).astype(str) + ")"

normalized_exposure = pd.read_csv(os.path.join(folder_path, "data", "processed", "normalized_exposure_22.csv"),  dtype={'district_id': str})
normalized_exposure = normalized_exposure[['district_id', 'ns_wht_blk', 'ns_wht_hsp', 'ns_wht_asn', 'ns_wht_nam', 'ns_wht_min', 'ns_was_min', 'ns_wht_nwh', 'ns_flu_nfl', 'ns_frl_nfr']]

performance_21 = pd.read_excel(os.path.join(folder_path, "data", "processed", "performance_21.xlsx"), dtype={'district_id': str})
performance_21['grad_rate_21_white_nh_cat_desc'] = performance_21['grad_rate_21_white_nh_desc']

ed_race = pd.read_csv(os.path.join(folder_path, "data", "processed", "educator_diversity.csv"), dtype={'district_id': str})
# ed_race['district_id'] = ed_race['district_id'].astype(str)

ecd_test_trend = pd.read_csv(os.path.join(folder_path, "data", "processed", "achievement_trend_econ_distress_0919.csv"), dtype={'district_id': str})
ecd_test_trend = ecd_test_trend.rename(columns={'ecd_gcs_mn_coh_eb': 'li_achieve_trend'})
ecd_test_trend['li_achieve_trend'] = ecd_test_trend['li_achieve_trend'].round(3)

print("All data is loaded")

All data is loaded


In [111]:
# Start with nces_data and merge (left join) each dataframe in sequence
sd_data = nces_data \
    .merge(frl_district, on="district_id", how="left") \
    .merge(crdc_data, on="district_id", how="left") \
    .merge(edge_data, on="district_id", how="left") \
    .merge(performance_21, on="district_id", how="left") \
    .merge(ecd_test_trend, on="district_id", how="left") \
    .merge(friendship_data, on="district_id", how="left") \
    .merge(theils, on="district_id", how="left") \
    .merge(normalized_exposure, on="district_id", how="left") \
    .merge(ed_race, on="district_id", how="left") \
    .merge(voucher_areas, on="district_id", how="left")
    # .merge(qual, on="district_id", how="left")  # Uncomment if needed

# Reorder columns if needed
cols = ['district_id', 'district', 'voucher_area'] + [col for col in sd_data.columns if col not in ['district_id', 'district', 'voucher_area']]
sd_data = sd_data[cols]

# Mutate (round) after all joins
if 'li_achieve_trend' in sd_data.columns:
    sd_data['li_achieve_trend'] = sd_data['li_achieve_trend'].round(4)
if 'pct_poverty' in sd_data.columns:
    sd_data['pct_poverty'] = (sd_data['pct_poverty'] / 100).round(3)
if 'pct_employed_civilian' in sd_data.columns:
    sd_data['pct_employed_civilian'] = (sd_data['pct_employed_civilian'] / 100).round(3)

sd_data.head(5)

Unnamed: 0,district_id,district,voucher_area,enroll_24,student_teacher_ratio_24,hs_students_per_guidance_couselor_24,expend_per_pupil_22,pct_native_24,pct_aapi_24,pct_latinx_24,pct_black_24,pct_white_24,pct_hawpi_24,pct_two_plus_24,pct_bipoc_24,pct_frl,high_schools,hs_with_ap,pct_hs_with_ap,d_ap_enroll,d_ap_enroll_latinx,d_ap_enroll_native,d_ap_enroll_asian,d_ap_enroll_hawpi,d_ap_enroll_black,d_ap_enroll_white,d_ap_enroll_twoplus,d_hs_enroll,d_hs_enroll_latinx,d_hs_enroll_native,d_hs_enroll_asian,d_hs_enroll_hawpi,d_hs_enroll_black,d_hs_enroll_white,d_hs_enroll_twoplus,pct_ap_enroll_d,pct_latinx_ap_enroll_d,pct_native_ap_enroll_d,pct_asian_ap_enroll_d,pct_hawpi_ap_enroll_d,pct_black_ap_enroll_d,pct_white_ap_enroll_d,pct_twoplus_ap_enroll_d,schools,schools_with_oos,pct_oos_d,pct_latinx_oos_d,pct_native_oos_d,pct_asian_oos_d,pct_hawpi_oos_d,pct_black_oos_d,pct_white_oos_d,pct_twoplus_oos_d,pct_poverty,pct_poverty_moe,pct_employed_civilian,pct_employed_civilian_moe,pop,pop_moe,pct_only_english,pct_only_english_moe,pct_other_language,pct_other_language_moe,pct_spanish_athome,pct_spanish_athome_moe,pct_indo_european_athome,pct_indo_european_athome_moe,pct_aspi_athome,pct_aspi_athome_moe,grad_rate_21,grad_rate_21_native,grad_rate_21_aapi,grad_rate_21_black_nh,grad_rate_21_latinx,grad_rate_21_two_plus,grad_rate_21_white_nh,grad_rate_21_cat,grad_rate_21_native_cat,grad_rate_21_aapi_cat,grad_rate_21_black_nh_cat,grad_rate_21_latinx_cat,grad_rate_21_two_plus_cat,grad_rate_21_white_nh_cat,math_21,math_21_native,math_21_aapi,math_21_black_nh,math_21_latinx,math_21_two_plus,math_21_white_nh,math_21_cat,math_21_native_cat,math_21_aapi_cat,math_21_black_nh_cat,math_21_latinx_cat,math_21_two_plus_cat,math_21_white_nh_cat,read_21,read_21_native,read_21_aapi,read_21_black_nh,read_21_latinx,read_21_two_plus,read_21_white_nh,read_21_cat,read_21_native_cat,read_21_aapi_cat,read_21_black_nh_cat,read_21_latinx_cat,read_21_two_plus_cat,read_21_white_nh_cat,grad_rate_21_cat_desc,grad_rate_21_black_nh_cat_desc,grad_rate_21_latinx_cat_desc,grad_rate_21_aapi_cat_desc,grad_rate_21_white_nh_desc,grad_rate_21_two_plus_cat_desc,read_21_cat_desc,read_21_black_nh_cat_desc,read_21_latinx_cat_desc,read_21_aapi_cat_desc,read_21_native_cat_desc,read_21_two_plus_cat_desc,read_21_white_nh_cat_desc,math_21_cat_desc,math_21_black_nh_cat_desc,math_21_latinx_cat_desc,math_21_aapi_cat_desc,math_21_native_cat_desc,math_21_two_plus_cat_desc,math_21_white_nh_cat_desc,grad_rate_21_white_nh_cat_desc,li_achieve_trend,ecd_gcs_mn_coh_eb_se,pct_cc_friends,theils_H_race,theils_H_frl,theils_race_cat,theils_frl_cat,theils_race_cat_desc,theils_frl_cat_desc,ns_wht_blk,ns_wht_hsp,ns_wht_asn,ns_wht_nam,ns_wht_min,ns_was_min,ns_wht_nwh,ns_flu_nfl,ns_frl_nfr,pct_black_educ,pct_asian_educ,pct_latinx_educ,pct_native_educ,pct_white_educ
0,1700105,A-C Central Community Unit School District 262,,328.0,9.51,106.0,17729.0,0.0,0.0,0.021,0.006,0.951,0.0,0.021,0.049,0.507,1.0,1.0,1.0,9.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,119.0,1.0,0.0,0.0,0.0,0.0,118.0,0.0,0.08,0.0,,,,,0.08,,3.0,3.0,0.01,0.0,,,,0.0,0.01,0.0,0.084,7.7,0.849,8.6,325.0,102.0,1.0,0.102,,0.102,,0.102,,0.102,,0.102,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.11,0.026375,0.561,0.089604,0.003298,mid-low,low,mid-low (0.09),low (0.0),0.0,0.0,,,0.0,,0.0,,,,,,,
1,2700106,A.C.G.C. Public School District,,880.0,13.82,92.93,17691.0,0.006,0.001,0.123,0.001,0.831,0.0,0.039,0.169,0.468,1.0,1.0,1.0,5.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,403.0,24.0,3.0,1.0,0.0,0.0,360.0,14.0,0.01,0.0,0.0,0.0,,,0.01,0.0,3.0,3.0,0.01,0.0,0.0,0.0,,,0.0,0.09,0.101,2.9,0.883,3.6,1080.0,151.0,0.986,0.018,0.019,0.012,0.004,0.005,0.009,0.009,0.004,0.009,>=90%,,,,Suppressed,,>=90%,high,,,,suppressed,,high,40-49%,Suppressed,Suppressed,,Suppressed,Suppressed,40-49%,mid-low,suppressed,suppressed,,suppressed,suppressed,mid-low,30-39%,Suppressed,Suppressed,,Suppressed,Suppressed,40-49%,mid-low,suppressed,suppressed,,suppressed,suppressed,mid-low,high ( >=90% ),,suppressed,,high ( >=90% ),,mid-low ( 30-39% ),,suppressed,suppressed,suppressed,suppressed,mid-low ( 40-49% ),mid-low ( 40-49% ),,suppressed,suppressed,suppressed,suppressed,mid-low ( 40-49% ),high ( >=90% ),0.008,0.019059,0.76,0.010874,0.004203,low,low,low (0.01),low (0.0),0.0,0.0,0.0,0.0,0.0,,0.0,,,,,,,
2,4500690,Abbeville County School District,,2802.0,13.17,62.85,20782.0,0.003,0.004,0.025,0.327,0.61,0.001,0.03,0.39,0.66,2.0,2.0,1.0,12.0,0.0,0.0,3.0,0.0,2.0,7.0,0.0,894.0,15.0,2.0,4.0,2.0,230.0,595.0,29.0,0.01,0.0,0.0,0.75,0.0,0.01,0.01,0.0,8.0,8.0,0.07,0.16,0.0,0.0,0.0,0.11,0.04,0.1,0.169,6.4,0.767,6.7,3100.0,319.0,0.979,0.022,0.023,0.021,0.01,0.014,0.013,0.016,,0.014,89%,,Suppressed,85-89%,Suppressed,,85-89%,high,,suppressed,high,suppressed,,high,35-39%,Suppressed,Suppressed,25-29%,Suppressed,Suppressed,45-49%,mid-low,suppressed,suppressed,mid-low,suppressed,suppressed,mid-low,40-44%,Suppressed,Suppressed,25-29%,Suppressed,Suppressed,50-54%,mid-low,suppressed,suppressed,mid-low,suppressed,suppressed,mid-high,high ( 89% ),high ( 85-89% ),suppressed,suppressed,high ( 85-89% ),,mid-low ( 40-44% ),mid-low ( 25-29% ),suppressed,suppressed,suppressed,suppressed,mid-high ( 50-54% ),mid-low ( 35-39% ),mid-low ( 25-29% ),suppressed,suppressed,suppressed,suppressed,mid-low ( 45-49% ),high ( 85-89% ),0.05,0.014555,0.369,0.14786,0.383891,mid-low,high,mid-low (0.15),high (0.38),0.236963,0.020292,0.042331,0.007365,0.224838,0.222173,0.223552,0.02944,0.04994,,,,,
3,5500030,Abbotsford School District,,815.0,19.65,177.14,15137.0,0.0,0.007,0.634,0.006,0.337,0.0,0.015,0.663,0.634,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,407.0,207.0,0.0,0.0,0.0,4.0,190.0,4.0,0.0,0.0,,,,0.0,0.0,0.0,2.0,2.0,0.01,0.02,,,,0.1,0.0,0.0,0.202,9.9,0.851,7.7,745.0,137.0,0.456,0.1,0.544,0.1,0.537,0.098,0.005,0.006,,0.026,>=90%,,,,>=80%,Suppressed,>=80%,high,,,,high,suppressed,high,20-29%,,,,<=20%,Suppressed,40-59%,low,,,,low,suppressed,mid-low,20-29%,,,,<=20%,Suppressed,40-59%,low,,,,low,suppressed,mid-low,high ( >=90% ),,high ( >=80% ),,high ( >=80% ),suppressed,low ( 20-29% ),,low ( <=20% ),,,suppressed,mid-low ( 40-59% ),low ( 20-29% ),,low ( <=20% ),,,suppressed,mid-low ( 40-59% ),high ( >=80% ),-0.039,0.020513,0.478,0.016313,0.003469,low,low,low (0.02),low (0.0),0.0,0.0,0.0,,0.0,,0.0,,,,,,,
4,4807380,Abbott Independent School District,,284.0,11.63,101.33,13360.0,0.0,0.0,0.151,0.011,0.813,0.0,0.025,0.187,0.247,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.02,0.02,,,,0.0,0.02,0.0,0.0,34.6,0.86,11.0,170.0,54.0,1.0,0.251,,0.251,,0.251,,0.251,,0.251,>=80%,,,,Suppressed,Suppressed,>=50%,high,,,,suppressed,suppressed,mid-high,40-59%,,,,Suppressed,Suppressed,40-59%,mid-low,,,,suppressed,suppressed,mid-low,60-79%,,,,Suppressed,Suppressed,60-79%,mid-high,,,,suppressed,suppressed,mid-high,high ( >=80% ),,suppressed,,mid-high ( >=50% ),suppressed,mid-high ( 60-79% ),,suppressed,,,suppressed,mid-high ( 60-79% ),mid-low ( 40-59% ),,suppressed,,,suppressed,mid-low ( 40-59% ),mid-high ( >=50% ),0.028,0.031928,,0.0,0.0,low,low,low (0.0),low (0.0),0.0,0.0,,,0.0,,0.0,,,,,,,


In [None]:
voucher_area_sd_data = sd_data[sd_data['voucher_area'].notnull()]

columns = [
    "voucher_area", "district_id", "district", "enroll_24",
    "pct_black_24", "pct_latinx_24", "pct_aapi_24", "pct_native_24",
    "pct_white_24", "pct_hawpi_24", "pct_two_plus_24", "theils_race_cat", "theils_frl_cat", "theils_race_cat_desc", "theils_frl_cat_desc",
    "pct_only_english", "pct_other_language", "pct_spanish_athome", "pct_aspi_athome", "pct_cc_friends", "pct_poverty",
    "pct_frl", "pct_employed_civilian",
    "grad_rate_21_cat", "grad_rate_21_black_nh_cat", "grad_rate_21_latinx_cat",
    "grad_rate_21_aapi_cat", "grad_rate_21_white_nh_cat", "grad_rate_21_two_plus_cat",
    "grad_rate_21_cat_desc", "grad_rate_21_black_nh_cat_desc", "grad_rate_21_latinx_cat_desc",
    "grad_rate_21_aapi_cat_desc","grad_rate_21_white_nh_cat_desc", "grad_rate_21_two_plus_cat_desc",
    "pct_oos_d", "pct_black_oos_d",
    "pct_latinx_oos_d", "pct_asian_oos_d", "pct_white_oos_d",
    "pct_black_ap_enroll_d", "pct_latinx_ap_enroll_d",
    "pct_asian_ap_enroll_d", "pct_native_ap_enroll_d", "pct_white_ap_enroll_d",
    "read_21_cat", "read_21_black_nh_cat", "read_21_latinx_cat", "read_21_aapi_cat",
    "read_21_native_cat", "read_21_two_plus_cat", "read_21_white_nh_cat",
    "read_21_cat_desc", "read_21_black_nh_cat_desc", "read_21_latinx_cat_desc", "read_21_aapi_cat_desc",
    "read_21_native_cat_desc", "read_21_two_plus_cat_desc", "read_21_white_nh_cat_desc",
    "math_21_cat", "math_21_black_nh_cat", "math_21_latinx_cat", "math_21_aapi_cat",
    "math_21_native_cat", "math_21_two_plus_cat", "math_21_white_nh_cat",
    "math_21_cat_desc", "math_21_black_nh_cat_desc", "math_21_latinx_cat_desc", "math_21_aapi_cat_desc",
    "math_21_native_cat_desc", "math_21_two_plus_cat_desc", "math_21_white_nh_cat_desc",
    "expend_per_pupil_22", "li_achieve_trend", "pct_black_educ",
    "pct_latinx_educ", "pct_asian_educ", "pct_native_educ",
    "pct_white_educ", "student_teacher_ratio_24",
    "hs_students_per_guidance_couselor_24"
]

voucher_area_sd_data_small = voucher_area_sd_data[columns].copy()
voucher_area_sd_data_small = voucher_area_sd_data_small[ voucher_area_sd_data_small['district_id'].isin(included_areas) ]
voucher_area_sd_data_small["district_id"] = pd.to_numeric(voucher_area_sd_data_small["district_id"], errors="coerce")
voucher_area_sd_data_small["pct_bipoc_educ"] = np.nan
 
print( voucher_area_sd_data_small.shape , "rows and cols" )
voucher_area_sd_data_small.head()


(117, 83) rows and cols


Unnamed: 0,voucher_area,district_id,district,enroll_24,pct_black_24,pct_latinx_24,pct_aapi_24,pct_native_24,pct_white_24,pct_hawpi_24,pct_two_plus_24,theils_race_cat,theils_frl_cat,theils_race_cat_desc,theils_frl_cat_desc,pct_only_english,pct_other_language,pct_spanish_athome,pct_aspi_athome,pct_cc_friends,pct_poverty,pct_frl,pct_employed_civilian,grad_rate_21_cat,grad_rate_21_black_nh_cat,grad_rate_21_latinx_cat,grad_rate_21_aapi_cat,grad_rate_21_white_nh_cat,grad_rate_21_two_plus_cat,grad_rate_21_cat_desc,grad_rate_21_black_nh_cat_desc,grad_rate_21_latinx_cat_desc,grad_rate_21_aapi_cat_desc,grad_rate_21_white_nh_cat_desc,grad_rate_21_two_plus_cat_desc,pct_oos_d,pct_black_oos_d,pct_latinx_oos_d,pct_asian_oos_d,pct_white_oos_d,pct_black_ap_enroll_d,pct_latinx_ap_enroll_d,pct_asian_ap_enroll_d,pct_native_ap_enroll_d,pct_white_ap_enroll_d,read_21_cat,read_21_black_nh_cat,read_21_latinx_cat,read_21_aapi_cat,read_21_native_cat,read_21_two_plus_cat,read_21_white_nh_cat,read_21_cat_desc,read_21_black_nh_cat_desc,read_21_latinx_cat_desc,read_21_aapi_cat_desc,read_21_native_cat_desc,read_21_two_plus_cat_desc,read_21_white_nh_cat_desc,math_21_cat,math_21_black_nh_cat,math_21_latinx_cat,math_21_aapi_cat,math_21_native_cat,math_21_two_plus_cat,math_21_white_nh_cat,math_21_cat_desc,math_21_black_nh_cat_desc,math_21_latinx_cat_desc,math_21_aapi_cat_desc,math_21_native_cat_desc,math_21_two_plus_cat_desc,math_21_white_nh_cat_desc,expend_per_pupil_22,li_achieve_trend,pct_black_educ,pct_latinx_educ,pct_asian_educ,pct_native_educ,pct_white_educ,student_teacher_ratio_24,hs_students_per_guidance_couselor_24,pct_bipoc_educ
177,newark,3400750,Allendale Borough School District,905.0,0.009,0.074,0.152,0.0,0.702,0.0,0.063,low,low,low (0.0),low (0.01),0.78,0.22,0.122,0.068,,0.0,0.015,0.869,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23070.0,,,,,,,12.15,,
201,newark,3400870,Alpine Borough School District,169.0,0.083,0.059,0.254,0.0,0.556,0.0,0.047,low,low,low (0.0),low (0.0),,,,,,,0.019,0.842,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,48013.0,,,,,,,8.41,,
789,newark,3401350,Belleville Town School District,5095.0,0.078,0.74,0.061,0.003,0.1,0.004,0.014,low,low,low (0.02),low (0.01),0.464,0.536,0.472,0.009,0.747,0.148,0.482,0.756,high,high,high,high,high,,high ( 94% ),high ( >=90% ),high ( 90-94% ),high ( >=90% ),high ( >=90% ),,0.0,0.0,0.0,0.0,0.01,0.04,0.08,0.13,0.0,0.16,mid-low,mid-low,mid-low,mid-low,suppressed,suppressed,mid-high,mid-low ( 45% ),mid-low ( 40-59% ),mid-low ( 40-44% ),mid-low ( 40-59% ),suppressed,suppressed,mid-high ( 50-59% ),low,low,low,low,suppressed,suppressed,low,low ( 13% ),low ( <=20% ),low ( 10-14% ),low ( <=20% ),suppressed,suppressed,low ( 20-29% ),24718.0,0.005,,,,,,13.25,85.16,
871,newark,3401500,Bergenfield Borough School District,3710.0,0.075,0.569,0.223,0.001,0.097,0.005,0.029,low,low,low (0.01),low (0.01),0.611,0.39,0.23,0.136,0.762,0.104,0.304,0.864,high,high,high,excellent,high,suppressed,high ( 93% ),high ( >=80% ),high ( 90-94% ),excellent ( >=95% ),high ( 80-89% ),suppressed,0.0,0.0,0.0,0.0,0.0,0.32,0.36,0.57,0.5,0.36,mid-high,mid-low,mid-low,mid-high,,mid-high,mid-low,mid-high ( 50-54% ),mid-low ( 40-59% ),mid-low ( 35-39% ),mid-high ( 60-69% ),,mid-high ( >=50% ),mid-low ( 40-59% ),mid-low,low,low,mid-high,,mid-low,low,mid-low ( 25-29% ),low ( <=20% ),low ( 15-19% ),mid-high ( 50-59% ),,mid-low ( <=49% ),low ( 21-39% ),25034.0,0.008,,,,,,12.67,97.69,
1054,newark,3401830,Bloomfield Township School District,6438.0,0.208,0.471,0.061,0.002,0.233,0.009,0.015,low,mid-low,low (0.04),mid-low (0.06),0.717,0.283,0.233,0.011,0.759,0.091,0.385,0.797,high,high,high,high,excellent,suppressed,high ( 93% ),high ( 90-94% ),high ( 90-94% ),high ( >=90% ),excellent ( >=95% ),suppressed,0.0,0.0,0.0,0.0,0.0,0.07,0.1,0.37,0.0,0.21,mid-low,mid-low,mid-low,mid-high,,suppressed,mid-high,mid-low ( 45% ),mid-low ( 30-34% ),mid-low ( 35-39% ),mid-high ( 60-69% ),,suppressed,mid-high ( 55-59% ),low,low,low,mid-low,,suppressed,mid-low,low ( 14% ),low ( <=5% ),low ( 6-9% ),mid-low ( 30-39% ),,suppressed,mid-low ( 25-29% ),24121.0,-0.02,,,,,,11.52,98.86,


### Add data to existing data

In [119]:
all_data = pd.read_csv( r"C:\Git_Repo\sdat\sdat-app\static\data\250910_voucher_sds_quant_data_small.csv")
all_data = all_data.loc[:, ~all_data.columns.str.contains('^Unnamed')]

all_data.head(3)

Unnamed: 0,voucher_area,district_id,district,enroll_24,pct_black_24,pct_latinx_24,pct_aapi_24,pct_native_24,pct_white_24,pct_hawpi_24,pct_two_plus_24,theils_race_cat,theils_frl_cat,theils_race_cat_desc,theils_frl_cat_desc,pct_only_english,pct_other_language,pct_spanish_athome,pct_aspi_athome,pct_cc_friends,pct_poverty,pct_frl,pct_employed_civilian,grad_rate_21_cat,grad_rate_21_black_nh_cat,grad_rate_21_latinx_cat,grad_rate_21_aapi_cat,grad_rate_21_white_nh_cat,grad_rate_21_two_plus_cat,grad_rate_21_cat_desc,grad_rate_21_black_nh_cat_desc,grad_rate_21_latinx_cat_desc,grad_rate_21_aapi_cat_desc,grad_rate_21_white_nh_cat_desc,grad_rate_21_two_plus_cat_desc,pct_oos_d,pct_black_oos_d,pct_latinx_oos_d,pct_asian_oos_d,pct_white_oos_d,pct_black_ap_enroll_d,pct_latinx_ap_enroll_d,pct_asian_ap_enroll_d,pct_native_ap_enroll_d,pct_white_ap_enroll_d,read_21_cat,read_21_black_nh_cat,read_21_latinx_cat,read_21_aapi_cat,read_21_native_cat,read_21_two_plus_cat,read_21_white_nh_cat,read_21_cat_desc,read_21_black_nh_cat_desc,read_21_latinx_cat_desc,read_21_aapi_cat_desc,read_21_native_cat_desc,read_21_two_plus_cat_desc,read_21_white_nh_cat_desc,math_21_cat,math_21_black_nh_cat,math_21_latinx_cat,math_21_aapi_cat,math_21_native_cat,math_21_two_plus_cat,math_21_white_nh_cat,math_21_cat_desc,math_21_black_nh_cat_desc,math_21_latinx_cat_desc,math_21_aapi_cat_desc,math_21_native_cat_desc,math_21_two_plus_cat_desc,math_21_white_nh_cat_desc,expend_per_pupil_22,li_achieve_trend,pct_black_educ,pct_latinx_educ,pct_asian_educ,pct_native_educ,pct_white_educ,student_teacher_ratio_24,hs_students_per_guidance_couselor_24,pct_bipoc_educ
0,los angeles,601620,ABC Unified School District,18081.0,0.07,0.455,0.353,0.002,0.048,0.005,0.067,mid-low,mid-low,mid-low (0.13),mid-low (0.11),0.594,0.406,0.222,0.137,0.613,0.089,0.561,0.787,excellent,high,high,excellent,excellent,high,excellent ( 95% ),high ( 90-94% ),high ( 92% ),excellent ( 97% ),excellent ( >=95% ),high ( >=90% ),0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.5,0.5,0.28,,,,,,,,,,,,,,,,,,,,,,,,,,,,,18320.0,0.135,,,,,,23.1,191.48,
1,south shore,2501650,Abington School District,2133.0,0.062,0.154,0.025,0.004,0.725,0.001,0.029,low,low,low (0.01),low (0),0.93,0.07,0.002,,0.851,0.051,,0.865,high,mid-high,high,,high,,high ( 90-94% ),mid-high ( >=50% ),high ( >=80% ),,high ( 90-94% ),,0.01,0.02,0.01,0.0,0.01,0.03,0.06,0.22,,0.14,mid-high,,mid-low,,,,mid-high,mid-high ( 55-59% ),,mid-low ( 40-59% ),,,,mid-high ( 55-59% ),mid-low,,low,,,,mid-low,mid-low ( 25-29% ),,low ( <=20% ),,,,mid-low ( 30-34% ),25930.0,0.007,0.017,0.017,0.0,0.0,0.959,13.44,102.41,
2,los angeles,600001,Acton-Agua Dulce Unified School District,1021.0,0.018,0.573,0.018,0.003,0.364,0.0,0.025,low,low,low (0.01),low (0.01),0.787,0.213,0.189,0.024,0.639,0.074,0.601,0.728,high,,high,,high,,high ( 90-94% ),,high ( 80-89% ),,high ( >=90% ),,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.17,,,,,,,,,,,,,,,,,,,,,,,,,,,,,19495.0,0.062,,,,,,19.83,187.5,


In [None]:
# Test if all columns match
t = pd.concat([all_data, voucher_area_sd_data_small])
t.columns[ ~t.columns.isin(voucher_area_sd_data_small.columns) ]

Index([], dtype='object')

In [131]:
all_data_added = pd.concat([all_data, voucher_area_sd_data_small])

# "C:\Git_Repo\sdat\sdat-app\static\data\250910_voucher_sds_quant_data_small.csv"
filename = f"{pd.Timestamp.now().strftime("%y%m%d")}_voucher_sds_quant_data_small.csv"
all_data_added.to_csv(
    os.path.join( folder_path, 'nj','processed', filename),
    index=False
)