In [1]:
import glob 
import csv
import pandas as pd
import numpy as np
from scipy import stats 

import plotly.graph_objects as go
import plotly.express as px

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)

In [2]:
merged_csvs = glob.glob('../raw_data_files_clean/merged_*')
merged_csvs.sort()

merged_csvs

['../raw_data_files_clean/merged_adm.csv',
 '../raw_data_files_clean/merged_assessment.csv',
 '../raw_data_files_clean/merged_assessment_2022.csv',
 '../raw_data_files_clean/merged_assessment_all_students.csv',
 '../raw_data_files_clean/merged_demographics.csv',
 '../raw_data_files_clean/merged_directory.csv',
 '../raw_data_files_clean/merged_grad.csv']

In [3]:
flat_file = glob.glob('../data_for_analysis/2022_assessment_flat_file*')
demog = glob.glob('../data_for_analysis/2022_demog*')
demog

['../data_for_analysis/2022_demog.csv']

In [4]:
def pull_files_and_create_df(raw_file_names): 
    df_list=[]
    for i in raw_file_names:
        name = i.split('/')[2].split('.')[0]
        globals()[f'df_{name}'] = pd.read_csv(i, low_memory=False)
        d = df_list.append(f'df_{name}')
    df_list.sort(reverse=True)
    return df_list

In [5]:
pull_files_and_create_df(merged_csvs)
pull_files_and_create_df(flat_file)
pull_files_and_create_df(demog)

['df_2022_demog']

In [6]:
df_2022_assessment_flat_file[df_2022_assessment_flat_file['system_name']=='i3 Academy']

Unnamed: 0,system_code,system_name,school_code,school_name,2022_percent_proficient_ela,2022_percent_proficient_math,2022_proficient_ela,2022_proficient_math,2022_tested_ela,2022_tested_math,site_type,county,is_charter,asian,black_or_african_american,american_indian_alaska_native,native_hawaiian_pacific_islander,white,two_or_more_races,hispanic_latino,year,total_enrollment_2022,econ_disadv,foster,gen_ed,...,military_pct_2022,swd_pct_2022,el_pct_2022,asian_pct_2022,black_or_african_american_pct_2022,american_indian_alaska_native_pct_2022,native_hawaiian_pacific_islander_pct_2022,white_pct_2022,two_or_more_races_pct_2022,hispanic_latino_pct_2022,sum_pct_2022,updated_district,updated_district_code,frl_pct_2022,total_frl_enrollment_2022,black_or_hispanic_pct_2022,is_charter_district,2021_percent_proficient_ela,2021_percent_proficient_math,2021_proficient_ela,2021_proficient_math,2021_tested_ela,2021_tested_math,2021_2022_ela_change,2021_2022_math_change
1281,805,i3 Academy,10,i3 Academy - Elementary School,26.57,7.62,54.9999,16.002,207.0,210.0,Public Charter School Startup,Jefferson,True,*,339,*,*,27,*,13,2022,382.0,239.0,,317.0,...,,0.170157,,,0.887435,,,0.070681,,0.034031,,Birmingham City,114.0,59.81%,423,0.921466,True,25.69,3.66,56.0,8.015985,217.976319,219.015985,0.88,3.96
1282,805,i3 Academy,15,i3 Academy - Middle School,30.93,9.18,30.3114,8.9964,98.0,98.0,Public Charter School Startup,Jefferson,True,*,*,*,*,*,*,*,2022,88.0,53.0,,71.0,...,,0.193182,,,,,,,,,,Birmingham City,114.0,56.38%,94,,True,,,,,,,,


In [7]:
df_district_charter_mapping = pd.DataFrame({'system_code':
    [800,51,802,802,808,801,803,805,805,810],
    'school_code':
    [10,220,10,15,10,10,10,10,15,10],
    'updated_district':
    ['Mobile County','Montgomery County','Montgomery County','Montgomery County','Perry County','Sumter County','Birmingham City','Birmingham City','Birmingham City','Homewood City'],
    'updated_district_code':[49,51,51,51,53,60,114,114,114,157]
})

In [8]:
df_directory_schools = df_merged_directory.loc[((df_merged_directory['is_charter']=='True') 
                                               | (df_merged_directory['is_charter']=='False'))
                                              & (df_merged_directory['school_code']!=0)
                                              & (df_merged_directory['school_code']!=1)
                                              & (df_merged_directory['nces_id'].notnull())].drop_duplicates()
df_directory_charter_schools = df_merged_directory.loc[(df_merged_directory['is_charter']=='True')]

In [9]:
df_directory_schools.columns

Index(['system_code', 'school_code', 'system_name', 'school_name', 'site_type',
       'nces_id', 'is_charter', 'street', 'city', 'state', 'zip_5', 'county',
       'file_year', 'opened_date'],
      dtype='object')

In [10]:
df_merged_demographics.loc[df_merged_demographics['system_name']=='i3 Academy']

Unnamed: 0,year,system_name,school_name,grade,gender,ethnicity,sub_population,total_student_count,asian,asian_%,black_or_african_american,black_or_african_american_%,american_indian_/_alaska_native,_american_indian_/_alaska_native_%,native_hawaiian_/_pacific_islander,native_hawaiian_/_pacific_islander_%,white,white_%,two_or_more_races,two_or_more_races_%,file_year
71,2022,i3 Academy,i3 Academy - LEA,All Grades,All Gender,All Ethnicity,All SubPopulation,470,*,*,419,89.15,*,*,*,*,40,8.51,*,*,2022
231,2022,i3 Academy,i3 Academy - Elementary School,All Grades,All Gender,All Ethnicity,All SubPopulation,382,*,*,340,89.01,*,*,*,*,34,8.90,*,*,2022
281,2022,i3 Academy,i3 Academy - Middle School,All Grades,All Gender,All Ethnicity,All SubPopulation,88,*,*,*,*,*,*,*,*,*,*,*,*,2022
1570,2021,i3 Academy,i3 Academy - Elementary School,All Grades,All Gender,All Ethnicity,All SubPopulation,412,*,*,370,89.81,*,*,*,*,35,8.50,*,*,2021


In [11]:
df_directory_schools.loc[df_directory_schools['system_name']=='i3 Academy']

Unnamed: 0,system_code,school_code,system_name,school_name,site_type,nces_id,is_charter,street,city,state,zip_5,county,file_year,opened_date
1603,805,10,i3 Academy,i3 Academy - Elementary School,Public Charter School Startup,10020402476.0,True,55th Place 1st Avenue South,Birmingham,AL,35212,Jefferson,2022,7/1/20
1604,805,15,i3 Academy,i3 Academy - Middle School,Public Charter School Startup,10020402510.0,True,7901 1st Avenue North,Birmingham,AL,35206-,Jefferson,2022,7/1/21
5884,805,10,i3 Academy,i3 Academy - Elementary School,Public Charter School Startup,10020402476.0,True,55th Place 1st Avenue South,Birmingham,AL,35212,Jefferson,2021,7/1/20


In [12]:
df_2022_demog['file_year'] = 2022

In [13]:
df_directory_demog_merged = (
    df_directory_schools
    .merge(df_merged_demographics, on=['system_name','school_name','file_year'], how='outer')
)

df_directory_demog_merged

Unnamed: 0,system_code,school_code,system_name,school_name,site_type,nces_id,is_charter,street,city,state,zip_5,county,file_year,opened_date,year,grade,gender,ethnicity,sub_population,total_student_count,asian,asian_%,black_or_african_american,black_or_african_american_%,american_indian_/_alaska_native,_american_indian_/_alaska_native_%,native_hawaiian_/_pacific_islander,native_hawaiian_/_pacific_islander_%,white,white_%,two_or_more_races,two_or_more_races_%
0,600,9040.0,Al Inst Deaf And Blind,E H Gentry Technical Facility,State Supported School Other Facility,10000901406.0,False,1105 Fort Lashley,Talladega,AL,35160,Talladega,2022,,,,,,,,,,,,,,,,,,,
1,210,4999.0,Alabama Youth Services,Spec Ed Private Services (Child Count Only),Special Education - Private Services,10000202249.0,False,1000 Industrial School Road,Mt Meigs,AL,36057-0066,Montgomery,2022,,,,,,,,,,,,,,,,,,,
2,103,10.0,Alabaster City,Creek View Elementary School,Public Regular School,10019001815.0,False,8568 Highway 17,Maylene,AL,35114,Shelby,2022,7/1/13,2022.0,All Grades,All Gender,All Ethnicity,All SubPopulation,911,11,~,180,19.76,137,15.04,*,*,551,60.48,29,~
3,103,5.0,Alabaster City,Meadow View Elementary School,Public Regular School,10019001460.0,False,2800 Smokey Rd,Alabaster,AL,35007,Shelby,2022,7/1/13,2022.0,All Grades,All Gender,All Ethnicity,All SubPopulation,937,*,*,195,20.81,189,20.17,*,*,518,55.28,24,~
4,103,4999.0,Alabaster City,Spec Ed Private Services (Child Count Only),Special Education - Private Services,10019002216.0,False,"1953 Municipal Way, Suite 200",Alabaster,AL,35007,Autauga,2022,8/1/13,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15089,,,Walker County,Walker County,,,,,,,,,2015,,2015.0,All Grades,All Gender,All Ethnicity,All SubPopulation,7899,*,*,451,5.71,*,*,*,*,7255,91.85,174,~
15090,,,Washington County,Washington County,,,,,,,,,2015,,2015.0,All Grades,All Gender,All Ethnicity,All SubPopulation,3024,*,*,820,27.12,302,9.99,*,*,1855,61.34,40,~
15091,,,Wilcox County,Wilcox County,,,,,,,,,2015,,2015.0,All Grades,All Gender,All Ethnicity,All SubPopulation,1869,*,*,1858,~,*,*,*,*,*,*,*,*
15092,,,Winfield City,Winfield City,,,,,,,,,2015,,2015.0,All Grades,All Gender,All Ethnicity,All SubPopulation,1263,*,*,42,~,*,*,*,*,1179,93.35,31,~


In [14]:
df_2022_demog[df_2022_demog['system_name']=='i3 Academy']

df_2022_demog['school_name'] = (
    df_2022_demog['school_name']
    .replace('i3 Academy - LEA', 'i3 Academy')
    .replace('LEAD Academy - LEA', 'LEAD Academy'))

In [15]:
for i in ['system_code','school_code']:
    df_district_charter_mapping[i] = df_district_charter_mapping[i].astype('string')
    df_merged_directory[i] = df_merged_directory[i].astype('string')
    
df_entities = (
    df_merged_directory
    .merge(df_district_charter_mapping, on=['system_code','school_code'], how='left')
    .merge(df_2022_demog, on=['system_name','school_name','file_year'], how='outer')
)
df_entities = df_entities.loc[((df_entities['site_type'] == 'Central Office' )|
# (df_entities['site_type'] == 'State Board District' )|
# (df_entities['site_type'] == 'State Supported School Special Education School' )|
(df_entities['site_type'] == 'Public Regular School' )|
(df_entities['site_type'] == 'Public Alternative School' )|
(df_entities['site_type'] == 'Public Virtual School' )|
(df_entities['site_type'] == 'Public Special Education School' )|
(df_entities['site_type'] == 'Public Magnet School' )|
(df_entities['site_type'] == 'Public Charter School Startup' )|
(df_entities['site_type'] == 'Public Charter School - Converted' )|
(df_entities['site_type'] == 'Regular School with Magnet Program' )|
(df_entities['site_type'] == 'Special Education School (No Detail)'))
                             & (df_entities['file_year']==2022)]

df_entities[df_entities['school_code']=='0']


Unnamed: 0,system_code,school_code,system_name,school_name,site_type,nces_id,is_charter,street,city,state,zip_5,county,file_year,opened_date,updated_district,updated_district_code,asian,black_or_african_american,american_indian_alaska_native,native_hawaiian_pacific_islander,white,two_or_more_races,hispanic_latino,year,total_enrollment_2022,econ_disadv,foster,gen_ed,homeless,migrant,military,swd,el,econ_disadv_pct_2022,foster_pct_2022,gen_ed_pct_2022,homeless_pct_2022,migrant_pct_2022,military_pct_2022,swd_pct_2022,el_pct_2022,asian_pct_2022,black_or_african_american_pct_2022,american_indian_alaska_native_pct_2022,native_hawaiian_pacific_islander_pct_2022,white_pct_2022,two_or_more_races_pct_2022,hispanic_latino_pct_2022,sum_pct_2022
0,999,0,Alabama State Department of Education,Alabama State Department of Education,Central Office,,,"50. N. Ripley Street, Gordon Persons Building",Montgomery,AL,36104,Montgomery,2022,1/1/01,,,11099,235270,6455,854,385045,22524,74561,2022.0,735808.0,351049.0,2754.0,604862.0,7819.0,1292.0,14646.0,130946.0,36956.0,0.477093,0.003743,0.822038,0.010626,0.001756,0.019905,0.177962,0.050225,0.015084,0.319744,0.008773,0.001161,0.523295,0.030611,0.101332,1.0
41,601,0,Alabama Specialized Treatment Centers (ALSTC),Alabama Specialized Treatment Centers (ALSTC),Central Office,,,"50. N. Ripley Street, Gordon Persons Building",Montgomery,AL,36104,Montgomery,2022,8/1/21,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
51,210,0,Alabama Youth Services,Alabama Youth Services,Central Office,100002.0,,1000 Industrial School Road,Mt. Meigs,AL,36057-0066,Montgomery,2022,1/1/01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
61,103,0,Alabaster City,Alabaster City,Central Office,100190.0,,10111 Highway 119,Alabaster,AL,35007,Shelby,2022,7/1/13,,,77,1399,31,*,3396,71,1332,2022.0,6315.0,2201.0,20.0,5464.0,48.0,,128.0,851.0,554.0,0.348535,0.003167,0.865241,0.007601,,0.020269,0.134759,0.087728,0.012193,0.221536,0.004909,,0.537767,0.011243,0.210926,
74,101,0,Albertville City,Albertville City,Central Office,100005.0,,8379 US Highway 431,Albertville,AL,35950-0025,Marshall,2022,,,,24,254,28,*,2168,145,3235,2022.0,5856.0,2716.0,20.0,5045.0,71.0,304.0,120.0,811.0,1488.0,0.463798,0.003415,0.861510,0.012124,0.051913,0.020492,0.138490,0.254098,0.004098,0.043374,0.004781,,0.370219,0.024761,0.552425,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4184,64,0,Walker County,Walker County,Central Office,103450.0,,1710 Alabama Av,Jasper,AL,35501-4966,Walker,2022,,,,*,397,19,*,6240,170,283,2022.0,7118.0,4087.0,59.0,5433.0,91.0,,69.0,1685.0,121.0,0.574178,0.008289,0.763276,0.012784,,0.009694,0.236724,0.016999,,0.055774,0.002669,,0.876651,0.023883,0.039758,
4212,65,0,Washington County,Washington County,Central Office,103480.0,,229 Granade St,Chatom,AL,36518,Washington,2022,,,,*,594,212,*,1655,42,20,2022.0,2529.0,1247.0,12.0,2065.0,19.0,143.0,,464.0,,0.493080,0.004745,0.816528,0.007513,0.056544,,0.183472,,,0.234875,0.083828,,0.654409,0.016607,0.007908,
4236,66,0,Wilcox County,Wilcox County,Central Office,103510.0,,75 Camden Bypass,Camden,AL,36726,Wilcox,2022,1/1/01,,,*,1293,*,*,15,*,*,2022.0,1316.0,1091.0,,1140.0,89.0,,,176.0,,0.829027,,0.866261,0.067629,,,0.133739,,,0.982523,,,0.011398,,,
4250,204,0,Winfield City,Winfield City,Central Office,103540.0,,481 Apple Ave,Winfield,AL,35594,Marion,2022,1/1/01,,,*,60,*,*,1108,23,40,2022.0,1242.0,540.0,,1035.0,,,,207.0,21.0,0.434783,,0.833333,,,,0.166667,0.016908,,0.048309,,,0.892110,0.018519,0.032206,


In [16]:
df_entities.columns


Index(['system_code', 'school_code', 'system_name', 'school_name', 'site_type',
       'nces_id', 'is_charter', 'street', 'city', 'state', 'zip_5', 'county',
       'file_year', 'opened_date', 'updated_district', 'updated_district_code',
       'asian', 'black_or_african_american', 'american_indian_alaska_native',
       'native_hawaiian_pacific_islander', 'white', 'two_or_more_races',
       'hispanic_latino', 'year', 'total_enrollment_2022', 'econ_disadv',
       'foster', 'gen_ed', 'homeless', 'migrant', 'military', 'swd', 'el',
       'econ_disadv_pct_2022', 'foster_pct_2022', 'gen_ed_pct_2022',
       'homeless_pct_2022', 'migrant_pct_2022', 'military_pct_2022',
       'swd_pct_2022', 'el_pct_2022', 'asian_pct_2022',
       'black_or_african_american_pct_2022',
       'american_indian_alaska_native_pct_2022',
       'native_hawaiian_pacific_islander_pct_2022', 'white_pct_2022',
       'two_or_more_races_pct_2022', 'hispanic_latino_pct_2022',
       'sum_pct_2022'],
      dtype='ob

Entities would be one row per entity (district or school) with type (district, school), district code, district name, school code, school name, is_charter, neighboring district (or whatever you called that for charters in our analysis), and maybe most recent enrollment total and % by race, group (as columns), so wide.  I think just those currently operating would be fine.

In [17]:
df_entities = df_entities[['system_code', 'school_code', 'system_name', 'school_name', 'site_type',
      'is_charter', 'updated_district', 'updated_district_code',
       'total_enrollment_2022',
       'econ_disadv_pct_2022', 'foster_pct_2022', 'gen_ed_pct_2022',
       'homeless_pct_2022', 'migrant_pct_2022', 'military_pct_2022',
       'swd_pct_2022', 'el_pct_2022', 'asian_pct_2022',
       'black_or_african_american_pct_2022',
       'american_indian_alaska_native_pct_2022',
       'native_hawaiian_pacific_islander_pct_2022', 'white_pct_2022',
       'two_or_more_races_pct_2022', 'hispanic_latino_pct_2022',
       'sum_pct_2022']]

In [18]:
# district code, district name, school code, school name, metric name, group, grade, year, value, and n-size
df_merged_assessment

Unnamed: 0,system_name,school_name,subject,grade,tested,proficient,race,ethnicity,sub_population,grade.1,participation_rate,proficient_rate,file_source,file_year,system_code,school_code,site_type,nces_id,is_charter,street,city,state,zip_5,county,opened_date
0,Alabama State Department of Education,Alabama State Department of Education,Math,All Grades,378006.00,102938,All Race,All Ethnicity,All SubPopulation,All Grades,,27.23,df_assessment_2022,2022,999.0,0.0,Central Office,,,"50. N. Ripley Street, Gordon Persons Building",Montgomery,AL,36104,Montgomery,1/1/01
1,Alabama State Department of Education,Alabama State Department of Education,Math,All Grades,378006.00,102938,All Race,All Ethnicity,All SubPopulation,All Grades,,27.23,df_assessment_2022,2022,999.0,1.0,State Board District,,,3071 Teal Court,Mobile,AL,36695,Baldwin,1/1/00
2,Alabama State Department of Education,Alabama State Department of Education,Math,All Grades,20237.00,2095,All Race,All Ethnicity,Students with Limited English Proficiency,All Grades,,10.35,df_assessment_2022,2022,999.0,0.0,Central Office,,,"50. N. Ripley Street, Gordon Persons Building",Montgomery,AL,36104,Montgomery,1/1/01
3,Alabama State Department of Education,Alabama State Department of Education,Math,All Grades,20237.00,2095,All Race,All Ethnicity,Students with Limited English Proficiency,All Grades,,10.35,df_assessment_2022,2022,999.0,1.0,State Board District,,,3071 Teal Court,Mobile,AL,36695,Baldwin,1/1/00
4,Alabama State Department of Education,Alabama State Department of Education,Math,All Grades,3781.00,409,All Race,All Ethnicity,Homeless,All Grades,,10.82,df_assessment_2022,2022,999.0,0.0,Central Office,,,"50. N. Ripley Street, Gordon Persons Building",Montgomery,AL,36104,Montgomery,1/1/01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5918101,Winston County,Winston County,Reading,Grade 10,*,*,Two or more races,All Ethnicity,General Education Students,Grade 10,*,*,df_assessment_2015,2015,67.0,1.0,Public Board of Education,,,25101 Highway 195,Double Springs,AL,35553,Winston,
5918102,Winston County,Winston County,Reading,Grade 10,*,*,Two or more races,Other Ethnicity,All SubPopulation,Grade 10,*,*,df_assessment_2015,2015,67.0,0.0,Central Office,103580.0,,25101 Highway 195,Double Springs,AL,35553,Winston,
5918103,Winston County,Winston County,Reading,Grade 10,*,*,Two or more races,Other Ethnicity,All SubPopulation,Grade 10,*,*,df_assessment_2015,2015,67.0,1.0,Public Board of Education,,,25101 Highway 195,Double Springs,AL,35553,Winston,
5918104,Winston County,Winston County,Reading,Grade 10,*,*,Two or more races,Other Ethnicity,General Education Students,Grade 10,*,*,df_assessment_2015,2015,67.0,0.0,Central Office,103580.0,,25101 Highway 195,Double Springs,AL,35553,Winston,


In [19]:
asmt_race = df_merged_assessment[(df_merged_assessment['sub_population']=='All SubPopulation') & 
                    (df_merged_assessment['ethnicity']=='Other Ethnicity')  & 
                    (df_merged_assessment['race']!='All Race')].copy()

asmt_race['group']=asmt_race['race']

asmt_hispanic = df_merged_assessment[(df_merged_assessment['sub_population']=='All SubPopulation') & 
                    (df_merged_assessment['ethnicity']=='Hispanic/Latino')  & 
                    (df_merged_assessment['race']=='All Race')].copy()

asmt_hispanic['group']=asmt_hispanic['ethnicity']

asmt_subpop = df_merged_assessment[(df_merged_assessment['sub_population']!='All SubPopulation') & 
                    (df_merged_assessment['ethnicity']=='All Ethnicity')  & 
                    (df_merged_assessment['race']=='All Race')].copy()

asmt_subpop['group']=asmt_subpop['sub_population']


asmt_all = df_merged_assessment[(df_merged_assessment['sub_population']=='All SubPopulation') & 
                    (df_merged_assessment['ethnicity']=='All Ethnicity')  & 
                    (df_merged_assessment['race']=='All Race')].copy()

asmt_all['group']= 'All Students'

In [20]:
df_asmt = pd.concat([asmt_race,asmt_subpop,asmt_hispanic,asmt_all])

df_asmt['metric']='Assessment Proficiency'
df_asmt = df_asmt[['file_year','metric','system_code', 'school_code', 'system_name', 'school_name',
       'group', 'subject', 'grade', 'tested',
       'proficient', 'proficient_rate']]


df_asmt = df_asmt.rename(columns={'file_year':'year',
                        'tested':'total_n',
                       'proficient':'metric_n',
                       'proficient_rate':'metric_pct'})

In [21]:
df_asmt

Unnamed: 0,year,metric,system_code,school_code,system_name,school_name,group,subject,grade,total_n,metric_n,metric_pct
86,2022,Assessment Proficiency,999.0,0.0,Alabama State Department of Education,Alabama State Department of Education,Asian,Math,All Grades,5942.00,3767,63.40
87,2022,Assessment Proficiency,999.0,1.0,Alabama State Department of Education,Alabama State Department of Education,Asian,Math,All Grades,5942.00,3767,63.40
140,2022,Assessment Proficiency,999.0,0.0,Alabama State Department of Education,Alabama State Department of Education,Black or African American,Math,All Grades,120612.00,12910,10.70
141,2022,Assessment Proficiency,999.0,1.0,Alabama State Department of Education,Alabama State Department of Education,Black or African American,Math,All Grades,120612.00,12910,10.70
194,2022,Assessment Proficiency,999.0,0.0,Alabama State Department of Education,Alabama State Department of Education,American Indian/Alaska Native,Math,All Grades,3579.00,1104,30.85
...,...,...,...,...,...,...,...,...,...,...,...,...
5917851,2015,Assessment Proficiency,67.0,1.0,Winston County,Winston County,All Students,Reading,Grade 07,*,79,41.80
5917918,2015,Assessment Proficiency,67.0,0.0,Winston County,Winston County,All Students,Reading,Grade 08,*,105,42.34
5917919,2015,Assessment Proficiency,67.0,1.0,Winston County,Winston County,All Students,Reading,Grade 08,*,105,42.34
5918022,2015,Assessment Proficiency,67.0,0.0,Winston County,Winston County,All Students,Reading,Grade 10,*,123,64.74


In [22]:
grad_race = df_merged_grad[(df_merged_grad['sub_population']=='All SubPopulation') & 
                    (df_merged_grad['ethnicity']=='Other Ethnicity')  & 
                    (df_merged_grad['race']!='All Race')].copy()

grad_race['group']=asmt_race['race']

grad_hispanic = df_merged_grad[(df_merged_grad['sub_population']=='All SubPopulation') & 
                    (df_merged_grad['ethnicity']=='Hispanic/Latino')  & 
                    (df_merged_grad['race']=='All Race')].copy()

grad_hispanic['group']=asmt_hispanic['ethnicity']

grad_subpop = df_merged_grad[(df_merged_grad['sub_population']!='All SubPopulation') & 
                    (df_merged_grad['ethnicity']=='All Ethnicity')  & 
                    (df_merged_grad['race']=='All Race')].copy()

grad_subpop['group']=asmt_subpop['sub_population']

grad_all = df_merged_assessment[(df_merged_assessment['sub_population']=='All SubPopulation') & 
                    (df_merged_assessment['ethnicity']=='All Ethnicity')  & 
                    (df_merged_assessment['race']=='All Race')].copy()

grad_all['group']= 'All Students'


In [39]:
asmt_subpop[['file_year','subject']].drop_duplicates()

Unnamed: 0,file_year,subject
2,2022,Math
474025,2022,ELA
948098,2021,Math
1406021,2021,ELA
1864033,2019,Math
2317220,2019,ELA
2770631,2018,Math
3212989,2018,ELA
3655556,2017,Math
4036699,2017,ELA


In [23]:
df_grad_ccr = pd.concat([grad_race,grad_subpop,grad_hispanic,grad_all])

df_grad = df_grad_ccr.copy()
df_grad['subject']=np.NaN

df_grad['metric']='Graduation'

df_grad = df_grad[['file_year','metric','system_code', 'school_code', 'system_name', 'school_name',
       'group', 'subject', 'grade', 'student_count',
       'graduates', 'graduation_%']]


df_grad = df_grad.rename(columns={'file_year':'year',
                        'student_count':'total_n',
                       'graduates':'metric_n',
                       'graduation_%':'metric_pct'})

df_ccr = df_grad_ccr.copy()
df_ccr['subject']=np.NaN

df_ccr['metric']='CCR'

df_ccr = df_ccr[['file_year','metric','system_code', 'school_code', 'system_name', 'school_name',
       'group', 'subject', 'grade', 'student_count',
       'ccr_attainment', 'ccr_attainment_%']]


df_ccr = df_ccr.rename(columns={'file_year':'year',
                        'student_count':'total_n',
                       'ccr_attainment':'metric_n',
                       'ccr_attainment_%':'metric_pct'})

df_ccr.columns

Index(['year', 'metric', 'system_code', 'school_code', 'system_name',
       'school_name', 'group', 'subject', 'grade', 'total_n', 'metric_n',
       'metric_pct'],
      dtype='object')

In [40]:
df_merged_grad

Unnamed: 0,year,sub_population,system_name,school_name,grade,gender,race,ethnicity,student_count,graduates,graduation_%,ccr_attainment,ccr_attainment_%,file_source,file_year,system_code,school_code,site_type,nces_id,is_charter,street,city,state,zip_5,county,opened_date,sub_group
0,2022,All SubPopulation,Alabama State Department of Education,Alabama State Department of Education,All Grades,All Gender,All Race,All Ethnicity,51440,46644,90.68,39340,76.48,df_grad_2022,2022,999,0.0,Central Office,,,"50. N. Ripley Street, Gordon Persons Building",Montgomery,AL,36104,Montgomery,1/1/01,All
1,2022,All SubPopulation,Alabama State Department of Education,Alabama State Department of Education,All Grades,All Gender,All Race,All Ethnicity,51440,46644,90.68,39340,76.48,df_grad_2022,2022,999,1.0,State Board District,,,3071 Teal Court,Mobile,AL,36695,Baldwin,1/1/00,All
2,2022,All SubPopulation,Morgan County,Albert P Brewer High School,All Grades,All Gender,All Race,All Ethnicity,161,139,86.34,137,85.09,df_grad_2022,2022,52,5.0,Public Regular School,1.024800e+10,False,59 Eva Rd,Somerville,AL,35670-6423,Morgan,1/1/94,All
3,2022,All SubPopulation,Baldwin County,Baldwin County High School,All Grades,All Gender,All Race,All Ethnicity,219,179,81.74,181,82.65,df_grad_2022,2022,2,5.0,Public Regular School,1.002700e+10,False,One Tiger Dr,Bay Minette,AL,36507-3300,Baldwin,1/1/94,All
4,2022,All SubPopulation,Coosa County,Central High School,All Grades,All Gender,All Race,All Ethnicity,50,*,*,30,60.00,df_grad_2022,2022,19,5.0,Public Regular School,1.009000e+10,False,97 Coosa County Road 75,Rockford,AL,35136-3710,Coosa,1/1/94,All
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6943,2018,Economically Disadvantaged,Tuscaloosa City,Central High School,All Grades,All Gender,All Race,All Ethnicity,126,103,81.75,52,41.27,df_grad_2018,2018,200,160.0,Public Regular School,1.033600e+10,False,905 15th Street,Tuscaloosa,AL,35401,Tuscaloosa,1/1/94,Economically Disadvantaged
6944,2018,Economically Disadvantaged,Tuscumbia City,Deshler High School,All Grades,All Gender,All Race,All Ethnicity,49,*,*,24,48.98,df_grad_2018,2018,201,10.0,Public Regular School,1.034200e+10,False,200 N Commons East,Tuscumbia,AL,35674-1299,Colbert,5/28/97,Economically Disadvantaged
6945,2018,Economically Disadvantaged,Vestavia Hills City,Vestavia Hills High School,All Grades,All Gender,All Race,All Ethnicity,55,*,*,41,74.55,df_grad_2018,2018,202,20.0,Public Regular School,1.034300e+10,False,2235 Lime Rock Rd,Vestavia Hills,AL,35216-3399,Jefferson,,Economically Disadvantaged
6946,2018,Economically Disadvantaged,Winfield City,Winfield High School,All Grades,All Gender,All Race,All Ethnicity,26,*,*,*,*,df_grad_2018,2018,204,20.0,Public Regular School,1.035400e+10,False,232 Pirate Cove,Winfield,AL,35594,Marion,1/1/01,Economically Disadvantaged


In [25]:
df_metrics = pd.concat([df_asmt,df_ccr,df_grad,])

In [26]:
with pd.ExcelWriter('../data_for_analysis/directory.xlsx') as writer:  
    df_directory_charter_schools.to_excel(writer, sheet_name='directory_by_charter')
    df_directory_demog_merged.to_excel(writer, sheet_name='directory_demographics')

    
df_entities.to_csv('../data_for_analysis/entities.csv', encoding='utf-8', index=False)
df_metrics.to_csv('../data_for_analysis/metrics.csv', encoding='utf-8', index=False)

In [32]:
df_metrics['metric'].unique()

array(['Assessment Proficiency', 'CCR', 'Graduation'], dtype=object)

In [27]:
# district code, district name, school code, school name, metric name, group, grade, year, value, and n-size