## Description 

In this notebook, we will pull Tennessee's raw data files which we have downloaded manually from the Tennessee Department of Education data archives (https://www.tn.gov/education/data/data-downloads.html), convert them to CSV with the naming convention topic+level+year.csv, map and merge the datafiles based on the topic, and export as CSV. 

In [47]:
import glob 
import csv
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 50)

## Pull Data From Folder
https://www.tn.gov/education/data/data-downloads.html

In [48]:
all_raw_xls_files = glob.glob('../raw_data_files/*')
all_raw_xls_files.sort()

all_raw_xls_files[:5]

['../raw_data_files/2017-18 ACT_school_suppressed.xlsx',
 '../raw_data_files/201819_membership.xlsx',
 '../raw_data_files/2020-21_tvaas_school_composite.csv',
 '../raw_data_files/2020-21_tvaas_school_subject_level.csv',
 '../raw_data_files/2022_school_assessment_file_suppressed_upd8-19.xlsx']

In [49]:
def download_rename_and_upload_as_csv(original_file_name, xls_or_csv, clean_file_name, has_xls_sheet): 
    if xls_or_csv == 'csv': 
        og_file = pd.read_csv('../raw_data_files/{}.csv'.format(original_file_name), dtype=str, index_col=None)
    else:
        if has_xls_sheet == False:
            og_file = pd.read_excel('../raw_data_files/{}.xlsx'.format(original_file_name), 
                                    dtype=str, 
                                    index_col=None)
        else:
            og_file = pd.read_excel('../raw_data_files/{}.xlsx'.format(original_file_name),
                                    dtype=str, 
                                    index_col=None,
                                    sheet_name=has_xls_sheet
                                   )
            
    og_file.to_csv('../raw_data_files_clean/{}.csv'.format(clean_file_name), encoding='utf-8', index=False)

In [50]:
##  Pull files and create dataframes 
def pull_files_and_create_df(raw_file_names): 
    df_list=[]
    for i in raw_file_names:
        name = i.split('/')[2].split('.')[0]
        globals()[f'df_{name}'] = pd.read_csv(i, low_memory=False)
        d = df_list.append(f'df_{name}')
    df_list.sort(reverse=True)
    return df_list

In [51]:
## Match file column names 
def match_files(list_of_file_names):  
    
    # change column names to lowercase and replace space with underscores
    for d in list_of_file_names: 
        globals()[d].columns = list((map(lambda x: x.lower().replace(' ', '_'),list(globals()[d]))))
    
    # create matching dataframe 
    df=[]
    for n in range(len(list_of_file_names)):
        name = list_of_file_names[n]
        total_num_cols1 = len(globals()[list_of_file_names[n]].columns)
        set_f1_cols = set((map(lambda x: "'" + x + "'", list(globals()[list_of_file_names[n]]))))
               
        if n+1 == len(list_of_file_names):
            name0 = 'NA'
            total_num_cols0 = 0
            set_f0_cols = []        
        else:
            name0 = list_of_file_names[n+1]
            total_num_cols0 = len(globals()[list_of_file_names[n+1]].columns)
            set_f0_cols = set((map(lambda x: "'" + x + "'", list(globals()[list_of_file_names[n+1]]))))
        dict = {'current_file_name': name,
                'prev_file_name': name0,
                'current_num_cols': total_num_cols1,
                'prev_num_cols': total_num_cols0,
                'current_cols': sorted(set_f1_cols),
                'prev_cols': sorted(set_f0_cols),
                'matching_columns': sorted(set_f1_cols.intersection(set_f0_cols)),
                'num_matching': len(set_f1_cols.intersection(set_f0_cols)),
                'mismatched_columns': sorted(set_f1_cols.difference(set_f0_cols)),
                'num_mismatching': len(set_f1_cols.difference(set_f0_cols))
                }
        df.append(dict)
    df_chx = pd.DataFrame(df)
    return df_chx

In [52]:
def df_to_change_and_merge(df_name, column_list): 
    appended_data = []
    
    df_files = [names for names in df_all_files if names.startswith(df_name)]
    df_files
    
    for d in df_files:
        globals()[d] = globals()[d][column_list]
        globals()[d]['file_source'] = d
        globals()[d]['file_year'] = d.split('_')[-1]
        appended_data.append(globals()[d])
    globals()[df_name] = pd.concat(appended_data).copy()
    globals()[df_name] = pd.merge(globals()[df_name], df_directory_school_2022[['district_number', 'school_number','updated_district_number','school_type','status']], on=['district_number', 'school_number'], how='left')
    globals()[df_name].to_csv('../raw_data_files_clean/merged_{}.csv'.format(df_name[3:]), encoding='utf-8', index=False)

In [53]:
'df_tvaas_composite_subject_school_2021'[3:]

'tvaas_composite_subject_school_2021'

In [54]:
# ## This cell is pretty manual because the file names are not consistent. 
# ## However, I can see the utility of creating lists and loops for 
# ## file names and file types to make it less manual. 


# download_rename_and_upload_as_csv('data_2015_school_base', 
#                                   'xlsx',
#                                   'assessment_school_2015', 
#                                   False)

In [55]:
cleaned_csv_files = glob.glob('../raw_data_files_clean/*.csv')
cleaned_csv_files.sort()
cleaned_csv_files[:5]

['../raw_data_files_clean/act_school_2017.csv',
 '../raw_data_files_clean/act_school_2018.csv',
 '../raw_data_files_clean/act_school_2019.csv',
 '../raw_data_files_clean/act_school_2020.csv',
 '../raw_data_files_clean/act_school_2021.csv']

In [56]:
%%time
df_all_files = pull_files_and_create_df(cleaned_csv_files)

CPU times: user 19.9 s, sys: 7.24 s, total: 27.1 s
Wall time: 30.5 s


In [57]:
df_all_files[:10]

['df_tvaas_composite_subject_school_2022',
 'df_tvaas_composite_subject_school_2021',
 'df_tvaas_composite_subject_school_2019',
 'df_tvaas_composite_subject_school_2018',
 'df_tvaas_composite_subject_school_2017',
 'df_tvaas_composite_school_2022',
 'df_tvaas_composite_school_2021',
 'df_tvaas_composite_school_2019',
 'df_tvaas_composite_school_2018',
 'df_tvaas_composite_school_2017']

In [58]:
df_file_name_specs_check = match_files(df_all_files)

In [59]:
# with pd.ExcelWriter('../data_for_analysis/file_specs.xlsx') as writer:  
#     df_file_name_specs_check.to_excel(writer, sheet_name='file_columns_comparisons')


In [60]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', None)
df_file_name_specs_check #.head()

Unnamed: 0,current_file_name,prev_file_name,current_num_cols,prev_num_cols,current_cols,prev_cols,matching_columns,num_matching,mismatched_columns,num_mismatching
0,df_tvaas_composite_subject_school_2022,df_tvaas_composite_subject_school_2021,13,15,"['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'phi_folder', 'rr_folder', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']",13,[],0
1,df_tvaas_composite_subject_school_2021,df_tvaas_composite_subject_school_2019,15,13,"['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'phi_folder', 'rr_folder', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']",13,"['phi_folder', 'rr_folder']",2
2,df_tvaas_composite_subject_school_2019,df_tvaas_composite_subject_school_2018,13,13,"['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']",13,[],0
3,df_tvaas_composite_subject_school_2018,df_tvaas_composite_subject_school_2017,13,13,"['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']",13,[],0
4,df_tvaas_composite_subject_school_2017,df_tvaas_composite_school_2022,13,10,"['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'science_composite', 'social_studies_composite']","['district_number', 'school_number']",2,"['district', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'standard_error', 'subject', 'test', 'year']",11
5,df_tvaas_composite_school_2022,df_tvaas_composite_school_2021,10,8,"['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'science_composite', 'social_studies_composite']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number']",8,"['science_composite', 'social_studies_composite']",2
6,df_tvaas_composite_school_2021,df_tvaas_composite_school_2019,8,9,"['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'social_studies_composite']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number']",8,[],0
7,df_tvaas_composite_school_2019,df_tvaas_composite_school_2018,9,10,"['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'social_studies_composite']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'science_composite', 'social_studies_composite']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'social_studies_composite']",9,[],0
8,df_tvaas_composite_school_2018,df_tvaas_composite_school_2017,10,10,"['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'science_composite', 'social_studies_composite']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'science_composite', 'social_studies_composite']","['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'science_composite', 'social_studies_composite']",10,[],0
9,df_tvaas_composite_school_2017,df_profile_school_2021,10,22,"['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number', 'science_composite', 'social_studies_composite']","['african_american_pct', 'asian_pct', 'black_hispanic_native_american_pct', 'district_id', 'district_name', 'economically_disadvantaged_pct', 'female_pct', 'foster_pct', 'hawaiian_pacisld_pct', 'hispanic_pct', 'homeless_pct', 'limited_english_proficient_pct', 'male_pct', 'migrant_pct', 'military_pct', 'multirace_pct', 'native_american_pct', 'school_id', 'school_name', 'students_with_disabilities_pct', 'total', 'white_pct']","['district_name', 'school_name']",2,"['district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_number', 'science_composite', 'social_studies_composite']",8


In [61]:
df_directory_school_2022 = df_directory_school_2022[['_district_no','_district','_school_no','_school','_school_type'
                                                     ,'_address', '_city', '_state','_zipcode','_nces_school_number','_status'
                                                     ]]
df_directory_school_2022.columns = ['district_number','district_name','school_number', 'school_name', 'school_type'
                                   ,'address', 'city', 'state','zip','nces_school_number','status']
# df_directory_school_2022['updated_district_number'] = df_directory_school_2022['district_number'].copy()
# need to change to MNPS : 8020, 8015, 8090, 8005


In [62]:
## Manually changing some of the charters authorized outside of MNPS but within Nashville
df_directory_school_2022['updated_district_number'] = (
    np.where((df_directory_school_2022['school_number']==8020) & (df_directory_school_2022['district_number']==986) | # Changed to 987: KIPP Antioch College Prep Middle
             (df_directory_school_2022['school_number']==8015) & (df_directory_school_2022['district_number']==986) | # Changed to 987: KIPP Antioch College Prep Elementary
             (df_directory_school_2022['school_number']==8030) & (df_directory_school_2022['district_number']==987) | # Nashville Collegiate Prep
             (df_directory_school_2022['school_number']==8020) & (df_directory_school_2022['district_number']==987) | # KIPP Antioch College Prep Middle
             (df_directory_school_2022['school_number']==8015) & (df_directory_school_2022['district_number']==987) | # KIPP Antioch College Prep Elementary
             (df_directory_school_2022['school_number']==8090) & (df_directory_school_2022['district_number']==985) | # Neely's Bend: A LEAD Public School
             (df_directory_school_2022['school_number']==8145) & (df_directory_school_2022['district_number']==985) | # Inactive: Rocketship Partners Community Prep
             (df_directory_school_2022['school_number']==8005) & (df_directory_school_2022['district_number']==985),  # Brick Church: A LEAD Public School
             190,
             df_directory_school_2022['district_number'])
)

In [63]:
charter_school_list = df_directory_school_2022.loc[(df_directory_school_2022['school_type']=='Public Charter') & (df_directory_school_2022['updated_district_number']==190)]

In [64]:
charter_school_list.to_csv('../data_for_analysis/charter_school_list.csv')

In [65]:
c = ['district', 'district_number', 'grade', 'growth_measure', 'index', 'level', 'number_of_students', 'school', 'school_number', 'standard_error', 'subject', 'test', 'year']
df_to_change_and_merge('df_tvaas_composite_subject_school',c)
df_tvaas_composite_subject_school.head()

Unnamed: 0,district,district_number,grade,growth_measure,index,level,number_of_students,school,school_number,standard_error,subject,test,year,file_source,file_year,updated_district_number,school_type,status
0,Anderson County,10,3.0,-2.2,-0.59,Level 3,23,Briceville Elementary,10,3.6,English Language Arts,Grades 3-8,2022,df_tvaas_composite_subject_school_2022,2022,10,Public,A
1,Anderson County,10,4.0,11.4,3.72,Level 5,17,Briceville Elementary,10,3.1,English Language Arts,Grades 3-8,2022,df_tvaas_composite_subject_school_2022,2022,10,Public,A
2,Anderson County,10,5.0,-9.6,-3.16,Level 1,17,Briceville Elementary,10,3.0,English Language Arts,Grades 3-8,2022,df_tvaas_composite_subject_school_2022,2022,10,Public,A
3,Anderson County,10,3.0,-8.1,-1.73,Level 2,22,Briceville Elementary,10,4.7,Math,Grades 3-8,2022,df_tvaas_composite_subject_school_2022,2022,10,Public,A
4,Anderson County,10,4.0,2.3,0.82,Level 3,16,Briceville Elementary,10,2.8,Math,Grades 3-8,2022,df_tvaas_composite_subject_school_2022,2022,10,Public,A


In [66]:
c = ['district_name', 'district_number', 'literacy_and_numeracy_composite', 'literacy_composite', 'numeracy_composite', 'overall_composite', 'school_name', 'school_number']
df_to_change_and_merge('df_tvaas_composite_school',c)
# df_tvaas_composite_school.head()

In [67]:
df_chronic_absenteeism_school_2022.columns

Index(['system', 'system_name', 'school', 'school_name', 'student_group',
       'grade_band', 'n_students', 'n_chronically_absent',
       'pct_chronically_absent'],
      dtype='object')

In [68]:
df_chronic_absenteeism_school_2022 = df_chronic_absenteeism_school_2022.rename(columns={'school':'school_number', 'system':'district_number', 'system_name':'district_name'}).copy()
df_chronic_absenteeism_school_2021 = df_chronic_absenteeism_school_2021.rename(columns={'school':'school_number', 'system':'district_number', 'system_name':'district_name','subgroup':'student_group'}).copy()
df_chronic_absenteeism_school_2020 = df_chronic_absenteeism_school_2020.rename(columns={'school':'school_number', 'system':'district_number', 'system_name':'district_name','subgroup':'student_group'}).copy()
df_chronic_absenteeism_school_2019 = df_chronic_absenteeism_school_2019.rename(columns={'school':'school_number', 'system':'district_number', 'system_name':'district_name','subgroup':'student_group'}).copy()
df_chronic_absenteeism_school_2018 = df_chronic_absenteeism_school_2018.rename(columns={'district':'district_number', 
                                                                                        'system_name':'district_name', 
                                                                                        'school':'school_number',
                                                                                        '#_students':'n_students', 
                                                                                        '#_chronically_absent':'n_chronically_absent',
                                                                                        '%_chronically_absent':'pct_chronically_absent'}).copy()
df_chronic_absenteeism_school_2017['grade_band'] = 'All Grades'
df_chronic_absenteeism_school_2017 = df_chronic_absenteeism_school_2017.rename(columns={'system':'district_number','system_name':'district_name', 'school':'school_number', 'subgroup':'student_group'})
c = ['grade_band', 'n_chronically_absent', 'n_students', 'pct_chronically_absent', 'school_number', 'school_name', 'student_group', 'district_number', 'district_name']
df_to_change_and_merge('df_chronic_absenteeism_school',c)

In [69]:
df_chronic_absenteeism_school_2022

Unnamed: 0,grade_band,n_chronically_absent,n_students,pct_chronically_absent,school_number,school_name,student_group,district_number,district_name,file_source,file_year
0,All Grades,245,1062,23.1,2,Anderson County High School,All Students,10,Anderson County,df_chronic_absenteeism_school_2022,2022
1,All Grades,*,6,*,2,Anderson County High School,American Indian or Alaska Native,10,Anderson County,df_chronic_absenteeism_school_2022,2022
2,All Grades,**,11,**,2,Anderson County High School,Asian,10,Anderson County,df_chronic_absenteeism_school_2022,2022
3,All Grades,3,19,15.8,2,Anderson County High School,Black or African American,10,Anderson County,df_chronic_absenteeism_school_2022,2022
4,All Grades,6,46,13,2,Anderson County High School,Black/Hispanic/Native American,10,Anderson County,df_chronic_absenteeism_school_2022,2022
...,...,...,...,...,...,...,...,...,...,...,...
17743,All Grades,245,397,61.7,8140,Hillcrest High School,Black/Hispanic/Native American,985,Achievement School District,df_chronic_absenteeism_school_2022,2022
17744,All Grades,133,201,66.2,8140,Hillcrest High School,Economically Disadvantaged,985,Achievement School District,df_chronic_absenteeism_school_2022,2022
17745,All Grades,9,28,32.1,8140,Hillcrest High School,English Learners with Transitional 1-4,985,Achievement School District,df_chronic_absenteeism_school_2022,2022
17746,All Grades,6,17,35.3,8140,Hillcrest High School,Hispanic,985,Achievement School District,df_chronic_absenteeism_school_2022,2022


In [70]:
df_census_school_2021 = df_census_school_2021[['district_id','district_name','school_id','school_name','total','asian_pct','african_american_pct','hispanic_pct','hawaiian_pacisld_pct','white_pct']].rename(columns={'district_id':'district_number','school_id':'school_number'}).copy()
df_census_school_2020 = df_census_school_2020.rename(columns={'race':'race_or_ethnicity','district':'district_number','school':'school_number'}).copy()
df_census_school_2019 = df_census_school_2019.rename(columns={'race':'race_or_ethnicity','district':'district_number','school':'school_number'}).copy()
df_census_school_2018 = df_census_school_2018.rename(columns={'race':'race_or_ethnicity','district':'district_number','school':'school_number', 'grade_':'grade'}).copy()
df_census_school_2017 = df_census_school_2017.rename(columns={'race':'race_or_ethnicity','district_id':'district_number','school_id':'school_number'}).copy()
df_census_school_2016 = df_census_school_2016.rename(columns={'district_id':'district_number','school_id':'school_number'}).copy()
df_census_school_2015 = df_census_school_2015.rename(columns={'district_id':'district_number','school_id':'school_number'}).copy()
df_census_school_2014 = df_census_school_2014.rename(columns={'district_id':'district_number','school_id':'school_number'}).copy()
df_census_school_2013 = df_census_school_2013.rename(columns={'district_id':'district_number','school_id':'school_number','schoolname':'school_name'}).copy()

df_census_filter_gender_grades = ['df_census_school_2020', 'df_census_school_2019', 'df_census_school_2018', 'df_census_school_2016']
df_census_filter_gender = ['df_census_school_2017','df_census_school_2015', 'df_census_school_2014', 'df_census_school_2013']

for c in df_census_filter_gender_grades:
    df_pivot = (globals()[c][(globals()[c]['grade']=='All Grades') & ((globals()[c]['gender']=='All Genders')|(globals()[c]['gender']=='M & F'))].pivot(index=['district_number','district_name','school_number','school_name'], columns=['race_or_ethnicity'], values='enrollment').reset_index()).copy()

    df_pivot = df_pivot.rename(columns={'All Race/Ethnic Groups':'total',
                                        'Asian':'asian',
                                        'Black or African American':'african_american',
                                        'Hispanic':'hispanic',
                                        'Hispanic or Latino ethnicity':'hispanic',
                                        'Native Hawaiian or Pacific Islander':'hawaiian_pacisld',
                                        'Native Hawaiian or Other Pacific Islander':'hawaiian_pacisld',
                                        'White':'white'})

    df_pivot['asian_pct'] = (df_pivot['asian'])/(df_pivot['total']) * 100
    df_pivot['african_american_pct'] = (df_pivot['african_american'])/(df_pivot['total']) * 100
    df_pivot['hispanic_pct'] = (df_pivot['hispanic'])/(df_pivot['total']) * 100
    df_pivot['hawaiian_pacisld_pct'] = (df_pivot['hawaiian_pacisld'])/(df_pivot['total']) * 100
    df_pivot['white_pct'] = (df_pivot['white'])/(df_pivot['total']) * 100
    globals()[c] = df_pivot[['district_number','district_name','school_number','school_name','total','asian_pct','african_american_pct','hispanic_pct','hawaiian_pacisld_pct','white_pct']].copy()

for c in df_census_filter_gender:
    df_pivot = globals()[c][(globals()[c]['grade']!='All Grades') & ((globals()[c]['gender']=='All Genders')|(globals()[c]['gender']=='M & F'))].groupby(['district_number','district_name','school_number','school_name','race_or_ethnicity'])[['enrollment']].sum().reset_index()
    df_pivot = (df_pivot.pivot(index=['district_number','district_name','school_number','school_name'], columns=['race_or_ethnicity'], values='enrollment').reset_index()).copy()
    
    df_pivot = df_pivot.rename(columns={'All Race/Ethnic Groups':'total',
                                        'Asian':'asian',
                                        'Black or African American':'african_american',
                                        'Hispanic':'hispanic',
                                        'Hispanic or Latino ethnicity':'hispanic',
                                        'Hispanic/Latino':'hispanic',
                                        'Native Hawaiian or Pacific Islander':'hawaiian_pacisld',
                                        'Native Hawaiian or Other Pacific Islander':'hawaiian_pacisld',
                                        'White':'white'})

    df_pivot['asian_pct'] = (df_pivot['asian'])/(df_pivot['total']) * 100
    df_pivot['african_american_pct'] = (df_pivot['african_american'])/(df_pivot['total']) * 100
    df_pivot['hispanic_pct'] = (df_pivot['hispanic'])/(df_pivot['total']) * 100
    df_pivot['hawaiian_pacisld_pct'] = (df_pivot['hawaiian_pacisld'])/(df_pivot['total']) * 100
    df_pivot['white_pct'] = (df_pivot['white'])/(df_pivot['total']) * 100
    globals()[c] = df_pivot[['district_number','district_name','school_number','school_name','total','asian_pct','african_american_pct','hispanic_pct','hawaiian_pacisld_pct','white_pct']].copy()



In [71]:
c = ['district_number','district_name','school_number','school_name','total','asian_pct','african_american_pct','hispanic_pct','hawaiian_pacisld_pct','white_pct']
df_to_change_and_merge('df_census_school',c)

In [72]:
df_census_school[['file_year','african_american_pct']].drop_duplicates()

Unnamed: 0,file_year,african_american_pct
0,2021,Less than 5%
2,2021,*
5,2021,6
8,2021,5.6
13,2021,7.8
...,...,...
15967,2013,94.117647
15968,2013,96.648045
15969,2013,83.495146
15970,2013,96.507937


In [73]:
df_assessment_school_2019['enrolled'] = np.NaN
df_assessment_school_2018['enrolled'] = np.NaN
df_assessment_school_2017['enrolled'] = np.NaN
df_assessment_school_2016['enrolled'] = np.NaN
df_assessment_school_2015['enrolled'] = np.NaN
df_assessment_school_2014['enrolled'] = np.NaN
df_assessment_school_2017['test'] = np.NaN
df_assessment_school_2016['test'] = np.NaN
df_assessment_school_2015['test'] = np.NaN
df_assessment_school_2014['test'] = np.NaN
df_assessment_school_2017['system_name'] = np.NaN
df_assessment_school_2017['school_name'] = np.NaN
df_assessment_school_2022 = df_assessment_school_2022.rename(columns={'system':'district_number',
                                                                      'system_name':'district_name',
                                                                      'school':'school_number'}).copy()
df_assessment_school_2021 = df_assessment_school_2021.rename(columns={'system':'district_number',
                                                                      'system_name':'district_name',
                                                                      'school':'school_number',
                                                                      'subgroup':'student_group',
                                                                      'pct_on_track':'pct_met_expectations',
                                                                      'pct_mastered':'pct_exceeded_expectations',
                                                                      'pct_on_mastered':'pct_met_exceeded'}).copy()
df_assessment_school_2019 = df_assessment_school_2019.rename(columns={'system':'district_number','system_name':'district_name','school':'school_number','subgroup':'student_group','pct_on_track':'pct_met_expectations','pct_mastered':'pct_exceeded_expectations','pct_on_mastered':'pct_met_exceeded'}).copy()
df_assessment_school_2018 = df_assessment_school_2018.rename(columns={'system':'district_number','system_name':'district_name','school':'school_number','subgroup':'student_group','pct_on_track':'pct_met_expectations','pct_mastered':'pct_exceeded_expectations','pct_on_mastered':'pct_met_exceeded'}).copy()
df_assessment_school_2017 = df_assessment_school_2017.rename(columns={'system':'district_number','system_name':'district_name','school':'school_number','subgroup':'student_group','pct_on_track':'pct_met_expectations','pct_mastered':'pct_exceeded_expectations','pct_on_mastered':'pct_met_exceeded'}).copy()
df_assessment_school_2016 = df_assessment_school_2016.rename(columns={'district':'district_number','school':'school_number','#_valid_tests':'valid_tests','subgroup':'student_group',
                                          '%_below_(prev._below_basic)':'pct_below', '%_approaching_(prev._basic)':'pct_approaching', 
                                          '%_on_track_(prev._proficient)':'pct_met_expectations','%_mastered_(prev._advanced)':'pct_exceeded_expectations',
                                          '%_on_track_or_mastered_(prev._%_proficient/advanced)':'pct_met_exceeded'}).copy()
df_assessment_school_2015 = df_assessment_school_2015.rename(columns={'system':'district_number','system_name':'district_name','school':'school_number','subgroup':'student_group',
                                          'pct_below_bsc':'pct_below',
                                          'pct_bsc':'pct_approaching', 
                                          'pct_prof':'pct_met_expectations',
                                          'pct_adv':'pct_exceeded_expectations',
                                          'pct_prof_adv':'pct_met_exceeded'}).copy()
df_assessment_school_2014 = df_assessment_school_2014.rename(columns={'system':'district_number','system_name':'district_name','school':'school_number','subgroup':'student_group',
                                          'pct_below_bsc':'pct_below', 
                                          'pct_bsc':'pct_approaching', 
                                          'pct_prof':'pct_met_expectations',
                                          'pct_adv':'pct_exceeded_expectations',
                                          'pct_prof_adv':'pct_met_exceeded'}).copy()
df_assessment_state_2019['enrolled'] = np.NaN
df_assessment_state_2018['enrolled'] = np.NaN
df_assessment_state_2017['enrolled'] = np.NaN
df_assessment_state_2017['test'] = np.NaN
df_assessment_state_2021 = df_assessment_state_2021.rename(columns={'subgroup':'student_group','pct_on_track':'pct_met_expectations','pct_mastered':'pct_exceeded_expectations','pct_on_mastered':'pct_met_exceeded'}).copy()
df_assessment_state_2019 = df_assessment_state_2019.rename(columns={'subgroup':'student_group','pct_on_track':'pct_met_expectations','pct_mastered':'pct_exceeded_expectations','pct_on_mastered':'pct_met_exceeded'}).copy()
df_assessment_state_2018 = df_assessment_state_2018.rename(columns={'subgroup':'student_group','pct_on_track':'pct_met_expectations','pct_mastered':'pct_exceeded_expectations','pct_on_mastered':'pct_met_exceeded'}).copy()
df_assessment_state_2017 = df_assessment_state_2017.rename(columns={'subgroup':'student_group','pct_on_track':'pct_met_expectations','pct_mastered':'pct_exceeded_expectations','pct_on_mastered':'pct_met_exceeded'}).copy()

In [75]:
state_dfs = [df_assessment_state_2017,
             df_assessment_state_2018, 
             df_assessment_state_2019, 
             df_assessment_state_2021, 
             df_assessment_state_2022]
for d in state_dfs: 
    d['district_number'] = 0
    d['district_name'] = 'State of Tennessee'
    d['school_number'] = 0
    d['school_name'] = 'All Schools'

In [77]:
c = ['year', 'district_number', 'district_name', 'school_number', 'school_name', 'test',
     'subject', 'grade', 'student_group', 'enrolled', 'valid_tests',
     'pct_below','pct_approaching','pct_met_expectations','pct_exceeded_expectations','pct_met_exceeded']
df_to_change_and_merge('df_assessment',c)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  globals()[d]['file_source'] = d
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  globals()[d]['file_year'] = d.split('_')[-1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  globals()[d]['file_source'] = d
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

In [78]:
df_assessment.loc[(df_assessment['school_number']==8015) & (df_assessment['year']==2022) & (df_assessment['district_number']!=df_assessment['updated_district_number'])]

Unnamed: 0,year,district_number,district_name,school_number,school_name,test,subject,grade,student_group,enrolled,valid_tests,pct_below,pct_approaching,pct_met_expectations,pct_exceeded_expectations,pct_met_exceeded,file_source,file_year,updated_district_number,school_type,status
685693,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,ELA,3,All Students,129.0,129.0,15.4,41.9,31.8,10.9,42.6,df_assessment_school_2022,2022,190.0,Public Charter,A
685694,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,ELA,3,Asian,20.0,20.0,15,35,35,15,50,df_assessment_school_2022,2022,190.0,Public Charter,A
685695,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,ELA,3,Black or African American,48.0,48.0,12.4,41.7,31.3,14.6,45.8,df_assessment_school_2022,2022,190.0,Public Charter,A
685696,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,ELA,3,Black/Hispanic/Native American,87.0,87.0,13.8,43.7,33.3,9.2,42.5,df_assessment_school_2022,2022,190.0,Public Charter,A
685697,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,ELA,3,Economically Disadvantaged,34.0,34.0,14.8,52.9,23.5,8.8,32.4,df_assessment_school_2022,2022,190.0,Public Charter,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685796,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,Science,All Grades,Non-English Learners/Transitional 1-4,99.0,99.0,13.1,39.4,42.4,5.1,47.5,df_assessment_school_2022,2022,190.0,Public Charter,A
685797,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,Science,All Grades,Non-Students with Disabilities,116.0,116.0,11.2,42.2,39.7,6.9,46.6,df_assessment_school_2022,2022,190.0,Public Charter,A
685798,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,Science,All Grades,Students with Disabilities,13.0,13.0,**,**,**,**,30.8,df_assessment_school_2022,2022,190.0,Public Charter,A
685799,2022,987,Tennessee Public Charter School Commission,8015,KIPP Antioch College Prep Elementary,TNReady,Science,All Grades,Super Subgroup,111.0,111.0,12.7,44.1,37.8,5.4,43.2,df_assessment_school_2022,2022,190.0,Public Charter,A


In [79]:
df_act_school_2021 = df_act_school_2021.rename(columns={'district':'district_number','school':'school_number'})
df_act_school_2020 = df_act_school_2020.rename(columns={'district':'district_number','school':'school_number'})
df_act_school_2019 = df_act_school_2019.rename(columns={'district':'district_number','school':'school_number'})
df_act_school_2018 = df_act_school_2018.rename(columns={'district':'district_number','school':'school_number'})
df_act_school_2017 = df_act_school_2017.rename(columns={'district':'district_number','school':'school_number'})
c = ['district_number', 'district_name', 'school_number', 'school_name', 'subgroup',
       'valid_tests', 'participation_rate', 'average_english_score',
       'average_math_score', 'average_reading_score', 'average_science_score',
       'average_composite_score', 'percent_scoring_21_or_higher',
       'percent_scoring_below_19']
df_to_change_and_merge('df_act_school',c)

In [80]:
df_profile_school_2021 = df_profile_school_2021.rename(columns={'district_id':'district_number', 'school_id':'school_number'})
df_profile_school_2020 = df_profile_school_2020.rename(columns={'district_id':'district_number', 'school_id':'school_number'})
df_profile_school_2019 = df_profile_school_2019.rename(columns={'district_id':'district_number', 'school_id':'school_number'})
df_profile_school_2018 = df_profile_school_2018.rename(columns={'district_id':'district_number', 'school_id':'school_number'})
df_profile_school_2017 = df_profile_school_2017.rename(columns={'district_id':'district_number', 'school_id':'school_number'})
df_profile_school_2016 = df_profile_school_2016.rename(columns={'district':'district_number', 'school_id':'school_number'})
df_profile_school_2015 = df_profile_school_2015.rename(columns={'district':'district_number', 'school_id':'school_number'})
df_profile_school_2014 = df_profile_school_2014.rename(columns={'district':'district_number', 'school_id':'school_number',
                                                               'free_reduced_pct':'economically_disadvantaged_pct'})
df_profile_school_2014['hawaiian_pacisld_pct'] = np.NaN
df_profile_school_2013 = df_profile_school_2013.rename(columns={'district':'district_number', 'school_no':'school_number',
                                                               'free_reduced_pct':'economically_disadvantaged_pct'})
df_profile_school_2013['hawaiian_pacisld_pct'] = np.NaN

df_profile_school_2012 = df_profile_school_2012.rename(columns={'district':'district_number', 'school':'school_number',
                                                                'free_reduced_(%)':'economically_disadvantaged_pct',
                                                                'african_american_(%)':'african_american_pct', 
                                                                'hispanic_(%)':'hispanic_pct', 
                                                                'asian_(%)':'asian_pct',
                                                                'native_american_(%)':'native_american_pct',
                                                                'white_(%)':'white_pct',
                                                                'male_(%)':'male_pct', 
                                                                'female_(%)':'female_pct'})
df_profile_school_2012['hawaiian_pacisld_pct'] = np.NaN
df_profile_school_2012['limited_english_proficient_pct'] = np.NaN
df_profile_school_2012['students_with_disabilities_pct'] = np.NaN

df_profile_school_2011 = df_profile_school_2011.rename(columns={'district':'district_number', 'school':'school_number',
                                                                'free_reduced_(%)':'economically_disadvantaged_pct',
                                                                'african_american_(%)':'african_american_pct', 
                                                                'hispanic_(%)':'hispanic_pct', 
                                                                'asian_(%)':'asian_pct',
                                                                'native_american_(%)':'native_american_pct',
                                                                'white_(%)':'white_pct',
                                                                'male_(%)':'male_pct', 
                                                                'female_(%)':'female_pct'})
df_profile_school_2011['hawaiian_pacisld_pct'] = np.NaN
df_profile_school_2011['limited_english_proficient_pct'] = np.NaN
df_profile_school_2011['students_with_disabilities_pct'] = np.NaN

df_profile_school_2010 = df_profile_school_2010.rename(columns={'district':'district_number', 'school':'school_number',
                                                                'free_reduced_(%)':'economically_disadvantaged_pct',
                                                                'african_american_(%)':'african_american_pct', 
                                                                'hispanic_(%)':'hispanic_pct', 
                                                                'asian_(%)':'asian_pct',
                                                                'native_american_(%)':'native_american_pct',
                                                                'white_(%)':'white_pct',
                                                                'male_(%)':'male_pct', 
                                                                'female_(%)':'female_pct'})
df_profile_school_2010['hawaiian_pacisld_pct'] = np.NaN
df_profile_school_2010['limited_english_proficient_pct'] = np.NaN
df_profile_school_2010['students_with_disabilities_pct'] = np.NaN


In [81]:
df_profile_district_2010.columns

Index(['year', 'district', 'district_name', 'grades_served',
       'number_of_schools', 'administrators', 'teachers',
       'average_daily_membership', 'total', 'white', 'african_american',
       'hispanic', 'asian', 'native_american', 'male', 'female', 'white_(%)',
       'african_american_(%)', 'hispanic_(%)', 'asian_(%)',
       'native_american_(%)', 'male_(%)', 'female_(%)', 'white_male',
       'african_american_male', 'hispanic_male', 'asian_male',
       'native_american_male', 'white_female', 'african_american_female',
       'hispanic_female', 'asian_female', 'native_american_female',
       'economically_disadvantaged', 'limited_english_proficient_(%)',
       'limited_english_proficient', 'number_exempt_from_reading_assessment',
       'students_with_disabilities', 'students_with_disabilities_(%)',
       'free_eligible', 'reduced_eligible', 'free_reduced_eligible',
       'free_(%)', 'reduced_(%)', 'free_reduced_(%)', 'title_i', 'title_i_(%)',
       'per_pupil_expendit

In [82]:
columns = ['african_american', 'asian', 'female',# 'hawaiian_pacisld_pct',
       'hispanic', 'limited_english_proficient', 'male','economically_disadvantaged',
       'native_american','students_with_disabilities', 'total', 'white']
df = [df_profile_district_2010, df_profile_district_2011,df_profile_district_2012]
for d in df:
    for i in columns: 
        d[i] = d[i].replace('-','0')
        d[i] = d[i].astype('float64')

In [83]:
df_profile_district_2010 = df_profile_district_2010[columns]
df_profile_district_2010 = pd.DataFrame(df_profile_district_2010.sum(axis=0)).transpose()


cols = ['african_american', 'asian',
       'economically_disadvantaged', 'female',
       'hispanic', 'limited_english_proficient', 'male',
       'native_american', 'students_with_disabilities', 'total', 'white']
for i in cols:
    df_profile_district_2010[i+'_pct'] = df_profile_district_2010[i] / df_profile_district_2010['total']
    
df_profile_district_2010['district_number'] = 0
df_profile_district_2010['district_name'] = 'State of Tennessee'
df_profile_district_2010['school_number'] = 0
df_profile_district_2010['school_name'] = 'All Schools'
df_profile_district_2010['hawaiian_pacisld_pct'] = np.NaN

In [84]:
df_profile_district_2011 = df_profile_district_2011[columns]
df_profile_district_2011 = pd.DataFrame(df_profile_district_2011.sum(axis=0)).transpose()


cols = ['african_american', 'asian',
       'economically_disadvantaged', 'female',
       'hispanic', 'limited_english_proficient', 'male',
       'native_american', 'students_with_disabilities', 'total', 'white']
for i in cols:
    df_profile_district_2011[i+'_pct'] = df_profile_district_2011[i] / df_profile_district_2011['total']
    
df_profile_district_2011['district_number'] = 0
df_profile_district_2011['district_name'] = 'State of Tennessee'
df_profile_district_2011['school_number'] = 0
df_profile_district_2011['school_name'] = 'All Schools'
df_profile_district_2011['hawaiian_pacisld_pct'] = np.NaN

In [85]:
df_profile_district_2012 = df_profile_district_2012[columns]
df_profile_district_2012 = pd.DataFrame(df_profile_district_2012.sum(axis=0)).transpose()


cols = ['african_american', 'asian',
       'economically_disadvantaged', 'female',
       'hispanic', 'limited_english_proficient', 'male',
       'native_american', 'students_with_disabilities', 'total', 'white']
for i in cols:
    df_profile_district_2012[i+'_pct'] = df_profile_district_2012[i] / df_profile_district_2012['total']
    
df_profile_district_2012['district_number'] = 0
df_profile_district_2012['district_name'] = 'State of Tennessee'
df_profile_district_2012['school_number'] = 0
df_profile_district_2012['school_name'] = 'All Schools'
df_profile_district_2012['hawaiian_pacisld_pct'] = np.NaN

In [86]:
df_profile_district_2013.columns

Index(['year', 'district', 'district_name', 'grades_served',
       'number_of_schools', 'administrators', 'teachers',
       'average_daily_membership', 'total', 'white', 'african_american',
       'hispanic', 'asian', 'native_american', 'male', 'female', 'white_pct',
       'african_american_pct', 'hispanic_pct', 'asian_pct',
       'native_american_pct', 'male_pct', 'female_pct', 'white_male',
       'african_american_male', 'hispanic_male', 'asian_male',
       'native_american_male', 'white_female', 'african_american_female',
       'hispanic_female', 'asian_female', 'native_american_female',
       'limited_english_proficient_pct', 'limited_english_proficient',
       'number_exempt_from_reading_assessment', 'students_with_disabilities',
       'students_with_disabilities_pct', 'free_eligible', 'reduced_eligible',
       'free_reduced_eligible', 'free_pct', 'reduced_pct', 'free_reduced_pct',
       'title_i', 'title_i_pct', 'per_pupil_expenditures_per_ada',
       'local_funding_

In [87]:
df_profile_district_2013 = df_profile_district_2013.loc[df_profile_district_2013['district']==0].copy()
df_profile_district_2013 = df_profile_district_2013.rename(columns={'district':'district_number','free_reduced_pct':'economically_disadvantaged_pct'})
df_profile_district_2013['school_name'] = 'All Schools'
df_profile_district_2013['school_number'] = 0
df_profile_district_2013['hawaiian_pacisld_pct'] = np.NaN

df_profile_district_2018 = df_profile_district_2018.loc[df_profile_district_2018['district']==0].copy()
df_profile_district_2018 = df_profile_district_2018.rename(columns={'district':'district_number'})
df_profile_district_2018['school_name'] = 'All Schools'
df_profile_district_2018['school_number'] = 0

df_profile_district_2019 = df_profile_district_2019.loc[df_profile_district_2019['district_id']==0].copy()
df_profile_district_2019 = df_profile_district_2019.rename(columns={'district_id':'district_number'})
df_profile_district_2019['school_name'] = 'All Schools'
df_profile_district_2019['school_number'] = 0

df_profile_district_2020 = df_profile_district_2020.loc[df_profile_district_2020['district_id']==0].copy()
df_profile_district_2020 = df_profile_district_2020.rename(columns={'district_id':'district_number'})
df_profile_district_2020['school_name'] = 'All Schools'
df_profile_district_2020['school_number'] = 0

df_profile_district_2021 = df_profile_district_2021.loc[df_profile_district_2021['district_id']==0].copy()
df_profile_district_2021 = df_profile_district_2021.rename(columns={'district_id':'district_number'})
df_profile_district_2021['school_name'] = 'All Schools'
df_profile_district_2021['school_number'] = 0

In [88]:
df_profile_school_2019.loc[df_profile_school_2019['school_number']==1]

Unnamed: 0,school_year,district_number,district_name,school_number,school_name,grades_served,safe_school,average_daily_membership,total,female,female_pct,male,male_pct,economically_disadvantaged,economically_disadvantaged_pct,limited_english_proficient,limited_english_proficient_pct,students_with_disabilities,students_with_disabilities_pct,african_american,african_american_pct,asian,asian_pct,hawaiian_pacisld,hawaiian_pacisld_pct,hispanic,hispanic_pct,native_american,native_american_pct,white,white_pct,african_american_female,african_american_male,asian_female,asian_male,hawaiian_pacisld_female,hawaiian_pacisld_male,hispanic_female,hispanic_male,native_american_female,native_american_male,white_female,white_male
52,2018-19,793,Arlington Municipal School District,1,Arlington High,Grades 9-12,SAFE SCHOOL,2059.0,2071,1025,49.5,1046,50.5,97,4.7,12,0.6,184,8.9,326,15.7,79,3.8,4,0.2,136,6.6,2,0.1,1524,73.6,166,160,45,34,2,2,75,61,2,0,735,789
906,2018-19,190,Metro Nashville Public Schools,1,A. Z. Kelley Elementary,Grades PK-4,SAFE SCHOOL,797.0,795,403,50.7,392,49.3,306,38.5,248,31.2,88,11.1,287,36.1,99,12.5,0,0.0,207,26.0,0,0.0,202,25.4,151,136,49,50,0,0,104,103,0,0,99,103
1214,2018-19,750,Rutherford County Schools,1,Blackman Elementary School,Grades K-5,SAFE SCHOOL,985.0,961,461,48.0,500,52.0,120,12.5,73,7.6,83,8.6,261,27.2,110,11.4,2,0.2,92,9.6,4,0.4,492,51.2,127,134,50,60,2,0,42,50,2,2,238,254


In [89]:
c = ['african_american_pct', 'asian_pct', 'district_number', 'district_name',
       'economically_disadvantaged_pct', 'female_pct', 'hawaiian_pacisld_pct',
       'hispanic_pct', 'limited_english_proficient_pct', 'male_pct',
       'native_american_pct', 'school_number', 'school_name',
       'students_with_disabilities_pct', 'total', 'white_pct']

df_to_change_and_merge('df_profile',c)

In [90]:
df_profile[df_profile['district_number']==0]

Unnamed: 0,african_american_pct,asian_pct,district_number,district_name,economically_disadvantaged_pct,female_pct,hawaiian_pacisld_pct,hispanic_pct,limited_english_proficient_pct,male_pct,native_american_pct,school_number,school_name,students_with_disabilities_pct,total,white_pct,file_source,file_year,updated_district_number,school_type,status
7097,24.1,2.2,0,State of Tennessee,34.7,48.6,0.2,9.7,5.3,51.4,0.3,0,All Schools,13.9,999701.0,63.4,df_profile_school_2017,2017,,,
9063,24.1,2.2,0,State of Tennessee,35.1,48.7,0.2,9.0,5.0,51.3,0.3,0,All Schools,14.0,997893.0,64.2,df_profile_school_2016,2016,,,
11043,24.1337414097111,2.05544376297831,0,State of Tennessee,57.901487,48.663811,0.155739779012182,8.459552,4.592767,51.334984,0.319612970081093,0,All Schools,13.9806324380555,995892.0,64.852112,df_profile_school_2015,2015,,,
13001,24.140984,1.977681,0,State of Tennessee,58.771854,48.638162,,7.788872,4.478382,51.360731,0.328222,0,All Schools,14.202876,993841.0,65.612306,df_profile_school_2014,2014,,,
22075,24.3,2.6,0,State of Tennessee,33.5,48.8,Less than 1%,12.3,7.8,51.2,Less than 1%,0,All Schools,13.5,957423.0,60.1,df_profile_district_2021,2021,,,
22076,23.9,2.5,0,State of Tennessee,30.6,48.8,0.2,11.8,7.7,51.2,0.4,0,All Schools,13.1,980619.0,61.2,df_profile_district_2020,2020,,,
22077,24.0,2.4,0,State of Tennessee,34.9,48.8,0.2,10.9,4.6,51.2,0.4,0,All Schools,13.5,973659.0,62.1,df_profile_district_2019,2019,,,
22078,24.0,2.3,0,State of Tennessee,36.1,48.7,0.2,10.4,4.6,51.3,0.4,0,All Schools,13.6,975222.0,62.7,df_profile_district_2018,2018,,,
22079,24.09701,1.895987,0,State of Tennessee,58.617011,48.630867,,7.285433,4.336143,51.36863,0.295392,0,All Schools,13.684589,993256.0,66.269824,df_profile_district_2013,2013,,,
22080,0.236019,0.017948,0,State of Tennessee,0.583545,0.485476,,0.066423,0.00031,0.514524,0.001878,0,All Schools,0.145143,976855.0,0.677731,df_profile_district_2012,2012,,,


In [91]:
df_profile.loc[df_profile['school_number']==8015]

Unnamed: 0,african_american_pct,asian_pct,district_number,district_name,economically_disadvantaged_pct,female_pct,hawaiian_pacisld_pct,hispanic_pct,limited_english_proficient_pct,male_pct,native_american_pct,school_number,school_name,students_with_disabilities_pct,total,white_pct,file_source,file_year,updated_district_number,school_type,status
1785,Greater than 95%,*,985,Achievement School District,67.1,43.7,*,Less than 5%,*,56.3,*,8015,Humes Preparatory Academy Middle School,11.9,252,Less than 5%,df_profile_school_2021,2021,985.0,Public Charter,A
1804,37.9,10.9,986,Tennessee State Board of Education,28.4,53.6,*,24.6,28.2,46.4,*,8015,KIPP Antioch College Prep Elementary,10.2,422,26.5,df_profile_school_2021,2021,190.0,Public Charter,I
3572,99.3,0.0,985,Achievement School District,61.5,40.7,0.0,0.4,0.0,59.3,0.0,8015,Humes Preparatory Academy Middle School,13.0,270,0.4,df_profile_school_2020,2020,985.0,Public Charter,A
3594,38.6,9.6,986,Tennessee State Board of Education,31.7,55.3,0.0,25.3,21.2,44.7,0.0,8015,KIPP Antioch College Prep Elementary,10.9,293,26.6,df_profile_school_2020,2020,190.0,Public Charter,I
3608,98.7,0.0,985,Achievement School District,72.9,44.1,0.0,0.4,0.0,55.9,0.0,8015,Humes Preparatory Academy Middle School,16.5,236,0.8,df_profile_school_2019,2019,985.0,Public Charter,A
5182,43.8,11.8,986,Tennessee State Board of Education,27.8,52.8,0.0,29.9,22.9,47.2,0.0,8015,KIPP Antioch College Prep Elementary,15.3,144,14.6,df_profile_school_2019,2019,190.0,Public Charter,I
7074,98.8,0.0,985,Achievement School District,82.3,45.3,0.0,1.2,0.8,54.7,0.0,8015,Humes Preparatory Academy Middle School,16.1,254,0.0,df_profile_school_2018,2018,985.0,Public Charter,A
9040,98.7,,985,Achievement School District,77.6,48.2,,0.6,1.6,51.8,,8015,Humes Preparatory Academy Middle School,14.7,313.0,0.6,df_profile_school_2017,2017,985.0,Public Charter,A
11031,98.4,,985,Achievement School District,38,48.9,,1.4,,51.1,,8015,Humes Preparatory Academy Middle School,18.1,370,0.3,df_profile_school_2016,2016,985.0,Public Charter,A
12987,98.2843137254902,,985,Achievement School District,94.362745,53.186275,,1.22549,0.735294,46.813725,,8015,Humes Preparatory Academy - Upper School,19.8529411764706,408.0,0.245098,df_profile_school_2015,2015,985.0,Public Charter,A


In [92]:
df_profile.loc[df_profile['school_name']=='KIPP Antioch College Prep Elementary']

Unnamed: 0,african_american_pct,asian_pct,district_number,district_name,economically_disadvantaged_pct,female_pct,hawaiian_pacisld_pct,hispanic_pct,limited_english_proficient_pct,male_pct,native_american_pct,school_number,school_name,students_with_disabilities_pct,total,white_pct,file_source,file_year,updated_district_number,school_type,status
1804,37.9,10.9,986,Tennessee State Board of Education,28.4,53.6,*,24.6,28.2,46.4,*,8015,KIPP Antioch College Prep Elementary,10.2,422,26.5,df_profile_school_2021,2021,190.0,Public Charter,I
3594,38.6,9.6,986,Tennessee State Board of Education,31.7,55.3,0.0,25.3,21.2,44.7,0.0,8015,KIPP Antioch College Prep Elementary,10.9,293,26.6,df_profile_school_2020,2020,190.0,Public Charter,I
5182,43.8,11.8,986,Tennessee State Board of Education,27.8,52.8,0.0,29.9,22.9,47.2,0.0,8015,KIPP Antioch College Prep Elementary,15.3,144,14.6,df_profile_school_2019,2019,190.0,Public Charter,I
