In [1]:
import glob 
import csv
import pandas as pd
import numpy as np
from scipy import stats 

import plotly.graph_objects as go
import plotly.express as px

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)

What is the Tennessee TCAP (TNReady)? The Tennessee Comprehensive Assessment Program (TCAP) is the long running assessment test program in Tennessee. This program includes the TNReady (Tennessee Ready) which tests overall student progress in English Language Arts and Mathematics as well as Social Studies and Science.

MSAA: The Multi-State Alternate Assessment (MSAA) are ELA and mathematics assessments
designed for students with significant cognitive disabilities in grades 3–8 and grade 11. 

EOC: Each subject-area test is divided into multiple subparts and will be administered during one testing window at the end of the course.


Definitions
• Enrolled counts include the number of valid_tests and non-valid_tests records. Enrolled counts include all
students enrolled to take the test, regardless of test completion.
• valid_tests counts are the number of students that participated in the test.
• Valid test counts are the number of students that completed enough of the test to receive a valid
score.

Frequently Asked Questions

Why would each test/grade have different enrolled numbers?

There are many cases where student enrollment counts in a grade might differ across subjects. For
example, alternative tests are not included in these data, and some students may use general education
tests in some subjects and alternative tests in others.
What is the difference between "**" and "*"?


• The * is used if the number of valid tests is below 10.

• The ** suppression hides percentages (% Below, % Approaching, etc.) below 1% or above 99%.

In [2]:
merged_csvs = glob.glob('../raw_data_files_clean/merged_*')
merged_csvs.sort()

merged_csvs

['../raw_data_files_clean/merged_act_school.csv',
 '../raw_data_files_clean/merged_assessment.csv',
 '../raw_data_files_clean/merged_census_school.csv',
 '../raw_data_files_clean/merged_chronic_absenteeism_school.csv',
 '../raw_data_files_clean/merged_profile.csv',
 '../raw_data_files_clean/merged_tvaas_composite_school.csv',
 '../raw_data_files_clean/merged_tvaas_composite_subject_school.csv']

In [3]:
def pull_files_and_create_df(raw_file_names): 
    df_list=[]
    for i in raw_file_names:
        name = i.split('/')[2].split('.')[0]
        globals()[f'df_{name}'] = pd.read_csv(i, low_memory=False)
        d = df_list.append(f'df_{name}')
    df_list.sort(reverse=True)
    return df_list

In [4]:
pull_files_and_create_df(merged_csvs)

['df_merged_tvaas_composite_subject_school',
 'df_merged_tvaas_composite_school',
 'df_merged_profile',
 'df_merged_chronic_absenteeism_school',
 'df_merged_census_school',
 'df_merged_assessment',
 'df_merged_act_school']

In [5]:
df_merged_assessment.loc[df_merged_assessment['school_number']==8015][['school_name','year','grade']].drop_duplicates()

Unnamed: 0,school_name,year,grade
678829,Humes Preparatory Academy Middle School,2022,6
678837,Humes Preparatory Academy Middle School,2022,7
678846,Humes Preparatory Academy Middle School,2022,8
678856,Humes Preparatory Academy Middle School,2022,All Grades
685693,KIPP Antioch College Prep Elementary,2022,3
685711,KIPP Antioch College Prep Elementary,2022,All Grades
1326155,Humes Preparatory Academy Middle School,2021,6
1326165,Humes Preparatory Academy Middle School,2021,7
1326174,Humes Preparatory Academy Middle School,2021,8
1326184,Humes Preparatory Academy Middle School,2021,All Grades


In [6]:
# KACPE - KIPP Antioch College Prep Elementary - (3rd)
# ELA - 42.6% met or exceeded
# Math - 46.50% met or exceeded
# ** No TVAAS, year 1 of TCAP data

# KACPM - KIPP Antioch College Prep Middle - (5-7th) – percent met or exceeded
# ELA - Whole School - 33.9% ; 5th - 43.1%  ; 6th - 33.6% ; 7th - 25.0%
# Math - Whole School - 32.4% ; 5th - 35.4% ; 6th - 33.6% ; 7th - 28.1%
# Science - Whole School - 30.2% ; 5th - 38.0% ; 6th - 35.9% ; 7th - 16.4%
# Social Studies - Whole School - 29.8% ; 6th - 35.1% ; 7th - 24.4%
# ***TVAAS 5

# NCP - Nashville Collegiate Prep - (3-5th) – percent met or exceeded
# ELA - Whole School: 25.5% ; 3rd - 26.1% ; 4th - 32.5% ; 5th - 12.5%
# Math - Whole School: 20.7% ; 3rd - 31.9% ; 4th - 17.1% ; 5th - 4.3%
# Science - Whole School: 21.4% ; 3rd - 19.1% ; 4th - 28.6% ; 5th - 13.0%
# ***TVAAS 3

In [7]:
# pull_forward_data = (

# ## For enrolled and valid_tests, 422 and 146 were pulled from the 2021 school membership files for 2nd grade. 
# # These students are now in 3rd grade.
# [2022, 986, 'Tennessee State Board of Education', 8015,'KIPP Antioch College Prep Elementary', 
#  'TNReady', 'Math', 'All Grades', 'All Students', 422,
#        146, '*', '*', '*',
#        '*', '46.5', 'pull_forward',
#        2022, 190, 'Public Charter', 'A'],

# [2022, 986, 'Tennessee State Board of Education', 8015,'KIPP Antioch College Prep Elementary', 
#  'TNReady', 'ELA', 'All Grades', 'All Students', 422,
#        146, '*', '*', '*',
#        '*', 42.6, 'pull_forward',
#        2022, 190, 'Public Charter', 'A'],

# ## For enrolled and valid_tests, 408 was derived from the 2021 school membership files where 5th and 6th grade were both 136. 
# # We are making the assumption that the the cohorts stayed at 136, and the new cohort was the same size. 

# [2022, 986, 'Tennessee State Board of Education', 8020,'KIPP Antioch College Prep Middle', 
#  'TNReady', 'Math', 'All Grades', 'All Students', 408,
#        408, '*', '*', '*',
#        '*', 32.4, 'pull_forward',
#        2022, 190, 'Public Charter', 'A'],


# [2022, 986, 'Tennessee State Board of Education', 8020,'KIPP Antioch College Prep Middle', 
#  'TNReady', 'ELA', 'All Grades', 'All Students', 408,
#        408, '*', '*', '*',
#        '*', 33.9, 'pull_forward',
#        2022, 190, 'Public Charter', 'A'],

# ## For enrolled and valid_tests, 333 was derived from the total enrollment in the Commission's website: https://www.tn.gov/tn-public-charter-school-commission/commission-schools/nashville-collegiate-prep.html 
# # We are making the assumption that enrollment is evenly distributed between K, 1, 2, 3, 4, 5, and as such each grade level has 55 students (rounded down from 55.5).
# # Only 3-5 take the assessment so we derive 165 as the number of students valid_tests.

# [2022, 987, 'Tennessee State Board of Education', 8030,'Nashville Collegiate Prep', 
#  'TNReady', 'Math', 'All Grades', 'All Students', 333,
#        165, '*', '*', '*',
#        '*', 20.7, 'pull_forward',
#        2022, 190, 'Public Charter', 'A'],

# [2022, 987, 'Tennessee State Board of Education', 8030,'Nashville Collegiate Prep', 
#  'TNReady', 'ELA', 'All Grades', 'All Students', 333,
#        165, '*', '*', '*',
#        '*', 25.5, 'pull_forward',
#        2022, 190, 'Public Charter', 'A'])


In [8]:
# df_pull_forward = pd.DataFrame(pull_forward_data)
# df_pull_forward.columns = ['year', 'district_number', 'district_name', 'school_number','school_name', 
#  'test', 'subject', 'grade', 'student_group', 'enrolled',
#        'valid_tests', 'pct_below', 'pct_approaching', 'pct_met_expectations',
#        'pct_exceeded_expectations', 'pct_met_exceeded', 'file_source',
#        'file_year', 'updated_district_number', 'school_type', 'status']
# df_pull_forward

In [9]:
# df_merged_assessment = pd.concat([df_merged_assessment,df_pull_forward]).copy()
# df_merged_assessment

In [10]:
# df_merged_assessment.loc[df_merged_assessment['school_number'] == 8030]

## Assessments

In [11]:
df_merged_assessment.loc[df_merged_assessment['district_number']==0]
df_merged_assessment['pct_met_exceeded_adjusted'] = df_merged_assessment['pct_met_exceeded'].replace(['*','**'],
                                              [0,0]).astype('float')

df_merged_assessment['num_met_or_exceeded'] = df_merged_assessment['valid_tests'] * df_merged_assessment['pct_met_exceeded_adjusted']/100 

df_merged_assessment['school_type'] = df_merged_assessment['school_type'].replace('Public Virtual School','Public')
                               
df_merged_assessment.loc[df_merged_assessment['district_number']==0, 'school_type'] = 'State'

In [12]:
df_merged_assessment['num_met_or_exceeded'] = df_merged_assessment['valid_tests'] * df_merged_assessment['pct_met_exceeded_adjusted']/100 

array(['Algebra I', 'Algebra II', 'Biology I', 'ELA', 'English I',
       'English II', 'Geometry', 'Integrated Math I',
       'Integrated Math II', 'Integrated Math III', 'Math', 'Science',
       'Social Studies', 'US History', 'Chemistry', 'English III', 'RLA'],
      dtype=object)

In [13]:
df_merged_assessment['subject_group']='Other'
df_merged_assessment.loc[df_merged_assessment['subject']=='Algebra I', 'subject_group'] = 'Math'
df_merged_assessment.loc[df_merged_assessment['subject']=='Algebra II', 'subject_group'] = 'Math'
df_merged_assessment.loc[df_merged_assessment['subject']=='Integrated Math I', 'subject_group'] = 'Math'
df_merged_assessment.loc[df_merged_assessment['subject']=='Integrated Math II', 'subject_group'] = 'Math'
df_merged_assessment.loc[df_merged_assessment['subject']=='Integrated Math III', 'subject_group'] = 'Math'
df_merged_assessment.loc[df_merged_assessment['subject']=='Geometry', 'subject_group'] = 'Math'
df_merged_assessment.loc[df_merged_assessment['subject']=='Math', 'subject_group'] = 'Math'


df_merged_assessment.loc[df_merged_assessment['subject']=='English I', 'subject_group'] = 'ELA'
df_merged_assessment.loc[df_merged_assessment['subject']=='English II', 'subject_group'] = 'ELA'
df_merged_assessment.loc[df_merged_assessment['subject']=='English III', 'subject_group'] = 'ELA'
df_merged_assessment.loc[df_merged_assessment['subject']=='ELA', 'subject_group'] = 'ELA'


In [64]:
df_merged_assessment.student_group.unique()

array(['All Students', 'American Indian or Alaska Native', 'Asian',
       'Black or African American', 'Black/Hispanic/Native American',
       'Economically Disadvantaged', 'English Learner Transitional 1-4',
       'English Learners', 'English Learners with Transitional 1-4',
       'Female', 'Gifted', 'Hispanic', 'Male', 'Migrant',
       'Native Hawaiian or Other Pacific Islander',
       'Non-Black/Hispanic/Native American',
       'Non-Economically Disadvantaged',
       'Non-English Learners/Transitional 1-4',
       'Non-Students with Disabilities', 'Students with Disabilities',
       'Super Subgroup', 'White', 'English Learners with T1/T2',
       'Non-English Learners', 'Non-English Learners/T1 or T2', 'Black',
       'English Language Learners', 'Hawaiian or Pacific Islander',
       'Native American', 'Non-English Language Learners',
       'Non-English Language Learners/T1 or T2',
       'English Language Learners with T1/T2'], dtype=object)

In [65]:
poc_raw_data = df_merged_assessment.loc[(df_merged_assessment['file_year']==2022) & 
                         ((df_merged_assessment['student_group']=='Black/Hispanic/Native American')|
                          (df_merged_assessment['student_group']=='Non-Black/Hispanic/Native American')|
                          (df_merged_assessment['student_group']=='All Students')) & 
                          (df_merged_assessment['grade']=='All Grades') & 
                            ((df_merged_assessment['subject_group']=='Math')|
                          (df_merged_assessment['subject_group']=='ELA')) & 
                            ((df_merged_assessment['updated_district_number']==190)|
                          (df_merged_assessment['district_number']==0)) 
                        ]



with pd.ExcelWriter('../data_for_analysis/poc_2022.xlsx') as writer:  
    poc_raw_data.to_excel(writer, sheet_name='poc_raw_data')

In [67]:
poc_raw_data.student_group.unique()

array(['All Students', 'Black/Hispanic/Native American',
       'Non-Black/Hispanic/Native American'], dtype=object)

In [16]:
mnps_cohorts = df_merged_assessment.loc[((df_merged_assessment['grade']=='3') |
                                         (df_merged_assessment['grade']=='4') |
                                         (df_merged_assessment['grade']=='5') |
                                         (df_merged_assessment['grade']=='6') |
                                         (df_merged_assessment['grade']=='7') |
                                         (df_merged_assessment['grade']=='8')) &
                                        ((df_merged_assessment['updated_district_number']==190) | 
                                         (df_merged_assessment['district_number']==0)) & 
                                        (df_merged_assessment['pct_met_exceeded']!='*')
                                       ].copy()

In [52]:
df_merged_assessment.loc[
    # ((df_merged_assessment['grade']=='3') |
                                         # (df_merged_assessment['grade']=='4') |
                                         # (df_merged_assessment['grade']=='5') |
                                         # (df_merged_assessment['grade']=='6') |
                                         # (df_merged_assessment['grade']=='7') |
                                         # (df_merged_assessment['grade']=='8')) &
                                        # ((df_merged_assessment['updated_district_number']==190) | 
                                         # (df_merged_assessment['district_number']==0)) & 
                                        # (df_merged_assessment['pct_met_exceeded']!='*')& 
                                        (df_merged_assessment['file_year']==2016)]

Unnamed: 0,year,district_number,district_name,school_number,school_name,test,subject,grade,student_group,enrolled,valid_tests,pct_below,pct_approaching,pct_met_expectations,pct_exceeded_expectations,pct_met_exceeded,file_source,file_year,updated_district_number,school_type,status,pct_met_exceeded_adjusted,num_met_or_exceeded,subject_group
2827469,2016,10,Anderson County,2,Anderson County High School,,Algebra I,9th through 12th,All Students,,237.0,63.3,21.5,13.5,1.7,15.2,df_assessment_school_2016,2016,10.0,Public,A,15.2,36.024,Math
2827470,2016,10,Anderson County,25,Clinton High School,,Algebra I,9th through 12th,All Students,,236.0,66.5,22.5,8.9,2.1,11,df_assessment_school_2016,2016,10.0,Public,A,11.0,25.960,Math
2827471,2016,10,Anderson County,105,Clinch River Community School,,Algebra I,9th through 12th,All Students,,1.0,*,*,*,*,*,df_assessment_school_2016,2016,10.0,Public,A,0.0,0.000,Math
2827472,2016,12,Oak Ridge City,35,Oak Ridge High School,,Algebra I,9th through 12th,All Students,,251.0,**,**,**,**,15.5,df_assessment_school_2016,2016,12.0,Public,A,15.5,38.905,Math
2827473,2016,20,Bedford County,13,Cascade High School,,Algebra I,9th through 12th,All Students,,111.0,48.7,27.9,16.2,7.2,23.4,df_assessment_school_2016,2016,20.0,Public,A,23.4,25.974,Math
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2874938,2016,985,Achievement School District,8065,Martin Luther King Preparatory High School,,US History,9th through 12th,Non-Economically Disadvantaged,,22.0,**,**,**,**,**,df_assessment_school_2016,2016,985.0,Public Charter,A,0.0,0.000,Other
2874939,2016,985,Achievement School District,8065,Martin Luther King Preparatory High School,,US History,9th through 12th,Non-English Language Learners,,131.0,**,**,**,**,3.1,df_assessment_school_2016,2016,985.0,Public Charter,A,3.1,4.061,Other
2874940,2016,985,Achievement School District,8065,Martin Luther King Preparatory High School,,US History,9th through 12th,Non-Students with Disabilities,,127.0,**,**,**,**,3.1,df_assessment_school_2016,2016,985.0,Public Charter,A,3.1,3.937,Other
2874941,2016,985,Achievement School District,8065,Martin Luther King Preparatory High School,,US History,9th through 12th,Students with Disabilities,,6.0,*,*,*,*,*,df_assessment_school_2016,2016,985.0,Public Charter,A,0.0,0.000,Other


In [17]:
mnps_cohorts_grouped = (
    mnps_cohorts[['file_year','subject_group','student_group','school_type', 'grade', 'num_met_or_exceeded','valid_tests']]
    .groupby(by=['file_year','subject_group','student_group','school_type', 'grade'])
    .sum(['num_met_or_exceeded','valid_tests'])
    .reset_index())

mnps_cohorts_grouped['pct_met_or_exceeded'] = mnps_cohorts_grouped['num_met_or_exceeded']/mnps_cohorts_grouped['valid_tests']

In [18]:
mnps_all_grades = df_merged_assessment.loc[(df_merged_assessment['grade']=='All Grades') &  #All Grades\
                                        ((df_merged_assessment['updated_district_number']==190) | 
                                         (df_merged_assessment['district_number']==0)) & 
                                        ((df_merged_assessment['pct_met_exceeded']!='*') #&
                                         #(df_merged_assessment['pct_met_exceeded']!='**')
                                        )
                                     
                                           
                                        ].copy()

In [19]:
mnps_all_grades[['school_name']]

Unnamed: 0,school_name
139,All Schools
140,All Schools
141,All Schools
142,All Schools
143,All Schools
...,...
3633117,Brick Church College Prep
3633118,Brick Church College Prep
3633119,Brick Church College Prep
3633120,Brick Church College Prep


In [20]:
mnps_all_grades.loc[(mnps_all_grades['school_number']==8090) 
                    & (mnps_all_grades['district_number']==985) 
                    & (mnps_all_grades['file_year']==2022)
                    & (mnps_all_grades['student_group']=='All Students')
                ]

Unnamed: 0,year,district_number,district_name,school_number,school_name,test,subject,grade,student_group,enrolled,valid_tests,pct_below,pct_approaching,pct_met_expectations,pct_exceeded_expectations,pct_met_exceeded,file_source,file_year,updated_district_number,school_type,status,pct_met_exceeded_adjusted,num_met_or_exceeded,subject_group
682204,2022,985,Achievement School District,8090,Neely's Bend: A LEAD Public School,EOC,Integrated Math I,All Grades,All Students,11.0,11.0,18.1,18.2,36.4,27.3,63.6,df_assessment_school_2022,2022,190.0,Public Charter,A,63.6,6.996,Math
682476,2022,985,Achievement School District,8090,Neely's Bend: A LEAD Public School,TNReady,ELA,All Grades,All Students,460.0,436.0,**,**,**,**,9.4,df_assessment_school_2022,2022,190.0,Public Charter,A,9.4,40.984,ELA
682563,2022,985,Achievement School District,8090,Neely's Bend: A LEAD Public School,TNReady,Math,All Grades,All Students,449.0,431.0,**,**,**,**,7.9,df_assessment_school_2022,2022,190.0,Public Charter,A,7.9,34.049,Math
682650,2022,985,Achievement School District,8090,Neely's Bend: A LEAD Public School,TNReady,Science,All Grades,All Students,460.0,441.0,**,**,**,**,12.5,df_assessment_school_2022,2022,190.0,Public Charter,A,12.5,55.125,Other
682720,2022,985,Achievement School District,8090,Neely's Bend: A LEAD Public School,TNReady,Social Studies,All Grades,All Students,342.0,327.0,**,**,**,**,17.4,df_assessment_school_2022,2022,190.0,Public Charter,A,17.4,56.898,Other


In [21]:
mnps_all_grades['file_year'].unique()

array([2022, 2021, 2019, 2018, 2017, 2015, 2014])

In [22]:
mnps_all_grades_grouped = (
    mnps_all_grades[['file_year','subject_group','student_group','school_type', 'num_met_or_exceeded','valid_tests']]
    .groupby(by=['file_year','subject_group','student_group','school_type'])
    .sum(['num_met_or_exceeded','valid_tests'])
    .reset_index())

mnps_all_grades_grouped['pct_met_or_exceeded'] = mnps_all_grades_grouped['num_met_or_exceeded']/mnps_all_grades_grouped['valid_tests']

In [23]:
# mnps_all_grades['pct_met_exceeded_adjusted'] = mnps_all_grades['pct_met_exceeded'].replace(['**'],
#                                               [0])

# mnps_all_grades

In [24]:
mnps_all_grades.loc[mnps_all_grades['file_year']==2014][['subject','grade']].drop_duplicates()

Unnamed: 0,subject,grade
3265379,Math,All Grades
3265421,RLA,All Grades
3265463,Science,All Grades
3265505,Social Studies,All Grades
3265547,Algebra I,All Grades
3265579,Algebra II,All Grades
3265614,English I,All Grades
3265641,English II,All Grades
3265670,English III,All Grades
3265709,Biology I,All Grades


In [25]:
mnps_all_grades['file_year'].unique()

array([2022, 2021, 2019, 2018, 2017, 2015, 2014])

In [26]:
yoy_change = mnps_all_grades[['subject_group',
                                  'student_group',
                                  'school_type',
                                  'file_year',
                                  'pct_met_exceeded_adjusted',
                                  'school_number',
                                  'district_number']].copy()
yoy_change_temp_prev = yoy_change.copy()

yoy_change_temp_prev['prev_year'] = yoy_change_temp_prev['file_year']
# yoy_change_temp_prev['file_year'] = yoy_change_temp_prev['file_year']+1 
yoy_change_temp_prev['file_year'] = yoy_change_temp_prev['file_year'].map({2017:2018, 2018:2019, 2019:2021, 2021:2022}) ## field we want to join 

In [27]:
yoy_change = pd.merge(yoy_change, 
                          yoy_change_temp_prev, 
                          left_on=['subject_group','student_group','school_type','file_year','school_number','district_number'],
                          right_on=['subject_group','student_group','school_type','file_year','school_number','district_number'])
yoy_change

Unnamed: 0,subject_group,student_group,school_type,file_year,pct_met_exceeded_adjusted_x,school_number,district_number,pct_met_exceeded_adjusted_y,prev_year
0,Math,All Students,State,2022,20.7,0,0,18.3,2021
1,Math,All Students,State,2022,20.7,0,0,19.0,2021
2,Math,All Students,State,2022,20.7,0,0,23.9,2021
3,Math,All Students,State,2022,20.7,0,0,14.0,2021
4,Math,All Students,State,2022,20.7,0,0,20.3,2021
...,...,...,...,...,...,...,...,...,...
62313,Other,Students with Disabilities,Public Charter,2018,0.0,8090,985,5.3,2017
62314,Other,Super Subgroup,Public Charter,2018,29.5,8090,985,19.4,2017
62315,Other,Super Subgroup,Public Charter,2018,14.3,8090,985,19.4,2017
62316,Other,White,Public Charter,2018,53.3,8090,985,27.7,2017


In [28]:
yoy_change['pct_met_exceeded_adjusted_y'] = yoy_change['pct_met_exceeded_adjusted_y'].astype('float64')
yoy_change['pct_met_exceeded_adjusted_x'] = yoy_change['pct_met_exceeded_adjusted_x'].astype('float64')

yoy_change['yoy_change'] = yoy_change['pct_met_exceeded_adjusted_x'] - yoy_change['pct_met_exceeded_adjusted_y']

In [29]:
average_yoy_change = yoy_change[['subject_group','student_group','school_type','file_year','yoy_change']].groupby(by=['subject_group','student_group','school_type','file_year']).mean('yoy_change').reset_index()
ela_yoy_change = (
    average_yoy_change.loc[(average_yoy_change['subject_group']=='ELA') 
                           & ((average_yoy_change['student_group']=='All Students') 
                              | (average_yoy_change['student_group']=='Black/Hispanic/Native American')
                              | (average_yoy_change['student_group']=='Economically Disadvantaged')
                              | (average_yoy_change['student_group']=='English Learners')
                              | (average_yoy_change['student_group']=='Students with Disabilities'))]
    .sort_values(by=['student_group','school_type','file_year'])
)
math_yoy_change = (
    average_yoy_change.loc[(average_yoy_change['subject_group']=='Math') 
                           & ((average_yoy_change['student_group']=='All Students') 
                              | (average_yoy_change['student_group']=='Black/Hispanic/Native American')
                              | (average_yoy_change['student_group']=='Economically Disadvantaged')
                              | (average_yoy_change['student_group']=='English Learners')
                              | (average_yoy_change['student_group']=='Students with Disabilities'))]
    .sort_values(by=['student_group','school_type','file_year'])
)

In [30]:
mnps_all_grades['pct_met_exceeded_adjusted'] = mnps_all_grades['pct_met_exceeded_adjusted'].astype('float')

In [31]:
ttest_results = stats.ttest_ind(mnps_all_grades.loc[(mnps_all_grades['school_type']=='Public Charter') & (mnps_all_grades['subject_group']=='Math') & (mnps_all_grades['file_year']==2022)]['pct_met_exceeded_adjusted'],
                mnps_all_grades.loc[(mnps_all_grades['school_type']=='Public Charter') & (mnps_all_grades['subject_group']=='Math') & (mnps_all_grades['file_year']==2019)]['pct_met_exceeded_adjusted'])

In [32]:
stype = ['Public Charter', 'Public', 'State'] 
subject = ['Math', 'ELA'] 
ending_year = [2022, 2021] 
student_group = ['All Students',
'Black/Hispanic/Native American',
'Non-Black/Hispanic/Native American',
'Economically Disadvantaged',
'Non-Economically Disadvantaged',
'English Learners with Transitional 1-4',
'Non-English Learners/Transitional 1-4',
'Students with Disabilities',
'Non-Students with Disabilities']
results_df = []
for sg in student_group: 
    for t in stype: 
        for s in subject: 
            for y in ending_year: 
                ttest_results = stats.ttest_ind(mnps_all_grades.loc[(mnps_all_grades['school_type']==t) 
                                                    & (mnps_all_grades['subject_group']==s) 
                                                    & (mnps_all_grades['student_group']==sg) 
                                                    & (mnps_all_grades['grade']=='All Grades') 
                                                    & (mnps_all_grades['file_year']==y)]['pct_met_exceeded_adjusted'],
                                mnps_all_grades.loc[(mnps_all_grades['school_type']==t) 
                                                    & (mnps_all_grades['subject_group']==s) 
                                                    & (mnps_all_grades['student_group']==sg) 
                                                    & (mnps_all_grades['grade']=='All Grades') 
                                                    & (mnps_all_grades['file_year']==2019)]['pct_met_exceeded_adjusted'])
                results = {'school_type':t,
                           'student_group': sg,
                           'subject':s,
                           'starting_year':2019,
                           'ending_year':y,
                           't':ttest_results[0],
                           'p-value':ttest_results[1]
                          }
                results_df.append(results)

results_df = pd.DataFrame(results_df)

In [33]:
results_df

Unnamed: 0,school_type,student_group,subject,starting_year,ending_year,t,p-value
0,Public Charter,All Students,Math,2019,2022,-2.283513,0.024394
1,Public Charter,All Students,Math,2019,2021,-4.817863,0.000005
2,Public Charter,All Students,ELA,2019,2022,-0.893837,0.374025
3,Public Charter,All Students,ELA,2019,2021,-2.666051,0.009263
4,Public,All Students,Math,2019,2022,-2.008518,0.045233
...,...,...,...,...,...,...,...
103,Public,Non-Students with Disabilities,ELA,2019,2021,-2.076753,0.038733
104,State,Non-Students with Disabilities,Math,2019,2022,-1.328165,0.208832
105,State,Non-Students with Disabilities,Math,2019,2021,-2.713530,0.018832
106,State,Non-Students with Disabilities,ELA,2019,2022,0.400300,0.709393


In [34]:
df_merged_missing = df_merged_assessment.loc[(df_merged_assessment['file_year']==2022) & (df_merged_assessment['updated_district_number']==190)].copy()
df_merged_missing['missing_data'] = df_merged_missing['pct_met_exceeded'].map({'*':1, '**':1})
df_merged_missing['less_than_10'] = df_merged_missing['pct_met_exceeded'].map({'*':1})
df_merged_missing['low_n_for_scores'] = df_merged_missing['pct_met_exceeded'].map({'**':1})
df_merged_missing['suppressed_data'] = df_merged_missing['missing_data'] * df_merged_missing['valid_tests']
df_merged_missing['suppressed_less_than_10'] = df_merged_missing['less_than_10'] * df_merged_missing['valid_tests']
df_merged_missing['suppressed_low_n_scores'] = df_merged_missing['low_n_for_scores'] * df_merged_missing['valid_tests']
mnps_missing_grouped = (
    df_merged_missing[['file_year','subject_group','student_group','school_type','suppressed_data','suppressed_less_than_10','suppressed_low_n_scores','valid_tests','enrolled']]
    .groupby(by=['file_year','subject_group','student_group','school_type'])
    .sum(['valid_tests_missing_data','valid_tests','enrolled','suppressed_data','suppressed_less_than_10','suppressed_low_n_scores'])
    .reset_index())

mnps_missing_grouped['suppression_rate'] = mnps_missing_grouped['suppressed_data'] / mnps_missing_grouped['valid_tests']
mnps_missing_grouped['suppression_less_than_10_tested_rate'] = mnps_missing_grouped['suppressed_less_than_10'] / mnps_missing_grouped['valid_tests']
mnps_missing_grouped['suppression_scores_than_1_pct_rate'] = mnps_missing_grouped['suppressed_low_n_scores'] / mnps_missing_grouped['valid_tests']

mnps_missing_grouped['participation_rate'] = mnps_missing_grouped['valid_tests'] / mnps_missing_grouped['enrolled']


In [35]:
mnps_missing_grouped

Unnamed: 0,file_year,subject_group,student_group,school_type,suppressed_data,suppressed_less_than_10,suppressed_low_n_scores,valid_tests,enrolled,suppression_rate,suppression_less_than_10_tested_rate,suppression_scores_than_1_pct_rate,participation_rate
0,2022,ELA,All Students,Public,5036.0,889.0,4147.0,68910.0,74290.0,0.073081,0.012901,0.060180,0.927581
1,2022,ELA,All Students,Public Charter,321.0,185.0,136.0,20146.0,20846.0,0.015934,0.009183,0.006751,0.966420
2,2022,ELA,American Indian or Alaska Native,Public,154.0,154.0,0.0,154.0,162.0,1.000000,1.000000,0.000000,0.950617
3,2022,ELA,American Indian or Alaska Native,Public Charter,36.0,36.0,0.0,36.0,38.0,1.000000,1.000000,0.000000,0.947368
4,2022,ELA,Asian,Public,1015.0,864.0,151.0,2950.0,3078.0,0.344068,0.292881,0.051186,0.958415
...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,2022,Other,Students with Disabilities,Public Charter,1912.0,742.0,1170.0,3362.0,3484.0,0.568709,0.220702,0.348007,0.964983
128,2022,Other,Super Subgroup,Public,12648.0,1499.0,11149.0,74466.0,81760.0,0.169849,0.020130,0.149719,0.910788
129,2022,Other,Super Subgroup,Public Charter,1152.0,300.0,852.0,25348.0,26682.0,0.045447,0.011835,0.033612,0.950004
130,2022,Other,White,Public,2704.0,1265.0,1439.0,23146.0,24214.0,0.116824,0.054653,0.062171,0.955893


In [36]:
with pd.ExcelWriter('../data_for_analysis/asmt.xlsx') as writer:  
    mnps_all_grades_grouped.to_excel(writer, sheet_name='all_grades_and_students')
    mnps_cohorts_grouped.to_excel(writer, sheet_name='grades_3_to_8_and_students')
    ela_yoy_change.to_excel(writer, sheet_name='ela_yoy_change')
    math_yoy_change.to_excel(writer, sheet_name='math_yoy_change')
    results_df.to_excel(writer, sheet_name='t_test_results')
    mnps_missing_grouped.to_excel(writer, sheet_name='missing_results')