In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
df = pd.read_csv('cleaned_combined_2015_2020.csv')

In [3]:
df.head()

Unnamed: 0,student_totals_seniors,specific_interactions_1-on-1s_total,specific_interactions_group_total,specific_interactions_1-on-1s_at_least_one,specific_interactions_group_at_least_one,specific_interactions_parent_meetings,specific_interactions_parent_meetings_at_least_one,total_interactions_met_with_1_plus_times_(group_and_1_1),total_interactions_met_with_3_plus_times_(group_and_1_1),total_interactions_met_with_5_plus_times_(group_and_1_1),...,total_female_rate,enrolled_female_rate,total_male_rate,enrolled_male_rate,total_black_rate,enrolled_black_rate,total_white_rate,enrolled_white_rate,econ_total_rate,econ_enrolled_rate
0,148.0,724.0,644.0,126.0,143.0,55.0,37.0,145.0,138.0,113.0,...,0.455357,0.285714,0.544643,0.348214,0.160714,0.080357,0.741071,0.473214,0.392857,0.223214
1,247.0,837.0,726.0,234.0,231.0,183.0,97.0,235.0,226.0,137.0,...,0.549708,0.397661,0.450292,0.274854,0.315789,0.239766,0.549708,0.368421,0.520468,0.339181
2,136.0,1679.0,799.0,128.0,129.0,179.0,64.0,132.0,124.0,118.0,...,0.522388,0.373134,0.477612,0.223881,0.30597,0.134328,0.604478,0.425373,0.559701,0.291045
3,676.0,298.0,150.0,142.0,128.0,116.0,99.0,218.0,45.0,9.0,...,0.537842,0.452496,0.462158,0.325282,0.352657,0.270531,0.2657,0.238325,0.563607,0.407407
4,96.0,488.0,158.0,81.0,77.0,13.0,12.0,85.0,73.0,61.0,...,0.484848,0.363636,0.515152,0.272727,0.353535,0.212121,0.575758,0.393939,0.434343,0.20202


Let's start with ANOVA tests to see if we can identify schools that show statistically significant difference from the population mean

In [4]:
school_names = df['school_name'].unique()
vars = ['enrolled_rate']
 
results = []
schools = []
fstats = []
pvals = []
variables = []
for var in vars:
    for school in school_names:
        group1 = df[var][df['school_name'] != school]
        group2 = df[var][df['school_name'] == school]
        result = stats.f_oneway(group1, group2)
        fstats.append(result.statistic)
        pvals.append(result.pvalue)
        results.append(result)
        variables.append(var)
        schools.append(school)
        
anova_results_df = pd.DataFrame({'school': schools, 'variable': variables, 'fstat': fstats, 'pval': pvals})    

In [5]:
anova_results_df[['school','fstat', 'pval']].sort_values(by='pval').round(5).reset_index(drop=True)

Unnamed: 0,school,fstat,pval
0,Armstrong High School,37.50041,0.0
1,Washington and Lee High School,11.17909,0.00107
2,Huguenot High School,10.23145,0.00172
3,Bassett High School,9.05959,0.00312
4,Charlottesville High School,8.75692,0.00364
5,Alexandria City High School,8.4076,0.00436
6,Chatham High School (Virginia),5.55243,0.01988
7,Buckingham County High School,4.55669,0.03458
8,Magna Vista High School,4.19882,0.04237
9,Tunstall High School,3.34716,0.06951


In [6]:
significant_schools = anova_results_df[anova_results_df['pval'] < 0.05]['school'].tolist()

In [7]:
significant_schools

['Magna Vista High School',
 'Buckingham County High School',
 'Alexandria City High School',
 'Bassett High School',
 'Charlottesville High School',
 'Washington and Lee High School ',
 'Huguenot High School',
 'Armstrong High School',
 'Chatham High School (Virginia)']

These are the schools that show statistical significance 

In [8]:
insignificant_schools = anova_results_df[anova_results_df['pval'] >= 0.05]['school'].tolist()

In [9]:
insignificant_schools

['Nelson County High School',
 'Northumberland High School',
 'Louisa County High School',
 'Fluvanna County High School',
 'Gretna High School',
 'Dan River High School',
 'Rappahannock High School',
 'Lancaster High School (UVA)',
 'Patrick County High School',
 'George Washington High School',
 'Martinsville High School',
 'Orange County High School',
 'Tunstall High School',
 'William Monroe High School']

These are the schools that don't show statistical significance

In [10]:
enrollment_rates_significant = df[df['school_name'].isin(significant_schools)]

enrollment_rates_significant.head()

Unnamed: 0,student_totals_seniors,specific_interactions_1-on-1s_total,specific_interactions_group_total,specific_interactions_1-on-1s_at_least_one,specific_interactions_group_at_least_one,specific_interactions_parent_meetings,specific_interactions_parent_meetings_at_least_one,total_interactions_met_with_1_plus_times_(group_and_1_1),total_interactions_met_with_3_plus_times_(group_and_1_1),total_interactions_met_with_5_plus_times_(group_and_1_1),...,total_female_rate,enrolled_female_rate,total_male_rate,enrolled_male_rate,total_black_rate,enrolled_black_rate,total_white_rate,enrolled_white_rate,econ_total_rate,econ_enrolled_rate
1,247.0,837.0,726.0,234.0,231.0,183.0,97.0,235.0,226.0,137.0,...,0.549708,0.397661,0.450292,0.274854,0.315789,0.239766,0.549708,0.368421,0.520468,0.339181
2,136.0,1679.0,799.0,128.0,129.0,179.0,64.0,132.0,124.0,118.0,...,0.522388,0.373134,0.477612,0.223881,0.30597,0.134328,0.604478,0.425373,0.559701,0.291045
3,676.0,298.0,150.0,142.0,128.0,116.0,99.0,218.0,45.0,9.0,...,0.537842,0.452496,0.462158,0.325282,0.352657,0.270531,0.2657,0.238325,0.563607,0.407407
8,263.0,801.0,638.0,261.0,246.0,262.0,136.0,261.0,233.0,161.0,...,0.485597,0.345679,0.514403,0.316872,0.17284,0.123457,0.720165,0.477366,0.580247,0.358025
10,224.0,673.0,468.0,172.0,208.0,115.0,73.0,212.0,132.0,80.0,...,0.506438,0.373391,0.493562,0.351931,0.291845,0.184549,0.566524,0.446352,0.399142,0.240343


In [11]:
enrollment_rates_insignificant = df[df['school_name'].isin(insignificant_schools)]

enrollment_rates_insignificant.head()

Unnamed: 0,student_totals_seniors,specific_interactions_1-on-1s_total,specific_interactions_group_total,specific_interactions_1-on-1s_at_least_one,specific_interactions_group_at_least_one,specific_interactions_parent_meetings,specific_interactions_parent_meetings_at_least_one,total_interactions_met_with_1_plus_times_(group_and_1_1),total_interactions_met_with_3_plus_times_(group_and_1_1),total_interactions_met_with_5_plus_times_(group_and_1_1),...,total_female_rate,enrolled_female_rate,total_male_rate,enrolled_male_rate,total_black_rate,enrolled_black_rate,total_white_rate,enrolled_white_rate,econ_total_rate,econ_enrolled_rate
0,148.0,724.0,644.0,126.0,143.0,55.0,37.0,145.0,138.0,113.0,...,0.455357,0.285714,0.544643,0.348214,0.160714,0.080357,0.741071,0.473214,0.392857,0.223214
4,96.0,488.0,158.0,81.0,77.0,13.0,12.0,85.0,73.0,61.0,...,0.484848,0.363636,0.515152,0.272727,0.353535,0.212121,0.575758,0.393939,0.434343,0.20202
5,356.0,1577.0,619.0,256.0,308.0,269.0,110.0,336.0,220.0,150.0,...,0.522337,0.350515,0.477663,0.233677,0.175258,0.099656,0.731959,0.429553,0.360825,0.158076
6,267.0,1606.0,1196.0,267.0,253.0,163.0,91.0,267.0,250.0,210.0,...,0.542125,0.399267,0.457875,0.252747,0.124542,0.069597,0.761905,0.516484,0.205128,0.087912
7,141.0,200.0,345.0,121.0,131.0,34.0,32.0,134.0,108.0,50.0,...,0.522388,0.328358,0.477612,0.268657,0.283582,0.141791,0.69403,0.432836,0.462687,0.208955


Let's do a Welch's t-test and ANOVA to see if the two groups have a statistically significant difference in mean enrollment rate 

In [12]:
t_statistic, p_value = stats.ttest_ind(enrollment_rates_significant['enrolled_rate'], 
                                       enrollment_rates_insignificant['enrolled_rate'],
                                       equal_var = False)

print("Welch's T-test between significant and non-significant schools:")
print("t-statistic:", t_statistic)
print("p-value:", p_value)

Welch's T-test between significant and non-significant schools:
t-statistic: -0.3260987214916366
p-value: 0.7453393402621268


In [13]:
t_statistic, p_value = stats.f_oneway(enrollment_rates_significant['enrolled_rate'], 
                                       enrollment_rates_insignificant['enrolled_rate'])

print("ANOVA between significant and non-significant schools:")
print("t-statistic:", t_statistic)
print("p-value:", p_value)

ANOVA between significant and non-significant schools:
t-statistic: 0.1402387861956842
p-value: 0.708627009742409


The two groups as wholes don't seem to have a statistically significant difference in mean enrollment rate

Let's calculate mean enrollment rates to see if we can identify high performing schools and low performing schools from our statistically significant group. If a school's mean enrollment rate is higher than the population mean, then we'll deem it as "high significant" to indicate that its high performance. If a school's mean enrollment rate is lower than the population mean, then we'll deem it as "low significant".  

In [14]:
print(df['enrolled_rate'].mean())

0.5954041052919031


In [15]:
significant_schools_enrolled_means = df[df['school_name'].isin(significant_schools)]
significant_schools_enrolled_means = significant_schools_enrolled_means[['school_name', 'enrolled_rate']]
significant_schools_enrolled_means = significant_schools_enrolled_means.groupby('school_name').mean().sort_values(by = 'enrolled_rate', 
                                                                                                                  ascending = False).reset_index()

In [16]:
significant_schools_enrolled_means 

Unnamed: 0,school_name,enrolled_rate
0,Bassett High School,0.70951
1,Charlottesville High School,0.707705
2,Alexandria City High School,0.705575
3,Chatham High School (Virginia),0.685834
4,Magna Vista High School,0.674421
5,Buckingham County High School,0.513194
6,Huguenot High School,0.47463
7,Washington and Lee High School,0.469567
8,Armstrong High School,0.383131


In [17]:
print(significant_schools_enrolled_means['enrolled_rate'].mean())

0.5915073829807241


In [18]:
insignificant_schools_enrolled_means = df[df['school_name'].isin(insignificant_schools)]
insignificant_schools_enrolled_means = insignificant_schools_enrolled_means[['school_name', 'enrolled_rate']]
insignificant_schools_enrolled_means = insignificant_schools_enrolled_means.groupby('school_name').mean().sort_values(by = 'enrolled_rate', 
                                                                                                                      ascending = False).reset_index()

In [19]:
insignificant_schools_enrolled_means

Unnamed: 0,school_name,enrolled_rate
0,Tunstall High School,0.666169
1,Martinsville High School,0.659048
2,Patrick County High School,0.646662
3,William Monroe High School,0.637295
4,Fluvanna County High School,0.632502
5,Dan River High School,0.62144
6,George Washington High School,0.589217
7,Northumberland High School,0.587534
8,Rappahannock High School,0.576128
9,Lancaster High School (UVA),0.568303


In [20]:
print(insignificant_schools_enrolled_means['enrolled_rate'].mean())

0.5979091410633747


In [21]:
low_significant_schools = significant_schools_enrolled_means[significant_schools_enrolled_means['enrolled_rate'] < 
                                                              df['enrolled_rate'].mean()]['school_name'].tolist()
                          

In [22]:
low_significant_schools 

['Buckingham County High School',
 'Huguenot High School',
 'Washington and Lee High School ',
 'Armstrong High School']

These are our "low significant" schools that are performing poorly.

In [23]:
high_significant_schools = significant_schools_enrolled_means[significant_schools_enrolled_means['enrolled_rate'] > 
                                                              df['enrolled_rate'].mean()]['school_name'].tolist()

In [24]:
high_significant_schools

['Bassett High School',
 'Charlottesville High School',
 'Alexandria City High School',
 'Chatham High School (Virginia)',
 'Magna Vista High School']

These are our "high significant" schools that are performing well. 

Let's explore black student enrollment at our statistically significant schools. We'll calculate the difference between the overall rate of black students and the enrollment rate of black students. The lower the difference, the higher the ratio of black students that have enrolled in college and vice versa. Lower differences should indicate a school that's serving black students well in this capacity. 

In [25]:
df['black_rate_difference'] = df['total_black_rate'] - df['enrolled_black_rate']

Let's start with an ANOVA test to see which schools show statistical significance with their black student rate difference. 

In [31]:
school_names = df['school_name'].unique()
vars = ['black_rate_difference']
 
results = []
schools = []
fstats = []
pvals = []
variables = []
for var in vars:
    for school in school_names:
        group1 = df[var][df['school_name'] != school]
        group2 = df[var][df['school_name'] == school]
        result = stats.f_oneway(group1, group2)
        fstats.append(result.statistic)
        pvals.append(result.pvalue)
        results.append(result)
        variables.append(var)
        schools.append(school)
        
anova_results_df = pd.DataFrame({'school': schools, 'variable': variables, 'fstat': fstats, 'pval': pvals})    

In [32]:
anova_results_df[['school','fstat', 'pval']].sort_values(by='pval').round(5).reset_index(drop=True)

Unnamed: 0,school,fstat,pval
0,Armstrong High School,79.56841,0.0
1,Rappahannock High School,34.9903,0.0
2,Huguenot High School,13.94905,0.00028
3,William Monroe High School,7.36424,0.00751
4,Bassett High School,4.5115,0.03548
5,Tunstall High School,4.36613,0.03852
6,George Washington High School,3.29119,0.07186
7,Fluvanna County High School,3.26733,0.07288
8,Magna Vista High School,3.11728,0.07971
9,Orange County High School,2.73258,0.10063


Instead of splitting schools into significance and insignificance by black student rate difference here like we did earlier for enrollment rate, we'll leave the groups intact since they indicate which schools overall are significant or not. This allows us to better understand those schools in general versus focusing only on black students in this case.

In [26]:
print(df['black_rate_difference'].mean())

0.17691933807899568


In [40]:
high_sig_schools_black_difference_means = df[df['school_name'].isin(high_significant_schools)]
high_sig_schools_black_difference_means = high_sig_schools_black_difference_means[['school_name', 'black_rate_difference']]
high_sig_schools_black_difference_means = high_sig_schools_black_difference_means.groupby('school_name').mean().sort_values(by = 'black_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [41]:
high_sig_schools_black_difference_means

Unnamed: 0,school_name,black_rate_difference
0,Bassett High School,0.051735
1,Magna Vista High School,0.07234
2,Alexandria City High School,0.089513
3,Chatham High School (Virginia),0.091576
4,Charlottesville High School,0.12126


Although it appears that the high significant schools have below average black rate differences, only Bassett High School shows statistical significance. Bassett appears to be serving black students well. 

In [46]:
low_sig_schools_black_difference_means = df[df['school_name'].isin(low_significant_schools)]
low_sig_schools_black_difference_means = low_sig_schools_black_difference_means[['school_name', 'black_rate_difference']]
low_sig_schools_black_difference_means = low_sig_schools_black_difference_means.groupby('school_name').mean().sort_values(by = 'black_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [47]:
low_sig_schools_black_difference_means

Unnamed: 0,school_name,black_rate_difference
0,Buckingham County High School,0.186323
1,Washington and Lee High School,0.218654
2,Huguenot High School,0.390001
3,Armstrong High School,0.601367


From our low significant schools, Armstrong and Huguenot show statistical significance. Armstrong and Huguenot appear to be serving black students poorly. 

Let's carry out a similar process to explore white student enrollment with white student rate differences.

In [33]:
df['white_rate_difference'] = df['total_white_rate'] - df['enrolled_white_rate']

In [34]:
school_names = df['school_name'].unique()
vars = ['white_rate_difference']
 
results = []
schools = []
fstats = []
pvals = []
variables = []
for var in vars:
    for school in school_names:
        group1 = df[var][df['school_name'] != school]
        group2 = df[var][df['school_name'] == school]
        result = stats.f_oneway(group1, group2)
        fstats.append(result.statistic)
        pvals.append(result.pvalue)
        results.append(result)
        variables.append(var)
        schools.append(school)
        
anova_results_df = pd.DataFrame({'school': schools, 'variable': variables, 'fstat': fstats, 'pval': pvals})    

In [36]:
anova_results_df[['school','fstat', 'pval']].sort_values(by='pval').round(5).reset_index(drop=True)

Unnamed: 0,school,fstat,pval
0,Huguenot High School,22.22009,1e-05
1,Alexandria City High School,21.24519,1e-05
2,Rappahannock High School,21.19447,1e-05
3,Martinsville High School,12.3473,0.0006
4,Nelson County High School,12.32031,0.00061
5,George Washington High School,10.20304,0.00174
6,Orange County High School,6.49585,0.01192
7,Louisa County High School,6.03753,0.01526
8,Charlottesville High School,5.71806,0.01816
9,Patrick County High School,5.53564,0.02007


In [37]:
print(df['white_rate_difference'].mean())

0.21101721246592814


In [42]:
high_sig_schools_white_difference_means = df[df['school_name'].isin(high_significant_schools)]
high_sig_schools_white_difference_means = high_sig_schools_white_difference_means[['school_name', 'white_rate_difference']]
high_sig_schools_white_difference_means = high_sig_schools_white_difference_means.groupby('school_name').mean().sort_values(by = 'white_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [43]:
high_sig_schools_white_difference_means

Unnamed: 0,school_name,white_rate_difference
0,Alexandria City High School,0.034607
1,Charlottesville High School,0.114614
2,Magna Vista High School,0.181096
3,Bassett High School,0.184733
4,Chatham High School (Virginia),0.199576


Alexandria City and Charlottesville have below average white rate differences that are statistically significant. These schools seem to be serving white students well. 

In [48]:
low_sig_schools_white_difference_means = df[df['school_name'].isin(low_significant_schools)]
low_sig_schools_white_difference_means = low_sig_schools_white_difference_means[['school_name', 'white_rate_difference']]
low_sig_schools_white_difference_means = low_sig_schools_white_difference_means.groupby('school_name').mean().sort_values(by = 'white_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [49]:
low_sig_schools_white_difference_means

Unnamed: 0,school_name,white_rate_difference
0,Huguenot High School,0.031162
1,Armstrong High School,0.161215
2,Washington and Lee High School,0.220577
3,Buckingham County High School,0.254097


Although these are our low significant schools, Huguenot has a below average white rate difference that is statistically significant. It seems to be serving white students well.

Let's do this again for female student difference to explore female student enrollment. 

In [52]:
df['female_rate_difference'] = df['total_female_rate'] - df['enrolled_female_rate']

In [53]:
school_names = df['school_name'].unique()
vars = ['female_rate_difference']
 
results = []
schools = []
fstats = []
pvals = []
variables = []
for var in vars:
    for school in school_names:
        group1 = df[var][df['school_name'] != school]
        group2 = df[var][df['school_name'] == school]
        result = stats.f_oneway(group1, group2)
        fstats.append(result.statistic)
        pvals.append(result.pvalue)
        results.append(result)
        variables.append(var)
        schools.append(school)
        
anova_results_df = pd.DataFrame({'school': schools, 'variable': variables, 'fstat': fstats, 'pval': pvals})    

In [54]:
anova_results_df[['school','fstat', 'pval']].sort_values(by='pval').round(5).reset_index(drop=True)

Unnamed: 0,school,fstat,pval
0,Armstrong High School,84.6203,0.0
1,Huguenot High School,11.7911,0.00079
2,Tunstall High School,5.4314,0.02125
3,Washington and Lee High School,5.11603,0.02529
4,Alexandria City High School,3.68742,0.05692
5,Bassett High School,3.23218,0.07442
6,Chatham High School (Virginia),3.22673,0.07466
7,Magna Vista High School,2.34249,0.12821
8,William Monroe High School,2.09551,0.15003
9,Charlottesville High School,2.02156,0.15737


In [55]:
print(df['female_rate_difference'].mean())

0.16848707248432795


In [56]:
high_sig_schools_female_difference_means = df[df['school_name'].isin(high_significant_schools)]
high_sig_schools_female_difference_means = high_sig_schools_female_difference_means[['school_name', 'female_rate_difference']]
high_sig_schools_female_difference_means = high_sig_schools_female_difference_means.groupby('school_name').mean().sort_values(by = 'female_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [57]:
high_sig_schools_female_difference_means

Unnamed: 0,school_name,female_rate_difference
0,Alexandria City High School,0.123361
1,Bassett High School,0.126169
2,Chatham High School (Virginia),0.126204
3,Magna Vista High School,0.132345
4,Charlottesville High School,0.134873


None of our high significant schools show a statistical significance with their female rate differences. 

In [58]:
low_sig_schools_female_difference_means = df[df['school_name'].isin(low_significant_schools)]
low_sig_schools_female_difference_means = low_sig_schools_female_difference_means[['school_name', 'female_rate_difference']]
low_sig_schools_female_difference_means = low_sig_schools_female_difference_means.groupby('school_name').mean().sort_values(by = 'female_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [59]:
low_sig_schools_female_difference_means

Unnamed: 0,school_name,female_rate_difference
0,Buckingham County High School,0.18877
1,Washington and Lee High School,0.221371
2,Huguenot High School,0.246939
3,Armstrong High School,0.340501


Armstrong, Huguenot and Washington and Lee all show statistical significance with above average female rate differences. These schools appear to be serving female students poorly. 

Let's do this again for economically disadvantaged students. 

In [60]:
df['econ_rate_difference'] = df['econ_total_rate'] - df['econ_enrolled_rate']

In [61]:
school_names = df['school_name'].unique()
vars = ['econ_rate_difference']
 
results = []
schools = []
fstats = []
pvals = []
variables = []
for var in vars:
    for school in school_names:
        group1 = df[var][df['school_name'] != school]
        group2 = df[var][df['school_name'] == school]
        result = stats.f_oneway(group1, group2)
        fstats.append(result.statistic)
        pvals.append(result.pvalue)
        results.append(result)
        variables.append(var)
        schools.append(school)
        
anova_results_df = pd.DataFrame({'school': schools, 'variable': variables, 'fstat': fstats, 'pval': pvals})    

In [62]:
anova_results_df[['school','fstat', 'pval']].sort_values(by='pval').round(5).reset_index(drop=True)

Unnamed: 0,school,fstat,pval
0,Armstrong High School,76.05024,0.0
1,Fluvanna County High School,6.84091,0.00992
2,Huguenot High School,5.99618,0.01561
3,William Monroe High School,4.47387,0.03624
4,Tunstall High School,3.7524,0.0548
5,Washington and Lee High School,3.2103,0.0754
6,Charlottesville High School,3.20207,0.07577
7,Buckingham County High School,2.65246,0.1057
8,Chatham High School (Virginia),2.58136,0.11045
9,Lancaster High School (UVA),2.17661,0.14243


In [63]:
print(df['econ_rate_difference'].mean())

0.2367504091351579


In [64]:
high_sig_schools_econ_difference_means = df[df['school_name'].isin(high_significant_schools)]
high_sig_schools_econ_difference_means = high_sig_schools_econ_difference_means[['school_name', 'econ_rate_difference']]
high_sig_schools_econ_difference_means = high_sig_schools_econ_difference_means.groupby('school_name').mean().sort_values(by = 'econ_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [65]:
high_sig_schools_econ_difference_means

Unnamed: 0,school_name,econ_rate_difference
0,Charlottesville High School,0.165461
1,Chatham High School (Virginia),0.1726
2,Bassett High School,0.190778
3,Magna Vista High School,0.195979
4,Alexandria City High School,0.213758


None of our high significant schools show statistical significance.

In [66]:
low_sig_schools_econ_difference_means = df[df['school_name'].isin(low_significant_schools)]
low_sig_schools_econ_difference_means = low_sig_schools_econ_difference_means[['school_name', 'econ_rate_difference']]
low_sig_schools_econ_difference_means = low_sig_schools_econ_difference_means.groupby('school_name').mean().sort_values(by = 'econ_rate_difference', 
                                                                                                                            ascending = True).reset_index()

In [67]:
low_sig_schools_econ_difference_means

Unnamed: 0,school_name,econ_rate_difference
0,Buckingham County High School,0.301762
1,Washington and Lee High School,0.308129
2,Huguenot High School,0.33334
3,Armstrong High School,0.51824


Armstrong and Huguenot both have above average economically disadvantaged student differences that show statistical significance. These schools seem to be serving economically disadvantaged students poorly. 