In [None]:
import pandas as pd
import numpy as np
data = pd.read_csv("../input/scores.csv")
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

**Best/Worst Average SAT scores:**   

The SAT scores in the data show the average math, reading and writing scores for students at a particular public high school in NYC in 2014-2015. When we look at the overall ranges for these scores we see the following:

In [None]:
print('The highest average SAT Math score was {} and the lowest was {}.'.format(
      int(data['Average Score (SAT Math)'].max()), int(data['Average Score (SAT Math)'].min())))
print('\t')
print('The highest average SAT Reading score was {} and the lowest was {}.'.format(
      int(data['Average Score (SAT Reading)'].max()), int(data['Average Score (SAT Reading)'].min())))
print('\t')
print('The highest average SAT Writing score was {} and the lowest was {}.'.format(
      int(data['Average Score (SAT Writing)'].max()), int(data['Average Score (SAT Writing)'].min())))


In [None]:
The ranges of average SAT scores are pretty wide. Below, we see the schools that are at
each of these range values:

In [None]:
print("The school with the best overall average SAT math score was {}.".format(
      data.loc[data['Average Score (SAT Math)']==754, 'School Name'].values[0]))
print('\t')
print("The school with the worst overall average SAT math score was {}".format(
      data.loc[data['Average Score (SAT Math)']==317, 'School Name'].values[0]))

In [None]:
print("The school with the best overall average SAT reading score was {}.".format(
      data.loc[data['Average Score (SAT Reading)']==697, 'School Name'].values[0]))
print('\t')
print("The school with the worst overall average SAT reading score was {}".format(
      data.loc[data['Average Score (SAT Reading)']==302, 'School Name'].values[0]))

In [None]:
print("The school with the best overall average SAT writing score was {}.".format(
      data.loc[data['Average Score (SAT Writing)']==693, 'School Name'].values[0]))
print('\t')
print("The school with the worst overall average SAT writing score was {}".format(
      data.loc[data['Average Score (SAT Writing)']==284, 'School Name'].values[0]))

<p>We see that Stuyvesant High School ranks best, across the board, in terms of average SAT scores in 2014-2015.</p>

**Average SAT scores by Borough and Zip Code:** 
<p>Now let us turn our focus to looking at how each borough and, on a smaller scale, zip code region does in terms of SAT score outcomes.</p>

In [None]:
#function to label bars in the barplots
def barlabel(b, i):
    for bar in bars:
        height = bar.get_height()
        ax[i].text(bar.get_x()+bar.get_width()/2., 0.90*height, 
            '%d' % int(height), color='white', ha='center', va='bottom')


fig, ax = plt.subplots(3, figsize=(8, 12))
ind = np.arange(5)
width = 0.35
bars = ax[0].bar((ind+width), data.groupby('Borough')['Average Score (SAT Math)'].max())
ax[0].set_facecolor("white")
ax[0].set_title("Max average SAT Math Score by Borough (2014-2015)", fontsize=18)
ax[0].set_xlabel('')
ax[0].set_xticks(ind+width/1.0)
ax[0].set_xticklabels(labels=data.groupby('Borough')['Average Score (SAT Math)'].max().index)
ax[0].set_yticklabels("")
barlabel(bars, 0)
bars[1].set_color('#f45c42')
bars[2].set_color('#41f4d9');


bars = ax[1].bar((ind+width), data.groupby('Borough')['Average Score (SAT Reading)'].max())
ax[1].set_facecolor("white")
ax[1].set_title("Max average SAT Reading Score by Borough (2014-2015)", fontsize=18)
ax[1].set_xlabel('')
ax[1].set_xticks(ind+width/1.0)
ax[1].set_xticklabels(labels=data.groupby('Borough')['Average Score (SAT Reading)'].max().index)
ax[1].set_yticklabels("")
barlabel(bars, 1)
bars[1].set_color('#f45c42')
bars[2].set_color('#41f4d9')


bars = ax[2].bar((ind+width), data.groupby('Borough')['Average Score (SAT Writing)'].max())
ax[2].set_facecolor("white")
ax[2].set_title("Max average SAT Writing Score by Borough (2014-2015)", fontsize=18)
ax[2].set_xlabel('')
ax[2].set_xticks(ind+width/1.0)
ax[2].set_xticklabels(labels=data.groupby('Borough')['Average Score (SAT Writing)'].max().index)
ax[2].set_yticklabels("")
barlabel(bars, 2)
bars[1].set_color('#f45c42')
bars[2].set_color('#41f4d9')

plt.tight_layout();

<p>When we plot the max average SAT scores by Borough, we see that the highest score came from a school in Manhattan, which we learned was Stuyvesant HS. The lowest average scores on all 3 parts of the SAT were found in schools based in Brooklyn.<p>
<p>Next, I will take the 120 zip codes we have and using the first three numbers of the zip code, create regions. I will find which region showed the maximum average score for each of the SAT scores.</p>

In [None]:
data['Zip Code']=data['Zip Code'].astype('str').str.replace(r'\d{2}$', 'xx')

In [None]:
fig, ax = plt.subplots(3, figsize=(8, 12))
ind = np.arange(10)
width = 0.35
bars = ax[0].bar((ind+width), data.groupby('Zip Code')['Average Score (SAT Math)'].max())
ax[0].set_facecolor("white")
ax[0].set_title("Max average SAT Math Score by Postal Region (2014-2015)", fontsize=16)
ax[0].set_xlabel('')
ax[0].set_xticks(ind+width/1.0)
ax[0].set_xticklabels(labels=data.groupby('Zip Code')['Average Score (SAT Math)'].max().index)
ax[0].set_yticklabels("")
barlabel(bars, 0)
bars[1].set_color('#f45c42')
bars[2].set_color('#41f4d9');


bars = ax[1].bar((ind+width), data.groupby('Zip Code')['Average Score (SAT Reading)'].max())
ax[1].set_facecolor("white")
ax[1].set_title("Max average SAT Reading Score by Postal Region (2014-2015)", fontsize=16)
ax[1].set_xlabel('')
ax[1].set_xticks(ind+width/1.0)
ax[1].set_xticklabels(labels=data.groupby('Zip Code')['Average Score (SAT Reading)'].max().index)
ax[1].set_yticklabels("")
barlabel(bars, 1)
bars[1].set_color('#f45c42')
bars[2].set_color('#41f4d9')


bars = ax[2].bar((ind+width), data.groupby('Zip Code')['Average Score (SAT Writing)'].max())
ax[2].set_facecolor("white")
ax[2].set_title("Max average SAT Writing Score by Postal Region (2014-2015)", fontsize=16)
ax[2].set_xlabel('')
ax[2].set_xticks(ind+width/1.0)
ax[2].set_xticklabels(labels=data.groupby('Zip Code')['Average Score (SAT Writing)'].max().index)
ax[2].set_yticklabels("")
barlabel(bars, 2)
bars[1].set_color('#f45c42')
bars[2].set_color('#41f4d9')

plt.tight_layout();


<p>When we break down the max average SAT scores by postal code region, we see just how poor the 101xx region has been in terms of the three average SAT scores.</p>