# How educator compensation has changed over time

Based on article here: http://nces.ed.gov/pubsearch/pubsinfo.asp?pubid=2016156

* Nonfiscal data: https://nces.ed.gov/ccd/stnfis.asp
* Fiscal data: https://nces.ed.gov/ccd/stfis.asp

In [1]:
import pandas as pd, numpy as np, re

In [2]:
fiscal_links = pd.read_csv("data/2/fiscal_links.csv")
nonfiscal_links = pd.read_csv("data/2/nonfiscal_links.csv")

In [3]:
fiscal_links.head()

Unnamed: 0,url,School year,Excel Link,Excel Link_link,New column,New column_link
0,https://nces.ed.gov/ccd/stfis.asp,(Fiscal Year 2013); (v.1a—Provisional),ZIP (34 KB),https://nces.ed.gov/ccd/data/zip/stfis13_1a.zip,Flat File (21 KB),https://nces.ed.gov/ccd/data/txt/stfis13_1a_la...
1,https://nces.ed.gov/ccd/stfis.asp,(Fiscal Year 2013); (v.1a—Provisional),ZIP (72 KB),https://nces.ed.gov/ccd/data/zip/Stfis13_1a_xl...,Flat File (21 KB),https://nces.ed.gov/ccd/data/txt/stfis13_1a_la...
2,https://nces.ed.gov/ccd/stfis.asp,(Fiscal Year 2012); (v.1a—Provisional),ZIP (34 KB),https://nces.ed.gov/ccd/data/zip/Stfis_1a_txt.zip,Flat File (21 KB),https://nces.ed.gov/ccd/data/txt/stfis121alay.txt
3,https://nces.ed.gov/ccd/stfis.asp,(Fiscal Year 2012); (v.1a—Provisional),ZIP (69 KB),https://nces.ed.gov/ccd/data/zip/Stfis_1a_xls.zip,Flat File (21 KB),https://nces.ed.gov/ccd/data/txt/stfis121alay.txt
4,https://nces.ed.gov/ccd/stfis.asp,(Fiscal Year 2011); (v.1a—Preliminary),ZIP (35 KB),https://nces.ed.gov/ccd/data/zip/stfis111a_txt...,Flat File (21 KB),https://nces.ed.gov/ccd/data/txt/stfis111alay.txt


In [4]:
import urllib2, zipfile, os

In [5]:
def download(zip_url, path):
    response = urllib2.urlopen(zip_url)
    zipcontent= response.read()
    
    with open("tmp.zip", 'w') as f:
        f.write(zipcontent)
        f.close()
    

    zip_ref = zipfile.ZipFile("tmp.zip", 'r')
    zip_ref.extractall(path)
    zip_ref.close()

In [6]:
def import_excel_file(dir_path):
    #print "Looking for excel files in " + dir_path
    excel_files = []
    for filename in os.listdir(dir_path):
        if ".xls" in filename.lower():
            excel_files.append(dir_path + "/" + filename)
            
    if len(excel_files) == 0: 
        return None
    elif len(excel_files) == 1:
        ret_frame = pd.read_excel(excel_files[0])#[["SURVYEAR","STABR","E11","E12"]]
        #ret_frame["STABR"] = ret_frame["STABR"].apply(lambda x: str(x).upper().strip())
        ret_frame.columns = map(lambda x: str(x).strip().upper(), ret_frame.columns)
        return ret_frame
    else:
        throw("Error: Multiple excel files found in " + dir_path)

In [7]:
import re

def clean_fiscal_colname(col_name):
    return  re.sub("FISCAL_YEAR_","",re.sub(r"[^\w]+","_",re.sub(r"[\(\)]","",col_name).strip().upper()))


In [8]:
fiscal_frames = {}
for row, val in fiscal_links.iterrows():
    year = val["School year"]
    zip_link = val["Excel Link_link"]
    path =  "data/2/fiscal/" + str(year)
    fiscal_frames[clean_fiscal_colname(year)] = import_excel_file(path)

In [9]:
def get_frameset(folder_path, links_frame):
    frame_set = {}
    for row, val in links_frame.iterrows():
        try:
            year = val["School year"]
            zip_link = val["Excel Link_link"]
        except:
            print "Error"
            print val
        path =  folder_path + str(year)
        #print path
        #download(zip_link, path)
        frame_set[clean_fiscal_colname(year)] = import_excel_file(path)
        
    return frame_set

In [10]:
fiscal_frames = get_frameset("data/2/fiscal/", fiscal_links)
print "done"
nonfiscal_frames = get_frameset("data/2/nonfiscal/", nonfiscal_links)
print "done 2"

done
done 2


In [11]:
nonfiscal_frames["1994_95_V_1B"]["TOTTCH"]

0        NaN
1      42791
2       7205
3      38132
4      26181
5     225016
6      34894
7      35316
8       6416
9       6110
10    110674
11     77914
12     10240
13     12582
14    110830
15     55496
16     31726
17     30579
18     38784
19     47599
20     15404
21     46565
22     60489
23     80522
24     46958
25     28866
26     56606
27     10079
28     19774
29     13414
30     12109
31     85258
32     19025
33    182273
34     71592
35      7796
36    109085
37     39406
38     26208
39    102988
40     10066
41     39437
42      9985
43     47406
44    234213
45     19524
46      7566
47     72505
48     46439
49     21024
50     54054
51      6754
52       698
53      1826
54       406
55     39933
56      1528
Name: TOTTCH, dtype: float64

In [12]:
def count_cols (frame_list):
    columns = {}
    for k in frame_list:
        for col in frame_list[k].columns:
            if col in columns:
                columns[col].append(k)
            else:
                columns[col] = [k]
    return columns

In [13]:
count_cols(nonfiscal_frames).keys()

['HP08F',
 'HP08M',
 'AMREGDIP',
 'LIBSUP',
 'WH07M',
 'ISTUSUP',
 'GRADE 3 STUDENTS',
 'IAIDES',
 'ISECTCH',
 'AM05M',
 'AMEQUIV',
 'HIPKF',
 'AM04M',
 'OTHSUP',
 'AM04F',
 'HIPKM',
 'HI07M',
 'SEANAME',
 'HI07F',
 'GRADE 4 STUDENTS',
 'TRKG',
 'STFIPS',
 'ITOTTCH',
 'TRUGM',
 'TRUGF',
 'OTHGUI',
 'IUG',
 'HPKGM',
 'HPKGF',
 'HP07',
 'TR11',
 'TR10',
 'TR12',
 'AS03M',
 'TR05F',
 'HI09F',
 'HI09M',
 'TR05M',
 'AS03F',
 'IACHSUP',
 'SECONDARY GUIDANCE COUNSELOR',
 'IG09',
 'TOTAL STUDENTS',
 'ASUGM',
 'BL04M',
 'GRADE 12 STUDENTS',
 'BL04F',
 'BL10M',
 'UNGRADED TEACHERS',
 'TOTTCH',
 'ALL OTHER SUPPORT SERVICES STAFF',
 'WH10M',
 'WH06F',
 'PREKINDERGARTEN TEACHERS',
 'WH03F',
 'AM09F',
 'WH03M',
 'AM09M',
 'ASPK',
 'MEMBER',
 'HI02F',
 'HI02M',
 'AS12',
 'TR12M',
 'TRPKM',
 'TR12F',
 'TRPKF',
 'G12',
 'G11',
 'G10',
 'HP03F',
 'AM11',
 'AM10',
 'AM12',
 'HP03M',
 'WH08F',
 'ILEASUP',
 'WHEQUIV',
 'WH08M',
 'TR06M',
 'TR06F',
 'HIREGDIP',
 'H.S. EQUIVALENCY',
 'ASIN',
 'WHREGDIP',
 'B

In [14]:
def has_col(frame_list, frame, col):
    return frame in count_cols(frame_list)[col]

In [15]:
has_col(fiscal_frames, "2006_V_1B_REVISED", "SURVYEAR")

True

In [16]:
def all_have_cols(frame_list, cols):
    oks = []
    for k in frame_list:
        found_cols = 0
        for col in cols:
            if has_col(frame_list, k, col):
                found_cols += 1
            else:
                print k + " is missing " + col
        if found_cols == len(cols):
            oks.append(k)
        
    return oks

In [17]:
all_have_cols(fiscal_frames, ["STABR","E11","E12"])

1988_V_1B_REVISED is missing E11
1988_V_1B_REVISED is missing E12
1987_V_1B_REVISED is missing E11
1987_V_1B_REVISED is missing E12


['2005_V_1B_REVISED',
 '1991_V_1B_REVISED',
 '1998_V_1B_REVISED',
 '2002_V_1D_REVISED',
 '2004_V_1B_REVISED',
 '2013_V_1A_PROVISIONAL',
 '2010_V_1A_PROVISIONAL',
 '1993_V_1B_REVISED',
 '2008_V_1B_REVISED',
 '1992_V_1B_REVISED',
 '1990_V_1B_REVISED',
 '1996_V_1B_REVISED',
 '1999_V_1B_REVISED',
 '2001_V_1B_REVISED',
 '2003_V_1B_REVISED',
 '2007_V_1B_REVISED',
 '1997_V_1B_REVISED',
 '1995_V_1B_REVISED',
 '2011_V_1A_PRELIMINARY',
 '1989_V_1B_REVISED',
 '2000_V_1B_REVISED',
 '2012_V_1A_PROVISIONAL',
 '1994_V_1B_REVISED',
 '2009_V_1B_REVISED',
 '2006_V_1B_REVISED']

In [18]:
def merge_all_with(frame_list, col):
    first = True
    ret_frame = pd.DataFrame({"STABR": ["CT","NY"]}).set_index("STABR")
    #return ret_frame
    cols = [col]
    cols.append("STABR")
    for k in all_have_cols(frame_list, cols):
        print "Processing frame: " + k
        #print frame_list[k].columns
        try:
            cur_frame = frame_list[k][["STABR",col]]
        except:
            print "Error:", frame_list[k].columns
        #cur_frame[col] = cur_frame[col].apply(lambda x: re.sub(",","",str(x)))
        cur_frame[col] = pd.to_numeric(cur_frame[col], errors="coerce")
        cur_frame[col] = cur_frame[cur_frame[col].notnull()][col]
        cur_frame = cur_frame.set_index("STABR")
        new_col_name = col + "_" + k
        cur_frame.columns = [new_col_name]
        #return cur_frame, ret_frame
        cur_frame[new_col_name]
        #print ret_frame
        if first:
            ret_frame = cur_frame
        else:
            ret_frame = cur_frame.join(ret_frame)
            return ret_frame, cur_frame
        first = False
    return ret_frame

def frames_with_cols(frame_list, col):
    ret_frames = {}
    cols = ["STABR",col]
    for k in all_have_cols(frame_list, cols):
        ret_frames[k] = frame_list[k][cols]
        ret_frames[k].columns = ["STABR", k + "_" + col]
    return ret_frames

def join_frames_on(frame_set, col):
    frames = frames_with_cols(frame_set, col)
    first = True
    ret_frame = False
    for k in frames:
        cur_frame = frames[k]
        cur_frame["STABR"] = cur_frame["STABR"].apply(lambda x: str(x).upper().strip())
        cur_frame = cur_frame.set_index("STABR")
        if first == True:
            ret_frame = cur_frame
            #return ret_frame
        else:
            ret_frame = cur_frame.join(ret_frame)
        first = False
        #print k, cur_frame.index
    return ret_frame

In [19]:
join_frames_on(nonfiscal_frames, "TOTTCH")
#frames_with_cols(nonfiscal_frames, "TOTTCH")

1991_92_V_1C is missing TOTTCH
1994_95_V_1B is missing STABR
1987_88_V_1C is missing TOTTCH
1989_90_V_1C is missing TOTTCH
1990_91_V_1C is missing TOTTCH
1988_89_V_1C is missing TOTTCH
1986_87_V_1C is missing TOTTCH
1993_94_V_1B is missing STABR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,2005_06_V_1B_TOTTCH,1998_99_V_1B_TOTTCH,2009_10_V_1B_TOTTCH,2007_08_V_1B_TOTTCH,2011_12_V_1A_TOTTCH,2000_01_V_1C_TOTTCH,1997_98_V_1C_TOTTCH,2001_02_V_1C_TOTTCH,1999_2000_V_1B_TOTTCH,1992_93_V_1C_TOTTCH,2004_05_V_1F_TOTTCH,2006_07_V_1C_TOTTCH,1995_96_V_1B_TOTTCH,2008_09_V_1C_TOTTCH,2002_03_V_1B_TOTTCH,2013_14_V_1A_TOTTCH,2012_13_V_1A_TOTTCH,2003_04_V_1B_TOTTCH,2010_11_V_1A_TOTTCH,1996_97_V_1C_TOTTCH
STABR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AK,7912,8118,8083.1,7613.0,8087.87,7880,7625.0,8026.0,7838,7282.0,7756.0,7903.0,7379.0,7927.0,8080.0,7898.26,7682.18,7808.0,8170.64,7418.0
AL,57757,47766,47492.0,50420.0,47722.67,48194,45967.0,46785.0,48624,41961.0,51594.0,56134.0,44056.0,47818.0,47115.0,47161.77,51877.22,58070.0,49363.24,45035.0
AR,32997,27953,37240.0,33882.0,33982.96,31947,26931.0,33079.0,31362,26017.0,31234.0,35089.0,26449.0,37162.0,30330.0,34933.04,34131.42,30876.0,34272.8,26681.0
AS,989,764,-1.0,-1.0,-1.0,820,762.0,914.0,801,725.0,945.0,971.0,728.0,-1.0,943.0,-1.0,-1.0,988.0,-1.0,734.0
AZ,51376,42352,51947.23,54032.0,50800.15,44438,41129.0,46015.0,43892,36076.0,48935.0,52625.0,38017.0,54696.0,47101.0,48358.73,48866.42,47507.0,50030.62,40521.0
BI,-1,N,-1.0,-1.0,-1.0,M,,,,,,,,,,,,,,
CA,309222,281784,316298.58,305371.0,268688.93,298021,268535.0,304203.0,287433,218566.0,305969.0,307366.0,230849.0,303647.0,307764.0,259505.79,266254.85,304311.0,260806.3,248818.0
CO,45841,39434,49060.32,47761.0,48077.76,41983,37840.0,44182.0,40772,33419.0,45165.0,46973.0,35388.0,48692.0,45401.0,50157.36,48921.79,44904.0,48542.99,36398.0
CT,39687,38772,43592.83,39304.0,43804.81,41044,37658.0,41773.0,39907,34193.0,38808.0,39115.0,36070.0,48463.0,42296.0,43443.09,43931.33,42370.0,42951.39,36551.0
DC,5481,5187,5854.0,6347.0,6278.06,4949,4388.0,4951.0,4812,6064.0,5387.0,5383.0,5305.0,5321.0,5005.0,5991.43,5925.1,5676.0,5925.33,5288.0


In [20]:
join_frames_on(nonfiscal_frames, "AIDES")


1991_92_V_1C is missing AIDES
1994_95_V_1B is missing STABR
1987_88_V_1C is missing AIDES
1989_90_V_1C is missing AIDES
1990_91_V_1C is missing AIDES
1988_89_V_1C is missing AIDES
1986_87_V_1C is missing AIDES
1993_94_V_1B is missing STABR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,2005_06_V_1B_AIDES,1998_99_V_1B_AIDES,2009_10_V_1B_AIDES,2007_08_V_1B_AIDES,2011_12_V_1A_AIDES,2000_01_V_1C_AIDES,1997_98_V_1C_AIDES,2001_02_V_1C_AIDES,1999_2000_V_1B_AIDES,1992_93_V_1C_AIDES,2004_05_V_1F_AIDES,2006_07_V_1C_AIDES,1995_96_V_1B_AIDES,2008_09_V_1C_AIDES,2002_03_V_1B_AIDES,2013_14_V_1A_AIDES,2012_13_V_1A_AIDES,2003_04_V_1B_AIDES,2010_11_V_1A_AIDES,1996_97_V_1C_AIDES
STABR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AK,2243,2162,2423.8,2317.0,2458.4,2197,1957,2481.0,2221,1775,2200.0,2274.0,1751,2190.0,2328.0,2421.81,2441.73,2118.0,2536.66,1649
AL,6768,6752,6671.24,6514.0,6146.48,6738,7294,6122.0,6270,3636,6458.0,6669.0,6657,6914.0,6169.0,6243.47,6191.0,6240.0,6550.43,7146
AR,7381,3231,7991.0,8008.0,8052.0,6061,3837,6170.0,5526,3436,7196.0,7660.0,3523,7794.0,6217.0,7746.42,7518.7,6623.0,8065.0,3837
AS,41,115,-1.0,-1.0,-1.0,127,107,132.0,121,40,108.0,107.0,15,-1.0,147.0,-1.0,-1.0,116.0,-1.0,16
AZ,14520,11561,14789.91,15362.0,15182.76,12391,10283,13179.0,11631,8741,13713.0,15039.0,9613,15621.0,13650.0,14820.09,14803.9,13438.0,14385.86,10157
BI,-1,N,-1.0,-1.0,-1.0,M,,,,,,,,,,,,,,
CA,67073,59113,67462.5,65846.0,63886.0,63852,59381,72554.0,62753,55098,68118.0,68071.0,56822,68652.0,72242.0,64663.5,62890.0,69201.0,63972.0,57896
CO,10527,8051,15153.16,14322.0,14117.92,9124,6850,10383.0,8842,4662,10269.0,13464.0,5919,15010.0,11008.0,14796.32,15136.92,10216.0,14679.57,6532
CT,12488,9950,14435.29,13741.0,14253.2,10954,8881,11857.0,10544,6592,12689.0,13374.0,7520,13745.0,12076.0,15524.64,13703.06,11567.0,15636.58,7506
DC,1373,384,1541.0,1420.0,1403.79,1154,1011,1508.0,1129,344,1339.0,1353.0,327,1252.0,1537.0,1735.21,1493.5,1269.0,1634.8,499


In [21]:
merge_all_with(nonfiscal_frames, "TOTAL TEACHERS")

2002_03_V_1B is missing TOTAL TEACHERS
2006_07_V_1C is missing TOTAL TEACHERS
1992_93_V_1C is missing TOTAL TEACHERS
2007_08_V_1B is missing TOTAL TEACHERS
1998_99_V_1B is missing TOTAL TEACHERS
1994_95_V_1B is missing TOTAL TEACHERS
1994_95_V_1B is missing STABR
2013_14_V_1A is missing TOTAL TEACHERS
2004_05_V_1F is missing TOTAL TEACHERS
2001_02_V_1C is missing TOTAL TEACHERS
2003_04_V_1B is missing TOTAL TEACHERS
2010_11_V_1A is missing TOTAL TEACHERS
1995_96_V_1B is missing TOTAL TEACHERS
2011_12_V_1A is missing TOTAL TEACHERS
2005_06_V_1B is missing TOTAL TEACHERS
1996_97_V_1C is missing TOTAL TEACHERS
1999_2000_V_1B is missing TOTAL TEACHERS
2012_13_V_1A is missing TOTAL TEACHERS
1997_98_V_1C is missing TOTAL TEACHERS
1993_94_V_1B is missing TOTAL TEACHERS
1993_94_V_1B is missing STABR
2000_01_V_1C is missing TOTAL TEACHERS
2009_10_V_1B is missing TOTAL TEACHERS
2008_09_V_1C is missing TOTAL TEACHERS
Processing frame: 1991_92_V_1C
Processing frame: 1987_88_V_1C


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(       TOTAL TEACHERS_1987_88_V_1C  TOTAL TEACHERS_1991_92_V_1C
 STABR                                                          
 AL                           37716                        40480
 AK                            6113                         7118
 AZ                           30707                        33978
 AR                           25572                        25785
 CA                          195864                       224000
 CO                           31168                        33093
 CT                           35050                        34383
 DE                            5951                         6095
 DC                            6232                         6346
 FL                           95857                       109939
 GA                           62280                        63816
 HI                            7684                         9451
 ID                           10258                        11626
 IL                      

In [22]:
join_frames_on(fiscal_frames, "E11")

1988_V_1B_REVISED is missing E11
1987_V_1B_REVISED is missing E11


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,2006_V_1B_REVISED_E11,2009_V_1B_REVISED_E11,1994_V_1B_REVISED_E11,2012_V_1A_PROVISIONAL_E11,2000_V_1B_REVISED_E11,1989_V_1B_REVISED_E11,2011_V_1A_PRELIMINARY_E11,1995_V_1B_REVISED_E11,1997_V_1B_REVISED_E11,2007_V_1B_REVISED_E11,...,1992_V_1B_REVISED_E11,2008_V_1B_REVISED_E11,1993_V_1B_REVISED_E11,2010_V_1A_PROVISIONAL_E11,2013_V_1A_PROVISIONAL_E11,2004_V_1B_REVISED_E11,2002_V_1D_REVISED_E11,1998_V_1B_REVISED_E11,1991_V_1B_REVISED_E11,2005_V_1B_REVISED_E11
STABR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,353601881,641431359,682495826,516985454,503771388,437679839,313184024,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,353601881,641431359,682495826,516985454,503771388,437679839,R,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,353601881,641431359,682495826,516985454,503771388,A,313184024,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,353601881,641431359,682495826,516985454,503771388,A,R,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,R,641431359,682495826,516985454,503771388,437679839,313184024,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,R,641431359,682495826,516985454,503771388,437679839,R,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,R,641431359,682495826,516985454,503771388,A,313184024,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,338354588,578916138,R,641431359,682495826,516985454,503771388,A,R,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,R,578916138,353601881,641431359,682495826,516985454,503771388,437679839,313184024,531231377
AK,551604528,605309764,370381038,673081514,450803336,312231004,666918708,413383093,431822768,561122474,...,R,578916138,353601881,641431359,682495826,516985454,503771388,437679839,R,531231377


In [23]:
merge_all_with(fiscal_frames, "E12")

1988_V_1B_REVISED is missing E12
1987_V_1B_REVISED is missing E12
Processing frame: 2005_V_1B_REVISED
Processing frame: 1991_V_1B_REVISED


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(       E12_1991_V_1B_REVISED  E12_2005_V_1B_REVISED
 STABR                                              
 AK                  66610661              186133045
 AK                       NaN              186133045
 AL                 264252450              680250673
 AL                       NaN              680250673
 AR                 155362945              367406266
 AR                       NaN              367406266
 AS                   1317746                3418796
 AS                       NaN                3418796
 AZ                 194754955              557575690
 AZ                       NaN              557575690
 CA                2218640074             6678136999
 CA                       NaN             6678136999
 CO                 249389441              519119702
 CO                       NaN              519119702
 CT                 460789724              991740106
 CT                       NaN              991740106
 DC                  43806480               40

In [24]:
all_have_cols(nonfiscal_frames, ["STABR"])

1994_95_V_1B is missing STABR
1993_94_V_1B is missing STABR


['2002_03_V_1B',
 '2006_07_V_1C',
 '1992_93_V_1C',
 '2007_08_V_1B',
 '1998_99_V_1B',
 '1991_92_V_1C',
 '2013_14_V_1A',
 '1987_88_V_1C',
 '2004_05_V_1F',
 '2001_02_V_1C',
 '1989_90_V_1C',
 '2003_04_V_1B',
 '1990_91_V_1C',
 '2010_11_V_1A',
 '1988_89_V_1C',
 '1986_87_V_1C',
 '1995_96_V_1B',
 '2011_12_V_1A',
 '2005_06_V_1B',
 '1996_97_V_1C',
 '1999_2000_V_1B',
 '2012_13_V_1A',
 '1997_98_V_1C',
 '2000_01_V_1C',
 '2009_10_V_1B',
 '2008_09_V_1C']

In [25]:
fiscal_frames.keys()

['2005_V_1B_REVISED',
 '1991_V_1B_REVISED',
 '1998_V_1B_REVISED',
 '2002_V_1D_REVISED',
 '2004_V_1B_REVISED',
 '2013_V_1A_PROVISIONAL',
 '2010_V_1A_PROVISIONAL',
 '1993_V_1B_REVISED',
 '2008_V_1B_REVISED',
 '1992_V_1B_REVISED',
 '1990_V_1B_REVISED',
 '1996_V_1B_REVISED',
 '1999_V_1B_REVISED',
 '2001_V_1B_REVISED',
 '2003_V_1B_REVISED',
 '1988_V_1B_REVISED',
 '2007_V_1B_REVISED',
 '1997_V_1B_REVISED',
 '1995_V_1B_REVISED',
 '2011_V_1A_PRELIMINARY',
 '1989_V_1B_REVISED',
 '1987_V_1B_REVISED',
 '2000_V_1B_REVISED',
 '2012_V_1A_PROVISIONAL',
 '1994_V_1B_REVISED',
 '2009_V_1B_REVISED',
 '2006_V_1B_REVISED']

In [26]:
nonfiscal_frames.keys()

['2002_03_V_1B',
 '2006_07_V_1C',
 '1992_93_V_1C',
 '2007_08_V_1B',
 '1998_99_V_1B',
 '1991_92_V_1C',
 '1994_95_V_1B',
 '2013_14_V_1A',
 '1987_88_V_1C',
 '2004_05_V_1F',
 '2001_02_V_1C',
 '1989_90_V_1C',
 '2003_04_V_1B',
 '1990_91_V_1C',
 '2010_11_V_1A',
 '1988_89_V_1C',
 '1986_87_V_1C',
 '1995_96_V_1B',
 '2011_12_V_1A',
 '2005_06_V_1B',
 '1996_97_V_1C',
 '1999_2000_V_1B',
 '2012_13_V_1A',
 '1997_98_V_1C',
 '1993_94_V_1B',
 '2000_01_V_1C',
 '2009_10_V_1B',
 '2008_09_V_1C']

In [27]:
#fiscal_frames_with_all_cols = all_have_cols(fiscal_frames, ["STABR","E12","E11"])

In [28]:
#nonfiscal_frames_with_all_cols = all_have_cols(nonfiscal_frames, ["STABR","TOTTCH","AIDES"])

In [29]:
#fiscal_frames_with_all_cols

In [30]:
#nonfiscal_frames_with_all_cols

In [31]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [32]:
good_years = {
    "nonfiscal":{
        "2013":{
            "sheet_name":'2013_14_V_1A',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
         "2012":{
            "sheet_name":'2013_14_V_1A',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2011":{
            "sheet_name": '2011_12_V_1A',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2010":{
            "sheet_name": '2010_11_V_1A',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2009":{
            "sheet_name": '2009_10_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2008":{
            "sheet_name": '2008_09_V_1C',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2007":{
            "sheet_name":  '2007_08_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2006":{
            "sheet_name":   '2006_07_V_1C',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2005":{
            "sheet_name": '2005_06_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2004":{
            "sheet_name": '2004_05_V_1F',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2003":{
            "sheet_name": '2003_04_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2002":{
            "sheet_name": '2002_03_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2001":{
            "sheet_name":  '2001_02_V_1C',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "2000":{
            "sheet_name":  '2000_01_V_1C',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1999":{
            "sheet_name": '1999_2000_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1998":{
            "sheet_name": '1998_99_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1997":{
            "sheet_name": '1997_98_V_1C',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1996":{
            "sheet_name": '1996_97_V_1C',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1995":{
            "sheet_name": '1995_96_V_1B',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1994":{
            "sheet_name": '1994_95_V_1B',
            "state_column":"STATE",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1993":{
            "sheet_name": '1993_94_V_1B',
            "state_column":"STATE",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1992":{
            "sheet_name": '1992_93_V_1C',
            "state_column":"STABR",
            "teacher_column":"TOTTCH",
            "aides_column":"AIDES"
        },
        "1991":{
            "sheet_name": '1991_92_V_1C',
            "state_column":"STABR",
            "teacher_column":'TOTAL TEACHERS', 
            "aides_column":'INSTRUCTIONAL AIDES',
        }
    },
    "fiscal":{
        "2013":{
            "sheet_name": '2013_V_1A_PROVISIONAL',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",
        },
        "2012":{
            "sheet_name": '2012_V_1A_PROVISIONAL',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",
        },
        "2011":{
            "sheet_name":'2011_V_1A_PRELIMINARY',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2010":{
            "sheet_name":'2010_V_1A_PROVISIONAL',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2009":{
            "sheet_name":'2009_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2008":{
            "sheet_name":'2008_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2007":{
            "sheet_name":'2007_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2006":{
            "sheet_name":'2006_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2005":{
            "sheet_name":'2005_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2004":{
            "sheet_name":'2004_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2003":{
            "sheet_name":'2003_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2002":{
            "sheet_name":'2002_V_1D_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "2000":{
            "sheet_name":'2000_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        } ,
        "2001":{
            "sheet_name":'2001_V_1B_REVISED',#'2000_V_1A_PROVISIONAL',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1999":{
            "sheet_name":'1999_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1998":{
            "sheet_name":'1998_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1997":{
            "sheet_name":'1997_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1996":{
            "sheet_name":'1996_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1995":{
            "sheet_name":'1995_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1994":{
            "sheet_name":'1994_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1993":{
            "sheet_name":'1993_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1992":{
            "sheet_name":'1992_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        },
        "1991":{
            "sheet_name":'1991_V_1B_REVISED',
            "state_column":"STABR",
            "E11_column":"E11",
            "E12_column":"E12",        
        } 
    }
}

def get_year_nonfiscal(year):
    record = good_years["nonfiscal"][year]
    sheet_name = record["sheet_name"]
    state_column = record["state_column"]
    teacher_column = record["teacher_column"]
    aides_column = record["aides_column"]

    try:
        fr = nonfiscal_frames[sheet_name][[state_column,teacher_column,aides_column]]
        fr[state_column] = fr[state_column].apply(lambda x: str(x).upper().strip())
        fr.columns = "STATE","TEACHERS","AIDES"
    
    except:
        print nonfiscal_frames[sheet_name].columns
    
    #fr["YEAR"] = year
    return fr

def get_year_fiscal(year):
    record = good_years["fiscal"][year]
    sheet_name = record["sheet_name"]
    
    state_column = record["state_column"]
    E11_column = record["E11_column"]
    E12_column = record["E12_column"]

    fr = fiscal_frames[sheet_name][[state_column,E11_column,E12_column]]
    fr[state_column] = fr[state_column].apply(lambda x: str(x).upper().strip())
    fr.columns = "STATE","E11","E12"
    #fr["YEAR"] = year
    return fr

def postal(state_name):
    if state_name.title() in us_state_abbrev:
        return us_state_abbrev[state_name.title()]
    return state_name

def get_year(year):
    left = get_year_fiscal(year).set_index("STATE")
    right = get_year_nonfiscal(year)
    
    # Convert state names to postal abbreviations
    right["STATE"] = right["STATE"].apply(lambda x: postal(x))
    right = right.set_index("STATE")

    
    fr = left.join(right)
    fr["E11"] = pd.to_numeric(fr["E11"], errors="coerce")
    fr = fr[fr["E11"].notnull()]
    fr["year"] = year
    return fr

In [33]:
# nonfiscal_frames['2013_14_V_1A'][["STABR","TOTTCH","AIDES"]]
# get_year_nonfiscal("2012")
# get_year_fiscal("2013")

# get_year("2013")
get_year("1999")
get_year("1998")
get_year("1997")
get_year("1996")
get_year("1995")
get_year("1994")

get_year("1993")

get_year("1992")
get_year("1991")
#ERRORS - Bad state name cols


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,E11,E12,TEACHERS,AIDES,year
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AK,313184024,66610661,7118.0,1626,1991
AL,1182801607,264252450,40480.0,3543,1991
AR,668686118,155362945,25785.0,4511,1991
AS,8481330,1317746,671.0,12,1991
AZ,1204234613,194754955,33978.0,5489,1991
CA,10002472429,2218640074,224000.0,55290,1991
CO,1254169917,249389441,33093.0,4441,1991
CT,1599856634,460789724,34383.0,5547,1991
DC,239072937,43806480,6346.0,424,1991
DE,244074668,74160745,6095.0,732,1991


In [34]:
def get_state_year(state, year):
    fr = get_year(str(year))
    #fr = fr[fr["STATE"] == state]
    return fr.loc[state]

def get_state (state):
    record = {}
    total_dollars = 0
    total_educators = 0
    for year in range(1991,2014):
        record[year] = get_state_year(state, year)[["E11","E12","TEACHERS","AIDES"]]
        continue
        row = {
            "DOLLARS": fr["E11"],
            "EDUCATORS": fr["TEACHERS"]
        }
        
        row = {
            state: float(fr["E11"]) / float(fr["TEACHERS"])
        }
        
        try:
            #row = (float(fr["E11"]) + float(fr["E12"])) / (float(fr["TEACHERS"]) + float(fr["AIDES"]))
            row = (float(fr["E11"]) + float(fr["E12"]))
        except:
            row = None
            
        record[year] = row
        
    return record

In [35]:
all = {}
for state_name in us_state_abbrev.keys():
    stabr = us_state_abbrev[state_name]
    print "Getting:", stabr

    all[stabr] = get_state(stabr)

Getting: MS


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Getting: OK
Getting: DE
Getting: MN
Getting: IL
Getting: AR
Getting: NM
Getting: IN
Getting: LA
Getting: TX
Getting: WI
Getting: KS
Getting: CT
Getting: CA
Getting: WV
Getting: GA
Getting: ND
Getting: PA
Getting: AK
Getting: MO
Getting: SD
Getting: CO
Getting: NJ
Getting: WA
Getting: NY
Getting: NV
Getting: MD
Getting: ID
Getting: WY
Getting: AZ
Getting: IA
Getting: MI
Getting: UT
Getting: VA
Getting: OR
Getting: MT
Getting: NH
Getting: MA
Getting: SC
Getting: VT
Getting: FL
Getting: HI
Getting: KY
Getting: RI
Getting: NE
Getting: OH
Getting: AL
Getting: NC
Getting: TN
Getting: ME


In [36]:
pd.DataFrame(all).transpose().loc["CT"].to_frame().transpose()

Unnamed: 0,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
CT,E11 1.59986e+09 E12 46078972...,E11 1.81976e+09 E12 40171471...,E11 1.84759e+09 E12 40814507...,E11 1.9495e+09 E12 435277381 ...,E11 2.00439e+09 E12 46315151...,E11 2.05731e+09 E12 46038619...,E11 2.10846e+09 E12 48098065...,E11 2.19351e+09 E12 51691216...,E11 2.30219e+09 E12 55516416...,E11 2.42165e+09 E12 60344863...,...,E11 2880450595 E12 815592839 ...,E11 2986242698 E12 991740106 ...,E11 3107054505 E12 1108456056 ...,E11 3221094679 E12 1141204140 ...,E11 3343802807 E12 1263905104 ...,E11 3444803590 E12 1330076790 ...,E11 3544680769 E12 1371504932 ...,E11 3601405478 E12 1448012365 ...,E11 3597321000 E12 1602150113 ...,E11 3671907651 E12 1633961459 ...


In [37]:
pd.DataFrame(all).transpose()[2011].sum()

E11         212397701170
E12          75131334960
TEACHERS     3.09698e+06
AIDES             708932
dtype: object

In [38]:
def postals():
    ret = []
    for state_name in us_state_abbrev:
        ret.append(us_state_abbrev[state_name])
    return ret
all_postals = postals()

In [39]:
def sum_col(year, states, col):
    total = 0
    for state in states:
        try: 
            total += float(all[state][year][col])
        except:
            print "ERROR Converting to numeric value: " + all[state][year][col] + " in ", state , str(year), " " + col
            total
    return total

def sum_salaries(year, states):
    return sum_col(year, states, "E11")

def sum_benefits(year, states):
    return sum_col(year, states, "E12")

def compensation(year, states):
    return sum_salaries(year, states) + sum_benefits(year, states)


def sum_aides (year, states):
    return sum_col (year,states,"AIDES")
def sum_teachers (year, states):
    return sum_col(year, states,"TEACHERS")
def educators(year, states):
    return sum_teachers(year, states) + sum_aides(year, states)

def per_teacher(year, states):
    return compensation (year, states) / educators(year, states)

In [40]:
educators(2011, postals())

3805916.0800000005

In [41]:
compensation(2011, postals())

287529036130.0

In [42]:
sum_salaries(1991, postals())

90503211216.0

In [43]:
sum_salaries(2011, postals())

212397701170.0

In [44]:
sum_benefits(2011, postals())

75131334960.0

In [45]:
sum_teachers(2011, postals())

3096984.4900000007

In [46]:
per_teacher(2011, postals())

75547.9180534112

In [47]:
per_teacher(1991, postals())

ERROR Converting to numeric value: M in  NV 1991  AIDES


39776.61876593567

In [48]:
per_teacher(1991, ["CT"])

51606.47027297771

In [49]:
per_teacher(2011, ["CT"])

86971.94139103287

In [50]:
def all_years(f, state):
    ret = {}
    values = []
    indexes = []
    for year in range(1991, 2014):
        indexes.append(year)
        values.append(int(f(year, state)))
        ret[year] = f(year, state)
    return indexes, values

In [51]:
from uscpi import UsCpi
def inflater_2011(row):
    #print "inflating", row
    #print "---------"
    year = row["year"]
    old_dollars = row["amount"]
    #print "inflating", old_dollars, " in ", str(year)
    cpi = UsCpi()
    return int(cpi.value_with_inflation(old_dollars, year, 2011))


ct_pay_timeline = all_years(per_teacher, ["CT"])
fr = pd.DataFrame(ct_pay_timeline[1],index=ct_pay_timeline[0]).reset_index()
fr.columns = "year","amount"
fr["2011 dollars"] = fr.apply(inflater_2011, axis=1)
print fr.to_csv(index=False,sep="\t")
#inflater_2011(1991, 100)

year	amount	2011 dollars
1991	51606	85229
1992	54467	87325
1993	55418	86267
1994	56594	85898
1995	56607	83550
1996	57146	81927
1997	55640	77978
1998	55630	76769
1999	56636	76468
2000	58177	75994
2001	59722	75854
2002	62504	78152
2003	65392	79941
2004	71772	85464
2005	76243	87813
2006	80312	89609
2007	82237	89216
2008	74069	77384
2009	82285	86274
2010	83911	86559
2011	86971	86971
2012	88174	86386
2013	89979	86882



In [52]:
us_pay_timeline = all_years(per_teacher, postals())
fr = pd.DataFrame(us_pay_timeline[1],index=ct_pay_timeline[0]).reset_index()
fr.columns = "year","amount"
fr["2011 dollars"] = fr.apply(inflater_2011, axis=1)
print fr.to_csv(index=False,sep="\t")


ERROR Converting to numeric value: M in  NV 1991  AIDES
ERROR Converting to numeric value: M in  NV 1991  AIDES
year	amount	2011 dollars
1991	39776	65691
1992	41111	65912
1993	42349	65923
1994	43275	65683
1995	44895	66264
1996	45491	65217
1997	46347	64954
1998	47086	64978
1999	48019	64834
2000	50595	66090
2001	52816	67082
2002	55317	69165
2003	57531	70331
2004	58718	69920
2005	61026	70287
2006	63459	70805
2007	66658	72315
2008	69749	72870
2009	71899	75385
2010	75238	77612
2011	75547	75547
2012	74650	73136
2013	75780	73171



In [53]:
all_years(sum_salaries, ["CT"])

([1991,
  1992,
  1993,
  1994,
  1995,
  1996,
  1997,
  1998,
  1999,
  2000,
  2001,
  2002,
  2003,
  2004,
  2005,
  2006,
  2007,
  2008,
  2009,
  2010,
  2011,
  2012,
  2013],
 [1599856634,
  1819761594,
  1847591468,
  1949503851,
  2004390769,
  2057311681,
  2108462189,
  2193508290,
  2302192425,
  2421649869,
  2547048019,
  2688388935,
  2783320438,
  2880450595,
  2986242698,
  3107054505,
  3221094679,
  3343802807,
  3444803590,
  3544680769,
  3601405478,
  3597321000,
  3671907651])

In [54]:
all_years(sum_salaries, postals())

([1991,
  1992,
  1993,
  1994,
  1995,
  1996,
  1997,
  1998,
  1999,
  2000,
  2001,
  2002,
  2003,
  2004,
  2005,
  2006,
  2007,
  2008,
  2009,
  2010,
  2011,
  2012,
  2013],
 [90503211216,
  94774036277,
  98843841911,
  103254981696,
  109463220853,
  114349123457,
  120951760602,
  128091305782,
  135350128392,
  144812885542,
  154227585981,
  162273899367,
  168549042165,
  172642709696,
  178764578892,
  186557873104,
  196530065403,
  206268627828,
  212567590435,
  213986790309,
  212397701170,
  210099775227,
  211982586420])

In [55]:
all_years(sum_benefits, ["CT"])

([1991,
  1992,
  1993,
  1994,
  1995,
  1996,
  1997,
  1998,
  1999,
  2000,
  2001,
  2002,
  2003,
  2004,
  2005,
  2006,
  2007,
  2008,
  2009,
  2010,
  2011,
  2012,
  2013],
 [460789724,
  401714711,
  408145079,
  435277381,
  463151513,
  460386196,
  480980653,
  516912161,
  555164168,
  603448631,
  655894792,
  710126273,
  743765353,
  815592839,
  991740106,
  1108456056,
  1141204140,
  1263905104,
  1330076790,
  1371504932,
  1448012365,
  1602150113,
  1633961459])

In [56]:
all_years(educators,["CT"])

([1991,
  1992,
  1993,
  1994,
  1995,
  1996,
  1997,
  1998,
  1999,
  2000,
  2001,
  2002,
  2003,
  2004,
  2005,
  2006,
  2007,
  2008,
  2009,
  2010,
  2011,
  2012,
  2013],
 [39930,
  40785,
  40704,
  42138,
  43590,
  44057,
  46539,
  48722,
  50451,
  51998,
  53630,
  54372,
  53937,
  51497,
  52175,
  52489,
  53045,
  62208,
  58028,
  58587,
  58058,
  58967,
  58967])

In [57]:
all_years(sum_salaries, ["CT"])

([1991,
  1992,
  1993,
  1994,
  1995,
  1996,
  1997,
  1998,
  1999,
  2000,
  2001,
  2002,
  2003,
  2004,
  2005,
  2006,
  2007,
  2008,
  2009,
  2010,
  2011,
  2012,
  2013],
 [1599856634,
  1819761594,
  1847591468,
  1949503851,
  2004390769,
  2057311681,
  2108462189,
  2193508290,
  2302192425,
  2421649869,
  2547048019,
  2688388935,
  2783320438,
  2880450595,
  2986242698,
  3107054505,
  3221094679,
  3343802807,
  3444803590,
  3544680769,
  3601405478,
  3597321000,
  3671907651])

In [58]:
nonfiscal_frames["2011_12_V_1A"].columns

Index([u'SURVYEAR', u'FIPST', u'STABR', u'SEANAME', u'STREET', u'CITY',
       u'STNAME', u'ZIP', u'ZIP4', u'PHONE',
       ...
       u'HIUGM', u'HIUGF', u'BLUGM', u'BLUGF', u'WHUGM', u'WHUGF', u'HPUGM',
       u'HPUGF', u'TRUGM', u'TRUGF'],
      dtype='object', length=256)