In [1]:
# from BLS MASTER
import numpy as np
import pandas as pd

In [2]:
# read dataframes, calculates and stores totals by geography and region
path = 'my_naics_chg/naics_'
codes = ['11', '21', '31', '51', '52', '54']

# empty lists for processing
shell = []
names = []

for code in codes:
    df = pd.read_csv(path + code + '.csv')
    
    temp_list = []
    for col in df.filter(regex='annual').columns:
        s = df.groupby('type_bea20')[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('geo_naics' + code)
d1 = dict(zip(names, shell))

In [4]:
d1['geo_naics21'].sum()

annual_avg_emplvl_90    470364.0
annual_avg_emplvl_00    372677.0
annual_avg_emplvl_10    560983.0
annual_avg_emplvl_20    453799.0
dtype: float64

In [5]:
# calculate regional totals and save to dictionary

# clear lists
shell = []
names = []

for code in codes:
    df = pd.read_csv(path + code + '.csv')
    
    temp_list = []
    for col in df.filter(regex='annual').columns:
        s = df.groupby('Region')[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('region_naics' + code)
d2 = dict(zip(names, shell))

In [7]:
# calculate regional ALT totals and save to dictionary

# clear lists
shell = []
names = []

for code in codes:
    df = pd.read_csv(path + code + '.csv')
    
    temp_list = []
    for col in df.filter(regex='annual').columns:
        s = df.groupby('Region_alt')[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('region_alt_naics' + code)
d2_2 = dict(zip(names, shell))

In [8]:
# calculate Divisional totals and save to dictionary

# clear lists
shell = []
names = []

for code in codes:
    df = pd.read_csv(path + code + '.csv')
    
    temp_list = []
    for col in df.filter(regex='annual').columns:
        s = df.groupby('Division')[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('division_naics' + code)
d2_3 = dict(zip(names, shell))

In [9]:
# check if totals are equal
for code in codes:
    print((d1['geo_naics' + code].sum() + d2['region_naics' + code].sum()) -
          (d2_2['region_alt_naics' + code].sum() + d2_3['division_naics' + code].sum()))

annual_avg_emplvl_90    0.0
annual_avg_emplvl_00    0.0
annual_avg_emplvl_10    0.0
annual_avg_emplvl_20    0.0
dtype: float64
annual_avg_emplvl_90    0.0
annual_avg_emplvl_00    0.0
annual_avg_emplvl_10    0.0
annual_avg_emplvl_20    0.0
dtype: float64
annual_avg_emplvl_90    0.0
annual_avg_emplvl_00    0.0
annual_avg_emplvl_10    0.0
annual_avg_emplvl_20    0.0
dtype: float64
annual_avg_emplvl_90    0.0
annual_avg_emplvl_00    0.0
annual_avg_emplvl_10    0.0
annual_avg_emplvl_20    0.0
dtype: float64
annual_avg_emplvl_90    0.0
annual_avg_emplvl_00    0.0
annual_avg_emplvl_10    0.0
annual_avg_emplvl_20    0.0
dtype: float64
annual_avg_emplvl_90    0.0
annual_avg_emplvl_00    0.0
annual_avg_emplvl_10    0.0
annual_avg_emplvl_20    0.0
dtype: float64


In [14]:
# calculate state totals (preserving region) and save to dictionary

# clear lists
shell = []
names = []

for code in codes:
    df = pd.read_csv(path + code + '.csv')
    
    temp_list = []
    for col in df.filter(regex='annual').columns:
        s = df.groupby(['State', 'Region', 'Division', 'Region_alt'], as_index=False)[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # drop duplicate state and region cols
    mynaics = mynaics.iloc[:,~mynaics.columns.duplicated()]
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('state_naics' + code)
d3 = dict(zip(names, shell))

In [17]:
# create total jobs row
dicts = [d1, d2, d2_2, d2_3]
for d in dicts:
    for key in d.keys():
        d[key] = pd.concat([d[key],
                            pd.DataFrame([d[key].sum()], index=['Total'], columns=d[key].columns),])

In [18]:
# create rate of change function
def rate_chg(df, year1, year2, chg):
    df[chg] = np.where((df[year1]==0),
                       ((df[year2] - df[year1]) / 1).round(4),
                       ((df[year2] - df[year1]) / df[year1]).round(4))
    
# create geographical and regional national job share and change columns
dicts = [d1, d2, d2_2, d2_3]
for d in dicts:
    for key in d.keys():
        # job share (removing total jobs row)
        for col in d[key].columns:
            d[key]['emplpct_' + col[-2:]] = (d[key].iloc[:-1][col] / d[key].iloc[-1][col]).round(4)

        # define % change column namer
        namer = d[key].columns.str.split('_')
        for x in range(0,3):
            
            # % change 90-00, 00-10, 10-20
            rate_chg(d[key], d[key].columns[x], d[key].columns[x+1], 'pct_chg_' + namer[x][3] + '_' + namer[x+1][3])
            
        # overall % change 90-20
        d[key]['pct_chg_90_20'] = np.where((d[key]['annual_avg_emplvl_90'] == 0),
                                          ((d[key]['annual_avg_emplvl_20'] - d[key]['annual_avg_emplvl_90']) / 1).round(4),
                                          ((d[key]['annual_avg_emplvl_20'] - d[key]['annual_avg_emplvl_90']) / d[key]['annual_avg_emplvl_90']).round(4))

In [19]:
# do same for states, make some tweaks for the different df structure (no totals row here)
for key in d3.keys():
    # job share
    for col in d3[key].columns[4:]:
        d3[key]['emplpct_' + col[-2:]] = (d3[key][col] / 
                                          d3[key][col].sum()).round(4)

    # define % change column namer
    namer = d3[key].columns.str.split('_')
    for x in range(0,3):
            
        # % change 90-00, 00-10, 10-20
        rate_chg(d3[key], d3[key].columns[4:][x], d3[key].columns[4:][x+1], 'pct_chg_' + namer[x+4][3] + '_' + namer[x+5][3])
            
    # overall % change 90-20
    d3[key]['pct_chg_90_20'] = np.where((d3[key]['annual_avg_emplvl_90'] == 0),
                                        ((d3[key]['annual_avg_emplvl_20'] - d3[key]['annual_avg_emplvl_90']) / 1).round(4),
                                        ((d3[key]['annual_avg_emplvl_20'] - d3[key]['annual_avg_emplvl_90']) / d3[key]['annual_avg_emplvl_90']).round(4))

In [20]:
# export to CSVs
for key in d1.keys():
    d1[key].to_csv('analysis/geo_naics_' + key[-2:] + '.csv',) #index_label=False)

for key in d2.keys():
    d2[key].to_csv('analysis/region_naics_' + key[-2:] + '.csv',)# index_label=False)

for key in d2_2.keys():
    d2_2[key].drop('Other').to_csv('analysis/region_alt_naics_' + key[-2:] + '.csv',)# index_label=False)
    
for key in d2_3.keys():
    d2_3[key].to_csv('analysis/division_naics_' + key[-2:] + '.csv',)# index_label=False)

for key in d3.keys():
    d3[key].to_csv('analysis/state_naics_' + key[-2:] + '.csv',)# index_label=False)

In [21]:
d3['state_naics21'].sort_values('pct_chg_00_10', ascending=False)

Unnamed: 0,State,Region,Division,Region_alt,annual_avg_emplvl_90,annual_avg_emplvl_00,annual_avg_emplvl_10,annual_avg_emplvl_20,emplpct_90,emplpct_00,emplpct_10,emplpct_20,pct_chg_90_00,pct_chg_00_10,pct_chg_10_20,pct_chg_90_20
34,North Dakota,Midwest,West North Central,Midwest,107.0,244.0,7986.0,14180.0,0.0002,0.0007,0.0142,0.0312,1.2804,31.7295,0.7756,131.5234
26,Montana,West,Mountain,Interior Northwest,1218.0,1053.0,4464.0,3176.0,0.0026,0.0028,0.008,0.007,-0.1355,3.2393,-0.2885,1.6076
27,Nebraska,Midwest,West North Central,Midwest,146.0,114.0,459.0,308.0,0.0003,0.0003,0.0008,0.0007,-0.2192,3.0263,-0.329,1.1096
3,Arkansas,South,West South Central,South,2989.0,2233.0,6827.0,2717.0,0.0064,0.006,0.0122,0.006,-0.2529,2.0573,-0.602,-0.091
17,Kentucky,South,East South Central,South,11728.0,5587.0,16794.0,2539.0,0.0249,0.015,0.0299,0.0056,-0.5236,2.0059,-0.8488,-0.7835
5,Colorado,West,Mountain,Southwest,13955.0,9190.0,22515.0,19327.0,0.0297,0.0247,0.0401,0.0426,-0.3415,1.4499,-0.1416,0.385
36,Oklahoma,South,West South Central,South,24832.0,16416.0,38995.0,30281.0,0.0528,0.044,0.0695,0.0667,-0.3389,1.3754,-0.2235,0.2194
50,Wyoming,West,Mountain,Interior Northwest,10323.0,10082.0,23740.0,12132.0,0.0219,0.0271,0.0423,0.0267,-0.0233,1.3547,-0.489,0.1752
44,Utah,West,Mountain,Southwest,4030.0,4376.0,9616.0,8287.0,0.0086,0.0117,0.0171,0.0183,0.0859,1.1974,-0.1382,1.0563
25,Missouri,Midwest,West North Central,Midwest,910.0,1015.0,1865.0,1845.0,0.0019,0.0027,0.0033,0.0041,0.1154,0.8374,-0.0107,1.0275
