In [1]:
# from BLS5
import numpy as np
import pandas as pd

In [2]:
# read dataframes, calculates and stores totals by geography and region
path = 'my_naics_chg/naics_'
codes = ['11', '21', '51', '52', '54']

# empty lists for processing
shell = []
names = []

for code in codes:
    df = pd.read_csv(path + code + '.csv')
    
    # calculate totals by geography
    classification = 'type_kurt20' # classification scheme
    
    temp_list = []
    for col in df.columns[8:12]:
        s = df.groupby(classification)[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('geo_naics' + code)
    d1 = dict(zip(names, shell))

In [3]:
# calculate regional totals and save to dictionary

# clear lists
shell = []
names = []

for code in codes:
    temp_list = []
    for col in df.columns[8:12]:
        s = df.groupby('region')[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('region_naics' + code)
    d2 = dict(zip(names, shell))

In [4]:
# calculate state totals (preserving region) and save to dictionary

# clear lists
shell = []
names = []

for code in codes:
    temp_list = []
    for col in df.columns[8:12]:
        s = df.groupby(['State', 'region'], as_index=False)[col].sum()
        temp_list.append(s)
    mynaics = pd.concat(temp_list, axis=1)
    
    # drop duplicate state and region cols
    mynaics = mynaics.iloc[:,~mynaics.columns.duplicated()]
    
    # store values in dictionary
    shell.append(mynaics)
    names.append('state_naics' + code)
    d3 = dict(zip(names, shell))

In [5]:
# create rate of change function
def rate_chg(df, year1, year2, chg):
    df[chg] = np.where((df[year1]==0),
                       ((df[year2] - df[year1]) / 1).round(4),
                       ((df[year2] - df[year1]) / df[year1]).round(4))
    
# create geographical and regional national job share and change columns
dicts = [d1, d2]
for d in dicts:
    for key in d.keys():
        # job share
        for col in d[key].columns:
            d[key]['emplpct_' + col[-2:]] = (d[key][col] / d[key][col].sum()).round(4)

        # define % change column namer
        namer = d[key].columns.str.split('_')
        for x in range(0,3):
            
            # % change 90-00, 00-10, 10-20
            rate_chg(d[key], d[key].columns[x], d[key].columns[x+1], 'pct_chg_' + namer[x][3] + '_' + namer[x+1][3])
            
            # overall % change 90-20
            d[key]['pct_chg_90_20'] = np.where((d[key]['annual_avg_emplvl_90'] == 0),
                                               ((d[key]['annual_avg_emplvl_20'] - d[key]['annual_avg_emplvl_90']) / 1).round(4),
                                               ((d[key]['annual_avg_emplvl_20'] - d[key]['annual_avg_emplvl_90']) / d[key]['annual_avg_emplvl_90']).round(4))

In [6]:
# do same for states, make some tweaks for the different df structure
for key in d3.keys():
    # job share
    for col in d3[key].columns[2:]:
        d3[key]['emplpct_' + col[-2:]] = (d3[key][col] / d3[key][col].sum()).round(4)

    # define % change column namer
    namer = d3[key].columns.str.split('_')
    for x in range(0,3):
            
        # % change 90-00, 00-10, 10-20
        rate_chg(d3[key], d3[key].columns[2:][x], d3[key].columns[2:][x+1], 'pct_chg_' + namer[x+2][3] + '_' + namer[x+3][3])
            
        # overall % change 90-20
        d3[key]['pct_chg_90_20'] = np.where((d3[key]['annual_avg_emplvl_90'] == 0),
                                           ((d3[key]['annual_avg_emplvl_20'] - d3[key]['annual_avg_emplvl_90']) / 1).round(4),
                                           ((d3[key]['annual_avg_emplvl_20'] - d3[key]['annual_avg_emplvl_90']) / d3[key]['annual_avg_emplvl_90']).round(4))

In [7]:
# export to CSVs
for key in d1.keys():
    d1[key].to_csv('analysis/geo_naics_' + key[-2:] + '.csv',) #index_label=False)

for key in d2.keys():
    d2[key].to_csv('analysis/region_naics_' + key[-2:] + '.csv',)# index_label=False)
    
for key in d3.keys():
    d3[key].to_csv('analysis/state_naics_' + key[-2:] + '.csv',)# index_label=False)