In [22]:
import numpy as np
import pandas as pd
import json

## Mapping NAICS Codes to ISIC Sectors

naics sectors: https://www.census.gov/programs-surveys/economic-census/year/2022/guidance/understanding-naics.html

isic sectors: https://unstats.un.org/unsd/classifications/Econ/Download/In%20Text/ISIC_Rev_4_publication_English.pdf

In [23]:
naics_sectors = {
    'Agriculture, Forestry, Fishing and Hunting': ['11'],
    'Mining, Quarrying, and Oil and Gas Extraction': ['21'],
    'Utilities': ['22'],
    'Construction': ['23'],
    'Manufacturing': ['31', '32', '33'],
    'Wholesale Trade': ['42'],
    'Retail Trade': ['44','45'],
    'Transportation and Warehousing': ['48', '49'],
    'Information': ['51'],
    'Finance and Insurance': ['52'],
    'Real Estate and Rental and Leasing': ['53'],
    'Professional, Scientific, and Technical Services': ['54'],
    'Management of Companies and Enterprises': ['55'],
    'Administrative and Support and Waste Management and Remediation Services': ['56'],
    'Educational Services': ['61'],
    'Health Care and Social Assistance': ['62'], 
    'Arts, Entertainment, and Recreation': ['71'],
    'Accommodation and Food Services': ['72'],
    'Other Services': ['81'],
    'Public Administration': ['92']
}


In [25]:
# from https://unstats.un.org/unsd/classifications/Econ/Download/In%20Text/ISIC_Rev_4_publication_English.pdf
isic_sectors_df = pd.read_csv('raw_data/ISIC_SECTORS.csv')

isic_sectors = {}

for i, row in isic_sectors_df.iterrows():
    
    subs = [int(x) for x in row[1].split('–')]
    if len(subs) > 1:
        subs = np.arange(subs[0], subs[1] + 1)
    subs = ['0' + str(x) if len(str(x)) < 2 else str(x) for x in subs]

    for s in subs:
        isic_sectors[s] = row[0]

In [27]:
# from https://unstats.un.org/unsd/classifications/Econ/isic
# specifically https://unstats.un.org/unsd/classifications/Econ/tables/ISIC/NAICS2012US-ISIC4/NAICS2012US-ISIC4.txt
naics_isic_df = pd.read_csv('raw_data/NAICS_ISIC.csv', dtype={'NAICS2012Code': str, 'ISIC4Code': str})

In [28]:

def naics_breakdown(naics_isic_df, digits=3):

    naics_isic_df['ISICSec'] = naics_isic_df['ISIC4Code'].map(lambda x: isic_sectors[x[0:2]] )
    naics_isic_df[f'NAICS_{digits}'] = naics_isic_df['NAICS2012Code'].map(lambda x: x[0:digits] if type(x) is str else None)
    pd.set_option('display.max_rows', 200)

    # inter_map shows match list
    inter_map = naics_isic_df[[f'NAICS_{digits}', 'ISICSec', 'NAICS2012Code']].groupby([f'NAICS_{digits}', 'ISICSec']).agg({'count'})
    inter_map['best_match'] = None

    matches = []
    for code in list(np.unique(naics_isic_df[f'NAICS_{digits}'].dropna())):
        best_match = inter_map.loc[code][('NAICS2012Code','count')].map(
            lambda x: x == max(inter_map.loc[code][('NAICS2012Code','count')])
        )
        for m in best_match: matches.append(m)

    naics_isicsec = {}

    for i in inter_map[matches].index:
        if i[0] in naics_isicsec.keys():
            naics_isicsec[i[0]].append(i[1])
        else: naics_isicsec[i[0]] = [i[1]]

    # arbitrarily picks first with tied number of sub sectors
    for key, val in naics_isicsec.items():
        naics_isicsec[key] = val[0]

    return naics_isicsec

In [29]:
naics_walk = {}
for i in range(5): 
    naics_walk = { **naics_walk, **naics_breakdown(naics_isic_df, i)}

naics_3_isicsec = naics_breakdown(naics_isic_df, 3)

with open('naics_isic_map.json', 'w') as f:
    f.write(json.dumps(naics_walk))

## Implementing VW Sectors to ISIC

In [30]:
# compares to "Replication Packet/Converting SIC to NAICS/SIC_NAICS_BEA_allsec.do"
# VW aggregated industries
VW_groups = {
    'Agriculture': [111,112,113,114,115], # excluded
    'Mining': [212],
    'Oil/Gas': [211], # excluded
    'Mining Support': ['212'], # excluded
    'Util': [22], # 221 doesn't exist in NAICS codes in asset expenditures but in VW mapping
    'Const': [23],
    'Wood': [321],
    'Minerals': [327],
    'Primary Metals': [331],
    'Fabricated Metals': [332],
    'Machinery': [333],
    'Computers': [334],
    'Electrical': [335],
    'Vehicles': [3361,3362,3363],
    'Transport': [3364,3365,3366,3367,3368,3369],
    'Furniture': [337],
    'Misc Mfg': [339],
    'Food Mfg': [311,312],
    'Textile': [313,314],
    'Apparel': [315,316],
    'Paper': [322],
    'Printing': [323],
    'Petroleum': [324],
    'Chemical': [325],
    'Plastics': [326],
    'Wholesale Trade': [42],
    'Retail Trade': [44,45],
    'Transit/Warehouse': [48,49],
    'Info': [51],
    'Finance/Insurance': [52],
    'Real Estate': [531],
    'Rental': [532,533], # excluded
    'Prof/Tech': [54],
    'Mgmt': [55],
    'Admin': [561,562], # just 56 in the VW mapping
    'Educ': [61],
    'Health': [62],
    'Arts': [71],
    'Accom': [721],
    'Food Services': [722],
    'Other Services': [81]
}

In [31]:
VW_isicsec = {}
for key, vals in VW_groups.items():
    VW_isicsec[key] = []
    for v in vals:
        code = str(v)
        if len(code) == 3:
            VW_isicsec[key].append(naics_3_isicsec[code])
        elif len(code) == 2:
            matches = pd.Series(naics_3_isicsec.keys())[pd.Series(naics_3_isicsec.keys()).map(lambda x: x[0:2]) == code]
            for match in matches:
                VW_isicsec[key].append(naics_3_isicsec[match])
        elif len(code) == 4:
            VW_isicsec[key].append(naics_3_isicsec[code[0:3]])
    VW_isicsec[key] = list(np.unique(VW_isicsec[key]))

In [32]:
isicsec_VW = {}
for vw, sec in VW_isicsec.items():
    for sec in sec:
        if not sec in isicsec_VW.keys():
            isicsec_VW[sec] = [vw]
        else: isicsec_VW[sec].append(vw)

In [41]:
with open('isic_vw.json', 'w') as f:
    f.write(json.dumps(isicsec_VW))

## Implementing BEA to ISIC Sectors

In [34]:
expenditure_data = './raw_data/assetInvestments.xlsx'

In [35]:
readme = pd.read_excel(expenditure_data, header=14, converters={'BEA CODE': str, '2012 NAICS Codes': str})
readme = readme[['BEA CODE', '2012 NAICS Codes']][readme['BEA CODE'].map(lambda x: not (type(x) is float or x == '--------') )]
bea_naics_map = {}
for i, row in readme.iterrows():
    naics = row[1].split(',')
    for n in naics:
        if '-' in naics[0]:
            temp = naics[0].split('-')
            print(temp)
            naics = [str(x) for x in np.arange(int(temp[0]),int(temp[0][:-1] + temp[1]) + 1)]
    bea_naics_map[row[0]] = [n.strip() for n in naics]

['3361', '3']
['3364', '9']
['44', '5']


In [36]:
display(bea_naics_map)

{'110C': ['111', '112'],
 '113F': ['113', '114', '115'],
 '2110': ['211'],
 '2120': ['212'],
 '2130': ['213'],
 '2200': ['22'],
 '2300': ['23'],
 '3210': ['321'],
 '3270': ['327'],
 '3310': ['331'],
 '3320': ['332'],
 '3330': ['333'],
 '3340': ['334'],
 '3350': ['335'],
 '336M': ['3361', '3362', '3363'],
 '336O': ['3364', '3365', '3366', '3367', '3368', '3369'],
 '3370': ['337'],
 '338A': ['339'],
 '311A': ['311', '312'],
 '313T': ['313', '314'],
 '315A': ['315', '316'],
 '3220': ['322'],
 '3230': ['323'],
 '3240': ['324'],
 '3250': ['325'],
 '3260': ['326'],
 '4200': ['42'],
 '44RT': ['44', '45'],
 '4810': ['481'],
 '4820': ['482'],
 '4830': ['483'],
 '4840': ['484'],
 '4850': ['485'],
 '4860': ['486'],
 '487S': ['487', '488', '492'],
 '4930': ['493'],
 '5110': ['511'],
 '5120': ['512'],
 '5130': ['515', '517'],
 '5140': ['518', '519'],
 '5210': ['521'],
 '5220': ['522'],
 '5230': ['523'],
 '5240': ['524'],
 '5250': ['525'],
 '5310': ['531'],
 '5320': ['532', '533'],
 '5411': ['5411']

In [37]:
with open('naics_isic_map.json', 'r') as f:
    naics_isic = json.loads(f.read())

# i can make a map to have 2,3,4 digits naics codes mapped in the same way i map 3 digits ones
temp_map = {
    # '22': 'D',
    # '23': 'F',
    # '42': 'G',
    # '44': 'G',
    # '45': 'G',
    # '55': 'K',
    # '61': 'P'
}

print(naics_isic)

naics_isic = {**naics_isic, **temp_map}

bea_isic = {}
for key, val in bea_naics_map.items():
    bea_isic[key] = np.unique([naics_isic[v[0:3]] for v in val])[0]

with open('bea_isic_map.json', 'w') as f:
    f.write(json.dumps(bea_isic))

{'': 'C', '1': 'A', '2': 'F', '3': 'C', '4': 'G', '5': 'N', '6': 'Q', '7': 'R', '8': 'S', '9': 'O', '11': 'A', '21': 'B', '22': 'D', '23': 'F', '31': 'C', '32': 'C', '33': 'C', '42': 'G', '44': 'G', '45': 'G', '48': 'H', '49': 'H', '51': 'J', '52': 'K', '53': 'N', '54': 'M', '55': 'K', '56': 'N', '61': 'P', '62': 'Q', '71': 'R', '72': 'I', '81': 'S', '92': 'O', '111': 'A', '112': 'A', '113': 'A', '114': 'A', '115': 'A', '211': 'B', '212': 'B', '213': 'B', '221': 'D', '236': 'F', '237': 'F', '238': 'F', '311': 'C', '312': 'C', '313': 'C', '314': 'C', '315': 'C', '316': 'C', '321': 'C', '322': 'C', '323': 'C', '324': 'C', '325': 'C', '326': 'C', '327': 'C', '331': 'C', '332': 'C', '333': 'C', '334': 'C', '335': 'C', '336': 'C', '337': 'C', '339': 'C', '423': 'G', '424': 'G', '425': 'G', '441': 'G', '442': 'G', '443': 'G', '444': 'G', '445': 'G', '446': 'G', '447': 'G', '448': 'G', '451': 'G', '452': 'G', '453': 'G', '454': 'G', '481': 'H', '482': 'H', '483': 'H', '484': 'H', '485': 'H', 

## Implementing BEA to VW

### Inverting BEA to NAICS to NAICS to BEA

In [39]:
naics_bea_map = {}
for bea, naics_vals in bea_naics_map.items():
    for ind in naics_vals:
        if not ind in naics_bea_map.keys():
            naics_bea_map[ind] = [bea]
        else: naics_bea_map[ind].append(bea)
naics_bea_map

{'111': ['110C'],
 '112': ['110C'],
 '113': ['113F'],
 '114': ['113F'],
 '115': ['113F'],
 '211': ['2110'],
 '212': ['2120'],
 '213': ['2130'],
 '22': ['2200'],
 '23': ['2300'],
 '321': ['3210'],
 '327': ['3270'],
 '331': ['3310'],
 '332': ['3320'],
 '333': ['3330'],
 '334': ['3340'],
 '335': ['3350'],
 '3361': ['336M'],
 '3362': ['336M'],
 '3363': ['336M'],
 '3364': ['336O'],
 '3365': ['336O'],
 '3366': ['336O'],
 '3367': ['336O'],
 '3368': ['336O'],
 '3369': ['336O'],
 '337': ['3370'],
 '339': ['338A'],
 '311': ['311A'],
 '312': ['311A'],
 '313': ['313T'],
 '314': ['313T'],
 '315': ['315A'],
 '316': ['315A'],
 '322': ['3220'],
 '323': ['3230'],
 '324': ['3240'],
 '325': ['3250'],
 '326': ['3260'],
 '42': ['4200'],
 '44': ['44RT'],
 '45': ['44RT'],
 '481': ['4810'],
 '482': ['4820'],
 '483': ['4830'],
 '484': ['4840'],
 '485': ['4850'],
 '486': ['4860'],
 '487': ['487S'],
 '488': ['487S'],
 '492': ['487S'],
 '493': ['4930'],
 '511': ['5110'],
 '512': ['5120'],
 '515': ['5130'],
 '517'

In [40]:
vw_bea_map = {}
for vw, naics_vals in VW_groups.items():
    
    for naics in naics_vals:
        try:
            if vw in vw_bea_map.keys():
                vw_bea_map[vw].append(naics_bea_map[str(naics)][0])
            else: 
                vw_bea_map[vw] = [naics_bea_map[str(naics)][0]]
        except:
            matches = [key for key in naics_bea_map.keys() if str(naics) in key]
            vw_bea_map[vw] = []
            for m in matches:
                for vals in naics_bea_map[m]:
                    vw_bea_map[vw].append(vals)

for key, val in vw_bea_map.items():
    vw_bea_map[key] = list(np.unique(val))


with open('vw_bea_map.json', 'w') as f:
    f.write(json.dumps(vw_bea_map))