In [123]:
import numpy as np
import pandas as pd

naics sectors: https://www.census.gov/programs-surveys/economic-census/year/2022/guidance/understanding-naics.html
isic sectors: https://unstats.un.org/unsd/classifications/Econ/Download/In%20Text/ISIC_Rev_4_publication_English.pdf

In [124]:
naics_sectors = {
    'Agriculture, Forestry, Fishing and Hunting': ['11'],
    'Mining, Quarrying, and Oil and Gas Extraction': ['21'],
    'Utilities': ['22'],
    'Construction': ['23'],
    'Manufacturing': ['31', '32', '33'],
    'Wholesale Trade': ['42'],
    'Retail Trade': ['44','45'],
    'Transportation and Warehousing': ['48', '49'],
    'Information': ['51'],
    'Finance and Insurance': ['52'],
    'Real Estate and Rental and Leasing': ['53'],
    'Professional, Scientific, and Technical Services': ['54'],
    'Management of Companies and Enterprises': ['55'],
    'Administrative and Support and Waste Management and Remediation Services': ['56'],
    'Educational Services': ['61'],
    'Health Care and Social Assistance': ['62'], 
    'Arts, Entertainment, and Recreation': ['71'],
    'Accommodation and Food Services': ['72'],
    'Other Services': ['81'],
    'Public Administration': ['92']
}


In [125]:
isic_sectors_df = pd.read_csv('ISIC_SECTORS.csv')

isic_sectors = {}

for i, row in isic_sectors_df.iterrows():
    
    subs = [int(x) for x in row[1].split('–')]
    if len(subs) > 1:
        subs = np.arange(subs[0], subs[1] + 1)
    subs = ['0' + str(x) if len(str(x)) < 2 else str(x) for x in subs]

    for s in subs:
        isic_sectors[s] = row[0]

In [126]:
naics_isic_df = pd.read_csv('NAICS_ISIC.csv', dtype={'NAICS2012Code': str, 'ISIC4Code': str})

In [127]:
naics_isic_df['ISICSec'] = naics_isic_df['ISIC4Code'].map(lambda x: isic_sectors[x[0:2]] )
naics_isic_df['NAICS_3'] = naics_isic_df['NAICS2012Code'].map(lambda x: x[0:3] if type(x) is str else None)
pd.set_option('display.max_rows', 200)
inter_map = naics_isic_df[['NAICS_3', 'ISICSec', 'NAICS2012Code']].groupby(['NAICS_3', 'ISICSec']).agg({'count'})
inter_map['best_match'] = None

matches = []
for code in list(np.unique(naics_isic_df['NAICS_3'].dropna())):
    best_match = inter_map.loc[code][('NAICS2012Code','count')].map(
        lambda x: x == max(inter_map.loc[code][('NAICS2012Code','count')])
    )
    for m in best_match: matches.append(m)

In [128]:
inter_map['best_match'] = matches
inter_map

Unnamed: 0_level_0,Unnamed: 1_level_0,NAICS2012Code,best_match
Unnamed: 0_level_1,Unnamed: 1_level_1,count,Unnamed: 3_level_1
NAICS_3,ISICSec,Unnamed: 2_level_2,Unnamed: 3_level_2
111,A,48,True
111,C,1,False
112,A,26,True
113,A,5,True
113,C,1,False
114,A,7,True
115,A,10,True
115,M,1,False
211,B,4,True
212,B,29,True


In [129]:
naics_3_isicsec = {}

for i in inter_map[matches].index:
    if i[0] in naics_3_isicsec.keys():
        naics_3_isicsec[i[0]].append(i[1])
    else: naics_3_isicsec[i[0]] = [i[1]]

In [130]:
# ties include 

for key, val in naics_3_isicsec.items():
    naics_3_isicsec[key] = val[0]

naics_3_isicsec

{'111': 'A',
 '112': 'A',
 '113': 'A',
 '114': 'A',
 '115': 'A',
 '211': 'B',
 '212': 'B',
 '213': 'B',
 '221': 'D',
 '236': 'F',
 '237': 'F',
 '238': 'F',
 '311': 'C',
 '312': 'C',
 '313': 'C',
 '314': 'C',
 '315': 'C',
 '316': 'C',
 '321': 'C',
 '322': 'C',
 '323': 'C',
 '324': 'C',
 '325': 'C',
 '326': 'C',
 '327': 'C',
 '331': 'C',
 '332': 'C',
 '333': 'C',
 '334': 'C',
 '335': 'C',
 '336': 'C',
 '337': 'C',
 '339': 'C',
 '423': 'G',
 '424': 'G',
 '425': 'G',
 '441': 'G',
 '442': 'G',
 '443': 'G',
 '444': 'G',
 '445': 'G',
 '446': 'G',
 '447': 'G',
 '448': 'G',
 '451': 'G',
 '452': 'G',
 '453': 'G',
 '454': 'G',
 '481': 'H',
 '482': 'H',
 '483': 'H',
 '484': 'H',
 '485': 'H',
 '486': 'H',
 '487': 'H',
 '488': 'H',
 '491': 'H',
 '492': 'H',
 '493': 'H',
 '511': 'J',
 '512': 'J',
 '515': 'J',
 '517': 'J',
 '518': 'J',
 '519': 'J',
 '521': 'K',
 '522': 'K',
 '523': 'K',
 '524': 'K',
 '525': 'K',
 '531': 'L',
 '532': 'N',
 '533': 'N',
 '541': 'M',
 '551': 'K',
 '561': 'N',
 '562': 'E',

In [131]:
# compares to "Replication Packet/Converting SIC to NAICS/SIC_NAICS_BEA_allsec.do"

VW_groups = {
    'Agriculture': [111,112,113,114,115], # excluded
    'Mining': [212],
    'Oil/Gas': [211], # excluded
    'Mining Support': ['212'], # excluded
    'Util': [22], # 221 doesn't exist in NAICS codes in asset expenditures but in VW mapping
    'Const': [23],
    'Wood': [321],
    'Minerals': [327],
    'Primary Metals': [331],
    'Fabricated Metals': [332],
    'Machinery': [333],
    'Computers': [334],
    'Electrical': [335],
    'Vehicles': [3361,3362,3363],
    'Transport': [3364,3365,3366,3367,3368,3369],
    'Furniture': [337],
    'Misc Mfg': [339],
    'Food Mfg': [311,312],
    'Textile': [313,314],
    'Apparel': [315,316],
    'Paper': [322],
    'Printing': [323],
    'Petroleum': [324],
    'Chemical': [325],
    'Plastics': [326],
    'Wholesale Trade': [42],
    'Retail Trade': [44,45],
    'Transit/Warehouse': [48,49],
    'Info': [51],
    'Finance/Insurance': [52],
    'Real Estate': [531],
    'Rental': [532,533], # excluded
    'Prof/Tech': [54],
    'Mgmt': [55],
    'Admin': [561,562], # just 56 in the VW mapping
    'Educ': [61],
    'Health': [62],
    'Arts': [71],
    'Accom': [721],
    'Food Services': [722],
    'Other Services': [81]
}

In [132]:
VW_isicsec = {}
for key, vals in VW_groups.items():
    VW_isicsec[key] = []
    for v in vals:
        code = str(v)
        if len(code) == 3:
            VW_isicsec[key].append(naics_3_isicsec[code])
        elif len(code) == 2:
            matches = pd.Series(naics_3_isicsec.keys())[pd.Series(naics_3_isicsec.keys()).map(lambda x: x[0:2]) == code]
            for match in matches:
                VW_isicsec[key].append(naics_3_isicsec[match])
        elif len(code) == 4:
            VW_isicsec[key].append(naics_3_isicsec[code[0:3]])
    VW_isicsec[key] = list(np.unique(VW_isicsec[key]))

In [133]:
isicsec_VW = {}
for vw, sec in VW_isicsec.items():
    for sec in sec:
        if not sec in isicsec_VW.keys():
            isicsec_VW[sec] = [vw]
        else: isicsec_VW[sec].append(vw)

In [134]:
isicsec_VW

{'A': ['Agriculture'],
 'B': ['Mining', 'Oil/Gas', 'Mining Support'],
 'D': ['Util'],
 'F': ['Const'],
 'C': ['Wood',
  'Minerals',
  'Primary Metals',
  'Fabricated Metals',
  'Machinery',
  'Computers',
  'Electrical',
  'Vehicles',
  'Transport',
  'Furniture',
  'Misc Mfg',
  'Food Mfg',
  'Textile',
  'Apparel',
  'Paper',
  'Printing',
  'Petroleum',
  'Chemical',
  'Plastics',
  'Other Services'],
 'G': ['Wholesale Trade', 'Retail Trade'],
 'H': ['Transit/Warehouse'],
 'J': ['Info'],
 'K': ['Finance/Insurance', 'Mgmt'],
 'L': ['Real Estate'],
 'N': ['Rental', 'Admin'],
 'M': ['Prof/Tech'],
 'E': ['Admin'],
 'P': ['Educ'],
 'Q': ['Health'],
 'R': ['Arts'],
 'I': ['Accom', 'Food Services'],
 'S': ['Other Services'],
 'T': ['Other Services']}