In [None]:
import numpy as np
import pandas as pd

# This Notebook converts US supply and use tables into symmetric industry-by-industry I-O tables, including Z, A, and L matrices

The input-output US SUP tables are used. The code converting these into symmetric I-O tables is in IO_lib

In [None]:
from IO_lib import derive_IO
from IO_lib import derive_IO_domestic
from IO_lib import derive_import_perc
from IO_lib import derive_IO_no_scrap_adjustment
from IO_lib import derive_IO_domestic_no_scrap_adjustment
import os.path
from openpyxl import load_workbook

In [None]:
def savefile(df, filename, year, folder):
    filename = folder + str(year) + "-" +  filename
    df.to_excel(filename)

## First for 71 industry codes for every year 1997 - 2019

It appears to be that BEA does not do scrap calculations in its L matrices for the 71 industry summary sectors (published as IxI TR (total requirements) tables), so we follow 

In [None]:
# save industry/commodity/value added/final consumption names
ind_names1 = pd.read_excel("../Data/AllTablesIO/IOUse_After_Redefinitions_PRO_1997-2019_Summary.xlsx", 
                      sheet_name = '2018', skiprows = 7, nrows = 79, header=None,
                      index_col = None, usecols = "A:B", engine='openpyxl').dropna()

ind_names2 = pd.read_excel("../Data/AllTablesIO/IOUse_After_Redefinitions_PRO_1997-2019_Summary.xlsx", 
                      sheet_name = '2018', header = None, skiprows=5, nrows = 2, 
                      index_col = None, usecols = "C:CR", engine='openpyxl').T.dropna()

ind_names2.columns = ind_names1.columns = ['code', 'label']
ind_names = ind_names1.merge(ind_names2, how='outer')
ind_names.to_csv("../results/data_out/Data_out_names/ind_sum.csv")

In [None]:
# domestic tables, scrap redistributed, import perc calculated, domestic final demand

path_data = "../data/AllTablesIO/"


for year in range(1997, 2020):
    make = pd.read_excel(path_data + "IOMake_After_Redefinitions_1997-2019_Summary.xlsx", 
                          sheet_name = str(year), header = 5, skiprows = [6], nrows = 72, 
                          index_col = 0, usecols = "A, C:BX", engine='openpyxl')
    make.replace('...', 0, inplace=True)

    use = pd.read_excel(path_data + "IOUse_After_Redefinitions_PRO_1997-2019_Summary.xlsx", 
                      sheet_name = str(year), header = 5, skiprows = [6], nrows = 83, 
                      index_col = 0, usecols = "A, C:CV", engine='openpyxl')
    use.replace('...', 0, inplace=True)

    imp = pd.read_excel(path_data + "ImportMatrices_After_Redefinitions_SUM_1997-2020.xlsx", 
                      sheet_name = str(year), header = 5, skiprows = [6], nrows = 83, 
                      index_col = 0, usecols = "A, C:CV", engine='openpyxl')
    imp.replace('...', 0, inplace=True)
    imp_copy = imp.copy()

    for imp_reducer in [1, 0.9, 0.7, 0.5]:

        imp = imp_copy * imp_reducer

        # Make matrix
        # industry x commodity
        V = make.iloc[:71, :71]

        # Use matrix (producer value)
        # Commodity x industry 
        U = use.iloc[:72, :71]

        # scrap adjustment: divide proportional over incoming sectors
        #U /= U.sum() / (U.sum() + use.iloc[71, :71])

        # Import matrix
        # Commodity x industry
        W = imp.iloc[:72, :71]

        # industry total output (Producer prices)
        g = use.iloc[82, :71]

        # commodity total output (Producer value)
        q = make.iloc[71, :71]

        # scrap
        h = make.iloc[:71, 71]

        # final demand
        # Commodity x industry
        fc = use.iloc[:72, 74:94]

        # final demand import
        # Commodity x industry
        W_fc = imp.iloc[:72, 72:92].loc[:, fc.columns]

        # non-standard imports
        imp_non = use.iloc[72, :71]

        # value added
        va = use.iloc[76:79, :71]


        Z, A, L, f, p = derive_IO_domestic(U.to_numpy(),W.to_numpy(),V.to_numpy(), h.to_numpy(), imp_non.to_numpy(), \
                                                    q.to_numpy(),g.to_numpy(),fc.to_numpy(), W_fc.to_numpy(), how = "industry")

        Z_df = pd.DataFrame(data= Z, index= list(U.columns) + ['imports'], columns= U.columns)
        A_df = pd.DataFrame(data= A, index= list(U.columns) + ['imports'], columns= U.columns)
        L_df = pd.DataFrame(data= L, index= list(U.columns) + ['imports'], columns= U.columns)
        f_df = pd.DataFrame(data= f, index= U.columns, columns= fc.columns)

        import_perc = derive_import_perc(U.iloc[:-1, :].to_numpy(),W.iloc[:-1, :].to_numpy(), V.to_numpy(), h.to_numpy(), \
                                    imp_non.to_numpy(), q.to_numpy(),g.to_numpy(),fc.to_numpy(), W_fc.to_numpy(), how = "industry")

        import_perc_df = pd.DataFrame(data= import_perc, index= U.index[:-1], columns= ['import_perc'])

        if imp_reducer == 1:
            savefile(Z_df, "Z_sum_dom.xlsx", year, "../data/Data_out_IO/")
            savefile(A_df, "A_sum_dom.xlsx", year, "../data/Data_out_IO/")
            savefile(L_df, "L_sum_dom.xlsx", year, "../data/Data_out_IO/")
            savefile(f_df, "f_sum_dom.xlsx", year, "../data/Data_out_IO/")
            savefile(va, "va_sum_dom.xlsx", year, "../data/Data_out_IO/")
            savefile(import_perc_df, "import_perc_sum.xlsx", year, "../data/Data_out_IO/")
        else:
            savefile(Z_df, "Z_sum_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO/imp_reduced/")
            savefile(A_df, "A_sum_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO/imp_reduced/")
            savefile(L_df, "L_sum_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO/imp_reduced/")
            savefile(f_df, "f_sum_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO/imp_reduced/")
            savefile(va, "va_sum_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO/imp_reduced/")
            savefile(import_perc_df, "import_perc_sum_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO/imp_reduced/")

In [None]:
# original tables, not domestic
# original, no scrap
# Load data

path_data = "../data/AllTablesIO/"


for year in range(1997, 2020):
    make = pd.read_excel(path_data + "IOMake_After_Redefinitions_1997-2019_Summary.xlsx", 
                      sheet_name = str(year), header = 5, skiprows = [6], nrows = 72, 
                      index_col = 0, usecols = "A, C:BX", engine='openpyxl')
    make.replace('...', 0, inplace=True)

    use = pd.read_excel(path_data + "IOUse_After_Redefinitions_PRO_1997-2019_Summary.xlsx", 
                      sheet_name = str(year), header = 5, skiprows = [6], nrows = 83, 
                      index_col = 0, usecols = "A, C:CV", engine='openpyxl')
    use.replace('...', 0, inplace=True)

    # Make matrix
    # industry x commodity
    V = make.iloc[:71, :73]

    # Use matrix (producer value)
    # Commodity x industry 
    U = use.iloc[:73, :71]

    # industry total output (Producer prices)
    g = use.iloc[82, :71]

    # commodity total output (Producer value)
    q = make.iloc[71, :73]

    # scrap
    #h = make.iloc[:71, 71]

    # final demand
    fc = use.iloc[:73, 74:94]
    
    # value added
    va = use.iloc[76:79, :71]

    Z, A, L, f = derive_IO_no_scrap_adjustment(U.to_numpy(),V.to_numpy(),q.to_numpy(),g.to_numpy(),fc.to_numpy(), how = "industry")

    Z_df = pd.DataFrame(data= Z, index= U.columns, columns= U.columns)
    A_df = pd.DataFrame(data= A, index= U.columns, columns= U.columns)
    L_df = pd.DataFrame(data= L, index= U.columns, columns= U.columns)
    f_df = pd.DataFrame(data= f, index= U.columns, columns= fc.columns)

    savefile(Z_df, "Z_sum.xlsx", year, "../data/Data_out_IO/")
    savefile(A_df, "A_sum.xlsx", year, "../data/Data_out_IO/")
    savefile(L_df, "L_sum.xlsx", year, "../data/Data_out_IO/")
    savefile(f_df, "f_sum.xlsx", year, "../data/Data_out_IO/")
    savefile(va, "va_sum.xlsx", year, "../data/Data_out_IO/")

## Now for detailed 389 industry tables, for 2012

I follow p.15-16 of https://www.cbo.gov/sites/default/files/111th-congress-2009-2010/workingpaper/2010-04-io_model_paper_0.pdf in the following way:
- Scrap adjustment for scrap and secondhand goods
- Ignore Rest of world adjustment
- Noncomparable imports are part of value added
- Sectors without commodities (i.e. State and local government educational services (GLSGE), State and local government hospitals and health services (GSLGH), State and local government passenger transit (S00201)) are taken along in the calculation as normal

In [None]:
# save industry/commodity/value added/final consumption names names
ind_names1 = pd.read_excel("../Data/AllTablesIO/IOUse_After_Redefinitions_PRO_2007_2012_Detail.xlsx", 
                      sheet_name = '2012', skiprows=6, header = None, nrows = 419, 
                      index_col = None, usecols = "A:B", engine='openpyxl').dropna()
ind_names1.columns = ['code', 'label']

ind_names2 = pd.read_excel("../Data/AllTablesIO/IOUse_After_Redefinitions_PRO_2007_2012_Detail.xlsx", 
                      sheet_name = '2012', skiprows=4, header = None, nrows = 2, 
                      index_col = None, usecols = "C:PN", engine='openpyxl').T.dropna()
ind_names2.columns = ['label', 'code'] # note, reversed


ind_names1.merge(ind_names2, how='outer').to_csv("../results/data_out/Data_out_names/ind_det.csv")

In [None]:
# domestic tables, no scarp adjustment
# Load data

path_data = "../data/AllTablesIO/"


for year in (2007, 2012):
    make = pd.read_excel(path_data + "IOMake_After_Redefinitions_2007_2012_Detail.xlsx", 
                      sheet_name = str(year), header = 5, nrows = 411, 
                      index_col = 0, usecols = "A, C:OR", engine='openpyxl').fillna(0)

    use = pd.read_excel(path_data + "IOUse_After_Redefinitions_PRO_2007_2012_Detail.xlsx",\
                      sheet_name = str(year), header = 5, nrows = 411,\
                      index_col = 0, usecols = "A, C:PN", engine='openpyxl').fillna(0)
    
    imp = pd.read_excel(path_data + "ImportMatrices_Before_Redefinitions_DET_2007_2012.xlsx", 
                      sheet_name = str(year), header = 5, nrows = 411,
                      index_col = 0, usecols = "A, C:PN", engine='openpyxl').fillna(0)
    
    imp_copy = imp.copy()

    for imp_reducer in [1, 0.9, 0.7, 0.5]:

        imp = imp_copy * imp_reducer
    
        # Make matrix
        # industry x commodity
        V = make.iloc[:405, :401]

        # Use matrix (producer value)
        # Commodity x industry 
        U = use.iloc[:401, :405]
        
        # Import matrix
        # Commodity x industry
        W = imp.iloc[:401, :405]

        # industry total output (Producer prices)
        g = use.iloc[410, :405]

        # commodity total output (Producer value)
        q = make.iloc[405, :401]

        # scrap (output per industry) + secondhand
        #h = make.iloc[:405, 401] + make.iloc[:405, 402]

        # final demand
        fc = use.iloc[:401, 406:426]
        
        # noncomparable inports
        #nci = use.iloc[403, :405]
        
        # value added includes non-comparable imports
        #va = use.iloc[np.r_[403, 406:409], :405]

        va = use.iloc[406:409, :405]
        
        Z, A, L, f = derive_IO_domestic_no_scrap_adjustment(U.to_numpy(),W.to_numpy(),V.to_numpy(),q.to_numpy(),g.to_numpy(),fc.to_numpy(), how = "industry")

        Z_df = pd.DataFrame(data= Z, index= list(U.columns) + ['imports'], columns= U.columns)
        A_df = pd.DataFrame(data= A, index= list(U.columns) + ['imports'], columns= U.columns)
        L_df = pd.DataFrame(data= L, index= list(U.columns) + ['imports'], columns= U.columns)
        f_df = pd.DataFrame(data= f, index= U.columns, columns= fc.columns)

        if imp_reducer == 1:
            savefile(Z_df, "Z_det_dom.xlsx", year, "../data/Data_out_IO389/")
            savefile(A_df, "A_det_dom.xlsx", year, "../data/Data_out_IO389/")
            savefile(L_df, "L_det_dom.xlsx", year, "../data/Data_out_IO389/")
            savefile(f_df, "f_det_dom.xlsx", year, "../data/Data_out_IO389/")
            savefile(va, "va_det_dom.xlsx", year, "../data/Data_out_IO389/")
        else:
            savefile(Z_df, "Z_det_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO389/imp_reduced/")
            savefile(A_df, "A_det_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO389/imp_reduced/")
            savefile(L_df, "L_det_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO389/imp_reduced/")
            savefile(f_df, "f_det_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO389/imp_reduced/")
            savefile(va, "va_det_dom_" + str(imp_reducer) + ".xlsx", year, "../data/Data_out_IO389/imp_reduced/")