In [1]:
import pandas as pd
import numpy as np
import os
from functools import reduce

In [2]:
##Set directory for input files
import_file_path = rf"..\\Input"
export_file_path = rf"..\\Output"

# 2. Functions & Industry Aggregations

In [3]:
#Function to compute log-diff and drop original variable
def dlog(df, var, group_col='indnum', time_col='yr', base_year=None):
    df = df.copy()
    df = df.sort_values([group_col, time_col])
    newvar = f'dlog_{var}'.replace(' ', '_')

    #Compute log difference
    df[newvar] = df.groupby(group_col)[var].transform(lambda x: np.log(x).diff())

    #Force NaN for base_year or first available year in each group
    if base_year is not None:
        df.loc[df[time_col] == base_year, newvar] = np.nan
    else:
        min_years = df.groupby(group_col)[time_col].transform('min')
        df.loc[df[time_col] == min_years, newvar] = np.nan

    #Drop original variable
    return df.drop(columns=[var])

In [4]:
#Function that turns growth rates into an index
def index_generation(df, variables):
    df = df.copy()
    for var_g in variables:
        #Create the cumulative sum of growth rates, starting from 0 in base year
        def transform_group(group):
            #Find the first non-NaN year (first growth rate)
            first_valid_idx = group[var_g].first_valid_index()
            if first_valid_idx is None:
                return pd.Series(np.nan, index=group.index)
            
            first_valid_year = group.loc[first_valid_idx, 'yr']
            base_year = first_valid_year - 1
            
            #Initialize log_X with NaN
            log_X = pd.Series(np.nan, index=group.index)
            
            #Set base year to 0
            log_X[group['yr'] == base_year] = 0
            
            #For years >= first_valid_year, compute cumulative sum
            mask = group['yr'] >= first_valid_year
            if mask.any():
                #Get growth rates from first_valid_year onward
                growth_rates = group.loc[mask, var_g]
                #Compute cumulative sum starting from 0
                cumulative_sum = growth_rates.cumsum()
                #Assign to log_X
                log_X.loc[mask] = cumulative_sum
            
            return log_X
        
        #Apply the transformation group-wise
        log_X = df.groupby('indnum').apply(transform_group).reset_index(level=0, drop=True)
        
        #Convert to level index
        df[var_g.replace('_g', '')] = np.exp(log_X)
    
    return df.drop(columns=variables)

In [5]:
def rebase_indices(df, vars_to_rebase, base_year, id_var='indnum', time_var='yr'):
    df_new = df.copy()
    
    for var in vars_to_rebase:
        #Get the value in the base year for each id
        base_values = df_new.loc[df_new[time_var] == base_year, [id_var, var]]
        base_values = base_values.rename(columns={var: f'{var}_base'})
        
        #Merge the base year values into the main dataframe
        df_new = df_new.merge(base_values, on=id_var, how='left')
        
        #Rebase
        df_new[var] = df_new[var] / df_new[f'{var}_base']
        
        #Drop temporary column
        df_new = df_new.drop(columns=[f'{var}_base'])
    
    return df_new


In [6]:
#Industry aggregations for 1947-1963
aggregate_groups = {
    2936: list(range(29, 37)),
    3740: list(range(37, 41)),
    4144: list(range(41, 45)),
    4749: list(range(47, 50)),
    5152: list(range(51, 53)),
    5456: list(range(54, 57)),
    5758: list(range(57, 59))}

In [7]:
#Define the desired column order
order_1963to2023 = ['indnum','yr', 'GO', 'VA', 'CAP', 'LAB', 'II','GO_QI','CAPIT','CAPSOFT', 'CAPRD','CAPART','CAPOTH','CAPIT_QI',
    'CAPSOFT_QI','CAPRD_QI','CAPART_QI','CAPOTH_QI','LABCOL','LABNCOL','LABCOL_QI','LABNCOL_QI','II_QI','IIEN', 'IIMT','IISERV', 'IIEN_QI', 'IIMT_QI', 'IISERV_QI','HRS_QI'
]
order_1947to2023 = ['indnum','yr', 'GO', 'VA', 'CAP', 'LAB', 'II','GO_QI','CAPIT','CAPSOFT', 'CAPRD','CAPART','CAPOTH','CAPIT_QI',
    'CAPSOFT_QI','CAPRD_QI','CAPART_QI','CAPOTH_QI','LABCOL','LABNCOL','LABCOL_QI','LABNCOL_QI','II_QI','HRS_QI'
]

# 3. Cleaning BEA-BLS Experimental Dataset

In [8]:
#Identify needed variables
experimental_vars          = ['yr','indnum','goqi.','iiqi.','vlcol.','vln.','vkit.','vksoft.','vkRD.',
    'vkart.','vkoth.','qkit.','qks.','qkrd.','qka.','qko.','hrs','qlindexcol_merge.', 'qlindexn_merge.']

#Extract datasheets from BEA-BLS Integrated Industry-Level Production Account (Eldridge et al., 2020)
df_experimental_1947to1963 = pd.read_excel(os.path.join(import_file_path, 'industry-production-account-experimental.xlsx'), 
    sheet_name='1947-1963', skiprows=1, usecols=experimental_vars)
df_experimental_1963to2016 = pd.read_excel(os.path.join(import_file_path, 'industry-production-account-experimental.xlsx'),
    sheet_name='1963-2016', skiprows=1, usecols=experimental_vars)

#Quantity indices to be log-differenced
q_indices = ['goqi.','iiqi.','qkit.','qks.','qkrd.','qka.','qko.','qlindexcol_merge.','qlindexn_merge.','hrs']

#Generate log-difference for quantity indices in both sheets
for v in q_indices:
    df_experimental_1947to1963 = dlog(df_experimental_1947to1963, v, base_year=1947)

for v in q_indices:
    df_experimental_1963to2016 = dlog(df_experimental_1963to2016, v, base_year=1963)

#Rename variable names for both dataframes
experimental_renaming = {
    'vkit.': 'CAPIT','vksoft.': 'CAPSOFT', 'vkRD.': 'CAPRD','vkart.': 'CAPART','vkoth.': 'CAPOTH','vlcol.': 'LABCOL','vln.': 'LABNCOL',
    'dlog_goqi.': 'GO_QI_g','dlog_iiqi.': 'II_QI_g','dlog_qkit.': 'CAPIT_QI_g','dlog_qks.': 'CAPSOFT_QI_g','dlog_qkrd.': 'CAPRD_QI_g',
    'dlog_qka.': 'CAPART_QI_g','dlog_qko.': 'CAPOTH_QI_g','dlog_qlindexcol_merge.': 'LABCOL_QI_g','dlog_qlindexn_merge.': 'LABNCOL_QI_g',
    'dlog_hrs': 'HRS_QI_g'
}
df_experimental_1963to2016 = df_experimental_1963to2016.rename(columns=experimental_renaming)
df_experimental_1947to1963 = df_experimental_1947to1963.rename(columns=experimental_renaming)

In [9]:
#Restrict BEA-BLS Experimental to 1997 (for nominal variables) and to 1998 (for growth variables)
nom_var     = ['CAPIT','CAPSOFT', 'CAPRD', 'CAPART', 'CAPOTH', 'LABCOL', 'LABNCOL']
growth_var  = ['CAPIT_QI_g','CAPSOFT_QI_g', 'CAPRD_QI_g', 'CAPART_QI_g', 'CAPOTH_QI_g', 'LABCOL_QI_g', 'LABNCOL_QI_g', 'HRS_QI_g','GO_QI_g','II_QI_g']
               
#Nominal variables stop after 1996
df_experimental_1963to2016 = df_experimental_1963to2016.copy()
for v in nom_var:
    df_experimental_1963to2016.loc[(df_experimental_1963to2016['yr'] < 1963) | (df_experimental_1963to2016['yr'] > 1996),v] = np.nan

#Growth variables stop after 1997
for v in growth_var:
    df_experimental_1963to2016.loc[(df_experimental_1963to2016['yr'] < 1963) | (df_experimental_1963to2016['yr'] > 1997),v] = np.nan

# 4. Cleaning WK2017 Data

In [10]:
#Identify needed variables
klems_vars = ['year', 'industry', 'gross output', 'capital', 'labor', 'intermediate']

#Extract the required datasheets from US KLEMS, March 2017 (Jorgenson et al., 2017)
df_klems = pd.read_excel(os.path.join(import_file_path, 'usa_wk_mar_2017.xlsx'), sheet_name='KLEMdata', skiprows=1, usecols=klems_vars)

#Rename panel identifiers to be consistent with df_experimental
df_klems.rename(columns={'industry': 'indnum','year': 'yr'}, inplace=True)

#Rename variables to be consistent with KLEMS literature
df_klems.rename(columns={'gross output': 'GO','capital': 'CAP','labor': 'LAB','intermediate': 'II'}, inplace=True)

#Generate value add
df_klems["VA"] = df_klems["GO"] - df_klems["II"]

In [11]:
#For consistency with other datasets, consolidate federal government and state & local government from 2 industries each to 1 industry each
federal_inds          = [62, 63]
state_local_inds      = [64, 65]

#Sum nominal variables for indnum 62/63
federal               = df_klems[df_klems['indnum'].isin(federal_inds)].groupby('yr', as_index=False)[['GO', 'CAP', 'LAB', 'II', 'VA']].sum()

#Sum nominal variables for indnum 64/65
state_local           = df_klems[df_klems['indnum'].isin(state_local_inds)].groupby('yr', as_index=False)[['GO', 'CAP', 'LAB', 'II', 'VA']].sum()

federal['indnum']     = 62           #Consolidated Federal government indnum = 62
state_local['indnum'] = 63           #Consolidated state & local indnum = 63

#Remove the original rows and append new rows
df_klems              = df_klems[~df_klems['indnum'].isin(federal_inds + state_local_inds)]
df_klems              = pd.concat([df_klems, federal, state_local], ignore_index=True)

In [12]:
#Build panel for broad industries starting from 1947-
df_klems_1947to2014 = (
    df_klems.assign(indnum=df_klems['indnum'].replace({i:new for new, olds in aggregate_groups.items() for i in olds}))
    .groupby(['yr','indnum'], as_index=False)[['GO', 'CAP', 'LAB', 'II', 'VA']].sum())
df_klems_1947to2014 = df_klems_1947to2014.sort_values(['indnum','yr']).reset_index(drop=True)

#Build panel for finer industries starting 1963-
df_klems_1963to2014 = df_klems[df_klems['yr'] >= 1963]
df_klems_1963to2014 = df_klems_1963to2014.sort_values(['indnum','yr']).reset_index(drop=True)

# 5. Cleaning BEA-BLS Capital Dataset

In [13]:
#Name of sheets to extract data from
capital_sheets = [
    'Capital_Art_Quantity','Capital_R&D_Quantity','Capital_IT_Quantity','Capital_Other_Quantity','Capital_Software_Quantity',
    'Capital_Art Compensation', 'Capital_R&D Compensation','Capital_IT Compensation','Capital_Other Compensation','Capital_Software Compensation',
    'Labor_Col_Quantity','Labor_NoCol_Quantity','Labor_Col Compensation','Labor_NoCol Compensation',
    'Energy_Quantity','Materials_Quantity','Services_Quantity','Energy Compensation', 
    'Materials Compensation','Service Compensation','Gross Output', 'Gross Output_Quantity', 'Labor Hours_Quantity'
]

#Extract industry-level from BEA-BLS Integrated Industry-Level Production Account (Eldridge et al., 2025)
long_data = []
for sheet in capital_sheets:
    df_tmp = pd.read_excel(os.path.join(import_file_path, 'industry-production-account-capital.xlsx'), sheet_name=sheet, header=1).dropna(how='all')
    df_tmp = df_tmp.rename(columns={df_tmp.columns[0]: 'industry_description'})
    df_tmp = df_tmp.melt(id_vars='industry_description', var_name='year', value_name=sheet)
    long_data.append(df_tmp)
df_capital_1997to2023 = reduce(lambda l, r: pd.merge(l, r, on=['industry_description','year'], how='outer'), long_data)
df_capital_1997to2023 = df_capital_1997to2023.rename(columns={'year':'yr','industry_description':'Description'})

#Create a sequential `indnum` for each unique Description with existing order
order                           = df_capital_1997to2023['Description'].drop_duplicates().tolist()
mapping                         = {desc: i+1 for i, desc in enumerate(order)}
df_capital_1997to2023['indnum'] = df_capital_1997to2023['Description'].map(mapping).astype('Int64')
df_capital_1997to2023['yr']     = pd.to_numeric(df_capital_1997to2023['yr'], errors='coerce')
df_capital_1997to2023           = df_capital_1997to2023.drop(columns='Description')

#Move panel identifiers first
cols                            = ['indnum', 'yr'] + [c for c in df_capital_1997to2023.columns if c not in ['indnum', 'yr']]
df_capital_1997to2023           = df_capital_1997to2023[cols]
df_capital_1997to2023           = df_capital_1997to2023.sort_values(['indnum', 'yr']).reset_index(drop=True)

In [14]:
#Log difference the quantity variables
q_indices             = ['Gross Output_Quantity','Capital_IT_Quantity','Capital_Software_Quantity','Capital_R&D_Quantity',
                         'Capital_Art_Quantity','Capital_Other_Quantity','Labor_Col_Quantity','Labor_NoCol_Quantity',
                         'Labor Hours_Quantity','Energy_Quantity','Materials_Quantity','Services_Quantity']

for v in q_indices:
    df_capital_1997to2023 = dlog(df_capital_1997to2023, v, base_year=1997)

#Rename df_capital_1997to2023 variables to match df_experimental variable names
capital_dictionary = {
    'Gross Output': 'GO','Capital_IT Compensation': 'CAPIT','Capital_Software Compensation': 'CAPSOFT',
    'Capital_R&D Compensation': 'CAPRD','Capital_Art Compensation': 'CAPART','Capital_Other Compensation': 'CAPOTH',
    'Labor_Col Compensation': 'LABCOL','Labor_NoCol Compensation': 'LABNCOL','dlog_Gross_Output_Quantity': 'GO_QI_g',
    'dlog_Capital_IT_Quantity': 'CAPIT_QI_g','dlog_Capital_Software_Quantity': 'CAPSOFT_QI_g','dlog_Capital_R&D_Quantity': 'CAPRD_QI_g',
    'dlog_Capital_Art_Quantity': 'CAPART_QI_g','dlog_Capital_Other_Quantity': 'CAPOTH_QI_g','dlog_Labor_Col_Quantity': 'LABCOL_QI_g',
    'dlog_Labor_NoCol_Quantity': 'LABNCOL_QI_g','dlog_Labor_Hours_Quantity': 'HRS_QI_g','dlog_Energy_Quantity': 'IIEN_QI_g',
    'dlog_Materials_Quantity': 'IIMT_QI_g','dlog_Services_Quantity': 'IISERV_QI_g','Service Compensation': 'IISERV',
    'Materials Compensation': 'IIMT','Energy Compensation': 'IIEN'
}

#Rename variables using capital_dictionary
rename_dict                = {k: v for k, v in capital_dictionary.items() if k in df_capital_1997to2023.columns}
df_capital_1997to2023      = df_capital_1997to2023.rename(columns=rename_dict)
df_capital_1997to2023_exGO = df_capital_1997to2023.drop('GO', axis=1)


In [15]:
#Use BEA-BLS Capital to compute nominal variables GO, II, LAB, CAP (needed for 2015-, both for broad industries and finer industries)
df_capital_nominal = (df_capital_1997to2023[df_capital_1997to2023['yr'] >= 2015].copy())

nominal_agg_map = {
    'GO' : ['GO'],
    'CAP': ['CAPIT', 'CAP_SOFT', 'CAPRD', 'CAPART', 'CAPOTH'],
    'LAB': ['LABNCOL', 'LABCOL'],
    'II' : ['IISERV', 'IIMT', 'IIEN']
}
for newvar, cols in nominal_agg_map.items():
    cols_present                   = [c for c in cols if c in df_capital_nominal.columns]
    df_capital_nominal[newvar]     = df_capital_nominal[cols_present].apply(pd.to_numeric, errors='coerce').sum(axis=1, min_count=1)

#Keep nominal variables 2015 or after for 1963 industry classification and generate VA
df_capital_nominal_start1963       = df_capital_nominal[['yr','indnum','GO','II','LAB','CAP']].reset_index(drop=True)
df_capital_nominal_start1963["VA"] = df_klems["GO"] - df_klems["II"]

#Keep nominal variables 2015 or after for 1947 industry classification
df_capital_nominal_start1947       = (
    df_capital_nominal_start1963.copy()
    .assign(indnum=lambda d: d['indnum'].map({i:new for new, old in aggregate_groups.items() for i in old}).fillna(d['indnum']).astype('Int64'))
    .groupby(['yr','indnum'], as_index=False)[['GO','II','LAB','CAP','VA']].sum())
df_capital_nominal_start1947       = df_capital_nominal_start1947.sort_values(by=['indnum', 'yr']).reset_index(drop=True)

In [16]:
## -- Summary 
##In the cleaning stage, we have prepared 7 relevant dataframes:
#df_experimental_1963to2016   : Quantity indices and compensation for factor components (1963-2016)
#df_capital_1997to2023_exGO   : Quantity indices and compensation for factor components (1997-2023)
#df_klems_1963to2014          : Nominal GO, II, CAP, LAB, VA for 1963 industry aggregations (1963-2014)
#df_capital_nominal_start1963 : Nominal GO, II, CAP, LAB, VA for 1963 industry aggregations (2015-2023)

#df_klems_1947to2014          : Nominal GO, II, CAP, LAB, VA for 1947 industry aggregations (1947-2014)
#df_capital_nominal_start1947 : Nominal GO, II, CAP, LAB, VA for 1947 industry aggregations (2015-2023)
#df_experimental_1947to1963   : Quantity indices and compensation for factor components (1947-1963)

# 6. Merging the Datasets for 1963-2023

In [17]:
#Append df_klems_1963to2014 & df_capital_nominal_start1963 to create 1963-2023 GO, II, CAP, LAB, VA panel
df_nom_1963to2023 = pd.concat([df_klems_1963to2014, df_capital_nominal_start1963], ignore_index=True)
df_nom_1963to2023 = df_nom_1963to2023.sort_values(by=['indnum', 'yr']).reset_index(drop=True)

In [18]:
#Merging quantity indices and compensation for factor components (1963-1996/97 & 1997/98-2023)  
all_cols                   = list(set(df_experimental_1963to2016.columns).union(df_capital_1997to2023_exGO.columns))
df_experimental_1963to2016 = df_experimental_1963to2016.reindex(columns=all_cols)
df_capital_1997to2023_exGO = df_capital_1997to2023_exGO.reindex(columns=all_cols)

#Merge both datasets together
df_extended                = pd.merge(df_experimental_1963to2016,df_capital_1997to2023_exGO,on=["indnum", "yr"], how="outer", suffixes=("_exp", "_cap"))

#If same column appears twice, then coalesce
for col in all_cols:
    if col + "_exp" in df_extended and col + "_cap" in df_extended:
        df_extended[col] = df_extended[col + "_exp"].combine_first(df_extended[col + "_cap"])
        df_extended      = df_extended.drop(columns=[col + "_exp", col + "_cap"])

#Sort by indnum-yr
df_extended                = df_extended.sort_values(by=["indnum", "yr"]).reset_index(drop=True)

#Chain the growth rates into indices
growth_vars = [
    "GO_QI_g", "CAPIT_QI_g", "CAPSOFT_QI_g", "CAPRD_QI_g", "CAPART_QI_g", "CAPOTH_QI_g",
    "LABCOL_QI_g", "LABNCOL_QI_g", "II_QI_g", "IISERV_QI_g", "IIMT_QI_g", "IIEN_QI_g", 
    "HRS_QI_g"
]
df_qindices_1963to2023     = index_generation(df_extended, growth_vars)

In [19]:
#Merge panel data with nominal values and panel data with quantity indices and compensation for factor components
df_1963to2023 = pd.merge(df_nom_1963to2023,df_qindices_1963to2023,on=['indnum', 'yr'],how='inner') 
df_1963to2023 = df_1963to2023.sort_values(by=['indnum', 'yr']).reset_index(drop=True)

#Rebase to year 2000
quantity_vars = [
    "GO_QI", "CAPIT_QI", "CAPSOFT_QI", "CAPRD_QI", "CAPART_QI", "CAPOTH_QI",
    "LABCOL_QI", "LABNCOL_QI", "II_QI", "IIEN_QI","IIMT_QI","IISERV_QI","HRS_QI"
]
df_1963to2023 = rebase_indices(df_1963to2023, vars_to_rebase=quantity_vars, base_year=2000)
df_1963to2023 = df_1963to2023[order_1963to2023]

# 7. Merging the Datasets for 1947-2023

In [20]:
#Create a panel dataset for nominal values 1947-1963 (merging KLEMS (1947-2014) and BEA-BLS Capital (2015-2023))
all_cols                     = list(set(df_klems_1947to2014.columns).union(df_capital_nominal_start1947.columns))
df_klems_1947to2014          = df_klems_1947to2014.reindex(columns=all_cols)
df_capital_nominal_start1947 = df_capital_nominal_start1947.reindex(columns=all_cols)

#Append both datasets
df_nom_1947to2023            = pd.concat([df_klems_1947to2014, df_capital_nominal_start1947], ignore_index=True)

#Sort by panel identifiers (indnum, yr)
df_nom_1947to2023            = df_nom_1947to2023.sort_values(by=['indnum', 'yr']).reset_index(drop=True)
other_cols                   = [c for c in df_nom_1947to2023.columns if c not in ["indnum", "yr"]]
df_nom_1947to2023            = df_nom_1947to2023[["indnum", "yr"] + other_cols]

In [21]:
#Chain the growth rates into indices
growth_vars = [
    "GO_QI_g", "CAPIT_QI_g", "CAPSOFT_QI_g", "CAPRD_QI_g", "CAPART_QI_g", "CAPOTH_QI_g",
    "LABCOL_QI_g", "LABNCOL_QI_g", "II_QI_g", "HRS_QI_g"
]
df_qindices_1947to2023     = index_generation(df_experimental_1947to1963, growth_vars)

#Merge df_nom_1947to2023 with df_qindices_1947to2023
df_1947to2023   = pd.merge(df_nom_1947to2023,df_qindices_1947to2023,on=["indnum", "yr"],how="outer",suffixes=("_exp63", "_full"))  # keeps all rows from both
df_1947to2023   = df_1947to2023.sort_values(by=["indnum", "yr"]).reset_index(drop=True)
other_cols      = [c for c in df_1947to2023.columns if c not in ["indnum", "yr"]]
df_1947to2023   = df_1947to2023[["indnum", "yr"] + other_cols]

#Rebase to year 2000
quantity_vars = [
    "GO_QI", "CAPIT_QI", "CAPSOFT_QI", "CAPRD_QI", "CAPART_QI", "CAPOTH_QI",
    "LABCOL_QI", "LABNCOL_QI", "II_QI", "HRS_QI"
]
df_1963to2023 = rebase_indices(df_1963to2023, vars_to_rebase=quantity_vars, base_year=2000)

#Reorder variables
df_1947to2023   = df_1947to2023[order_1947to2023]

# 8. Combine 1947-2023 and 1963-2023 Datasets and Export

In [None]:
#Create a variable definition DataFrame (only Variable + Description)
var_defs = pd.DataFrame({
"Variable" : ['indnum','yr', 'GO', 'VA', 'CAP', 'LAB', 'II','GO_QI','CAPIT','CAPSOFT', 'CAPRD','CAPART','CAPOTH','CAPIT_QI',
    'CAPSOFT_QI','CAPRD_QI','CAPART_QI','CAPOTH_QI','LABCOL','LABNCOL','LABCOL_QI','LABNCOL_QI','II_QI','IIEN', 'IIMT','IISERV', 
    'IIEN_QI', 'IIMT_QI', 'IISERV_QI','HRS_QI'],

    "Description": [
        "Industry identifier",                                  
        "Year",                                                 
        "Nominal Gross Output",                                 
        "Nominal Value Added",                                  
        "Nominal Capital Compensation",                         
        "Nominal Labor Compensation",                           
        "Nominal Intermediate Input Compensation",              
        "Output Quantity Index",
        "Nominal IT Equipment Capital Compensation",            
        "Nominal Software Capital Compensation",                
        "Nominal R&D Capital Compensation",                      
        "Nominal Entertainment Originals Capital Compensation",  
        "Nominal Other Capital Compensation",         
        "IT Equipment Capital Quantity Index",        
        "Software Capital Quantity Index",            
        "R&D Capital Quantity Index",                 
        "Entertainment Originals Quantity Index",     
        "Other Capital Quantity Index",               
        "Nominal College Labor Compensation",         
        "Nominal Non-College Labor Compensation",     
        "College Labor Quantity Index",               
        "Non-college Labor Quantity Index",           
        "Intermediate Input Quantity Index",          
        "Nominal Energy Intermediate Compensation",   
        "Nominal Materials Intermediate Compensation",
        "Nominal Services Intermediate Compensation", 
        "Energy Intermediate Input Quantity Index", 
        "Materials Intermediate Input Quantity Index", 
        "Services Intermediate Input Quantity Index",     
        "Hours Quantity Index"    
    ]
})

#Export Excel with multiple sheets
os.makedirs(export_file_path, exist_ok=True)
output_file = os.path.join(export_file_path, "cleandata_new.xlsx")

with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
    #Sheet 1 -- Legend
    var_defs.to_excel(writer, sheet_name="VariableDefinitions", index=False)
    
    #Sheets 2/3 -- Data
    df_1963to2023.to_excel(writer, sheet_name="1963to2023", index=False)
    df_1947to2023.to_excel(writer, sheet_name="1947to2023", index=False)