In [1]:
import pandas as pd, os
from pathlib import Path

# Define folder and file paths

In [2]:
# Get root folder
def parent_dir(dir):
    return os.path.abspath(os.path.join(dir, os.pardir))
root_folderpath = Path(parent_dir(parent_dir(os.getcwd())))
print('root_folderpath (this should end with `.../disaggregated_accounts`):')
root_folderpath

root_folderpath (this should end with `.../disaggregated_accounts`):


WindowsPath('c:/Users/ftrikos/Dropbox/Jason-Kilian/Danish Macro/disaggregated_accounts')

In [3]:
# Inputs
data_folderpath = Path(root_folderpath / r'process_data_for_tableau\data')
dom_consumer_spending_filepath = Path(data_folderpath / r"misc\Domestic consumer spending (percent of consumer cell spending).csv") # using redacted version
for_consumer_spending_filepath = Path(data_folderpath / r"vectors_individual\Foreign consumer spending (predicted).csv")
labor_comp_filepath = Path(data_folderpath / r"matrices_individual\Labor compensation.csv")
intermediates_trade_filepath = Path(data_folderpath / r"matrices_individual\Domestic intermediates.csv")
mixed_income_filepath = Path(data_folderpath / r"matrices_individual\Mixed income from non-corporate producers.csv")
addl_consumer_flows_filepath = Path(root_folderpath / r"process_data_for_tableau\output_csv\Additional consumer flows.csv")
addl_producer_flows_filepath = Path(root_folderpath / r"process_data_for_tableau\output_csv\Additional producer flows.csv")
crosswalk_muni_filepath = Path(data_folderpath / r"crosswalk\dk_municipalities_code_to_name.xlsx")
crosswalk_industry_filepath = Path(data_folderpath / r"crosswalk\sector_number_to_name.xlsx")

In [4]:
# Outputs
output_folderpath_csv = Path(root_folderpath / r"src\assets\data\csv")
output_folderpath_dta = Path(root_folderpath / r"src\assets\data\dta")
dom_consumer_spending_output_filestem = r'Domestic consumer spending matrix (predicted)'
for_consumer_spending_output_filestem = r'Foreign consumer spending (predicted)'
labor_comp_output_filestem = r'Labor compensation matrix'
intermediates_trade_output_filestem = r'Intermediates trade matrix'
mixed_income_output_filestem = r'Mixed income matrix'
addl_consumer_flows_output_filestem = r'Additional consumer flows'
addl_producer_flows_output_filestem = r'Additional producer flows'
crosswalk_muni_output_filestem = r'Crosswalk for municipality names'
crosswalk_industry_output_filestem = r'Crosswalk for industry names'

# Common functions

In [5]:
def rename_cols(df):
    df = df.copy()
    for col in df.columns:
        if 'cust_' in col:
            df = df.rename(columns={col:col.replace('cust_', 'consumer_')})
        elif 'spend_' in col:
            df = df.rename(columns={col:col.replace('spend_', 'producer_')})
        elif 'prod_' in col:
            df = df.rename(columns={col:col.replace('prod_', 'producer_')})
    for col in df.columns:       
        if '_sector' in col:
            df = df.rename(columns={col:col.replace('_sector', '_industry')})
    return df

In [6]:
def create_cell(df, cell_type):
    df = df.copy()
    df[f'{cell_type}_cell'] = 'r' + df[f'{cell_type}_muni'].astype('str') + '_' + df[f'{cell_type}_industry']
    return df

In [7]:
def save_df(df, output_folderpath_csv, output_folderpath_dta, output_filestem):
    output_filepath_csv =  Path(output_folderpath_csv / (output_filestem + '.csv'))
    output_filepath_dta =  Path(output_folderpath_dta / (output_filestem + '.dta'))
    if df.shape[0] > 1000000:
        # Save as zip containing csv and dta
        output_filepath_csv_zip = Path(output_folderpath_csv / (output_filestem + '.zip'))
        output_filepath_dta_zip = Path(output_folderpath_dta / (output_filestem + '.zip'))
        df.to_csv(output_filepath_csv_zip, index=False, compression={'method':'zip', 'archive_name':str(output_filepath_csv.name)})
        df.to_stata(output_filepath_dta_zip, write_index=False, compression={'method':'zip', 'archive_name':str(output_filepath_dta.name)})
        print("Saved csv as:", output_filepath_csv_zip)
        print("Saved dta as:", output_filepath_dta_zip)
    else:
        # Save as csv and dta directly
        df.to_csv(output_filepath_csv, index=False)
        df.to_stata(output_filepath_dta, write_index=False)
        print("Saved csv as:", output_filepath_csv)
        print("Saved dta as:", output_filepath_dta)

# Get crosswalk

In [8]:
muni_names_crosswalk = pd.read_excel(crosswalk_muni_filepath)
muni_names_crosswalk

Unnamed: 0,code,name
0,101,København
1,147,Frederiksberg
2,151,Ballerup
3,153,Brøndby
4,155,Dragør
...,...,...
95,846,Mariagerfjord
96,849,Jammerbugt
97,851,Aalborg
98,860,Hjørring


# Individual processing for each file

In [9]:
#1
df = rename_cols(pd.read_csv(dom_consumer_spending_filepath, low_memory=False))
save_df(df, output_folderpath_csv, output_folderpath_dta, dom_consumer_spending_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Domestic consumer spending matrix (predicted).zip
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Domestic consumer spending matrix (predicted).zip


Unnamed: 0,consumer_muni,consumer_industry,consumer_cell,producer_muni,producer_industry,producer_cell,consumer_cell_share
0,101,s01,r101_s01,101,s01,r101_s01,0.264243
1,101,s01,r101_s01,101,s02,r101_s02,0.079690
2,101,s01,r101_s01,101,s03,r101_s03,0.740490
3,101,s01,r101_s01,101,s04,r101_s04,0.436143
4,101,s01,r101_s01,101,s05,r101_s05,0.300539
...,...,...,...,...,...,...,...
4571499,860,s28,r860_s28,860,s17,r860_s17,0.479092
4571500,860,s28,r860_s28,860,s18,r860_s18,0.059426
4571501,860,s28,r860_s28,860,s77,r860_s77,0.288067
4571502,860,s28,r860_s28,860,s88,r860_s88,3.348448


In [10]:
#2
df = rename_cols(pd.read_csv(for_consumer_spending_filepath, low_memory=False))
save_df(df, output_folderpath_csv, output_folderpath_dta, for_consumer_spending_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Foreign consumer spending (predicted).csv
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Foreign consumer spending (predicted).dta


Unnamed: 0,consumer_muni,consumer_industry,consumer_cell,s_foreign_
0,101,s01,101s01,0.152803
1,101,s02,101s02,0.133499
2,101,s03,101s03,0.137363
3,101,s04,101s04,0.131447
4,101,s05,101s05,0.117851
...,...,...,...,...
2739,860,s24,860s24,0.066827
2740,860,s25,860s25,0.113650
2741,860,s26,860s26,0.080460
2742,860,s27,860s27,0.057865


In [11]:
#3
df = rename_cols(pd.read_csv(labor_comp_filepath, low_memory=False))
df = df.reindex(columns=['consumer_muni', 'consumer_industry', 'consumer_cell', 'producer_muni', 'producer_industry', 'producer_cell', 'comp_'])
save_df(df, output_folderpath_csv, output_folderpath_dta, labor_comp_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Labor compensation matrix.zip
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Labor compensation matrix.zip


Unnamed: 0,consumer_muni,consumer_industry,consumer_cell,producer_muni,producer_industry,producer_cell,comp_
0,101,s01,r101_s01,101,s01,r101_s01,2.534114e+09
1,101,s01,r101_s01,101,s02,r101_s02,8.997349e+06
2,101,s01,r101_s01,101,s03,r101_s03,5.806542e+06
3,101,s01,r101_s01,101,s04,r101_s04,1.092552e+07
4,101,s01,r101_s01,101,s05,r101_s05,2.818032e+05
...,...,...,...,...,...,...,...
7260619,860,s28,r860_s28,860,s26,r860_s26,0.000000e+00
7260620,860,s28,r860_s28,860,s27,r860_s27,0.000000e+00
7260621,860,s28,r860_s28,860,s77,r860_s77,0.000000e+00
7260622,860,s28,r860_s28,860,s88,r860_s88,0.000000e+00


In [12]:
#4
df = rename_cols(pd.read_csv(intermediates_trade_filepath, low_memory=False))
df = create_cell(df, 'supplier')
df = create_cell(df, 'user')
df = df.reindex(columns=['supplier_muni', 'supplier_industry', 'supplier_cell', 'user_muni', 'user_industry', 'user_cell', 'trade_'])
save_df(df, output_folderpath_csv, output_folderpath_dta, intermediates_trade_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Intermediates trade matrix.zip
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Intermediates trade matrix.zip


Unnamed: 0,supplier_muni,supplier_industry,supplier_cell,user_muni,user_industry,user_cell,trade_
0,101,s01,r101_s01,101,s01,r101_s01,231172896.0
1,101,s01,r101_s01,101,s02,r101_s02,45590576.0
2,101,s01,r101_s01,101,s03,r101_s03,6943966.0
3,101,s01,r101_s01,101,s04,r101_s04,85335336.0
4,101,s01,r101_s01,101,s05,r101_s05,21419094.0
...,...,...,...,...,...,...,...
7001311,860,s99,r860_s99,860,s26,r860_s26,0.0
7001312,860,s99,r860_s99,860,s27,r860_s27,0.0
7001313,860,s99,r860_s99,860,s77,r860_s77,0.0
7001314,860,s99,r860_s99,860,s88,r860_s88,0.0


In [13]:
#5
df = rename_cols(pd.read_csv(mixed_income_filepath, low_memory=False))
df = df.reindex(columns=['consumer_muni', 'consumer_industry', 'consumer_cell', 'producer_muni', 'producer_industry', 'producer_cell', 'mixedinc_'])
save_df(df, output_folderpath_csv, output_folderpath_dta, mixed_income_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Mixed income matrix.zip
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Mixed income matrix.zip


Unnamed: 0,consumer_muni,consumer_industry,consumer_cell,producer_muni,producer_industry,producer_cell,mixedinc_
0,101,s01,r101_s01,101,s01,r101_s01,145290848.0
1,101,s01,r101_s01,101,s02,r101_s02,0.0
2,101,s01,r101_s01,101,s03,r101_s03,0.0
3,101,s01,r101_s01,101,s04,r101_s04,0.0
4,101,s01,r101_s01,101,s05,r101_s05,0.0
...,...,...,...,...,...,...,...
7260619,860,s28,r860_s28,860,s26,r860_s26,0.0
7260620,860,s28,r860_s28,860,s27,r860_s27,0.0
7260621,860,s28,r860_s28,860,s77,r860_s77,0.0
7260622,860,s28,r860_s28,860,s88,r860_s88,0.0


In [14]:
#6
df = rename_cols(pd.read_csv(addl_consumer_flows_filepath, low_memory=False))
df = df.merge(muni_names_crosswalk, left_on='consumer_muni_name', right_on='name', how='inner', validate='m:1')
df = df.rename(columns={'var_name':'flow', 'code':'consumer_muni'})
df['consumer_industry'] = 's' + df['consumer_industry'].astype('str').str.zfill(2)
df = create_cell(df, 'consumer')
df = df.reindex(columns=['consumer_muni', 'consumer_industry', 'consumer_cell', 'flow', 'value'])
save_df(df, output_folderpath_csv, output_folderpath_dta, addl_consumer_flows_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Additional consumer flows.csv
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Additional consumer flows.dta


Unnamed: 0,consumer_muni,consumer_industry,consumer_cell,flow,value
0,101,s01,r101_s01,Consumer adjustment for pension entitlements r...,1.973664e+08
1,101,s02,r101_s02,Consumer adjustment for pension entitlements r...,3.083405e+08
2,101,s03,r101_s03,Consumer adjustment for pension entitlements r...,1.996905e+08
3,101,s04,r101_s04,Consumer adjustment for pension entitlements r...,1.347487e+09
4,101,s05,r101_s05,Consumer adjustment for pension entitlements r...,2.239006e+08
...,...,...,...,...,...
41155,860,s24,r860_s24,Surplus of owner-occupied housing to consumers,9.011279e+07
41156,860,s25,r860_s25,Surplus of owner-occupied housing to consumers,1.237044e+06
41157,860,s26,r860_s26,Surplus of owner-occupied housing to consumers,4.919812e+07
41158,860,s27,r860_s27,Surplus of owner-occupied housing to consumers,3.249603e+07


In [15]:
#7
df = rename_cols(pd.read_csv(addl_producer_flows_filepath, low_memory=False))
df = df.merge(muni_names_crosswalk, left_on='producer_muni_name', right_on='name', how='inner', validate='m:1')
df = df.rename(columns={'var_name':'flow', 'code':'producer_muni'})
df['producer_industry'] = 's' + df['producer_industry'].astype('str').str.zfill(2)
df = create_cell(df, 'producer')
df = df.reindex(columns=['producer_muni', 'producer_industry', 'producer_cell', 'flow', 'value'])
save_df(df, output_folderpath_csv, output_folderpath_dta, addl_producer_flows_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Additional producer flows.csv
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Additional producer flows.dta


Unnamed: 0,producer_muni,producer_industry,producer_cell,flow,value
0,101,s01,r101_s01,Producer dividends paid,4.729443e+07
1,101,s02,r101_s02,Producer dividends paid,7.684627e+07
2,101,s03,r101_s03,Producer dividends paid,1.788792e+08
3,101,s04,r101_s04,Producer dividends paid,3.584703e+08
4,101,s05,r101_s05,Producer dividends paid,5.034090e+07
...,...,...,...,...,...
15871,860,s26,r860_s26,Producer sales to government,1.267504e+09
15872,860,s27,r860_s27,Producer sales to government,1.637282e+09
15873,860,s77,r860_s77,Producer sales to government,0.000000e+00
15874,860,s88,r860_s88,Producer sales to government,0.000000e+00


In [16]:
#8a
df = rename_cols(pd.read_excel(crosswalk_muni_filepath))
df = df[df['code'] != 'foreign']
save_df(df, output_folderpath_csv, output_folderpath_dta, crosswalk_muni_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Crosswalk for municipality names.csv
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Crosswalk for municipality names.dta


Unnamed: 0,code,name
0,101,København
1,147,Frederiksberg
2,151,Ballerup
3,153,Brøndby
4,155,Dragør
...,...,...
94,840,Rebild
95,846,Mariagerfjord
96,849,Jammerbugt
97,851,Aalborg


In [17]:
#8b
df = rename_cols(pd.read_excel(crosswalk_industry_filepath))
df = df.rename(columns={'sector_number':'code', 'sector_name':'name'})
df['code'] = 's' + df['code'].astype('str').str.zfill(2)
save_df(df, output_folderpath_csv, output_folderpath_dta, crosswalk_industry_output_filestem)
df

Saved csv as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\csv\Crosswalk for industry names.csv
Saved dta as: c:\Users\ftrikos\Dropbox\Jason-Kilian\Danish Macro\disaggregated_accounts\src\assets\data\dta\Crosswalk for industry names.dta


Unnamed: 0,code,name
0,s01,1 - Food away from home
1,s02,2 - Entertainment
2,s03,3 - Groceries
3,s04,"4 - Personal services, pharmacies"
4,s05,"5 - Vehicles, fuel, vehicle repair, public tra..."
5,s06,"6 - Hotels, rental cars"
6,s07,7 - Airlines
7,s08,"8 - Telecommunication, insurance"
8,s09,9 - Online stores
9,s10,10 - Utilities
