In [1]:
import numpy as np
from tqdm import tqdm
from stoclust import Group, Hierarchy
from functools import reduce
import os

In [2]:
if not os.path.isdir(os.path.abspath('scripts/data')):
    os.mkdir(os.path.abspath('scripts/data'))

if not os.path.isdir(os.path.abspath('scripts/data/ID')):
    os.mkdir(os.path.abspath('scripts/data/ID'))

# LOAD DATA

First we load the data from the CSV files we stored it in. To start with, we load the names of the variables (sectors, regions, commodities, factors, etc.).

In [3]:
commodities = Group(np.loadtxt(open("data/H2.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1])
regions = Group(np.loadtxt(open("data/H1.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1])
sectors = Group(np.loadtxt(open("data/SSET.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1])
np.save('scripts/data/ID/commodities.npy',commodities.elements)
np.save('scripts/data/ID/regions.npy',regions.elements)


factors = Group(np.loadtxt(open("data/H6.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1])
land_factors = Group(np.loadtxt(open("data/AEZS.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1],superset=factors)
lab_factors = Group(['UnSkLab','SkLab'],superset=factors)
factors_hier = Hierarchy(
    factors,
    Group(list(factors)+['Land']+['Labor']),
    {
        factors.size + 0 : (1,land_factors.in_superset),
        factors.size + 1 : (2,lab_factors.in_superset)
    }
)
orig_factors = factors_hier.at_scale(1)
orig_factors.clusters.set_super(sectors)
np.save('scripts/data/ID/factors.npy',factors.elements)
np.save('scripts/data/ID/factors_land.npy',land_factors.elements)
np.save('scripts/data/ID/factors_lab.npy',lab_factors.elements)

ergs = Group(np.loadtxt(open("data/EC.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1],superset=commodities)
fuels = Group(np.loadtxt(open("data/FC.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1],superset=ergs)
ag = Group(np.loadtxt(open("data/MLND.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1],superset=commodities)
commodities_hier = Hierarchy(
    commodities,
    Group(list(commodities)+['Fuels']+['Agriculture']+['Energy']),
    {
        commodities.size + 0 : (1,ergs.in_superset[fuels.in_superset]),
        commodities.size + 1 : (1,ag.in_superset),
        commodities.size + 2 : (2,np.array([commodities.size + 0,ergs.in_superset[ergs.ind['ely']]]))
    }
)
np.save('scripts/data/ID/commodities_energy.npy',ergs.elements)
np.save('scripts/data/ID/commodities_fuel.npy',fuels.elements)
np.save('scripts/data/ID/commodities_ag.npy',ag.elements)

covers = Group(np.loadtxt(open("data/COVS.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:,1])
np.save('scripts/data/ID/covers.npy',covers.elements)

imports = Group(['m_'+c for c in commodities], superset=sectors)
activities = Group(['a_'+c for c in commodities], superset=sectors)
domestic = Group(['d_'+c for c in commodities], superset=sectors)
commodity_sectors = Group(['m_'+c for c in commodities]
                            +['d_'+c for c in commodities]
                            +['a_'+c for c in commodities], superset=sectors)

regional_taxes = Group(['tmm_'+r for r in regions]+['tee_'+r for r in regions],superset=sectors)
industry_taxes = Group(['tssm_'+c for c in commodities]+['tssd_'+c for c in commodities],superset=sectors)
orig_factor_taxes = Group(['tf_'+f for f in orig_factors.clusters],superset=sectors)

margins = Group(reduce(lambda x,y:x+y,[['otp_'+r]+['wtp_'+r]+['atp_'+r] for r in regions],[])
                +['otp_pvst','wtp_pvst','atp_pvst'],superset=sectors)
trade = Group(['ww_'+r for r in regions],superset=sectors)

all_but_factors = Group(np.array(['m_'+c for c in commodities]
                    +['d_'+c for c in commodities]
                    +['a_'+c for c in commodities]
                    +['tmm_'+r for r in regions]
                    +['tee_'+r for r in regions]
                    +['tssm_'+c for c in commodities]
                    +['tssd_'+c for c in commodities]
                    +reduce(lambda x,y:x+y,[['otp_'+r]+['wtp_'+r]+['atp_'+r] for r in regions],[])
                    +['otp_pvst','wtp_pvst','atp_pvst']
                    +['ww_'+r for r in regions]
                    +['REGHOUS','PRIV','PRODTAX','DIRTAX','GOVT','CGDS',]),superset = sectors)

In [4]:
full_sectors = Group(np.array(['m_'+c for c in commodities]
                    +['d_'+c for c in commodities]
                    +['a_'+c for c in commodities]
                    +list(factors.elements)
                    +['tmm_'+r for r in regions]
                    +['tee_'+r for r in regions]
                    +['tssm_'+c for c in commodities]
                    +['tssd_'+c for c in commodities]
                    +['tf_'+f for f in factors]
                    +reduce(lambda x,y:x+y,[['otp_'+r]+['wtp_'+r]+['atp_'+r] for r in regions],[])
                    +['otp_pvst','wtp_pvst','atp_pvst']
                    +['ww_'+r for r in regions]
                    +['REGHOUS','PRIV','PRODTAX','DIRTAX','GOVT','CGDS','TRUST']))
np.save('scripts/data/ID/sectors.npy',full_sectors.elements)

full_imports = Group(['m_'+c for c in commodities], superset=full_sectors)
full_activities = Group(['a_'+c for c in commodities], superset=full_sectors)
full_domestic = Group(['d_'+c for c in commodities], superset=full_sectors)
full_commodity_sectors = Group(['m_'+c for c in commodities]
                            +['d_'+c for c in commodities]
                            +['d_'+c for c in commodities], superset=full_sectors)

factors.set_super(full_sectors)

full_regional_taxes = Group(['tmm_'+r for r in regions]+['tee_'+r for r in regions],superset=full_sectors)
full_industry_taxes = Group(['tssm_'+c for c in commodities]+['tssd_'+c for c in commodities],superset=full_sectors)
full_factor_taxes = Group(['tf_'+f for f in factors],superset=full_sectors)

full_margins = Group(reduce(lambda x,y:x+y,[['otp_'+r]+['wtp_'+r]+['atp_'+r] for r in regions],[])
                +['otp_pvst','wtp_pvst','atp_pvst'],superset=full_sectors)
full_trade = Group(['ww_'+r for r in regions],superset=full_sectors)

full_all_but_factors = Group(np.array(['m_'+c for c in commodities]
                    +['d_'+c for c in commodities]
                    +['a_'+c for c in commodities]
                    +['tmm_'+r for r in regions]
                    +['tee_'+r for r in regions]
                    +['tssm_'+c for c in commodities]
                    +['tssd_'+c for c in commodities]
                    +reduce(lambda x,y:x+y,[['otp_'+r]+['wtp_'+r]+['atp_'+r] for r in regions],[])
                    +['otp_pvst','wtp_pvst','atp_pvst']
                    +['ww_'+r for r in regions]
                    +['REGHOUS','PRIV','PRODTAX','DIRTAX','GOVT','CGDS']),superset = full_sectors)

We now load the social accounting matrix. This will require some modification due to the fact that the SAM as computed by GTAP 8 does not balance across all sectors, but this can be easily remedied by the use of a certain global sectors which balance accounts between investors and capital. See (1) for more detail.

In [52]:
sam_list = []
for i in tqdm(range(regions.size)):
    sam_list.append(np.loadtxt(open("data/SAM/"+str(i+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
sam = np.stack(sam_list,axis=0)

100%|██████████| 134/134 [03:04<00:00,  1.38s/it]


In [6]:
EVFA_list = []
for i in tqdm(range(factors.size)):
    EVFA_list.append(np.loadtxt(open("data/EVFA/"+str(i+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
EVFA = np.stack(EVFA_list,axis=0)

VFM_list = []
for i in tqdm(range(factors.size)):
    VFM_list.append(np.loadtxt(open("data/VFM/"+str(i+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
VFM = np.stack(VFM_list,axis=0)

EVOA = np.loadtxt(open("data/EVOA.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float)

100%|██████████| 22/22 [00:01<00:00, 19.46it/s]
100%|██████████| 22/22 [00:00<00:00, 57.60it/s]


In [7]:
KDEP = sam[:,sectors.ind['CGDS'],orig_factors.clusters.in_superset][:,3]
VDEP = np.zeros([regions.size,factors.size])
VDEP[:,factors.ind['Capital']] = KDEP

# Building the SAM

As we mentioned, we will have to harmonize the the social accounting matrix by adjusting some terms.

In [25]:
full_sam = np.zeros([regions.size,full_sectors.size,full_sectors.size])

## Things that didn't change

In [26]:
full_sam[np.ix_(
    np.arange(regions.size),
    full_all_but_factors.in_superset,
    full_all_but_factors.in_superset
)] = sam[np.ix_(
    np.arange(regions.size),
    all_but_factors.in_superset,
    all_but_factors.in_superset
)]

## Things that changed: factors

In [27]:
full_sam[np.ix_(
    np.arange(regions.size),
    factors.in_superset,
    full_activities.in_superset
)] = np.moveaxis(VFM,[0,1,2],[1,2,0])[:,:,:-1]

full_sam[np.ix_(
    np.arange(regions.size),
    full_factor_taxes.in_superset,
    full_activities.in_superset
)] = np.moveaxis(EVFA-VFM,[0,1,2],[1,2,0])[:,:,:-1]

full_sam[:,full_sectors.ind['REGHOUS'],:][np.ix_(
    np.arange(regions.size),
    factors.in_superset
)] = EVOA.T - VDEP

full_sam[:,full_sectors.ind['REGHOUS'],:][np.ix_(
    np.arange(regions.size),
    full_factor_taxes.in_superset
)] = np.sum(np.moveaxis(EVFA-VFM,[0,1,2],[1,2,0])[:,:,:-1],axis=2)

full_sam[:,full_sectors.ind['DIRTAX'],:][np.ix_(
    np.arange(regions.size),
    factors.in_superset
)] = (np.sum(VFM,axis=1) - EVOA).T

full_sam[:,full_sectors.ind['CGDS'],:][np.ix_(
    np.arange(regions.size),
    factors.in_superset
)] = VDEP

full_sam[:,full_sectors.ind['REGHOUS'],full_sectors.ind['DIRTAX']] = np.sum((np.sum(VFM,axis=1) - EVOA).T,axis=1)

In [28]:
SAVE = np.sum(full_sam[:,full_sectors.ind['REGHOUS']],axis=1)\
    -full_sam[:,full_sectors.ind['PRIV'],full_sectors.ind['REGHOUS']]\
    -full_sam[:,full_sectors.ind['GOVT'],full_sectors.ind['REGHOUS']]

full_sam[:,full_sectors.ind['CGDS'],full_sectors.ind['REGHOUS']] = SAVE*(SAVE>0)
full_sam[:,full_sectors.ind['REGHOUS'],full_sectors.ind['CGDS']] = -SAVE*(SAVE<0)

In [29]:
full_sam[:,full_sectors.ind['CGDS'],full_trade.in_superset] = np.zeros([regions.size,full_trade.size])
full_sam[:,full_trade.in_superset,full_sectors.ind['CGDS']] = np.zeros([regions.size,full_trade.size])

cap_in = np.sum(full_sam[:,:,full_sectors.ind['CGDS']],axis=1)
cap_out = np.sum(full_sam[:,full_sectors.ind['CGDS'],:],axis=1)

In [30]:
FLOW = cap_out-cap_in
full_sam[:,full_sectors.ind['TRUST'],full_sectors.ind['CGDS']] = (FLOW>0)*FLOW
full_sam[:,full_sectors.ind['CGDS'],full_sectors.ind['TRUST']] = -FLOW*(FLOW<0)

In [61]:
np.save('scripts/data/SAM.npy',full_sam)

# Material Accounting

Now we load satellite data.

In [22]:
AREA_list = []
for i in tqdm(range(ag.size)):
    AREA_list.append(np.loadtxt(open("data/AREA/"+str(i+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
AREA = np.stack(AREA_list,axis=0)
np.save('scripts/data/AREA.npy',np.moveaxis(AREA,[0,1,2],[2,0,1]))

100%|██████████| 8/8 [00:00<00:00, 141.84it/s]


In [23]:
TONS_list = []
for i in tqdm(range(ag.size)):
    TONS_list.append(np.loadtxt(open("data/TONS/"+str(i+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
TONS = np.stack(TONS_list,axis=0)
np.save('scripts/data/TONS.npy',np.moveaxis(TONS,[0,1,2],[2,0,1]))

100%|██████████| 8/8 [00:00<00:00, 197.69it/s]


In [36]:
LCOV_list = []
for i in tqdm(range(covers.size)):
    LCOV_list.append(np.loadtxt(open("data/LCOV/"+str(i+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
LCOV = np.stack(LCOV_list,axis=0)
np.save('scripts/data/LCOV.npy',np.moveaxis(LCOV,[0,1,2],[1,0,2]))

100%|██████████| 7/7 [00:00<00:00, 132.20it/s]


In [17]:
Q_list = []
for i in tqdm(range(lab_factors.size)):
    Q_list.append(np.loadtxt(open("data/Q/"+str(i+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
Q = np.stack(Q_list,axis=0)
np.save('scripts/data/LABF.npy',np.moveaxis(Q,[0,1,2],[2,1,0]))

100%|██████████| 2/2 [00:00<00:00, 39.49it/s]


In [4]:
energy_mat_sectors = Group(np.array(['m_'+c for c in ergs]+
                                    ['d_'+c for c in ergs]+
                                    ['ww_'+r for r in regions]),superset=full_sectors)
energy_mat = np.zeros([regions.size,energy_mat_sectors.size,full_sectors.size])

EDG = np.loadtxt(open("data/EDG.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
EDP = np.loadtxt(open("data/EDP.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
EIG = np.loadtxt(open("data/EIG.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
EIP = np.loadtxt(open("data/EIP.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]

for c in tqdm(range(ergs.size)):
    EDF_c = np.loadtxt(open("data/EDF/"+str(c+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
    EIF_c = np.loadtxt(open("data/EIF/"+str(c+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
    EXI_c = np.loadtxt(open("data/EXI/"+str(c+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
    for d in (range(commodities.size)):
        energy_mat[:,energy_mat_sectors.ind['d_'+ergs.elements[c]],
                   full_sectors.ind['a_'+commodities.elements[d]]] = EDF_c[d,:]
        energy_mat[:,energy_mat_sectors.ind['m_'+ergs.elements[c]],
                   full_sectors.ind['a_'+commodities.elements[d]]] = EIF_c[d,:]
    for r in range(regions.size):
        energy_mat[:,energy_mat_sectors.ind['d_'+ergs.elements[c]],
                   full_sectors.ind['ww_'+regions.elements[r]]] = EXI_c[:,r]
        energy_mat[:,energy_mat_sectors.ind['ww_'+regions.elements[r]],
                   full_sectors.ind['m_'+ergs.elements[c]]] = EXI_c[r,:]

    energy_mat[:,energy_mat_sectors.ind['d_'+ergs.elements[c]],
                   full_sectors.ind['CGDS']] = EDF_c[-1,:]
    energy_mat[:,energy_mat_sectors.ind['m_'+ergs.elements[c]],
                   full_sectors.ind['CGDS']] = EIF_c[-1,:]

    energy_mat[:,energy_mat_sectors.ind['d_'+ergs.elements[c]],
                   full_sectors.ind['GOVT']] = EDG[c,:]
    energy_mat[:,energy_mat_sectors.ind['d_'+ergs.elements[c]],
                   full_sectors.ind['PRIV']] = EDP[c,:]
    energy_mat[:,energy_mat_sectors.ind['m_'+ergs.elements[c]],
                   full_sectors.ind['GOVT']] = EIG[c,:]
    energy_mat[:,energy_mat_sectors.ind['m_'+ergs.elements[c]],
                   full_sectors.ind['PRIV']] = EIP[c,:]

np.save('scripts/data/ERG.npy',energy_mat)

100%|██████████| 6/6 [00:00<00:00, 13.03it/s]


In [5]:
carbon_mat_sectors = Group(np.array(['m_'+c for c in fuels]+
                                    ['d_'+c for c in fuels]),superset=full_sectors)
carbon_mat = np.zeros([regions.size,carbon_mat_sectors.size,full_sectors.size])

MDG = np.loadtxt(open("data/MDG.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
MDP = np.loadtxt(open("data/MDP.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
MIG = np.loadtxt(open("data/MIG.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
MIP = np.loadtxt(open("data/MIP.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]

for c in tqdm(range(fuels.size)):
    MDF_c = np.loadtxt(open("data/MDF/"+str(c+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
    MIF_c = np.loadtxt(open("data/MIF/"+str(c+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1]
    for d in (range(commodities.size)):
        carbon_mat[:,carbon_mat_sectors.ind['d_'+fuels.elements[c]],
                   full_sectors.ind['a_'+commodities.elements[d]]] = MDF_c[d,:]
        carbon_mat[:,carbon_mat_sectors.ind['m_'+fuels.elements[c]],
                   full_sectors.ind['a_'+commodities.elements[d]]] = MIF_c[d,:]

    carbon_mat[:,carbon_mat_sectors.ind['d_'+fuels.elements[c]],
                   full_sectors.ind['CGDS']] = MDF_c[-1,:]
    carbon_mat[:,carbon_mat_sectors.ind['m_'+fuels.elements[c]],
                   full_sectors.ind['CGDS']] = MIF_c[-1,:]

    carbon_mat[:,carbon_mat_sectors.ind['d_'+fuels.elements[c]],
                   full_sectors.ind['GOVT']] = MDG[c,:]
    carbon_mat[:,carbon_mat_sectors.ind['d_'+fuels.elements[c]],
                   full_sectors.ind['PRIV']] = MDP[c,:]
    carbon_mat[:,carbon_mat_sectors.ind['m_'+fuels.elements[c]],
                   full_sectors.ind['GOVT']] = MIG[c,:]
    carbon_mat[:,carbon_mat_sectors.ind['m_'+fuels.elements[c]],
                   full_sectors.ind['PRIV']] = MIP[c,:]

np.save('scripts/data/CO2.npy',carbon_mat)

100%|██████████| 5/5 [00:00<00:00, 31.67it/s]


In [4]:
POP = np.loadtxt(open("data/POP.csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float)

In [5]:
np.save('scripts/data/POP.npy',POP.T)

# Time Series

Here we load the time-series trade data; this will not be use for our current work but may be of use or interest to the reader.

In [11]:
years = Group(['Y'+str(j) for j in range(1995,2010)])
np.save('scripts/data/ID/years.npy',years.elements)

TSERIES_list = []
for y in tqdm(years):
    YARRAY_list = []
    for j in range(commodities.size):
        YARRAY_list.append(np.loadtxt(open("data/VTTS/"+y+"/"+str(j+1)+".csv", "rb"), delimiter=",", skiprows=1,dtype=str)[:-1,1:-1][:,:-1].astype(float))
    TSERIES_list.append(np.stack(YARRAY_list))
TSERIES = np.stack(TSERIES_list)

np.save('scripts/data/TSERIES.npy',TSERIES)

100%|██████████| 15/15 [00:15<00:00,  1.05s/it]


## References

(1) "Bound by Chains of Carbon." Luke Bergmann. 2013