In [46]:
import pandas as pd
import numpy as np

# Naming the columns for easier reference later on
use_cols = ["commodity", "industry", "year", "trn", 
            "producer_rev", "transportation", "railroad", "truck", 
            "water", "air", "oilpipe", "gaspipe", 
            "wholesale", "retail", "purchaser"]
use = pd.read_csv("ndn0307/IOUseDetail.txt", header=None, names=use_cols)

# All commodities and industries have a space at the end, probably for multiple industries or commodities but in this
# case, they are all singular so the space can be removed
use["industry"] = use["industry"].apply(lambda x: x[:-1])
use["commodity"] = use["commodity"].apply(lambda x: x[:-1])

In [47]:
make_cols = ["industry", "commodity", "trn", "final_value"]
make = pd.read_csv("ndn0307/IOMakeDetail.txt", header=None, names=make_cols)


In [48]:
commodities = make.groupby(["commodity"])
commodity_totals = commodities.sum()

In [49]:
list_commodities = make["commodity"].unique()
list_industries = make["industry"].unique()

In [50]:
c_len = len(list_commodities)
i_len = len(list_industries)
make_neat = pd.DataFrame(np.zeros((i_len,c_len)), columns= list_commodities, index = list_industries)

for name, commod in commodities:
    for _, industry in commod.iterrows():
        make_neat.set_value(industry["industry"], name, industry["final_value"])


share = make_neat.div(commodity_totals["final_value"], axis = "columns")

In [51]:
# Now Computing Revshare
# I have assumed we only use producer_rev since the costs of the other fields, 6-14, do not appear to go to the producer
# Also, I have assumed that commodities are the same throughout put not necessarily industries
# i.e. There are finites amount of kinds of natural resources but there are many differentiated industries

list_industries_use = use["industry"].unique()

use = use[use["commodity"].isin(list_commodities)] # Only look at commodities that are in both make and use
i_len_use = len(list_industries_use)

commod_use = use.groupby(["commodity"])

# Handily enough, matrix multiplication (share x use_neat) will give use revshare
# This is assuming that the commodities of use and make are bijective
use_neat = pd.DataFrame(np.zeros((c_len, i_len_use)),
                        index = list_commodities,
                        columns = list_industries_use)

for name, commod in commod_use:
    for _, industry in commod.iterrows():
        use_neat.set_value(name, industry["industry"], industry["producer_rev"])

revshare = share.dot(use_neat)

In [52]:
# Now on CUST
sales = make.groupby(["industry"]).sum()["final_value"]
purchases = use.groupby(["industry"]).sum()["producer_rev"]


# Cust is defined as the total cash flows from j to i (revshares) as a fraction of total sales of i (sales)
cust = revshare.div(sales, axis='rows')

# Supp is defined as the cost share of input to i as a fraaction of output j
supp = revshare.div(purchases)