In [36]:
import numpy as np 
import pandas as pd
import statsmodels.api as sm
from scipy.special import gamma

In [37]:
cz = pd.read_csv('../data/raw/archive/cz_county.csv')
cz = cz.dropna()
cz['LMA/CZ'] = cz['LMA/CZ'].astype(str).str[:-2].astype(int)
idx = cz.groupby(['LMA/CZ'])['Labor Force'].idxmax()
cz = cz.loc[idx].reset_index(drop=True)
cz = cz[['LMA/CZ', 'County Name']].set_index('LMA/CZ')
cz['County Name'] = cz['County Name'].str.replace('"', '')

In [38]:
theta = 3
rho = 0.5
alpha = 7
eta = 1.65

In [39]:
df_employment = pd.read_csv('../data/processed/city_occ_employment/city_occ_e_2019.csv')
df_employment = df_employment.drop(columns=['other'])
df_employment['COMZONE'] = df_employment['COMZONE'].map(cz['County Name'])
df_employment = df_employment.set_index('COMZONE')

rows = df_employment.index
cols = df_employment.columns

df_employment['cz_total'] = df_employment.sum(axis=1)
df_employment.loc['occ_total'] = df_employment.sum(axis=0)
df_employment = df_employment.div(df_employment['cz_total'], axis=0)

omega_k = df_employment.loc['occ_total'].drop('cz_total').to_numpy()
df_employment = df_employment.drop(index='occ_total').drop(columns='cz_total').to_numpy()

df_wages = pd.DataFrame((df_employment ** (1 / theta)) * (omega_k ** (rho / (theta * (1 - rho)))))
df_wages.columns = cols
df_wages.index = rows

In [40]:
w_ck = df_wages.fillna(0).to_numpy()

lambda_k = (w_ck ** theta).sum(axis=0)
Z_ck = gamma(theta - 1 / theta) * (w_ck ** theta) * (lambda_k ** (- rho)) * np.sum((lambda_k ** (1 - rho)) ** ((1 - theta) / theta), axis=0)

# T_ck_top = w_ck
# T_ck_bottom = (
#     gamma((1 - theta) / theta) * 
#     (w_ck ** theta) * 
#     (lambda_k ** (- rho)) *  
#     ((lambda_k ** (1 - rho)).sum(axis=0) ** ((1 - theta) / theta))
# )
# T_ck = pd.DataFrame(np.log(T_ck_top / np.abs(T_ck_bottom)))

T_ck_left = (w_ck ** (- (alpha) / (1 - alpha))) / (Z_ck ** (1 / (1 - alpha)))
T_ck_right = np.repeat(np.sum(w_ck * Z_ck, axis=0).reshape(1, -1), w_ck.shape[0], axis=0) ** (1 / (1 - alpha))
T_ck = np.log(T_ck_left * T_ck_right)
T_ck = pd.DataFrame(np.where(np.isinf(T_ck), np.nan, T_ck))

T_ck.columns = df_wages.columns
T_ck['COMZONE'] = df_wages.index
T_ck = pd.melt(T_ck, id_vars=['COMZONE'], var_name='occupation', value_name='wage')

  T_ck_left = (w_ck ** (- (alpha) / (1 - alpha))) / (Z_ck ** (1 / (1 - alpha)))
  T_ck = np.log(T_ck_left * T_ck_right)


In [41]:
model = sm.OLS.from_formula('wage ~ C(occupation) + C(COMZONE)', data=T_ck)
model = model.fit()
coefficients = model.params
T_ck['residual'] = model.resid

T_k = pd.DataFrame(coefficients[coefficients.index.str.contains('occupation')]).reset_index()
T_k['occupation'] = T_k['index'].str.extract(r'C\(occupation\)\[T\.(.*?)\]')
T_k = T_k.drop(columns=['index']).rename(columns={0: 'coefficient'})
T_k = T_k[['occupation', 'coefficient']]
T_k['coefficient'] = np.exp(T_k['coefficient'])

T_c = pd.DataFrame(coefficients[coefficients.index.str.contains('COMZONE')]).reset_index()
T_c['COMZONE'] = T_c['index'].str.extract(r'C\(COMZONE\)\[T\.(.*?)\]')
T_c = T_c.drop(columns=['index']).rename(columns={0: 'coefficient'})
T_c = T_c[['COMZONE', 'coefficient']]
T_c['coefficient'] = np.exp(T_c['coefficient'])

t_ck = T_ck.pivot(index='COMZONE', columns='occupation', values='residual')
t_ck = t_ck.apply(lambda x: np.exp(x))

# T_c.to_csv('params/T_c.csv', index=False)
# T_k.to_csv('params/T_k.csv', index=False)
# t_ck.to_csv('params/t_ck.csv')