# Inequality measures

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import libpysal as ps
import numpy as np
import geopandas as gpd
import mapclassify as mc
import inequality

In [None]:
ps.examples.explain('mexico')

In [None]:
# we will use an updated shapefile rather than the built-in dataset

shp_link = 'data/mexicojoin.shp'
data_table = gpd.read_file(shp_link)

In [None]:
data_table.head()

In [None]:
data_table.columns

In [None]:

years = range(1940, 2010, 10)

for year in years:
#     yi = mc.Quantiles(data_table['PCGDP%d'%year], k=5)
    f, ax = plt.subplots(1, figsize=(9, 9))
    data_table.plot(column='PCGDP%d'%year, scheme="quantiles", \
        k=5, cmap='BuGn', linewidth=0.1, ax=ax, \
        edgecolor='grey', legend=True)
    ax.set_axis_off()
    plt.title(str(year))
    plt.show()


In [None]:
vnames= ["PCGDP%d"%year for year in years]
vnames

In [None]:
Y = data_table[vnames].values

In [None]:
Y.shape

## Theil Inequality

$$
T = \sum_{i=1}^n \left( \frac{y_i}{\sum_{i=1}^n y_i} ln \left[ n \frac{y_i}{\sum_{i=1}^n y_i} \right] \right)
$$

In [None]:
theil = inequality.theil.Theil(Y)

In [None]:
plt.plot(years, theil.T)

## Interregional Decomposition

$$
T^t = \sum_{g=1}^{\omega} s_g^t ln(n/n_g s_g^t) + \sum_{g=1}^{\omega} s_g^t \sum_{i \in g} s_{i,g}^t ln (n_g s_{i,g}^t)
$$

where:

* $\sum_g n_g = n$
* $s_g^t = \sum_{i \in g} y_{i,g}^t / \sum_i^n y_i^t$
* $s_{i,g}^t = y_{i,g}^t / \sum_{i \in g} y_{i,g}^t$

$$
T^t = BG^t + WG^T
$$

In [None]:
f, ax = plt.subplots(1, figsize=(9, 9))
data_table.plot(column='HANSON98', categorical=True, \
       linewidth=0.1, ax=ax, \
        edgecolor='white', legend=True)
ax.set_axis_off()
plt.title('HANSON98')
plt.show()


In [None]:
regimes = data_table['HANSON98'].values

In [None]:
regimes

In [None]:
theil_d = inequality.theil.TheilD(Y, regimes)

In [None]:
regimes.shape

In [None]:
Y.shape

In [None]:
plt.plot(years, theil_d.bg)

In [None]:
theil_d.bg

In [None]:
plt.plot(years, theil_d.wg)

In [None]:
plt.plot(years, theil_d.T, label='T')
plt.plot(years, theil_d.wg, '-o', label='wg')
plt.plot(years, theil_d.bg, '-^', label='bg')
plt.legend()

## Inference

In [None]:
np.random.seed(12345)
theil_ds = inequality.theil.TheilDSim(Y, regimes, 999)

In [None]:
theil_ds.bg_pvalue

In [None]:
plt.plot(years, theil_d.T, label='T')
plt.plot(years, theil_d.wg, '-o', label='wg')
plt.plot(years, theil_d.bg, '-^', label='bg')
plt.plot(years, theil_ds.bg_pvalue, ':k', label='p-value')
plt.legend()

## Sensitivity to regime definition

In [None]:
regimes = ['HANSON03', 'HANSON98', 'ESQUIVEL99', 'INEGI', 'INEGI2']

In [None]:
for regime in regimes:
    f, ax = plt.subplots(1, figsize=(9, 9))
    data_table.plot(column=regime, categorical=True, \
           linewidth=0.1, ax=ax, \
            edgecolor='white', legend=True)
    ax.set_axis_off()
    plt.title(regime)
    plt.show()

In [None]:
results = {}
for regime in regimes:
    reg = data_table[regime].values
    results[regime] = inequality.theil.TheilD(Y, reg)
    plt.plot(years, results[regime].bg)

In [None]:
results = {}
for regime in regimes:
    reg = data_table[regime].values
    results[regime] = inequality.theil.TheilD(Y, reg)
    plt.plot(years, results[regime].bg, label=regime)
plt.legend()

In [None]:
#share of total equality
results = {}
for regime in regimes:
    reg = data_table[regime].values
    results[regime] = inequality.theil.TheilD(Y, reg)
    plt.plot(years, results[regime].bg/theil.T, label=regime)

plt.legend()