In [None]:
%pylab inline
import pysal as ps
import numpy as np

In [None]:
ps.examples.explain('mexico')

In [None]:
# we will use an updated shapefile rather than the built-in dataset

shp_link = 'data/mexicojoin.shp'
data_table = ps.pdio.read_files(shp_link)

In [None]:
data_table.head()

In [None]:
data_table.columns

In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd

shp = gpd.read_file(shp_link)

years = range(1940, 2010, 10)

for year in years:
    yi = ps.Quantiles(data_table['PCGDP%d'%year], k=5)
    f, ax = plt.subplots(1, figsize=(9, 9))
    shp.assign(cl=yi.yb).plot(column='cl', categorical=True, \
        k=5, cmap='BuGn', linewidth=0.1, ax=ax, \
        edgecolor='grey', legend=True)
    ax.set_axis_off()
    plt.title(str(year))
    plt.show()


In [None]:
vnames= ["PCGDP%d"%year for year in years]
vnames

In [None]:
Y = data_table[vnames].values

In [None]:
Y.shape

## Theil Inequality

$$
T = \sum_{i=1}^n \left( \frac{y_i}{\sum_{i=1}^n y_i} ln \left[ n \frac{y_i}{\sum_{i=1}^n y_i} \right] \right)
$$

In [None]:
theil = ps.Theil(Y)

In [None]:
%pylab inline

In [None]:
plot(years, theil.T)

## Interregional Decomposition

$$
T^t = \sum_{g=1}^{\omega} s_g^t ln(n/n_g s_g^t) + \sum_{g=1}^{\omega} s_g^t \sum_{i \in g} s_{i,g}^t ln (n_g s_{i,g}^t)
$$

where:

* $\sum_g n_g = n$
* $s_g^t = \sum_{i \in g} y_{i,g}^t / \sum_i^n y_i^t$
* $s_{i,g}^t = y_{i,g}^t / \sum_{i \in g} y_{i,g}^t$

$$
T^t = BG^t + WG^T
$$

In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd
shp = gpd.read_file(shp_link)


f, ax = plt.subplots(1, figsize=(9, 9))
shp.plot(column='HANSON98', categorical=True, \
       linewidth=0.1, ax=ax, \
        edgecolor='white', legend=True)
ax.set_axis_off()
plt.title('HANSON98')
plt.show()


In [None]:
regimes = data_table['HANSON98'].values

In [None]:
regimes

In [None]:
theil_d = ps.TheilD(Y, regimes)

In [None]:
regimes.shape

In [None]:
Y.shape

In [None]:
plot(years, theil_d.bg)

In [None]:
theil_d.bg

In [None]:
plot(years, theil_d.wg)

In [None]:
plot(years, theil_d.T, label='T')
plot(years, theil_d.wg, '-o', label='wg')
plot(years, theil_d.bg, '-^', label='bg')
legend()

## Inference

In [None]:
np.random.seed(12345)
theil_ds = ps.TheilDSim(Y, regimes, 999)

In [None]:
theil_ds.bg_pvalue

In [None]:
plot(years, theil_d.T, label='T')
plot(years, theil_d.wg, '-o', label='wg')
plot(years, theil_d.bg, '-^', label='bg')
plot(years, theil_ds.bg_pvalue, ':k', label='p-value')
legend()

## Sensitivity to regime definition

In [None]:
regimes = ['HANSON03', 'HANSON98', 'ESQUIVEL99', 'INEGI', 'INEGI2']

In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd
shp = gpd.read_file(shp_link)

for regime in regimes:
    f, ax = plt.subplots(1, figsize=(9, 9))
    shp.plot(column=regime, categorical=True, \
           linewidth=0.1, ax=ax, \
            edgecolor='white', legend=True)
    ax.set_axis_off()
    plt.title(regime)
    plt.show()

In [None]:
results = {}
for regime in regimes:
    reg = data_table[regime].values
    results[regime] = ps.TheilD(Y, reg)
    plot(years, results[regime].bg)

    

In [None]:
results = {}
for regime in regimes:
    reg = data_table[regime].values
    results[regime] = ps.TheilD(Y, reg)
    plot(years, results[regime].bg, label=regime)

legend()

    

In [None]:
#share of total equality
results = {}
for regime in regimes:
    reg = data_table[regime].values
    results[regime] = ps.TheilD(Y, reg)
    plot(years, results[regime].bg/theil.T, label=regime)

legend()

    