# Exploratory Spatial and Temporal Data Analysis (ESTDA) - Visualization
   

In [None]:
from pysal.lib import io, examples, weights

In [None]:
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

Load example dataset in **pysal**: nominal per capita incomes observed annually from 1929 to 2009 for the lower 48 US states. Downloaded from [US Bureau of Economic Analysis](https://www.bea.gov).

In [None]:
pdUS = pd.read_csv(examples.get_path('usjoin.csv'))
pdUS.head()

## Visualization 

* Temporal dynamics

In [None]:
names = pdUS["Name"].values
names

In [None]:
years = range(1929,2010)
pd_pci = pdUS[list(map(str,years))]
pd_pci.head()

In [None]:
pd_pci.index = names
pd_pci.head()

In [None]:
pd_pci = pd_pci.T
pd_pci.head()

In [None]:
pd_pci.plot(legend=None)

In [None]:
order1929 = np.argsort(pdUS["1929"])
order2009 = np.argsort(pdUS["2009"])
names1929 = names[order1929[::-1]]
names2009 = names[order2009[::-1]]
first_last = np.vstack((names[order1929[::-1]],names[order2009[::-1]]))
from pylab import rcParams
#sns.set_palette(sns.color_palette("Set1", 2010-1929))
rcParams['figure.figsize'] = 15,10
plt.plot(years,pd_pci.as_matrix())
#pd_pci.plot(legend=None)
for i in range(48):
    plt.text(1915,pd_pci.values.max()-500-(i*1159), names1929[i],fontsize=12)
    plt.text(2010.5,pd_pci.values.max()-500-(i*1159), names2009[i],fontsize=12)
plt.xlim((years[0], years[-1]))
plt.ylim((0, pd_pci.values.max()))
plt.ylabel("Per capita income (Nominal dollar)",fontsize=14,color="r")
plt.xlabel('Year',fontsize=12)
plt.title('Absolute Dynamics',fontsize=18)

* Distribution dynamics

In [None]:
import seaborn as sns
plt.figure(figsize=(8,7))
sns.kdeplot(pdUS["1929"], color="b") 
sns.kdeplot(pdUS["2009"], color="R")
plt.legend(facecolor="white")

In [None]:
sns.set_palette(sns.color_palette("coolwarm", 2010-1929))
plt.figure(figsize=(10,8))
for i in range(2010-1929):
    sns.kdeplot(pd_pci.T[str(i+1929)],legend=False)
plt.xlabel("Per capita income (Nominal dollar)",fontsize=14,color="r")

### Per capita income dynamics in constant dollar 2009 - structural mobility

We need to adjust for price change over years for a valid temporal comparison. First we acquire [Historical Consumer Price Index for All Urban Consumers (CPI-U)](https://www.bls.gov/cpi/tables/historical-cpi-u-201709.pdf) from [US Bureau of labor Statistics](https://www.bls.gov/home.htm). 


In [None]:
pd_cpi = pd.read_csv("data/CPI1913-2016.csv")
pd_cpi.head()

In [None]:
pd_cpi.index = pd_cpi["year"].as_matrix()
pd_cpi = pd_cpi.drop(["year"],axis=1)
pd_cpi.head()

In [None]:
pd_cpi2909 = pd_cpi.loc[years]
pd_cpi2909

In [None]:
deflator = (pd_cpi2909.loc[2009]/pd_cpi2909).T.values[0]
deflator

In [None]:
real_2909 = np.dot(np.diag(deflator),pd_pci.values)
real_2909

In [None]:
pd_real_2909 = pdUS.copy()
for year in years:
    pd_real_2909[str(year)] = real_2909[year-1929,:]
pd_real_2909.head()

In [None]:
pd_real_2909.to_csv("data/US_state_pci_constant09_1929_2009.csv")

In [None]:
from pylab import rcParams
sns.set_palette(sns.color_palette("Set1", 2010-1929))
rcParams['figure.figsize'] = 15,10
plt.plot(years,real_2909)
#pd_pci.plot(legend=None)
for i in range(48):
    plt.text(1915,real_2909.max()-700-(i*1189), names1929[i],fontsize=12)
    plt.text(2010.5,real_2909.max()-700-(i*1189), names2009[i],fontsize=12)
plt.xlim((years[0], years[-1]))
plt.ylim((0, real_2909.max()))
plt.ylabel(r"$y_{i,t}$",fontsize=14)
plt.ylabel("Per capita income (Constant Dollar 2009)",fontsize=14,color="r")
plt.xlabel('Year',fontsize=12)


Dynamics of the income distribution over time (kernel densities)

In [None]:
sns.set_palette(sns.color_palette("coolwarm", 2010-1929))
plt.figure(figsize=(10,8))
plt.xlabel("Per capita income (Constant Dollar 2009)",fontsize=14,color="r")
for i in range(2010-1929):
    sns.kdeplot(real_2909[i],legend=False)

We can also use [FacetGrid](https://seaborn.pydata.org/generated/seaborn.FacetGrid.html#seaborn.FacetGrid) from seaborn to visualize the cross-sectional densities over time.

In [None]:
pd_real_long = pd.DataFrame(data=real_2909,columns=names)
pd_real_long.index = pd_pci.index
pd_real_long = pd_real_long.stack()
pd_real_long.index.names = ['year', 'state']
pd_real_long = pd_real_long.reset_index(name='pci')

sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
sns.set_palette(sns.color_palette("coolwarm", 2010-1929))
#pal = sns.cubehelix_palette(2010-1929, rot=-.25, light=.7)
g = sns.FacetGrid(pd_real_long, row="year", hue="year", aspect=15, height=0.5)

# Draw the densities in a few steps
g.map(sns.kdeplot, "pci", clip_on=False, shade=True, alpha=1, lw=1.5)
#g.map(sns.kdeplot, "pci", clip_on=False, color="w")
g.map(plt.axhline, y=0, lw=2, clip_on=False)

# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .2, label, fontweight="bold", color=color,
            ha="left", va="center", transform=ax.transAxes)


g.map(label, "year")

# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)

# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)

### Relative per capita income dynamics - exchange mobility

In [None]:
mean = pd_pci.values.mean(axis=1)
mean.shape = (len(mean),1)

In [None]:
rpci = pd_pci.values/mean
rpci

In [None]:
from pylab import rcParams
sns.set_palette(sns.color_palette("Set1", 2010-1929))
rcParams['figure.figsize'] = 15,10
plt.plot(years,rpci)
#pd_pci.plot(legend=None)
for i in range(48):
    plt.text(1915,rpci.max()-(i*0.042), names1929[i],fontsize=12)
    plt.text(2010.5,rpci.max()-(i*0.042), names2009[i],fontsize=12)
plt.xlim((years[0], years[-1]))
plt.ylim((0, rpci.max()))
plt.ylabel("Relative Per capita income (mean-normalized)",fontsize=14,color="r")
plt.xlabel('Year',fontsize=12)

In [None]:
sns.set_palette(sns.color_palette("coolwarm", 2010-1929))
plt.figure(figsize=(10,8))
for i in range(2010-1929):
    sns.kdeplot(rpci[i],legend=False)
plt.xlabel("Relative Per capita income (mean-normalized)",fontsize=14,color="r")

In [None]:
pd_rpci_long = pd.DataFrame(data=rpci,columns=names)
pd_rpci_long.index = pd_pci.index
pd_rpci_long = pd_rpci_long.stack()
pd_rpci_long.index.names = ['year', 'state']
pd_rpci_long = pd_rpci_long.reset_index(name='pci')

sns.set_palette(sns.color_palette("coolwarm", 2010-1929))
g = sns.FacetGrid(pd_rpci_long, row="year", hue="year", aspect=15, height=0.5)

# Draw the densities in a few steps
g.map(sns.kdeplot, "pci", clip_on=False, shade=True)
#g.map(sns.kdeplot, "pci", clip_on=False, color="w")
g.map(plt.axhline, y=0, lw=2, clip_on=False)

# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .2, label, fontweight="bold", color=color,
            ha="left", va="center", transform=ax.transAxes)


g.map(label, "year")

# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)

# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)

### Spatial-temporal dynamics visualization

In [None]:
import geopandas as gpd
geo_table = gpd.read_file(examples.get_path('us48.shp'))
income_table = pd.read_csv(examples.get_path("usjoin.csv"))
complete_table = geo_table.merge(income_table,left_on='STATE_NAME',right_on='Name')
complete_table.head()

In [None]:
sns.set(style="white")
index_year = range(1929,2010,15)
fig, axes = plt.subplots(nrows=2, ncols=3,figsize = (15,7))
for i in range(2):
    for j in range(3):
        ax = axes[i,j]
        complete_table.plot(ax=ax, column=str(index_year[i*3+j]), cmap='OrRd', scheme='quantiles', legend=True)
        ax.set_title('Per Capita Income %s Quintiles'%str(index_year[i*3+j]))
        ax.axis('off')
        leg = ax.get_legend()
        leg.set_bbox_to_anchor((0.8, 0.15, 0.16, 0.2))
plt.tight_layout()