# STARS base plots in bqplot

## Loading Data and initial wrangling

In [112]:
# I had to update some jupyter components before installing bqplot
# conda update jupyter_core jupyter_client
# conda install -c conda-forge bqplot
import bqplot as bq
import pandas as pd
import pysal as ps
import bqplot.pyplot as plt
import geopandas as gpd
import numpy as np
from bqplot.interacts import (
    FastIntervalSelector, IndexSelector, BrushIntervalSelector,
    BrushSelector, MultiSelector, LassoSelector, PanZoom, HandDraw
)
from ipywidgets import ToggleButtons, VBox, HTML
import re
from scipy import stats
from bqplot import * # Imports Figure, Map, Mercator, Orthographic, ColorScale, ColorAxis, AlbersUSA, topo_load, Tooltip, Scatter, Lines, etc.

In [113]:
csv_path = ps.examples.get_path('usjoin.csv')
usjoin = pd.read_csv(csv_path)

In [114]:
years = list(range(1929, 2010))                  
cols_to_calculate = list(map(str, years))

In [115]:
# Making the dataset tidy
us_tidy = pd.melt(usjoin, 
                  id_vars=['Name', 'STATE_FIPS'],
                  value_vars=cols_to_calculate, 
                  var_name='Year', 
                  value_name='Income')

# Function that calculates Per Capita Ratio
def calculate_pcr(x):
    return x / np.mean(x)

us_tidy['pcr'] = us_tidy.groupby('Year').Income.apply(lambda x: calculate_pcr(x))

In [111]:
us_tidy.head()

Unnamed: 0,Name,STATE_FIPS,Year,Income,pcr
0,Alabama,1,1929,323,0.525025
1,Arizona,4,1929,600,0.975279
2,Arkansas,5,1929,310,0.503894
3,California,6,1929,991,1.610836
4,Colorado,8,1929,634,1.030545


In [6]:
shp_path = ps.examples.get_path('us48.shp')
W = ps.queen_from_shapefile(shp_path)
W.transform = 'r'

def calculate_lag_value(x):
    return ps.lag_spatial(W, x)

us48_map = gpd.read_file(shp_path)
us48_map.STATE_FIPS = us48_map.STATE_FIPS.astype(int)
df_map = us48_map.merge(usjoin, on='STATE_FIPS')

all_lagged = df_map[cols_to_calculate].apply(calculate_lag_value)

## Choropleth Map

In [7]:
year = 2000
us_aux = us_tidy[us_tidy.Year == str(year)]

In [8]:
base_json = topo_load('map_data/USStatesMap.json')

In [9]:
# List of indexes (ids) of the json file
x = []
for i in range(len(base_json['objects']['subunits']['geometries'])):
    aux = base_json['objects']['subunits']['geometries'][i]['id']
    x.append(aux)

In [10]:
# Some 'states' didn't have value, so I had to create this condition statement
v = []
for i in x:
    if (len(us_aux[us_aux.STATE_FIPS == i].Income.values.astype(int)) == 1):
        aux = us_aux[us_aux.STATE_FIPS == i].Income.values.astype(int).item()
    else:
        aux = 0
    v.append(aux)

In [11]:
# There were some Polygons that didn't have 'properties' such as:
# base_json['objects']['subunits']['geometries'][35]['properties']
# This was between Kentucky and Arizona

# This wa tricky because I had to make a loop to insert income values of this specific list in only the id's that actually was in the unique list of the id's of the dataset.

for i in range(len(x)):
    if (pd.Series(base_json['objects']['subunits']['geometries'][i]['id']).isin(us_aux.STATE_FIPS).values[0]):
        base_json['objects']['subunits']['geometries'][i]['properties']['Income_Value'] = v[i]

Converting to a Choropleth map.

In [12]:
sc_geo = AlbersUSA()

# In the example of bqplot the colors of the map_styles relates the 'id' of the json file and a 'variable number' of the color
sc_c1 = ColorScale(scheme='YlOrRd')
axis = ColorAxis(scale=sc_c1)

color_dict = dict(zip(us_aux.STATE_FIPS, us_aux.Income))

map_styles = {'color': color_dict,
              'scales': {'projection': sc_geo, 'color': sc_c1}, 'colors': {'default_color': 'Grey'}}

def_tt = Tooltip(fields=['id', 'name', 'Income_Value'])
choro_map = Map(map_data=base_json, **map_styles, tooltip=def_tt)
choro_map.interactions = {'click': 'select', 'hover': 'tooltip'}
Figure(marks=[choro_map], axes=[axis], title='Choropleth Example for ' + str(year), fig_margin={'top': 0, 'bottom': 0, 'left': 0, 'right': 0})

Figure(axes=[ColorAxis(scale=ColorScale(scheme='YlOrRd'))], fig_margin={'top': 0, 'bottom': 0, 'left': 0, 'rig…

## Scatter plot

### Basic

In [13]:
scat_Var = df_map[str(year)]
scat_VarLag = ps.lag_spatial(W, df_map[str(year)]).tolist()

sc_x = bq.LinearScale()
sc_y = bq.LinearScale()
ax_x = bq.Axis(label='Original Variable', scale=sc_x)
ax_y = bq.Axis(label='Lagged Variable', scale=sc_y, orientation='vertical')

scatt_plot = Scatter(x = scat_Var, y = scat_VarLag, scales={'x': sc_x, 'y': sc_y}, colors=['blue'])

b,a = np.polyfit(scat_Var, scat_VarLag, 1)
x_reg = [min(scat_Var), max(scat_Var)]
y_reg = [a + i * b for i in x_reg]

reg_line = Lines(x = x_reg, y = y_reg, scales={'x': sc_x, 'y': sc_y}, colors = ['black'])

bq.Figure(axes=[ax_x, ax_y], marks=[scatt_plot, reg_line], title = 'Scatterplot for '+ str(year))

Figure(axes=[Axis(label='Original Variable', scale=LinearScale()), Axis(label='Lagged Variable', orientation='…

### With dynamic regression from box select

In [14]:
default_marks = [scatt_plot, reg_line]
br_sel_scat = BrushSelector(x_scale=sc_x, y_scale=sc_y, marks=default_marks, color='red')
db_scat_brush = HTML(value = '[]')

## call back for the selector
def brush_callback_scat(change):
    if (len(br_sel_scat.selected) == 0):
        final_plot.marks = [scatt_plot, reg_line]
    else:
        db_scat_brush.value = str(br_sel_scat.selected)
        brush_box_coordinates = re.sub('[\[\]]', '', db_scat_brush.value).split(', ')
        x_bottom_left = float(brush_box_coordinates[0])
        y_bottom_left = float(brush_box_coordinates[1])
        x_upper_right = float(brush_box_coordinates[2])
        y_upper_right = float(brush_box_coordinates[3])
        conditions = (scat_Var > x_bottom_left) & (scat_Var < x_upper_right) & (pd.Series(scat_VarLag) < y_upper_right) & (pd.Series(scat_VarLag) > y_bottom_left)
        sub_scat_Var = scat_Var.loc[conditions]
        sub_scat_VarLag = pd.Series(scat_VarLag).loc[conditions]
        sub_b,sub_a = np.polyfit(sub_scat_Var, sub_scat_VarLag, 1)
        sub_x_reg = [min(scat_Var), max(scat_Var)]
        sub_y_reg = [sub_a + i * sub_b for i in x_reg]
        sub_reg_line = Lines(x = sub_x_reg, y = sub_y_reg, scales={'x': sc_x, 'y': sc_y}, colors = ['red'])
        sub_reg_line = sub_reg_line
        final_plot.marks = [scatt_plot, reg_line, sub_reg_line]
    
br_sel_scat.observe(brush_callback_scat, names=['brushing'])

final_plot = bq.Figure(axes=[ax_x, ax_y], marks = default_marks, title = 'Scatterplot for '+ str(year), interaction = br_sel_scat)
final_plot

Figure(axes=[Axis(label='Original Variable', scale=LinearScale(), side='bottom'), Axis(label='Lagged Variable'…

## Time Path plot

In this plot, we have to select a specific state.

In [85]:
state_selected = 'California'

state_row_index = list(df_map['Name']).index(state_selected)

Var = df_map[cols_to_calculate].iloc[state_row_index,:]
VarLag = all_lagged.iloc[state_row_index,:]

In [86]:
tp_sc_x = LinearScale()
tp_sc_y = LinearScale()

tp_line = Lines(x = Var, y = VarLag, scales={'x': tp_sc_x, 'y': tp_sc_y})
tp_ax_x = Axis(scale = tp_sc_x, label = 'Original Variable')
tp_ax_y = Axis(scale = tp_sc_y, orientation = 'vertical', label = 'Lagged Variable')

tp_scatt = Scatter(x = Var, y = VarLag, scales = {'x': tp_sc_x, 'y': tp_sc_y}, colors = ['black'])
tp_scatt.default_size = 20

tp_figure = Figure(marks = [tp_line, tp_scatt], axes = [tp_ax_x, tp_ax_y], title = 'Time Path for ' + state_selected)
tp_figure

Figure(axes=[Axis(label='Original Variable', scale=LinearScale()), Axis(label='Lagged Variable', orientation='…

## Time Series of Global Moran's I

First, we need to calculate the Moran's I value for all years.

In [87]:
# Calculating Moran'I for every column
morans = []
for i in cols_to_calculate:
    aux = ps.Moran(df_map[i], W).I
    morans.append(aux)

In [88]:
ts_sc_x = LinearScale()
ts_sc_y = LinearScale()

moran_line = Lines(x = years, y = morans, scales={'x': ts_sc_x, 'y': ts_sc_y})
moran_scatt = Scatter(x = years, y = morans, scales={'x': ts_sc_x, 'y': ts_sc_y}, colors=['black'])
moran_scatt.default_size = 10

ts_ax_x = Axis(scale = ts_sc_x, label='Years')
ts_ax_y = Axis(scale = ts_sc_y, orientation='vertical', label='Moran\'s I')

ts_plot = Figure(marks=[moran_line, moran_scatt], axes=[ts_ax_x, ts_ax_y], title='Moran\'s I Time Series plot')
ts_plot

Figure(axes=[Axis(label='Years', scale=LinearScale()), Axis(label="Moran's I", orientation='vertical', scale=L…

## Density Plot

First, let's estimate the density curve for a grid.

In [90]:
value_vetor = us_aux.Income.values
kde1 = stats.gaussian_kde(value_vetor, bw_method = 'silverman')
grid = np.linspace(start = min(value_vetor), stop = max(value_vetor), num = 10000)
dens = kde1.evaluate(grid)

In [91]:
dp_sc_x = LinearScale()
dp_sc_y = LinearScale()

density_line = Lines(x = grid, y = dens, scales={'x': dp_sc_x, 'y': dp_sc_y})
dp_ax_x = Axis(scale = dp_sc_x, label = 'Values')
dp_ax_y = Axis(scale = dp_sc_y, orientation = 'vertical', label = 'Density')

density_plot = Figure(marks = [density_line], axes = [dp_ax_x, dp_ax_y], title = 'Density plot for ' + str(year))
density_plot.interpolation = 'basis' # In order to make to curve smoother
density_plot

Figure(axes=[Axis(label='Values', scale=LinearScale()), Axis(label='Density', orientation='vertical', scale=Li…

## Boxplot (with scatter markers)

In [92]:
bp_sc_x = LinearScale(min = year - 1, max = year + 1)
bp_sc_y = LinearScale(min = min(us_aux.Income.values).astype(float), max = max(us_aux.Income.values).astype(float))
bp_ax_x = Axis(label='Year of ' + str(year), scale = bp_sc_x)
bp_ax_y = Axis(label='Variable Label', scale = bp_sc_y, orientation='vertical')

x_box = [year]
boxes = Boxplot(x = x_box, y = [us_aux.Income.values], scales = {'x': bp_sc_x, 'y': bp_sc_y},
                box_fill_color = 'gray', outlier_fill_color = 'black')

In [93]:
# Boxplot with markers
x_scat_box = [year] * len(us_aux.Income.values)
y_scat_box = us_aux.Income.values.tolist()
scatt_box = Scatter(x = x_scat_box, y = y_scat_box, scales={'x': bp_sc_x, 'y': bp_sc_y}, colors=['black'])
scatt_box.default_size = 12
bq.Figure(axes=[bp_ax_x, bp_ax_y], marks=[boxes, scatt_box], title = 'Boxplot of Income in '+ str(year))

Figure(axes=[Axis(label='Year of 2000', scale=LinearScale(max=2001.0, min=1999.0), side='bottom'), Axis(label=…

# Some interactions

## Choropleth with Time Path

In [94]:
def hover_callback(name, value):
    state_hovered = value['data']['name']
    state_row_index = list(df_map['Name']).index(state_hovered)
    Var = df_map[cols_to_calculate].iloc[state_row_index,:]
    VarLag = all_lagged.iloc[state_row_index,:]
    tp_line.x = Var
    tp_line.y = VarLag
    tp_scatt.x = Var
    tp_scatt.y = VarLag
    tp_figure.title = 'Time Path for ' + state_hovered
    
choro_map = Map(map_data = base_json, **map_styles, tooltip=tp_figure)
choro_map.on_hover(hover_callback)

In [95]:
Figure(marks=[choro_map], axes=[axis], title='Choropleth with TimePath', fig_margin={'top': 0, 'bottom': 0, 'left': 0, 'right': 0})

Figure(axes=[ColorAxis(scale=ColorScale(scheme='YlOrRd'))], fig_margin={'top': 0, 'bottom': 0, 'left': 0, 'rig…