In [None]:
import geopandas as gpd
import libpysal as lps
import seaborn as sns
import numpy as np
import tobler as tob
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20, 10]

In [None]:
schools = gpd.read_file('schools.shp')

In [None]:
schools.plot()

In [None]:
schools.head()

In [None]:
schools.plot(column='test_score', legend=True)

In [None]:
env = gpd.read_file('env.shp')

In [None]:
env.plot()

In [None]:
env.plot(column='escore', cmap='Greens', scheme='Quantiles', k=3,edgecolor='grey',
        legend=True)

In [None]:
dem = gpd.read_file('demo.shp')

In [None]:
dem.head()

In [None]:
dem.plot(column='pctb', scheme='Quantiles', k=5, legend=True)

# Overlays

In [None]:
base = dem.geometry.boundary.plot(edgecolor='blue')
env.geometry.boundary.plot(ax=base, edgecolor='green')
schools.plot(color='red', ax=base)

## Issues: Data Integration and Change of Support

We have three sources of data

- 40 census block groups with white/black population counts (blue polygons)
- 20 environmental monitoring zones with measurements of envl quality (green polygons)
- 10  schools with x,y addresses and test scores (red points)

## Objective

Explore relationship between school test score and environmental quality and racial composition of neighborhoods

## Tasks
1. Construct values of environmental quality for each school
2. Construct racial composition measure for each school
3. Explore associations of interest

### Environmental Quality for the Schools

Two approaches:
    
 1. Take the value for the environmental zone that contains the school (1-1)
 2. Consruct a value that represents the environmental quality in the neighborhoods closes to the school (1-many)

#### Approach 1 Spatial Join

In [None]:
base  = env.plot(column='escore', legend=True)
schools.plot(color='red', ax=base)

In [None]:
schools_escore1 = gpd.sjoin(schools, env, how='left', op='within')

In [None]:
schools_escore1

In [None]:
schools['escore1'] = schools_escore1.escore

In [None]:
schools.head()

#### Approach 2 Areal Interpolation

In [None]:
x = schools.geometry.x
y = schools.geometry.y
cents = np.array([x,y]).T
cents

In [None]:
schools_vd, school_cents = lps.cg.voronoi_frames(cents)

In [None]:
base = schools_vd.plot()
schools.plot(ax=base, color='red')

In [None]:
base = env.geometry.boundary.plot(edgecolor='green')
schools_vd.plot(ax=base)
schools.plot(ax=base, color='red')

In [None]:
schools_vd, school_cents = lps.cg.voronoi_frames(cents, clip = env.unary_union)

In [None]:
base = env.geometry.boundary.plot(edgecolor='green')
schools_vd.plot(ax=base)
schools.plot(ax=base, color='red')

In [None]:
base = env.plot(column='escore')
schools_vd.geometry.boundary.plot(ax=base, edgecolor='red')
schools.plot(ax=base, color='red')

Estimate the escore for a school using areal interpolation

In [None]:
escore = tob.area_weighted.area_interpolate(source_df=env,
                                           target_df=schools_vd,
                                           intensive_variables=['escore'])

In [None]:
escore

In [None]:
base = escore.plot(column='escore')
schools.plot(ax=base, color='red')

In [None]:
schools['escore2'] = escore.escore

In [None]:
schools.head()

In [None]:
sns.scatterplot(data=schools, x="escore1", y="escore2")

In [None]:
sns.scatterplot(data=schools, x="escore1", y="test_score")

In [None]:
sns.scatterplot(data=schools, x="escore2", y="test_score")

### Racial Composition of the Schools

Two approaches:
    
 1. Take the value for the block group  that contains the school (1-1)
 2. Consruct a value that represents the racial composition  in the neighborhoods closes to the school (1-many)

In [None]:
base = dem.plot(column='pctb', scheme='Quantiles', k=5, legend=True)
schools.plot(ax=base, color='red')

#### Approach 1 spatial join

In [None]:
pctb1 = gpd.sjoin(schools, dem, how='left', op='within')

In [None]:
pctb1.head()

In [None]:
schools_vd['pctb1'] = pctb1.pctb
schools['pctb1'] = pctb1.pctb

In [None]:
base = schools_vd.plot(column='pctb1')
schools.plot(ax=base, color='red')

#### Approach 2 areal interpolation

In [None]:
results = tob.area_weighted.area_interpolate(source_df=dem,
                                           target_df=schools_vd,
                                           intensive_variables=['pctb', 'pctw'],
                                            extensive_variables=['white', 'black'])

In [None]:
results.head()

In [None]:
results['pctb1'] = results.black / (results.white + results.black)

In [None]:
results[['pctb', 'pctb1']].head()

In [None]:
schools['pctb2e'] = results.pctb
schools['pctb2i'] = results.pctb1

In [None]:
schools[['pctb1','pctb2i', 'pctb2e' ]].head()

In [None]:
sns.scatterplot(data=schools, x="pctb2e", y="test_score")

In [None]:
sns.scatterplot(data=schools, x="pctb1", y="test_score")

In [None]:
schools.head()

In [None]:
schools_vd['test_score'] = schools.test_score
base = schools_vd.plot(column='test_score')
schools.plot(ax=base, color='red')