# Exploratory analysis with Geopandas

As with almost anything in Python, we'll need to import some libraries.  

In [1]:
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gp
import pyproj
import folium
from ipywidgets import widgets
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual

First we'll need to get our data, which is super easy with Geopandas. In one line of code we can import a Shapefile, GeoJSON and several others. For more information please vist the [Geopandas](http://geopandas.org) main site. 

In [2]:
Lincoln_blk_groups = gp.GeoDataFrame.from_file("Lincoln_block_groups.geojson")
Lincoln_blk_groups.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLKGRPCE10,GEOID10,NAMELSAD10,MTFCC10,FUNCSTAT10,ALAND10,AWATER10,...,OV250K2999,OV300K3999,OV400K4999,OV500K7499,OV750K9999,OV1MillUp,MedValHous,Shape_Le_1,Shape_Area,geometry
0,31,109,3002,1,311090030021,Block Group 1,G5030,S,625990.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,123100.0,10154.502977,6734360.0,(POLYGON ((2549906.750222221 389043.3978194296...
1,31,109,100,1,311090001001,Block Group 1,G5030,S,3464631.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,101100.0,26017.761787,37271990.0,"(POLYGON ((2576433.38324766 396432.8895947949,..."
2,31,109,10202,1,311090102021,Block Group 1,G5030,S,133753456.0,2659735.0,...,64.0,64.0,9.0,63.0,2.0,4.0,223500.0,206120.656109,1467521000.0,(POLYGON ((2526566.210097174 380154.5515383796...
3,31,109,2200,6,311090022006,Block Group 6,G5030,S,305126.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,77400.0,7794.218502,3282580.0,(POLYGON ((2552566.554760516 366552.4326609224...
4,31,109,3715,1,311090037151,Block Group 1,G5030,S,2594575.0,0.0,...,298.0,99.0,30.0,40.0,2.0,3.0,264700.0,21138.556873,27913580.0,(POLYGON ((2567001.427869571 343213.0047853467...


It's important to know what your fields mean, so we'll need to upload some metadata I created.

In [10]:
labels=pd.read_csv("defition.csv", sep=',')
print((labels.iloc[:,1]).head())

0    Feature geometry.
1      State FIPS Code
2     County FIPS Code
3    Census Tract Code
4     Block Group Code
Name: label, dtype: object


Next we're going to do some linear regression to see if we can find any interesting patterns. I've left choosing the fields totally up to you, but **be warned** not everything will run or make sense! 

In [11]:
from sklearn.linear_model import LinearRegression
def f(x='Median Age-Total Population', y='Median Value (Dollars)'):
    x_abbr = labels.loc[labels['label'] == x , 'abbreviated'].iloc[0]
    y_abbr = labels.loc[labels['label'] == y , 'abbreviated'].iloc[0] 
    x_values= ((Lincoln_blk_groups.iloc[:,Lincoln_blk_groups.columns== x_abbr]).values)
    y_values= ((Lincoln_blk_groups.iloc[:,Lincoln_blk_groups.columns== y_abbr]).values)
    regressor = LinearRegression()
    regressor.fit(x_values, y_values)
    fig=plt.figure(figsize=(10, 10), dpi= 80, facecolor='w', edgecolor='k')
    plt.scatter(x_values, y_values, color = 'red')
    plt.plot(x_values, regressor.predict(x_values), color = 'blue')
    plt.title(x + " versus " + y +" in Lincoln, NE", fontsize=25 )
    plt.xlabel(x , fontsize=15)
    plt.ylabel( y , fontsize=15)
    plt.show()
interact(f,x=labels.iloc[:,1], y=labels.iloc[:,1])

<function __main__.f>

And lastly we'll create a choropleth map. And as above, choose your fields wisely. 

In [5]:
def f(x='Total Population', y='Total Population-Male'):
    f, ax = plt.subplots(1, figsize=(15, 10))
    ax.set_title(x +" divided by " + y + ' in Lincoln, NE')
    denom = labels.loc[labels['label'] == x , 'abbreviated'].iloc[0]
    numer = labels.loc[labels['label'] == y , 'abbreviated'].iloc[0] 
    Lincoln_blk_groups[denom+" over "+numer] = ((Lincoln_blk_groups.iloc[:,Lincoln_blk_groups.columns== denom]).values)/((Lincoln_blk_groups.iloc[:,Lincoln_blk_groups.columns== numer ]).values)
    Lincoln_blk_groups.plot(denom+" over "+numer, scheme='fisher_jenks', k=5, cmap=plt.cm.Blues, legend=True, ax=ax)
    ax.set_axis_off()
    plt.axis('equal');
    plt.show()
    del Lincoln_blk_groups[denom+" over "+numer] 
interact(f,x=labels.iloc[:,1], y=labels.iloc[:,1])

<function __main__.f>