Using Python 3 Kernel

In [1]:
%matplotlib inline
from __future__ import print_function, division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import geopandas as gpd
from fiona.crs import from_epsg

In [2]:
#Function for overlay
def spatial_overlays(df1, df2, how='intersection', reproject=True):
    """Perform spatial overlay between two polygons.

    Currently only supports data GeoDataFrames with polygons.
    Implements several methods that are all effectively subsets of
    the union.

    Parameters
    ----------
    df1 : GeoDataFrame with MultiPolygon or Polygon geometry column
    df2 : GeoDataFrame with MultiPolygon or Polygon geometry column
    how : string
        Method of spatial overlay: 'intersection', 'union',
        'identity', 'symmetric_difference' or 'difference'.
    use_sindex : boolean, default True
        Use the spatial index to speed up operation if available.

    Returns
    -------
    df : GeoDataFrame
        GeoDataFrame with new set of polygons and attributes
        resulting from the overlay

    """
    from functools import reduce
    df1 = df1.copy()
    df2 = df2.copy()
    df1['geometry'] = df1.geometry.buffer(0)
    df2['geometry'] = df2.geometry.buffer(0)
    if df1.crs!=df2.crs and reproject:
        print('Data has different projections.')
        print('Converted data to projection of first GeoPandas DatFrame')
        df2.to_crs(crs=df1.crs, inplace=True)
    if how=='intersection':
        # Spatial Index to create intersections
        spatial_index = df2.sindex
        df1['bbox'] = df1.geometry.apply(lambda x: x.bounds)
        df1['sidx']=df1.bbox.apply(lambda x:list(spatial_index.intersection(x)))
        pairs = df1['sidx'].to_dict()
        nei = []
        for i,j in pairs.items():
            for k in j:
                nei.append([i,k])
        pairs = gpd.GeoDataFrame(nei, columns=['idx1','idx2'], crs=df1.crs)
        pairs = pairs.merge(df1, left_on='idx1', right_index=True)
        pairs = pairs.merge(df2, left_on='idx2', right_index=True, suffixes=['_1','_2'])
        pairs['Intersection'] = pairs.apply(lambda x: (x['geometry_1'].intersection(x['geometry_2'])).buffer(0), axis=1)
        pairs = gpd.GeoDataFrame(pairs, columns=pairs.columns, crs=df1.crs)
        cols = pairs.columns.tolist()
        cols.remove('geometry_1')
        cols.remove('geometry_2')
        cols.remove('sidx')
        cols.remove('bbox')
        cols.remove('Intersection')
        dfinter = pairs[cols+['Intersection']].copy()
        dfinter.rename(columns={'Intersection':'geometry'}, inplace=True)
        dfinter = gpd.GeoDataFrame(dfinter, columns=dfinter.columns, crs=pairs.crs)
        dfinter = dfinter.loc[dfinter.geometry.is_empty==False]
        dfinter.drop(['idx1','idx2'], inplace=True, axis=1)
        return dfinter
    elif how=='difference':
        spatial_index = df2.sindex
        df1['bbox'] = df1.geometry.apply(lambda x: x.bounds)
        df1['sidx']=df1.bbox.apply(lambda x:list(spatial_index.intersection(x)))
        df1['new_g'] = df1.apply(lambda x: reduce(lambda x, y: x.difference(y).buffer(0), 
                                 [x.geometry]+list(df2.iloc[x.sidx].geometry)) , axis=1)
        df1.geometry = df1.new_g
        df1 = df1.loc[df1.geometry.is_empty==False].copy()
        df1.drop(['bbox', 'sidx', 'new_g'], axis=1, inplace=True)
        return df1
    elif how=='symmetric_difference':
        df1['idx1'] = df1.index.tolist()
        df2['idx2'] = df2.index.tolist()
        df1['idx2'] = np.nan
        df2['idx1'] = np.nan
        dfsym = df1.merge(df2, on=['idx1','idx2'], how='outer', suffixes=['_1','_2'])
        dfsym['geometry'] = dfsym.geometry_1
        dfsym.loc[dfsym.geometry_2.isnull()==False, 'geometry'] = dfsym.loc[dfsym.geometry_2.isnull()==False, 'geometry_2']
        dfsym.drop(['geometry_1', 'geometry_2'], axis=1, inplace=True)
        dfsym = gpd.GeoDataFrame(dfsym, columns=dfsym.columns, crs=df1.crs)
        spatial_index = dfsym.sindex
        dfsym['bbox'] = dfsym.geometry.apply(lambda x: x.bounds)
        dfsym['sidx'] = dfsym.bbox.apply(lambda x:list(spatial_index.intersection(x)))
        dfsym['idx'] = dfsym.index.values
        dfsym.apply(lambda x: x.sidx.remove(x.idx), axis=1)
        dfsym['new_g'] = dfsym.apply(lambda x: reduce(lambda x, y: x.difference(y).buffer(0), 
                         [x.geometry]+list(dfsym.iloc[x.sidx].geometry)) , axis=1)
        dfsym.geometry = dfsym.new_g
        dfsym = dfsym.loc[dfsym.geometry.is_empty==False].copy()
        dfsym.drop(['bbox', 'sidx', 'idx', 'idx1','idx2', 'new_g'], axis=1, inplace=True)
        return dfsym
    elif how=='union':
        dfinter = spatial_overlays(df1, df2, how='intersection')
        dfsym = spatial_overlays(df1, df2, how='symmetric_difference')
        dfunion = dfinter.append(dfsym)
        dfunion.reset_index(inplace=True, drop=True)
        return dfunion
    elif how=='identity':
        dfunion = spatial_overlays(df1, df2, how='union')
        cols1 = df1.columns.tolist()
        cols2 = df2.columns.tolist()
        cols1.remove('geometry')
        cols2.remove('geometry')
        cols2 = set(cols2).intersection(set(cols1))
        cols1 = list(set(cols1).difference(set(cols2)))
        cols2 = [col+'_1' for col in cols2]
        dfunion = dfunion[(dfunion[cols1+cols2].isnull()==False).values]
        return dfunion

In [3]:
#function for download data from egis3
def download_egis3(url, zipname, folder, shpname):
    #take in download url, downloaded zipfile name, folder name
    #containing unzipped shapefile, shapefile name
    #return the downloaded geodataframe
    os.system("curl -O " + url)
    os.system("unzip " + zipname + " -d " + folder)
    
    df = folder
    df = gpd.read_file(folder + "/" + shpname)

    df.crs = from_epsg(2229)
    df = df.to_crs(epsg=4326)
    return df

In [75]:
def read_census(zipname, folder, shpname):
    os.system("curl -O " + url)
    os.system("unzip " + zipname + " -d " + folder)
    
    df = folder
    df = gpd.read_file(folder + "/" + shpname)

    return df    

In [4]:
#function to check the data quality
def checkdf(dfname):
    #take in geodataframe
    #return number of null values and unique values
    tota_poly = dfname.shape[0]
    print ('Total polygon number: {}'.format(tota_poly))    
    print ('number of null and unique values in each column:')
    
    for i in dfname.columns[:-1]:
        tota_valu = dfname.count()[i]
        null_valu = tota_poly - tota_valu
        uniq_valu = len(dfname[i].unique())
        print ('{}: null {}, unique {}'.format(i, null_valu, uniq_valu))
    return dfname.head(3)

In [5]:
#function to drop redundant columns
def cleandf(dfname, droplist):
    #take in geodataframe and list of column name redundant
    #return the clean df
    notneedlist = ['OBJECTID', 'Shape_area', 'Shape_len']
    
    for i in dfname.columns[:-1]:
        #uniq_valu = len(dfname[i].unique())
        #tota_valu = dfname.count()[i]
        #if uniq_valu < tota_valu:
        #    print (uniq_valu, tota_valu)
        #    dfname.drop([i], axis=1, inplace=True)
        if i in notneedlist:
            dfname.drop([i], axis=1, inplace=True)
            
    for i in droplist:
        dfname.drop([i], axis=1, inplace=True)
    
    return dfname.head(3)

## download data

#### Including 5 geometries as:
- Health Districts (HD) – 2012
- Law Enforcement Reporting Districts
- School District Boundaries (2011)
- California State Senate Districts (2011)
- US Congressional Districts
- [ZIP Code Boundaries](https://egis3.lacounty.gov/dataportal/2016/08/11/zip-codes-with-parcel-specific-boundaries/)

In [6]:
dfname_list = []

In [7]:
#Health Districts (HD) – 2012
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2012/" \
      "02/HD_20121.zip"
health_districts = download_egis3(url, 'HD_20121.zip', 'health_districts', 'Health_Districts_2012.shp')
dfname_list.append('health_districts')

checkdf(health_districts)

Total polygon number: 26
number of null and unique values in each column:
OBJECTID: null 0, unique 26
HD_NAME: null 0, unique 26
SPA_NAME: null 0, unique 8
Shape_area: null 0, unique 26
Shape_len: null 0, unique 26
HD_2012: null 0, unique 26
SPA_2012: null 0, unique 8


Unnamed: 0,OBJECTID,HD_NAME,SPA_NAME,Shape_area,Shape_len,HD_2012,SPA_2012,geometry
0,1,Alhambra,San Gabriel,1089719000.0,201892.7,3,3,"POLYGON ((-118.0942930008 34.13524499950874, -..."
1,2,Antelope Valley,Antelope Valley,38592380000.0,1484948.0,5,1,POLYGON ((-117.6552358399884 34.39722196554607...
2,3,Bellflower,East,1086595000.0,262018.7,6,7,(POLYGON ((-118.0287288746297 33.8733158131870...


In [8]:
droplist = ['SPA_NAME', 'SPA_2012']
cleandf(health_districts, droplist)

Unnamed: 0,HD_NAME,HD_2012,geometry
0,Alhambra,3,"POLYGON ((-118.0942930008 34.13524499950874, -..."
1,Antelope Valley,5,POLYGON ((-117.6552358399884 34.39722196554607...
2,Bellflower,6,(POLYGON ((-118.0287288746297 33.8733158131870...


In [10]:
#Law Enforcement Reporting Districts
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/LACOUNTY_LAW_ENFORCEMENT_RDs.zip"
law_enforcement = download_egis3(url, 'LACOUNTY_LAW_ENFORCEMENT_RDs.zip', 'law_enforcement', 'LACOUNTY_LAW_ENFORCEMENT_RDs.shp')
dfname_list.append('law_enforcement')
#law_enforcement.head(3)
checkdf(law_enforcement)

Total polygon number: 5745
number of null and unique values in each column:
RD: null 0, unique 3667
Name: null 0, unique 519
Layer: null 0, unique 3


Unnamed: 0,RD,Name,Layer,geometry
0,230,Belvedere,Parks Bureau,POLYGON ((-118.1604325432619 34.03712079823092...
1,231,Atlantic Avenue,Parks Bureau,"POLYGON ((-118.1550797745909 34.0257529106941,..."
2,232,City Terrace,Parks Bureau,POLYGON ((-118.1784888821586 34.04781649689102...


In [11]:
droplist = ['Layer']
cleandf(law_enforcement, droplist)

Unnamed: 0,RD,Name,geometry
0,230,Belvedere,POLYGON ((-118.1604325432619 34.03712079823092...
1,231,Atlantic Avenue,"POLYGON ((-118.1550797745909 34.0257529106941,..."
2,232,City Terrace,POLYGON ((-118.1784888821586 34.04781649689102...


In [13]:
#School District Boundaries (2011)
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2012/01/" \
      "rrcc_school_districts1.zip"
school_districts = download_egis3(url, 'rrcc_school_districts1.zip', 'school_districts', 'rrcc_school_districts.shp')
dfname_list.append('school_districts')

checkdf(school_districts)

Total polygon number: 79
number of null and unique values in each column:
DISTRICT: null 0, unique 78
UNIFIED: null 0, unique 49
HIGH: null 0, unique 9
ELEMENTARY: null 0, unique 31
PH: null 0, unique 78
ADDR: null 0, unique 78
PH2: null 0, unique 20
PH3: null 0, unique 6
STU: null 0, unique 78
HI_ADDR: null 0, unique 9
HI_PH: null 0, unique 9
HI_STU: null 0, unique 9
LABEL: null 0, unique 78


Unnamed: 0,DISTRICT,UNIFIED,HIGH,ELEMENTARY,PH,ADDR,PH2,PH3,STU,HI_ADDR,HI_PH,HI_STU,LABEL,geometry
0,HERMOSA BEACH CITY ELEMENTARY,,,HERMOSA BEACH CITY ELEMENTARY,310 937 5877,1645 VALLEY DR HERMOSA BCH 90254,,,950,,,,HERMOSA BEACH CITY ELEM,POLYGON ((-118.4048577253546 33.87782346394338...
1,CENTINELA VALLEY UNION HIGH/HAWTHORNE ELEMENTARY,,CENTINELA VALLEY UNION HIGH,HAWTHORNE ELEMENTARY,310 676 2276,14120 S HAWTHORNE BL HAWTHORNE 90250,,,8145,14901 S INGLEWOOD AV LAWNDALE 90260,310 263 3200,6220.0,CENTINELA VALLEY UNION HIGH\nHAWTHORNE ELEMENTARY,POLYGON ((-118.3607054443706 33.93093449396304...
2,CENTINELA VALLEY UNION HIGH/LAWNDALE ELEMENTARY,,CENTINELA VALLEY UNION HIGH,LAWNDALE ELEMENTARY,310 973 1300,4161 W 147TH ST LAWNDALE 90260,,,5510,14901 S INGLEWOOD AV LAWNDALE 90260,310 263 3200,6220.0,CENTINELA VALLEY UNION HIGH\nLAWNDALE ELEMENTARY,"POLYGON ((-118.367294562588 33.90558198783078,..."


In [17]:
droplist = ['UNIFIED', 'HIGH', 'ELEMENTARY', 'PH', 'ADDR', 'PH2', 'PH3',
       'STU', 'HI_ADDR', 'HI_PH', 'HI_STU', 'LABEL']
cleandf(school_districts, droplist)

Unnamed: 0,DISTRICT,geometry
0,HERMOSA BEACH CITY ELEMENTARY,POLYGON ((-118.4048577253546 33.87782346394338...
1,CENTINELA VALLEY UNION HIGH/HAWTHORNE ELEMENTARY,POLYGON ((-118.3607054443706 33.93093449396304...
2,CENTINELA VALLEY UNION HIGH/LAWNDALE ELEMENTARY,"POLYGON ((-118.367294562588 33.90558198783078,..."


In [19]:
#California State Senate Districts (2011)
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2011/11/" \
      "state-senate-2011.zip"
state_senate = download_egis3(url, 'state-senate-2011.zip', 'state_senate', 'senate.shp')
dfname_list.append('state_senate')

checkdf(state_senate)

Total polygon number: 16
number of null and unique values in each column:
DISTRICT: null 0, unique 16
NAME: null 0, unique 16
LABEL: null 0, unique 16


Unnamed: 0,DISTRICT,NAME,LABEL,geometry
0,16,TULKE,Disrict 16,POLYGON ((-117.6671490011134 34.75005899908675...
1,18,LASFE,Disrict 18,POLYGON ((-118.3611510010984 34.19465099908038...
2,20,POMSB,Disrict 20,POLYGON ((-117.7676900009356 34.02350599909427...


In [20]:
droplist = ['LABEL']
cleandf(state_senate, droplist)

Unnamed: 0,DISTRICT,NAME,geometry
0,16,TULKE,POLYGON ((-117.6671490011134 34.75005899908675...
1,18,LASFE,POLYGON ((-118.3611510010984 34.19465099908038...
2,20,POMSB,POLYGON ((-117.7676900009356 34.02350599909427...


In [22]:
#US Congressional Districts
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/RRCC_CONGRESSIONAL_DISTRICTS.zip"
congressional_districts = download_egis3(url, 'RRCC_CONGRESSIONAL_DISTRICTS.zip', 'congressional_districts', 'RRCC_CONGRESSIONAL_DISTRICTS.shp')
dfname_list.append('congressional_districts')

checkdf(congressional_districts)

Total polygon number: 18
number of null and unique values in each column:
DIST_CONG: null 0, unique 18
Shape_area: null 0, unique 18
Shape_len: null 0, unique 18


Unnamed: 0,DIST_CONG,Shape_area,Shape_len,geometry
0,23,5447776000.0,498627.7,POLYGON ((-117.7736888188639 34.82248828448963...
1,25,45987900000.0,1449675.0,(POLYGON ((-117.7668535002114 34.8232750430814...
2,26,153573000.0,64204.94,POLYGON ((-118.7888893009787 34.16821405941698...


In [24]:
droplist = []
cleandf(congressional_districts, droplist)

Unnamed: 0,DIST_CONG,geometry
0,23,POLYGON ((-117.7736888188639 34.82248828448963...
1,25,(POLYGON ((-117.7668535002114 34.8232750430814...
2,26,POLYGON ((-118.7888893009787 34.16821405941698...


In [63]:
#ZIP Code Boundaries
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2011/01/" \
      "CAMS_ZIPCODE_PARCEL_SPECIFIC.zip"
zip_code = download_egis3(url, 'CAMS_ZIPCODE_PARCEL_SPECIFIC.zip', 'zip_code', 'CAMS_ZIPCODE_PARCEL_SPECIFIC.shp')
dfname_list.append('zip_code')
checkdf(zip_code)

Total polygon number: 311
number of null and unique values in each column:
OBJECTID: null 0, unique 311
ZIPCODE: null 0, unique 311
Shape_area: null 0, unique 311
Shape_len: null 0, unique 311


Unnamed: 0,OBJECTID,ZIPCODE,Shape_area,Shape_len,geometry
0,1,90001,95563400.0,48677.278478,POLYGON ((-118.2433683541707 33.98923126419061...
1,2,90002,82752220.0,44972.017035,"POLYGON ((-118.2342946311994 33.9610050725129,..."
2,3,90003,102643100.0,63303.481758,POLYGON ((-118.2828356317466 33.97596170409104...


In [64]:
droplist = []
cleandf(zip_code, droplist)

Unnamed: 0,ZIPCODE,geometry
0,90001,POLYGON ((-118.2433683541707 33.98923126419061...
1,90002,"POLYGON ((-118.2342946311994 33.9610050725129,..."
2,90003,POLYGON ((-118.2828356317466 33.97596170409104...


#### Including geometries as:
- Public Use Microdata Areas (PUMA)
- [LAcounty_COMMUNITIES](https://egis3.lacounty.gov/dataportal/2010/10/21/citycommunity-boundaries/)
- [Split 2010 Block Group/City – Community Statistical Area (formerly BASA)](https://egis3.lacounty.gov/dataportal/2016/01/18/split-2010-block-groupcity-basa/)
- [Town Council / Community Group](https://egis3.lacounty.gov/dataportal/2014/12/29/town-council-areas/)
- [Los Angeles County Fire Department Division Boundaries](http://egis3.lacounty.gov/dataportal/2016/04/28/los-angeles-county-fire-department-division-boundaties/)
- [Los Angeles County Fire Department Battalion Boundaries](http://egis3.lacounty.gov/dataportal/2016/04/28/los-angeles-county-fire-department-battalion-boundaries/)

In [77]:
!wget http://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_06_puma10_500k.zip
puma = read_census('cb_2016_06_puma10_500k.zip', 'puma', 'cb_2016_06_puma10_500k.shp')
dfname_list.append('puma')

checkdf(puma)

URL transformed to HTTPS due to an HSTS policy
--2018-01-04 23:17:07--  https://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_06_puma10_500k.zip
Resolving www2.census.gov... 2600:1408:24:599::208c, 104.121.83.95
Connecting to www2.census.gov|2600:1408:24:599::208c|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 742516 (725K) [application/zip]
Saving to: 'cb_2016_06_puma10_500k.zip.3'


2018-01-04 23:17:07 (3.65 MB/s) - 'cb_2016_06_puma10_500k.zip.3' saved [742516/742516]

Total polygon number: 265
number of null and unique values in each column:
STATEFP10: null 0, unique 1
PUMACE10: null 0, unique 265
AFFGEOID10: null 0, unique 265
GEOID10: null 0, unique 265
NAME10: null 0, unique 265
LSAD10: null 0, unique 1
ALAND10: null 0, unique 265
AWATER10: null 0, unique 254


Unnamed: 0,STATEFP10,PUMACE10,AFFGEOID10,GEOID10,NAME10,LSAD10,ALAND10,AWATER10,geometry
0,6,1907,7950000US0601907,601907,"Fresno County (East)--Sanger, Reedley & Parlie...",P0,7647644973,117294872,"POLYGON ((-119.745652 36.938998, -119.743741 3..."
1,6,702,7950000US0600702,600702,Butte County (Southeast)--Oroville City & Para...,P0,2918880948,94035819,"POLYGON ((-121.908697 39.30799, -121.907403 39..."
2,6,7307,7950000US0607307,607307,"San Diego County (Central)--Lakeside, Winter G...",P0,632518078,8339612,"POLYGON ((-116.993347 32.892625, -116.993058 3..."


In [32]:
puma.columns

Index(['STATEFP10', 'PUMACE10', 'AFFGEOID10', 'GEOID10', 'NAME10', 'LSAD10',
       'ALAND10', 'AWATER10', 'geometry'],
      dtype='object')

In [33]:
droplist = ['STATEFP10', 'AFFGEOID10', 'GEOID10', 'LSAD10',
       'ALAND10', 'AWATER10']
cleandf(puma, droplist)

Unnamed: 0,PUMACE10,NAME10,geometry
0,1907,"Fresno County (East)--Sanger, Reedley & Parlie...","POLYGON ((-119.745652 36.938998, -119.743741 3..."
1,702,Butte County (Southeast)--Oroville City & Para...,"POLYGON ((-121.908697 39.30799, -121.907403 39..."
2,7307,"San Diego County (Central)--Lakeside, Winter G...","POLYGON ((-116.993347 32.892625, -116.993058 3..."


In [35]:
# LAcounty_COMMUNITIES
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2010/10/" \
      "Communities1.zip"
county_communities = download_egis3(url, 'Communities1.zip', 'county_communities', 'Communities.shp')
dfname_list.append('county_communities')

checkdf(county_communities)

Total polygon number: 353
number of null and unique values in each column:
COMMTYPE: null 0, unique 3
NAME: null 0, unique 206
COLOR: null 0, unique 7
PO_NAME: null 0, unique 177
STATNAME: null 0, unique 193
X_CENTER: null 0, unique 353
Y_CENTER: null 0, unique 353
ST_NAME: null 0, unique 88
LABEL_CITY: null 0, unique 89
LABEL_COMM: null 0, unique 143
AREA_SQMI: null 0, unique 322
Shape_area: null 0, unique 353
Shape_len: null 0, unique 353


Unnamed: 0,COMMTYPE,NAME,COLOR,PO_NAME,STATNAME,X_CENTER,Y_CENTER,ST_NAME,LABEL_CITY,LABEL_COMM,AREA_SQMI,Shape_area,Shape_len,geometry
0,Unincorporated,Edwards,Yellow,Edwards,East Antelope Valley,6590507,2111007,Lancaster,Unincorporated,Edwards,93.961,2619470000.0,337914.985138,POLYGON ((-118.1407088589749 34.81827748625132...
1,Unincorporated,Gorman,Yellow,West Antelope Valley,West Antelope Valley,6357449,2089258,Santa Clarita Valley,Unincorporated,Gorman,173.752,4843936000.0,442757.717076,POLYGON ((-118.4662754936974 34.81837563424517...
2,Unincorporated,Del Sur,Yellow,Lancaster,Quartz Hill,6469031,2091460,Lancaster,Unincorporated,Del Sur,135.398,3774671000.0,423904.059503,POLYGON ((-118.4662754936974 34.81837563424517...


In [41]:
droplist = ['COMMTYPE', 'COLOR', 'PO_NAME', 'STATNAME', 'X_CENTER',
       'Y_CENTER', 'ST_NAME', 'LABEL_CITY', 'LABEL_COMM', 'AREA_SQMI']
cleandf(county_communities, droplist)

Unnamed: 0,NAME,geometry
0,Edwards,POLYGON ((-118.1407088589749 34.81827748625132...
1,Gorman,POLYGON ((-118.4662754936974 34.81837563424517...
2,Del Sur,POLYGON ((-118.4662754936974 34.81837563424517...


In [47]:
#Split 2010 Block Group/City – Community Statistical Area (formerly BASA)
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2015/12/" \
      "EGIS_BG10FIPxx_CSA_20170118.zip"
community_stat_area = download_egis3(url, 'EGIS_BG10FIPxx_CSA_20170118.zip', 'community_stat_area', 'EGIS_BG10FIPxx_CSA_20170118.shp')
dfname_list.append('community_stat_area')

checkdf(community_stat_area)

Total polygon number: 7137
number of null and unique values in each column:
OBJECTID: null 0, unique 7137
BG10: null 0, unique 6422
CT10: null 0, unique 2344
FIP10: null 0, unique 89
FIP11: null 0, unique 89
FIP12: null 0, unique 89
FIP13: null 0, unique 89
FIP14: null 0, unique 89
FIP15: null 0, unique 89
FIP16: null 0, unique 89
CITY_TYPE: null 0, unique 2
LCITY: null 0, unique 89
BASA: null 0, unique 257
LABEL: null 0, unique 348
SOURCE: null 0, unique 2
BG10FIP10: null 0, unique 6882
BG10FIP11: null 0, unique 6882
BG10FIP12: null 0, unique 6881
BG10FIP13: null 0, unique 6871
BG10FIP14: null 0, unique 6870
BG10FIP15: null 0, unique 6874
BG10FIP16: null 0, unique 6874
CT10FIP10: null 0, unique 2696
CT10FIP11: null 0, unique 2696
CT10FIP12: null 0, unique 2696
CT10FIP13: null 0, unique 2690
CT10FIP14: null 0, unique 2689
CT10FIP15: null 0, unique 2693
CT10FIP16: null 0, unique 2693
DISTRICT: null 0, unique 5
NOTES: null 0, unique 10
PART: null 0, unique 6
PARTS: null 0, unique 6
BASA_

Unnamed: 0,OBJECTID,BG10,CT10,FIP10,FIP11,FIP12,FIP13,FIP14,FIP15,FIP16,...,PART,PARTS,BASA_ID,MERGED,FIP17,BG10FIP17,CT10FIP17,Shape_STAr,Shape_STLe,geometry
0,1,9001021,900102,99037,99037,99037,99037,99037,99037,99037,...,1,1,9001021_99037_1,,99037,900102199037,90010299037,4743159000.0,321202.859065,POLYGON ((-117.8806128314039 34.76361625108276...
1,2,9800031,980003,99037,99037,99037,99037,99037,99037,99037,...,1,1,9800031_99037_1,,99037,980003199037,98000399037,2079120000.0,273106.909554,POLYGON ((-118.1400751750954 34.82085077349104...
2,3,9012091,901209,99037,99037,99037,99037,99037,99037,99037,...,1,1,9012091_99037_1,,99037,901209199037,90120999037,6420949000.0,567539.493446,POLYGON ((-118.3250820264543 34.75189784834308...


In [48]:
droplist = ['BG10', 'CT10', 'FIP10', 'FIP11', 'FIP12', 'FIP13', 'FIP14',
       'FIP15', 'FIP16', 'CITY_TYPE', 'LCITY', 'LABEL', 'SOURCE', 'CT10FIP17',
            'DISTRICT', 'NOTES', 'PART', 'PARTS', 'MERGED', 'FIP17',
       'BG10FIP10', 'BG10FIP11', 'BG10FIP12', 'BG10FIP13', 'BG10FIP14',
       'BG10FIP15', 'BG10FIP16', 'CT10FIP10', 'CT10FIP11', 'CT10FIP12',
       'CT10FIP13', 'CT10FIP14', 'CT10FIP15', 'CT10FIP16', 'BG10FIP17', 
       'Shape_STAr', 'Shape_STLe']
cleandf(community_stat_area, droplist)

Unnamed: 0,BASA,BASA_ID,geometry
0,Hi Vista,9001021_99037_1,POLYGON ((-117.8806128314039 34.76361625108276...
1,South Edwards,9800031_99037_1,POLYGON ((-118.1400751750954 34.82085077349104...
2,West Antelope Valley,9012091_99037_1,POLYGON ((-118.3250820264543 34.75189784834308...


In [51]:
#LA County TOWN_COUNCILS
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2014/12/" \
      "DRP_TOWN_COUNCIL_AREAS.zip"
town_councils = download_egis3(url, 'DRP_TOWN_COUNCIL_AREAS.zip', 'town_councils', 'DRP_TOWN_COUNCIL_AREAS.shp')
dfname_list.append('town_councils')

checkdf(town_councils)

Total polygon number: 25
number of null and unique values in each column:
OBJECTID: null 0, unique 25
Id: null 0, unique 1
SupDist: null 0, unique 5
CertDate: null 25, unique 1
NAME: null 0, unique 21
SUB_REGION: null 0, unique 6
TYPE: null 0, unique 1
DRP_NOTES: null 0, unique 16
Shape_STAr: null 0, unique 25
Shape_STLe: null 0, unique 25


Unnamed: 0,OBJECTID,Id,SupDist,CertDate,NAME,SUB_REGION,TYPE,DRP_NOTES,Shape_STAr,Shape_STLe,geometry
0,1,0,5,,Juniper Hills Town Council,,,done,1412556000.0,162222.345363,POLYGON ((-117.9228713428042 34.48373342444793...
1,2,0,5,,Leona Valley Town Council,,,CSD referenced in bylaws,416155800.0,159039.502427,POLYGON ((-118.3237207085177 34.63380740048827...
2,3,0,5,,Acton Town Council,,,CSD,3104176000.0,302459.764141,POLYGON ((-118.2066130257172 34.53937158856911...


In [53]:
droplist = ['Id', 'SupDist', 'CertDate', 'SUB_REGION', 'TYPE',
       'DRP_NOTES', 'Shape_STAr', 'Shape_STLe']
cleandf(town_councils, droplist)

Unnamed: 0,NAME,geometry
0,Juniper Hills Town Council,POLYGON ((-117.9228713428042 34.48373342444793...
1,Leona Valley Town Council,POLYGON ((-118.3237207085177 34.63380740048827...
2,Acton Town Council,POLYGON ((-118.2066130257172 34.53937158856911...


In [56]:
#FIRE_DIVISION_BOUNDARIES
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/FIRE_DIVISION_BOUNDARIES.zip"
fire_division = download_egis3(url, 'FIRE_DIVISION_BOUNDARIES.zip', 'fire_division', 'FIRE_DIVISION_BOUNDARIES.shp')
dfname_list.append('fire_division')
checkdf(fire_division)

Total polygon number: 9
number of null and unique values in each column:
OBJECTID: null 0, unique 9
DIV: null 0, unique 9
Shape_Leng: null 0, unique 9
Shape_STAr: null 0, unique 9
Shape_STLe: null 0, unique 9


Unnamed: 0,OBJECTID,DIV,Shape_Leng,Shape_STAr,Shape_STLe,geometry
0,1,I,833432.6,4066853000.0,833432.6,(POLYGON ((-118.6060576865897 33.4791081369531...
1,2,II,605593.7,8351965000.0,605593.7,POLYGON ((-117.7457043664994 34.33837148403114...
2,3,III,1470782.0,24914020000.0,1470782.0,(POLYGON ((-118.8804273943463 34.8178403050801...


In [59]:
droplist = ['Shape_Leng', 'Shape_STAr', 'Shape_STLe']
cleandf(fire_division, droplist)

Unnamed: 0,DIV,geometry
0,I,(POLYGON ((-118.6060576865897 33.4791081369531...
1,II,POLYGON ((-117.7457043664994 34.33837148403114...
2,III,(POLYGON ((-118.8804273943463 34.8178403050801...


In [61]:
#Los Angeles County Fire Department Battalion Boundaries
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/FIRE_BATTALION_BOUNDARIES.zip"
fire_battalion = download_egis3(url, 'FIRE_BATTALION_BOUNDARIES.zip', 'fire_battalion', 'FIRE_BATTALION_BOUNDARIES.shp')
dfname_list.append('fire_battalion')
checkdf(fire_battalion)

Total polygon number: 22
number of null and unique values in each column:
OBJECTID: null 0, unique 22
BATTID: null 0, unique 22
Shape_Leng: null 0, unique 22
Shape_STAr: null 0, unique 22
Shape_STLe: null 0, unique 22


Unnamed: 0,OBJECTID,BATTID,Shape_Leng,Shape_STAr,Shape_STLe,geometry
0,1,1,225880.985263,274221000.0,225880.985263,(POLYGON ((-118.3626353408748 34.0063866246520...
1,2,2,453772.402871,3146341000.0,453772.402871,POLYGON ((-117.6587123201178 34.24037857785586...
2,3,3,158516.064622,586945000.0,158516.064622,(POLYGON ((-118.1777734260421 34.0622946700525...


In [62]:
droplist = ['Shape_Leng', 'Shape_STAr', 'Shape_STLe']
cleandf(fire_battalion, droplist)

Unnamed: 0,BATTID,geometry
0,1,(POLYGON ((-118.3626353408748 34.0063866246520...
1,2,POLYGON ((-117.6587123201178 34.24037857785586...
2,3,(POLYGON ((-118.1777734260421 34.0622946700525...


#### Including geometries as:
- [2011 Supervisorial District Boundaries (Official)](http://egis3.lacounty.gov/dataportal/2011/12/06/supervisorial-districts/)
- [LA City Council Districts (2012)](http://egis3.lacounty.gov/dataportal/2012/08/07/la-city-council-districts-2012/)
- [State Assembly Districts (State Legislative District – Lower Chamber)](http://egis3.lacounty.gov/dataportal/2011/11/08/california-state-assembly-districts-2011/)
- [Registrar Recorder Precincts](https://egis3.lacounty.gov/dataportal/2012/12/20/2012-precincts-as-of-march-9th/)

In [66]:
#2011 Supervisorial District Boundaries (Official)
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2011/12/" \
      "DPW-Supervisorial-District.zip"
supervisorial_district = download_egis3(url, 'DPW-Supervisorial-District.zip', 'supervisorial_district', 'sup_dist_2011.shp')
dfname_list.append('supervisorial_district')
checkdf(supervisorial_district)

Total polygon number: 5
number of null and unique values in each column:
OBJECTID: null 0, unique 5
SUP_DIST_N: null 0, unique 5
SYMBOL: null 0, unique 1
PERIMETER: null 0, unique 1
AREA_SQ_MI: null 0, unique 5
SHAPE_AREA: null 0, unique 5
SHAPE_LEN: null 0, unique 5


Unnamed: 0,OBJECTID,SUP_DIST_N,SYMBOL,PERIMETER,AREA_SQ_MI,SHAPE_AREA,SHAPE_LEN,geometry
0,1601,1,0,0.0,246.202,6863264000.0,946818.102008,POLYGON ((-118.2845039326605 34.08665464600393...
1,1602,2,0,0.0,161.825,4511450000.0,477092.243642,POLYGON ((-118.3063679014772 34.09079201094933...
2,1603,3,0,0.0,431.197,12021260000.0,759927.656095,"POLYGON ((-118.506688035281 34.33409190867378,..."


In [68]:
droplist = ['SYMBOL', 'PERIMETER', 'AREA_SQ_MI', 'SHAPE_AREA', 'SHAPE_LEN']
cleandf(supervisorial_district, droplist)

Unnamed: 0,SUP_DIST_N,geometry
0,1,POLYGON ((-118.2845039326605 34.08665464600393...
1,2,POLYGON ((-118.3063679014772 34.09079201094933...
2,3,"POLYGON ((-118.506688035281 34.33409190867378,..."


In [70]:
#LA City Council Districts (2012)
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2012/08/" \
      "CnclDist_July2012.zip"
LAcity_council_dist = download_egis3(url, 'CnclDist_July2012.zip', 'LAcity_council_dist', 'CnclDist_July2012.shp')
dfname_list.append('LAcity_council_dist')
checkdf(LAcity_council_dist)

Total polygon number: 15
number of null and unique values in each column:
AREA: null 0, unique 15
PERIMETER: null 0, unique 15
CDMEMBER: null 0, unique 15
DISTRICT: null 0, unique 15
SQ_MI: null 0, unique 15
SHADESYM: null 0, unique 1
Revised: null 0, unique 1
Comments: null 0, unique 1
SHAPE_Leng: null 0, unique 15
SHAPE_Area: null 0, unique 15


Unnamed: 0,AREA,PERIMETER,CDMEMBER,DISTRICT,SQ_MI,SHADESYM,Revised,Comments,SHAPE_Leng,SHAPE_Area,geometry
0,1508806000.0,345479.674422,Felipe Fuentes,7,54.12,10,07/02/2012,ORD 182168,345539.11529,1508899000.0,POLYGON ((-118.5084667256651 34.33379445567272...
1,1637593000.0,254031.923325,Mitchell Englander,12,58.74,10,07/02/2012,ORD 182168,253975.218622,1637537000.0,POLYGON ((-118.5084667256651 34.33379445567272...
2,757962400.0,191215.747144,Nury Martinez,6,27.19,10,07/02/2012,ORD 182168,191215.747039,757962400.0,POLYGON ((-118.4729681280253 34.23192967092903...


In [72]:
droplist = ['AREA', 'PERIMETER', 'CDMEMBER', 'SQ_MI', 'SHADESYM',
       'Revised', 'Comments', 'SHAPE_Leng', 'SHAPE_Area']
cleandf(LAcity_council_dist, droplist)

Unnamed: 0,DISTRICT,geometry
0,7,POLYGON ((-118.5084667256651 34.33379445567272...
1,12,POLYGON ((-118.5084667256651 34.33379445567272...
2,6,POLYGON ((-118.4729681280253 34.23192967092903...


In [78]:
#State Assembly Districts (State Legislative District – Lower Chamber)
!wget https://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_06_sldl_500k.zip
legislative_lower = read_census('cb_2016_06_sldl_500k.zip', 'legislative_lower', 'cb_2016_06_sldl_500k.shp')
dfname_list.append('legislative_lower')

checkdf(legislative_lower)

--2018-01-04 23:24:29--  https://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_06_sldl_500k.zip
Resolving www2.census.gov... 2600:1408:24:5aa::208c, 2600:1408:24:599::208c, 104.121.83.95
Connecting to www2.census.gov|2600:1408:24:5aa::208c|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 612466 (598K) [application/zip]
Saving to: 'cb_2016_06_sldl_500k.zip'


2018-01-04 23:24:36 (698 KB/s) - 'cb_2016_06_sldl_500k.zip' saved [612466/612466]

Total polygon number: 80
number of null and unique values in each column:
STATEFP: null 0, unique 1
SLDLST: null 0, unique 80
AFFGEOID: null 0, unique 80
GEOID: null 0, unique 80
NAME: null 0, unique 80
LSAD: null 0, unique 1
LSY: null 0, unique 1
ALAND: null 0, unique 80
AWATER: null 0, unique 80


Unnamed: 0,STATEFP,SLDLST,AFFGEOID,GEOID,NAME,LSAD,LSY,ALAND,AWATER,geometry
0,6,36,620L500US06036,6036,36,L3,2016,7179257039,28222885,"POLYGON ((-118.894474 34.817972, -118.884388 3..."
1,6,57,620L500US06057,6057,57,L3,2016,237746318,2222703,"POLYGON ((-118.099942 33.904934, -118.099517 3..."
2,6,55,620L500US06055,6055,55,L3,2016,410765132,1985765,"POLYGON ((-117.976692 33.910614, -117.976571 3..."


In [79]:
#Registrar Recorder Precincts
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/RRCC_PRECINCTS.zip"
election_precinct = download_egis3(url, 'RRCC_PRECINCTS.zip', 'election_precinct', 'RRCC_PRECINCTS.shp')
dfname_list.append('election_precinct')
checkdf(election_precinct)

Total polygon number: 33601
number of null and unique values in each column:
PRECINCT: null 0, unique 33601
CITY_EST: null 0, unique 7992
SUBCODE: null 0, unique 26
MAP1: null 0, unique 1
DIST_RES: null 0, unique 1
DIST_CONG: null 0, unique 18
DIST_STSEN: null 0, unique 15
DIST_STASS: null 0, unique 24
DIST_SUP: null 0, unique 5
DIST_MCRT: null 0, unique 1
DIST_BEQ: null 0, unique 2
DST_CITY: null 0, unique 89
DIV_CITY: null 0, unique 95
DST_RES2: null 0, unique 1
DIV_RES2: null 0, unique 1
DST_JRC: null 0, unique 15
DIV_JRC: null 0, unique 8
DST_USD: null 0, unique 50
DIV_USD: null 0, unique 8
DST_HSD: null 0, unique 8
DIV_HSD: null 0, unique 6
DST_ESD: null 0, unique 30
DIV_ESD: null 0, unique 6
DST_HOSP: null 0, unique 3
DIV_HOSP: null 0, unique 1
DST_PARK: null 0, unique 5
DIV_PARK: null 0, unique 3
DST_WA: null 0, unique 2
DIV_WA: null 0, unique 1
DST_MWD: null 0, unique 9
DIV_MWD: null 0, unique 8
DST_WR: null 0, unique 2
DIV_WR: null 0, unique 6
DST_WAG: null 0, unique 3
DIV_WAG

Unnamed: 0,PRECINCT,CITY_EST,SUBCODE,MAP1,DIST_RES,DIST_CONG,DIST_STSEN,DIST_STASS,DIST_SUP,DIST_MCRT,...,COMM_CODE,COMM_NAME,AREA_CODE,AREA_NAME,PRC_NAME,CITY,ESTAB,Shape_STAr,Shape_STLe,geometry
0,9001742B,9001742,B,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,453984.737305,2713.125981,POLYGON ((-118.2776159595675 34.05073225085379...
1,9001742C,9001742,C,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,766784.173828,3820.588904,POLYGON ((-118.2828304354189 34.04931735342003...
2,9001742E,9001742,E,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,566946.030273,3362.362112,POLYGON ((-118.2835241269783 34.05237068362484...
