Using Python 3 Kernel

In [1]:
%matplotlib inline
from __future__ import print_function, division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import geopandas as gpd
from fiona.crs import from_epsg
from geopandas.tools import overlay

In [2]:
#Function for overlay
def spatial_overlays(df1, df2, how='intersection', reproject=True):
    """Perform spatial overlay between two polygons.

    Currently only supports data GeoDataFrames with polygons.
    Implements several methods that are all effectively subsets of
    the union.

    Parameters
    ----------
    df1 : GeoDataFrame with MultiPolygon or Polygon geometry column
    df2 : GeoDataFrame with MultiPolygon or Polygon geometry column
    how : string
        Method of spatial overlay: 'intersection', 'union',
        'identity', 'symmetric_difference' or 'difference'.
    use_sindex : boolean, default True
        Use the spatial index to speed up operation if available.

    Returns
    -------
    df : GeoDataFrame
        GeoDataFrame with new set of polygons and attributes
        resulting from the overlay

    """
    from functools import reduce
    df1 = df1.copy()
    df2 = df2.copy()
    df1['geometry'] = df1.geometry.buffer(0)
    df2['geometry'] = df2.geometry.buffer(0)
    if df1.crs!=df2.crs and reproject:
        print('Data has different projections.')
        print('Converted data to projection of first GeoPandas DatFrame')
        df2.to_crs(crs=df1.crs, inplace=True)
    if how=='intersection':
        # Spatial Index to create intersections
        spatial_index = df2.sindex
        df1['bbox'] = df1.geometry.apply(lambda x: x.bounds)
        df1['sidx']=df1.bbox.apply(lambda x:list(spatial_index.intersection(x)))
        pairs = df1['sidx'].to_dict()
        nei = []
        for i,j in pairs.items():
            for k in j:
                nei.append([i,k])
        pairs = gpd.GeoDataFrame(nei, columns=['idx1','idx2'], crs=df1.crs)
        pairs = pairs.merge(df1, left_on='idx1', right_index=True)
        pairs = pairs.merge(df2, left_on='idx2', right_index=True, suffixes=['_1','_2'])
        pairs['Intersection'] = pairs.apply(lambda x: (x['geometry_1'].intersection(x['geometry_2'])).buffer(0), axis=1)
        pairs = gpd.GeoDataFrame(pairs, columns=pairs.columns, crs=df1.crs)
        cols = pairs.columns.tolist()
        cols.remove('geometry_1')
        cols.remove('geometry_2')
        cols.remove('sidx')
        cols.remove('bbox')
        cols.remove('Intersection')
        dfinter = pairs[cols+['Intersection']].copy()
        dfinter.rename(columns={'Intersection':'geometry'}, inplace=True)
        dfinter = gpd.GeoDataFrame(dfinter, columns=dfinter.columns, crs=pairs.crs)
        dfinter = dfinter.loc[dfinter.geometry.is_empty==False]
        dfinter.drop(['idx1','idx2'], inplace=True, axis=1)
        return dfinter
    elif how=='difference':
        spatial_index = df2.sindex
        df1['bbox'] = df1.geometry.apply(lambda x: x.bounds)
        df1['sidx']=df1.bbox.apply(lambda x:list(spatial_index.intersection(x)))
        df1['new_g'] = df1.apply(lambda x: reduce(lambda x, y: x.difference(y).buffer(0), 
                                 [x.geometry]+list(df2.iloc[x.sidx].geometry)) , axis=1)
        df1.geometry = df1.new_g
        df1 = df1.loc[df1.geometry.is_empty==False].copy()
        df1.drop(['bbox', 'sidx', 'new_g'], axis=1, inplace=True)
        return df1
    elif how=='symmetric_difference':
        df1['idx1'] = df1.index.tolist()
        df2['idx2'] = df2.index.tolist()
        df1['idx2'] = np.nan
        df2['idx1'] = np.nan
        dfsym = df1.merge(df2, on=['idx1','idx2'], how='outer', suffixes=['_1','_2'])
        dfsym['geometry'] = dfsym.geometry_1
        dfsym.loc[dfsym.geometry_2.isnull()==False, 'geometry'] = dfsym.loc[dfsym.geometry_2.isnull()==False, 'geometry_2']
        dfsym.drop(['geometry_1', 'geometry_2'], axis=1, inplace=True)
        dfsym = gpd.GeoDataFrame(dfsym, columns=dfsym.columns, crs=df1.crs)
        spatial_index = dfsym.sindex
        dfsym['bbox'] = dfsym.geometry.apply(lambda x: x.bounds)
        dfsym['sidx'] = dfsym.bbox.apply(lambda x:list(spatial_index.intersection(x)))
        dfsym['idx'] = dfsym.index.values
        dfsym.apply(lambda x: x.sidx.remove(x.idx), axis=1)
        dfsym['new_g'] = dfsym.apply(lambda x: reduce(lambda x, y: x.difference(y).buffer(0), 
                         [x.geometry]+list(dfsym.iloc[x.sidx].geometry)) , axis=1)
        dfsym.geometry = dfsym.new_g
        dfsym = dfsym.loc[dfsym.geometry.is_empty==False].copy()
        dfsym.drop(['bbox', 'sidx', 'idx', 'idx1','idx2', 'new_g'], axis=1, inplace=True)
        return dfsym
    elif how=='union':
        dfinter = spatial_overlays(df1, df2, how='intersection')
        dfsym = spatial_overlays(df1, df2, how='symmetric_difference')
        dfunion = dfinter.append(dfsym)
        dfunion.reset_index(inplace=True, drop=True)
        return dfunion
    elif how=='identity':
        dfunion = spatial_overlays(df1, df2, how='union')
        cols1 = df1.columns.tolist()
        cols2 = df2.columns.tolist()
        cols1.remove('geometry')
        cols2.remove('geometry')
        cols2 = set(cols2).intersection(set(cols1))
        cols1 = list(set(cols1).difference(set(cols2)))
        cols2 = [col+'_1' for col in cols2]
        dfunion = dfunion[(dfunion[cols1+cols2].isnull()==False).values]
        return dfunion

In [3]:
#function for download data from egis3
def download_egis3(url, zipname, folder, shpname):
    #take in download url, downloaded zipfile name, folder name
    #containing unzipped shapefile, shapefile name
    #return the downloaded geodataframe
    os.system("curl -O " + url)
    os.system("unzip " + zipname + " -d " + folder)
    
    df = folder
    df = gpd.read_file(folder + "/" + shpname)

    df.crs = from_epsg(2229)
    df = df.to_crs(epsg=4326)
    return df

In [4]:
#function for reading downloaded census data from Census Bureau
def read_census(zipname, folder, shpname):
    os.system("curl -O " + url)
    os.system("unzip " + zipname + " -d " + folder)
    
    df = folder
    df = gpd.read_file(folder + "/" + shpname)

    return df 

In [5]:
#function to check the data quality
def checkdf(dfname):
    #take in geodataframe
    #return number of null values and unique values
    tota_poly = dfname.shape[0]
    print ('Total polygon number: {}'.format(tota_poly))    
    print ('number of null and unique values in each column:')
    
    for i in dfname.columns[:-1]:
        tota_valu = dfname.count()[i]
        null_valu = tota_poly - tota_valu
        uniq_valu = len(dfname[i].unique())
        print ('{}: null {}, unique {}'.format(i, null_valu, uniq_valu))
    return dfname.head(3)

In [6]:
#function to drop redundant columns
def cleandf(dfname, droplist):
    #take in geodataframe and list of column name redundant
    #return the clean df
    notneedlist = ['OBJECTID', 'Shape_area', 'Shape_len']
    
    for i in dfname.columns[:-1]:
        if i in notneedlist:
            dfname.drop([i], axis=1, inplace=True)
            
    for i in droplist:
        dfname.drop([i], axis=1, inplace=True)
    
    return dfname.head(3)

## Datasets used:

- [Law Enforcement Reporting Districts](https://egis3.lacounty.gov/dataportal/2010/10/05/law-enforcement-reporting-districts/)
- [LAcounty_COMMUNITIES](https://egis3.lacounty.gov/dataportal/2010/10/21/citycommunity-boundaries/)
- [Registrar Recorder Precincts](https://egis3.lacounty.gov/dataportal/2012/12/20/2012-precincts-as-of-march-9th/)
- [Census Block (2010)](http://egis3.lacounty.gov/dataportal/2016/01/26/census_blocks/)

In [7]:
dfname_list = []

#### 1. Law Enforcement Reporting Districts

In [8]:
#Law Enforcement Reporting Districts
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/LACOUNTY_LAW_ENFORCEMENT_RDs.zip"
law_enforcement = download_egis3(url, 'LACOUNTY_LAW_ENFORCEMENT_RDs.zip', 'law_enforcement', 'LACOUNTY_LAW_ENFORCEMENT_RDs.shp')
dfname_list.append('law_enforcement')
checkdf(law_enforcement)

Total polygon number: 5745
number of null and unique values in each column:
RD: null 0, unique 3667
Name: null 0, unique 519
Layer: null 0, unique 3


Unnamed: 0,RD,Name,Layer,geometry
0,230,Belvedere,Parks Bureau,POLYGON ((-118.1604325432619 34.03712079823092...
1,231,Atlantic Avenue,Parks Bureau,"POLYGON ((-118.1550797745909 34.0257529106941,..."
2,232,City Terrace,Parks Bureau,POLYGON ((-118.1784888821586 34.04781649689102...


In [9]:
droplist = ['Layer']
cleandf(law_enforcement, droplist)

Unnamed: 0,RD,Name,geometry
0,230,Belvedere,POLYGON ((-118.1604325432619 34.03712079823092...
1,231,Atlantic Avenue,"POLYGON ((-118.1550797745909 34.0257529106941,..."
2,232,City Terrace,POLYGON ((-118.1784888821586 34.04781649689102...


In [10]:
law_enforcement.columns = ['repo_dist_num', 'repo_dist_name',
                           'geometry']

#### 2. LAcounty_COMMUNITIES

In [11]:
#LAcounty_COMMUNITIES
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2010/10/" \
      "Communities1.zip"
county_communities = download_egis3(url, 'Communities1.zip', 'county_communities', 'Communities.shp')
dfname_list.append('county_communities')
checkdf(county_communities)

Total polygon number: 353
number of null and unique values in each column:
COMMTYPE: null 0, unique 3
NAME: null 0, unique 206
COLOR: null 0, unique 7
PO_NAME: null 0, unique 177
STATNAME: null 0, unique 193
X_CENTER: null 0, unique 353
Y_CENTER: null 0, unique 353
ST_NAME: null 0, unique 88
LABEL_CITY: null 0, unique 89
LABEL_COMM: null 0, unique 143
AREA_SQMI: null 0, unique 322
Shape_area: null 0, unique 353
Shape_len: null 0, unique 353


Unnamed: 0,COMMTYPE,NAME,COLOR,PO_NAME,STATNAME,X_CENTER,Y_CENTER,ST_NAME,LABEL_CITY,LABEL_COMM,AREA_SQMI,Shape_area,Shape_len,geometry
0,Unincorporated,Edwards,Yellow,Edwards,East Antelope Valley,6590507,2111007,Lancaster,Unincorporated,Edwards,93.961,2619470000.0,337914.985138,POLYGON ((-118.1407088589749 34.81827748625132...
1,Unincorporated,Gorman,Yellow,West Antelope Valley,West Antelope Valley,6357449,2089258,Santa Clarita Valley,Unincorporated,Gorman,173.752,4843936000.0,442757.717076,POLYGON ((-118.4662754936974 34.81837563424517...
2,Unincorporated,Del Sur,Yellow,Lancaster,Quartz Hill,6469031,2091460,Lancaster,Unincorporated,Del Sur,135.398,3774671000.0,423904.059503,POLYGON ((-118.4662754936974 34.81837563424517...


In [12]:
droplist = ['COMMTYPE', 'COLOR', 'PO_NAME', 'STATNAME', 'X_CENTER',
       'Y_CENTER', 'ST_NAME', 'LABEL_CITY', 'LABEL_COMM', 'AREA_SQMI']
cleandf(county_communities, droplist)

Unnamed: 0,NAME,geometry
0,Edwards,POLYGON ((-118.1407088589749 34.81827748625132...
1,Gorman,POLYGON ((-118.4662754936974 34.81837563424517...
2,Del Sur,POLYGON ((-118.4662754936974 34.81837563424517...


In [13]:
county_communities.columns = ['coun_comm_name', 'geometry']

In [None]:
print ('overlay_shp {}'.format(law_enforcement.shape))
print ('county_communities {}'.format(county_communities.shape))

In [14]:
overlay_shp = spatial_overlays(law_enforcement, county_communities)

In [15]:
print ('After overlay: {}'.format(overlay_shp.shape))
#overlay_shp.plot()

After overlay: (9599, 4)


####  3. Registrar Recorder Precincts

In [16]:
#Registrar Recorder Precincts
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/RRCC_PRECINCTS.zip"
election_precinct = download_egis3(url, 'RRCC_PRECINCTS.zip', 'election_precinct', 'RRCC_PRECINCTS.shp')
dfname_list.append('election_precinct')
checkdf(election_precinct)

Total polygon number: 33601
number of null and unique values in each column:
PRECINCT: null 0, unique 33601
CITY_EST: null 0, unique 7992
SUBCODE: null 0, unique 26
MAP1: null 0, unique 1
DIST_RES: null 0, unique 1
DIST_CONG: null 0, unique 18
DIST_STSEN: null 0, unique 15
DIST_STASS: null 0, unique 24
DIST_SUP: null 0, unique 5
DIST_MCRT: null 0, unique 1
DIST_BEQ: null 0, unique 2
DST_CITY: null 0, unique 89
DIV_CITY: null 0, unique 95
DST_RES2: null 0, unique 1
DIV_RES2: null 0, unique 1
DST_JRC: null 0, unique 15
DIV_JRC: null 0, unique 8
DST_USD: null 0, unique 50
DIV_USD: null 0, unique 8
DST_HSD: null 0, unique 8
DIV_HSD: null 0, unique 6
DST_ESD: null 0, unique 30
DIV_ESD: null 0, unique 6
DST_HOSP: null 0, unique 3
DIV_HOSP: null 0, unique 1
DST_PARK: null 0, unique 5
DIV_PARK: null 0, unique 3
DST_WA: null 0, unique 2
DIV_WA: null 0, unique 1
DST_MWD: null 0, unique 9
DIV_MWD: null 0, unique 8
DST_WR: null 0, unique 2
DIV_WR: null 0, unique 6
DST_WAG: null 0, unique 3
DIV_WAG

Unnamed: 0,PRECINCT,CITY_EST,SUBCODE,MAP1,DIST_RES,DIST_CONG,DIST_STSEN,DIST_STASS,DIST_SUP,DIST_MCRT,...,COMM_CODE,COMM_NAME,AREA_CODE,AREA_NAME,PRC_NAME,CITY,ESTAB,Shape_STAr,Shape_STLe,geometry
0,9001742B,9001742,B,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,453984.737305,2713.125981,POLYGON ((-118.2776159595675 34.05073225085379...
1,9001742C,9001742,C,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,766784.173828,3820.588904,POLYGON ((-118.2828304354189 34.04931735342003...
2,9001742E,9001742,E,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,566946.030273,3362.362112,POLYGON ((-118.2835241269783 34.05237068362484...


In [17]:
droplist = [ 'CITY_EST', 'SUBCODE', 'MAP1',
 'DIST_RES', 'DIST_MCRT', 'DIST_BEQ',
 'DST_CITY', 'DIV_CITY', 'DST_RES2', 'DIV_RES2',
 'DST_JRC', 'DIV_JRC', 'DST_USD', 'DIV_USD',
 'DST_HSD', 'DIV_HSD', 'DST_ESD', 'DIV_ESD',
 'DST_HOSP', 'DIV_HOSP', 'DST_PARK', 'DIV_PARK',
 'DST_WA', 'DIV_WA', 'DST_MWD', 'DIV_MWD',
 'DST_WR', 'DIV_WR', 'DST_WAG', 'DIV_WAG',
 'DST_CW', 'DIV_CW', 'DST_IRR', 'DIV_IRR',
 'DST_CS', 'DIV_CS', 'DST_LIB', 'DIV_LIB',
 'DST_RC', 'DIV_RC', 'DST_CAW', 'DIV_CAW',
 'DST_CEM', 'DIV_CEM', 'DST_MOS', 'DIV_MOS',
 'DST_SAN', 'DIV_SAN', 'DST_TRN', 'DIV_TRN',
 'DST_RES3', 'DIV_RES3', 'DST_FIR', 'DIV_FIR',
 'DST_FLD', 'DIV_FLD', 'DST_GARB', 'DIV_GARB',
 'DIST_OLDC', 'DIST_OLDS', 'DIST_OLDA', 'DST_CL', 'DIV_CL',
 'DST_SM', 'DIV_SM', 'DST_RD', 'DIV_RD',
 'DST_MISC1', 'DIV_MISC1', 'DST_MISC2', 'DIV_MISC2',
 'DST_MISC3', 'DIV_MISC3', 'DST_MISC4', 'DIV_MISC4',
 'DST_MISC5', 'DIV_MISC5', 'DST_MISC6', 'DIV_MISC6',
 'DST_ANX1', 'DIV_ANX1', 'DST_ANX2', 'DIV_ANX2',
 'DST_ANX3', 'DIV_ANX3', 'DST_ANX4', 'DIV_ANX4',
 'DST_ANX5', 'DIV_ANX5', 'CT_YEAR1', 'CT_YEAR2', 'CTRACT1',
 'CTRACT2', 'ESTAB', 'Shape_STAr', 'Shape_STLe',
           'DIST_SUP', 'CITY']
cleandf(election_precinct, droplist)

Unnamed: 0,PRECINCT,DIST_CONG,DIST_STSEN,DIST_STASS,COMM_CODE,COMM_NAME,AREA_CODE,AREA_NAME,PRC_NAME,geometry
0,9001742B,34.0,24.0,53.0,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,POLYGON ((-118.2776159595675 34.05073225085379...
1,9001742C,34.0,24.0,53.0,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,POLYGON ((-118.2828304354189 34.04931735342003...
2,9001742E,34.0,24.0,53.0,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,POLYGON ((-118.2835241269783 34.05237068362484...


In [18]:
election_precinct.columns = ['precinct_num', 'congress_dist', 'senate_dist', 
                    'assembly_dist', 'community_num',
                    'community_name', 'area_num', 'area_name', 
                    'precinct_name', 'geometry']

In [19]:
print ('overlay_shp {}'.format(overlay_shp.shape))
print ('election_precinct {}'.format(election_precinct.shape))

overlay_shp (9599, 4)
election_precinct (33601, 10)


In [20]:
overlay_shp = spatial_overlays(overlay_shp, election_precinct)

In [21]:
print ('After overlay: {}'.format(overlay_shp.shape))
#overlay_shp.plot()

After overlay: (98406, 13)


#### 4. Census Blocks

In [22]:
#Census Blocks
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/CENSUS_BLOCKS_2010.zip"
census_blocks = download_egis3(url, 'CENSUS_BLOCKS_2010.zip', 'census_blocks', 'CENSUS_BLOCKS_2010.shp')
dfname_list.append('census_blocks')
checkdf(census_blocks)

Total polygon number: 109279
number of null and unique values in each column:
CT10: null 0, unique 2344
BG10: null 0, unique 6422
CB10: null 0, unique 1975
CTCB10: null 0, unique 109279
BG10FIP10: null 0, unique 6972
CEN_FIP13: null 0, unique 142
LA_FIP10: null 0, unique 89
CITY: null 0, unique 89
COMM: null 0, unique 232
CITYCOM: null 0, unique 257
ZCTA10: null 0, unique 301
PUMA10: null 0, unique 70
HD_2012: null 0, unique 26
HD_NAME: null 0, unique 26
SPA_2012: null 0, unique 8
SPA_NAME: null 0, unique 8
SUP_DIST: null 0, unique 5
SUP_LABEL: null 0, unique 5
HOUSING10: null 0, unique 639
POP_2010: null 0, unique 1270
CT12: null 0, unique 2345
BG12: null 0, unique 6423
Shape_STAr: null 0, unique 109259
Shape_STLe: null 0, unique 109279


Unnamed: 0,CT10,BG10,CB10,CTCB10,BG10FIP10,CEN_FIP13,LA_FIP10,CITY,COMM,CITYCOM,...,SPA_NAME,SUP_DIST,SUP_LABEL,HOUSING10,POP_2010,CT12,BG12,Shape_STAr,Shape_STLe,geometry
0,650001,6500012,2004,6500012004,650001280000,80000,80000,Torrance,Torrance,City of Torrance,...,South Bay,4,District 4,13,39,650001,6500012,122799.112305,1811.062718,POLYGON ((-118.3196050007964 33.88240599913712...
1,650902,6509021,1045,6509021045,650902180000,80000,80000,Torrance,Torrance,City of Torrance,...,South Bay,4,District 4,0,0,650902,6509021,19197.03418,845.026552,POLYGON ((-118.3154650016074 33.82442799911141...
2,670407,6704071,1015,6704071015,670407159514,59514,59514,Rancho Palos Verdes,Rancho Palos Verdes,City of Rancho Palos Verdes,...,South Bay,4,District 4,1,5,670407,6704071,4159.323242,317.758051,POLYGON ((-118.3899240008091 33.77733799949572...


In [23]:
droplist = ['POP_2010', 'CT12', 'BG12', 'Shape_STAr', 'Shape_STLe',
           'HOUSING10', 'SUP_LABEL', 'SPA_NAME', 'CEN_FIP13', 
            'SPA_2012', 'CTCB10', 'BG10FIP10', 'COMM', 'CITYCOM']
cleandf(census_blocks, droplist)

Unnamed: 0,CT10,BG10,CB10,LA_FIP10,CITY,ZCTA10,PUMA10,HD_2012,HD_NAME,SUP_DIST,geometry
0,650001,6500012,2004,80000,Torrance,90504,3761,79,Torrance,4,POLYGON ((-118.3196050007964 33.88240599913712...
1,650902,6509021,1045,80000,Torrance,90501,3761,79,Torrance,4,POLYGON ((-118.3154650016074 33.82442799911141...
2,670407,6704071,1015,59514,Rancho Palos Verdes,90275,3768,31,Harbor,4,POLYGON ((-118.3899240008091 33.77733799949572...


In [24]:
census_blocks.columns = ['census_tract', 'block_group', 
                         'census_block', 'FIP', 'city',
                         'zip_code', 'PUMA', 
                         'health_dist_num', 'health_districts_name',
                         'sup_dist', 'geometry']

In [25]:
print ('overlay_shp {}'.format(overlay_shp.shape))
print ('census_blocks {}'.format(census_blocks.shape))

overlay_shp (98406, 13)
census_blocks (109279, 11)


In [26]:
overlay_shp = spatial_overlays(overlay_shp, census_blocks)

In [27]:
print ('After overlay: {}'.format(overlay_shp.shape))
#overlay_shp.plot()

After overlay: (462901, 23)


# Notebook for reproducing the result ends here.

------------------------------------

- [School District Boundaries (2011)](http://egis3.lacounty.gov/dataportal/2012/01/20/school-district-boundaries-2011/)
(overlaying this one takes forever )

In [29]:
#School District Boundaries (2011)
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2012/01/" \
      "rrcc_school_districts1.zip"
school_districts = download_egis3(url, 'rrcc_school_districts1.zip', 'school_districts', 'rrcc_school_districts.shp')
dfname_list.append('school_districts')
checkdf(school_districts)

Total polygon number: 79
number of null and unique values in each column:
DISTRICT: null 0, unique 78
UNIFIED: null 0, unique 49
HIGH: null 0, unique 9
ELEMENTARY: null 0, unique 31
PH: null 0, unique 78
ADDR: null 0, unique 78
PH2: null 0, unique 20
PH3: null 0, unique 6
STU: null 0, unique 78
HI_ADDR: null 0, unique 9
HI_PH: null 0, unique 9
HI_STU: null 0, unique 9
LABEL: null 0, unique 78


Unnamed: 0,DISTRICT,UNIFIED,HIGH,ELEMENTARY,PH,ADDR,PH2,PH3,STU,HI_ADDR,HI_PH,HI_STU,LABEL,geometry
0,HERMOSA BEACH CITY ELEMENTARY,,,HERMOSA BEACH CITY ELEMENTARY,310 937 5877,1645 VALLEY DR HERMOSA BCH 90254,,,950,,,,HERMOSA BEACH CITY ELEM,POLYGON ((-118.4048577253546 33.87782346394338...
1,CENTINELA VALLEY UNION HIGH/HAWTHORNE ELEMENTARY,,CENTINELA VALLEY UNION HIGH,HAWTHORNE ELEMENTARY,310 676 2276,14120 S HAWTHORNE BL HAWTHORNE 90250,,,8145,14901 S INGLEWOOD AV LAWNDALE 90260,310 263 3200,6220.0,CENTINELA VALLEY UNION HIGH\nHAWTHORNE ELEMENTARY,POLYGON ((-118.3607054443706 33.93093449396304...
2,CENTINELA VALLEY UNION HIGH/LAWNDALE ELEMENTARY,,CENTINELA VALLEY UNION HIGH,LAWNDALE ELEMENTARY,310 973 1300,4161 W 147TH ST LAWNDALE 90260,,,5510,14901 S INGLEWOOD AV LAWNDALE 90260,310 263 3200,6220.0,CENTINELA VALLEY UNION HIGH\nLAWNDALE ELEMENTARY,"POLYGON ((-118.367294562588 33.90558198783078,..."


In [30]:
droplist = ['UNIFIED', 'HIGH', 'ELEMENTARY', 'PH', 'ADDR', 'PH2', 'PH3',
       'STU', 'HI_ADDR', 'HI_PH', 'HI_STU', 'LABEL']
cleandf(school_districts, droplist)

Unnamed: 0,DISTRICT,geometry
0,HERMOSA BEACH CITY ELEMENTARY,POLYGON ((-118.4048577253546 33.87782346394338...
1,CENTINELA VALLEY UNION HIGH/HAWTHORNE ELEMENTARY,POLYGON ((-118.3607054443706 33.93093449396304...
2,CENTINELA VALLEY UNION HIGH/LAWNDALE ELEMENTARY,"POLYGON ((-118.367294562588 33.90558198783078,..."


In [31]:
school_districts.columns = ['scho_dist_name', 'geometry']

In [32]:
print ('overlay_shp {}'.format(overlay_shp.shape))
print ('school_districts {}'.format(school_districts.shape))

overlay_shp (462901, 23)
school_districts (79, 2)


In [None]:
overlay_shp = spatial_overlays(overlay_shp, school_districts)

In [None]:
print ('After overlay: {}'.format(overlay_shp.shape))
#overlay_shp.plot()