In [1]:
%matplotlib inline
from __future__ import print_function, division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import geopandas as gpd
from fiona.crs import from_epsg
from geopandas.tools import overlay

In [2]:
#function for download data from egis3
def download_egis3(url, zipname, folder, shpname):
    #take in download url, downloaded zipfile name, folder name
    #containing unzipped shapefile, shapefile name
    #return the downloaded geodataframe
    os.system("curl -O " + url)
    os.system("unzip " + zipname + " -d " + folder)
    
    df = folder
    df = gpd.read_file(folder + "/" + shpname)

    df.crs = from_epsg(2229)
    df = df.to_crs(epsg=4326)
    return df

In [3]:
#function to check the data quality
def checkdf(dfname):
    #take in geodataframe
    #return number of null values and unique values
    tota_poly = dfname.shape[0]
    print ('Total polygon number: {}'.format(tota_poly))    

    print ('--------------------')
    #print ('number of null and unique values in each column:')    
    for i in dfname.columns[:-1]:
        tota_valu = dfname.count()[i]
        null_valu = tota_poly - tota_valu
        uniq_valu = len(dfname[i].unique())       
        #print ('{}: null {}, unique {}'.format(
        #    i, null_valu, uniq_valu))
    print ('--------------------')    

    invalid_geo = dfname.loc[dfname.is_valid==False]
    print ('{} invalid geometries:'.format(len(invalid_geo)))
    print (invalid_geo)
    return dfname.head(3)

## Data inspection

### The following 4 datasets has invalid geometries.
When doing 'overlay>>intersect' in QGIS, these datasets will threw the error

#### Input layer A contains invalid geometries (feature xxxx). Unable to complete intersection algorithm.

In [4]:
dfname_list = []

In [5]:
#Law Enforcement Reporting Districts
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/LACOUNTY_LAW_ENFORCEMENT_RDs.zip"
law_enforcement = download_egis3(url, 'LACOUNTY_LAW_ENFORCEMENT_RDs.zip', 'law_enforcement', 'LACOUNTY_LAW_ENFORCEMENT_RDs.shp')
dfname_list.append('law_enforcement')
checkdf(law_enforcement)

Total polygon number: 5745
--------------------
--------------------
9 invalid geometries:
        RD                          Name   Layer  \
1445   540                Burbank Police  Patrol   
2150  0569          LAPD Harbor Division  Patrol   
2156  0589          LAPD Harbor Division  Patrol   
3209  1880                Avalon Sheriff  Patrol   
3388  2655              Palmdale Sheriff  Patrol   
3431  2851               Compton Sheriff  Patrol   
3808    12               Pasadena Police  Patrol   
4061   SWA               Monrovia Police  Patrol   
5732  0671  Santa Clarita Valley Sheriff  Patrol   

                                               geometry  
1445  POLYGON ((-118.3476026991103 34.15372648733122...  
2150  POLYGON ((-118.2764069783999 33.71525967187328...  
2156  POLYGON ((-118.2847034958252 33.71354150482826...  
3209  POLYGON ((-118.606764158083 33.03225665206974,...  
3388  POLYGON ((-118.3946920610323 34.61993478965515...  
3431  (POLYGON ((-118.1917674346608 33.9

Unnamed: 0,RD,Name,Layer,geometry
0,230,Belvedere,Parks Bureau,POLYGON ((-118.1604325432619 34.03712079823092...
1,231,Atlantic Avenue,Parks Bureau,"POLYGON ((-118.1550797745909 34.0257529106941,..."
2,232,City Terrace,Parks Bureau,POLYGON ((-118.1784888821586 34.04781649689102...


In [6]:
#Census Blocks
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/CENSUS_BLOCKS_2010.zip"
census_blocks = download_egis3(url, 'CENSUS_BLOCKS_2010.zip', 'census_blocks', 'CENSUS_BLOCKS_2010.shp')
dfname_list.append('census_blocks')
checkdf(census_blocks)

Total polygon number: 109279
--------------------
--------------------
72 invalid geometries:
          CT10     BG10  CB10      CTCB10     BG10FIP10 CEN_FIP13 LA_FIP10  \
1132    800506  8005063  3002  8005063002  800506399037     78960    99037   
6140    262400  2624001  1011  2624001011  262400144000     44000    44000   
6617    403600  4036002  2004  4036002004  403600216742     16742    16742   
7064    401312  4013121  1008  4013121008  401312166070     66070    66070   
8394    800204  8002043  3004  8002043004  137000344000     44000    44000   
10413   910201  9102011  1008  9102011008  910201155156     55156    55156   
13492   670416  6704162  2003  6704162003  670416259514     59514    59514   
13708   920033  9200331  1000  9200331000  920033199037              99037   
13950   901209  9012091  1347  9012091347  901209199037              99037   
17589   502100  5021002  2000  5021002000  502100285292     85292    85292   
23938   920104  9201041  1041  9201041041  92010

Unnamed: 0,CT10,BG10,CB10,CTCB10,BG10FIP10,CEN_FIP13,LA_FIP10,CITY,COMM,CITYCOM,...,SPA_NAME,SUP_DIST,SUP_LABEL,HOUSING10,POP_2010,CT12,BG12,Shape_STAr,Shape_STLe,geometry
0,650001,6500012,2004,6500012004,650001280000,80000,80000,Torrance,Torrance,City of Torrance,...,South Bay,4,District 4,13,39,650001,6500012,122799.112305,1811.062718,POLYGON ((-118.3196050007964 33.88240599913712...
1,650902,6509021,1045,6509021045,650902180000,80000,80000,Torrance,Torrance,City of Torrance,...,South Bay,4,District 4,0,0,650902,6509021,19197.03418,845.026552,POLYGON ((-118.3154650016074 33.82442799911141...
2,670407,6704071,1015,6704071015,670407159514,59514,59514,Rancho Palos Verdes,Rancho Palos Verdes,City of Rancho Palos Verdes,...,South Bay,4,District 4,1,5,670407,6704071,4159.323242,317.758051,POLYGON ((-118.3899240008091 33.77733799949572...


In [7]:
#Registrar Recorder Precincts
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/" \
      "ShapefilePackages/RRCC_PRECINCTS.zip"
election_precinct = download_egis3(url, 'RRCC_PRECINCTS.zip', 'election_precinct', 'RRCC_PRECINCTS.shp')
dfname_list.append('election_precinct')
checkdf(election_precinct)

Total polygon number: 33601
--------------------
--------------------
4 invalid geometries:
       PRECINCT CITY_EST SUBCODE  MAP1 DIST_RES  DIST_CONG  DIST_STSEN  \
9503   1040026B  1040026       B   0.0                25.0        21.0   
10091  1300037A  1300037       A   0.0                27.0        25.0   
16672  3750005T  3750005       T   0.0                25.0        21.0   
23649  6220062A  6220062       A   0.0                25.0        21.0   

       DIST_STASS  DIST_SUP DIST_MCRT  \
9503         38.0       5.0             
10091        41.0       1.0             
16672        36.0       5.0             
23649        38.0       5.0             

                             ...                          COMM_CODE  \
9503                         ...                              104.0   
10091                        ...                              130.0   
16672                        ...                              375.0   
23649                        ...               

Unnamed: 0,PRECINCT,CITY_EST,SUBCODE,MAP1,DIST_RES,DIST_CONG,DIST_STSEN,DIST_STASS,DIST_SUP,DIST_MCRT,...,COMM_CODE,COMM_NAME,AREA_CODE,AREA_NAME,PRC_NAME,CITY,ESTAB,Shape_STAr,Shape_STLe,geometry
0,9001742B,9001742,B,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,453984.737305,2713.125981,POLYGON ((-118.2776159595675 34.05073225085379...
1,9001742C,9001742,C,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,766784.173828,3820.588904,POLYGON ((-118.2828304354189 34.04931735342003...
2,9001742E,9001742,E,0.0,,34.0,24.0,53.0,1.0,,...,900.0,LOS ANGELES,809.0,CENTRAL AREA,LOS ANGELES,900,1742,566946.030273,3362.362112,POLYGON ((-118.2835241269783 34.05237068362484...


In [8]:
#School District Boundaries (2011)
url = "http://egis3.lacounty.gov/dataportal/wp-content/uploads/2012/01/" \
      "rrcc_school_districts1.zip"
school_districts = download_egis3(url, 'rrcc_school_districts1.zip', 'school_districts', 'rrcc_school_districts.shp')
dfname_list.append('school_districts')
checkdf(school_districts)

Total polygon number: 79
--------------------
--------------------
1 invalid geometries:
                     DISTRICT                    UNIFIED HIGH ELEMENTARY  \
41  LONG BEACH UNIFIED SCHOOL  LONG BEACH UNIFIED SCHOOL                   

              PH                             ADDR             PH2 PH3    STU  \
41  562 997 8000  1515 HUGHES WY LONG BEACH 90810  562 997 8101po      83536   

   HI_ADDR HI_PH HI_STU                      LABEL  \
41                       LONG BEACH UNIFIED SCHOOL   

                                             geometry  
41  (POLYGON ((-118.60576760701 33.47884487723737,...  


Unnamed: 0,DISTRICT,UNIFIED,HIGH,ELEMENTARY,PH,ADDR,PH2,PH3,STU,HI_ADDR,HI_PH,HI_STU,LABEL,geometry
0,HERMOSA BEACH CITY ELEMENTARY,,,HERMOSA BEACH CITY ELEMENTARY,310 937 5877,1645 VALLEY DR HERMOSA BCH 90254,,,950,,,,HERMOSA BEACH CITY ELEM,POLYGON ((-118.4048577253546 33.87782346394338...
1,CENTINELA VALLEY UNION HIGH/HAWTHORNE ELEMENTARY,,CENTINELA VALLEY UNION HIGH,HAWTHORNE ELEMENTARY,310 676 2276,14120 S HAWTHORNE BL HAWTHORNE 90250,,,8145,14901 S INGLEWOOD AV LAWNDALE 90260,310 263 3200,6220.0,CENTINELA VALLEY UNION HIGH\nHAWTHORNE ELEMENTARY,POLYGON ((-118.3607054443706 33.93093449396304...
2,CENTINELA VALLEY UNION HIGH/LAWNDALE ELEMENTARY,,CENTINELA VALLEY UNION HIGH,LAWNDALE ELEMENTARY,310 973 1300,4161 W 147TH ST LAWNDALE 90260,,,5510,14901 S INGLEWOOD AV LAWNDALE 90260,310 263 3200,6220.0,CENTINELA VALLEY UNION HIGH\nLAWNDALE ELEMENTARY,"POLYGON ((-118.367294562588 33.90558198783078,..."
