# Processing Microsoft's building footprints data by Los Angeles Neighborhood

### Import Python tools

In [3]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [4]:
import pandas as pd
import geopandas as gpd

---

### Convert the Microsoft GeoJSON to a shapefile

In [6]:
!ogr2ogr -f "ESRI Shapefile" output/la/037.shp output/places/037.geojson -progress

0...10...20...30...40...50...60...70...80...90...100 - done.


### Read the new data format

In [None]:
src = gpd.read_file("output/la/037.shp")

In [10]:
src.head()

Unnamed: 0,name,countyfips,geometry
0,Los Angeles,37,"POLYGON ((-117.65550 34.36644, -117.65554 34.3..."
1,Los Angeles,37,"POLYGON ((-117.65366 34.36188, -117.65368 34.3..."
2,Los Angeles,37,"POLYGON ((-117.65889 34.43905, -117.65889 34.4..."
3,Los Angeles,37,"POLYGON ((-117.66029 34.23580, -117.66032 34.2..."
4,Los Angeles,37,"POLYGON ((-117.67558 34.70836, -117.67559 34.7..."


---

### Read L.A. neighborhood boundaries file

In [11]:
lahoods = gpd.read_file(
    "../../data/GIS/la/la_county_gis_neighborhoods/la_county_gis_neighborhoods_1566093928766.geojson"
)

In [60]:
boundaries_api_url = "http://s3-us-west-2.amazonaws.com/boundaries.latimes.com/archive/1.0/boundary-set/la-county-neighborhoods-v6.geojson"

### Clean up the counties file

In [12]:
lahoods.columns = ca_counties.columns.str.strip().str.lower().str.replace(" ", "_")

In [14]:
lahoods.head()

Unnamed: 0,objectid,cityname_alf,type,name,city_comm_name,sq_miles,jurisdiction,geometry
0,4261,AGOURA HILLS,,,AGOURA HILLS,7.817874,INCORPORATED CITY,"POLYGON ((-118.73256 34.15503, -118.73239 34.1..."
1,4262,ALHAMBRA,,,ALHAMBRA,7.631921,INCORPORATED CITY,"POLYGON ((-118.12110 34.10537, -118.12117 34.1..."
2,4263,ARCADIA,,,ARCADIA,11.109514,INCORPORATED CITY,"POLYGON ((-118.01826 34.17572, -118.01638 34.1..."
3,4264,ARTESIA,,,ARTESIA,1.621596,INCORPORATED CITY,"POLYGON ((-118.08782 33.88035, -118.08781 33.8..."
4,4265,AVALON,,,AVALON,2.886091,INCORPORATED CITY,"POLYGON ((-118.33083 33.35590, -118.33085 33.3..."


---

### Merge the counties with their buildings

In [None]:
la_hoods_buildings = gpd.sjoin(src, lahoods, how="inner", op="within")

In [16]:
la_hoods_buildings.head()

Unnamed: 0,name_left,countyfips,geometry,index_right,objectid,cityname_alf,type,name_right,city_comm_name,sq_miles,jurisdiction
0,Los Angeles,37,"POLYGON ((-117.65550 34.36644, -117.65554 34.3...",88,4350,,AREA,ANTELOPE VALLEY,ANTELOPE VALLEY,1800.48028,UNINCORPORATED AREA
1,Los Angeles,37,"POLYGON ((-117.65366 34.36188, -117.65368 34.3...",88,4350,,AREA,ANTELOPE VALLEY,ANTELOPE VALLEY,1800.48028,UNINCORPORATED AREA
2,Los Angeles,37,"POLYGON ((-117.65889 34.43905, -117.65889 34.4...",88,4350,,AREA,ANTELOPE VALLEY,ANTELOPE VALLEY,1800.48028,UNINCORPORATED AREA
3,Los Angeles,37,"POLYGON ((-117.66029 34.23580, -117.66032 34.2...",88,4350,,AREA,ANTELOPE VALLEY,ANTELOPE VALLEY,1800.48028,UNINCORPORATED AREA
4,Los Angeles,37,"POLYGON ((-117.67558 34.70836, -117.67559 34.7...",88,4350,,AREA,ANTELOPE VALLEY,ANTELOPE VALLEY,1800.48028,UNINCORPORATED AREA


In [17]:
la_hoods_buildings.drop(
    ["index_right", "cityname_alf", "name_right", "sq_miles", "name_left"],
    axis=1,
    inplace=True,
)

### How many buildings in each hood?

In [29]:
la_hoods_buildings["city_comm_name"] = la_hoods_buildings["city_comm_name"].str.title()

In [33]:
la_hoods_buildings["jurisdiction"] = la_hoods_buildings["jurisdiction"].str.title()

In [51]:
la_hoods_buildings["city_comm_name_slug"] = (
    la_hoods_buildings["city_comm_name"]
    .str.lower()
    .str.replace(" ", "-", regex=False)
    .str.replace("/", "-", regex=False)
)

In [52]:
la_hoods_buildings.head(20)

Unnamed: 0,countyfips,geometry,objectid,type,city_comm_name,jurisdiction,city_comm_name_slug
0,37,"POLYGON ((-117.65550 34.36644, -117.65554 34.3...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
1,37,"POLYGON ((-117.65366 34.36188, -117.65368 34.3...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
2,37,"POLYGON ((-117.65889 34.43905, -117.65889 34.4...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
3,37,"POLYGON ((-117.66029 34.23580, -117.66032 34.2...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
4,37,"POLYGON ((-117.67558 34.70836, -117.67559 34.7...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
5,37,"POLYGON ((-117.68064 34.56549, -117.68074 34.5...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
205,37,"POLYGON ((-117.71746 34.39145, -117.71748 34.3...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
208,37,"POLYGON ((-117.71724 34.39230, -117.71724 34.3...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
221,37,"POLYGON ((-117.71891 34.39370, -117.71898 34.3...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley
279,37,"POLYGON ((-117.72124 34.59179, -117.72124 34.5...",4350,AREA,Antelope Valley,Unincorporated Area,antelope-valley


In [53]:
la_hoods_buildings_grp = (
    la_hoods_buildings.groupby(
        ["city_comm_name", "jurisdiction", "city_comm_name_slug"]
    )
    .agg({"countyfips": "count"})
    .reset_index()
)

In [54]:
la_hoods_buildings_grp.rename(columns={"countyfips": "buildings"}, inplace=True)

In [55]:
la_hoods_buildings_grp.sort_values("buildings", ascending=False).head()

Unnamed: 0,city_comm_name,jurisdiction,city_comm_name_slug,buildings
76,Los Angeles,Incorporated City,los-angeles,817640
74,Long Beach,Incorporated City,long-beach,122330
112,Santa Clarita,Incorporated City,santa-clarita,57049
70,Lancaster,Incorporated City,lancaster,49020
92,Palmdale,Incorporated City,palmdale,44460


### Slice them up and save county files

In [57]:
for hood in la_hoods_buildings["city_comm_name_slug"].unique():
    la_hoods_buildings[la_hoods_buildings["city_comm_name_slug"] == hood].to_file(
        f"output/hoods/la/" + hood + "_buildings.geojson", driver="GeoJSON"
    )