# LA building permits: 2013-2019

In [64]:
import pandas as pd
import requests
import matplotlib as mpl
import geopandas as gpd
import descartes
import geojson
import json
import numpy as np
import altair as alt
import altair_latimes as lat
alt.renderers.enable('notebook')
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.float_format = '{:,}'.format

### Read data (as of 9/9/2019), clean up column headers

In [65]:
#https://data.lacity.org/A-Prosperous-City/Building-and-Safety-Permit-Information/yv23-pmwf

permits = pd.read_csv('/Users/mhustiles/Desktop/data/LA/Building_and_Safety_Permit_Information.csv', low_memory=False)
permits.columns = permits.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

### Clean up data types, parse dates

In [66]:
permits[['project_number', 'address_start', 'address_end', 'license_#', 'council_district', 'existing_code', 'proposed_code']] = permits[['project_number', 'address_start', 'address_end', 'license_#', 'council_district', 'existing_code', 'proposed_code']].astype(str)
permits['zip_code'] = permits['zip_code'].astype(str).replace('\.0', '', regex=True)
permits['assessor_book'] = permits['assessor_book'].astype(str).replace('\.0', '', regex=True)
permits['assessor_page'] = permits['assessor_page'].astype(str).replace('\.0', '', regex=True)
permits['census_tract'] = '06037' + permits['census_tract'].astype(str)
permits['nu_census_tract'] = permits['census_tract'].str.replace('.','').str.ljust(11, '0')
permits['issue_date'] = pd.to_datetime(permits['issue_date'], format='%m/%d/%Y')
permits['status_date'] = pd.to_datetime(permits['status_date'], format='%m/%d/%Y')
permits['year_issued'] = permits['issue_date'].dt.year.astype(str)
permits['month_issued'] = permits['issue_date'].dt.month.astype(str)
permits['year_issued'] = permits['issue_date'].dt.year.astype(str)
permits['weekday_issued'] = permits['issue_date'].dt.weekday_name.astype(str)
permits['year-status'] = permits['status_date'].dt.year.astype(str)
permits['month-status'] = permits['status_date'].dt.month.astype(str)
permits['year-status'] = permits['status_date'].dt.year.astype(str)
permits['weekday-status'] = permits['status_date'].dt.weekday_name.astype(str)
permits['floor_area-l.a._zoning_code_definition'] = permits['floor_area-l.a._zoning_code_definition'].astype(float)
permits['area'] = permits['floor_area-l.a._zoning_code_definition'].astype(float)
permits['location'] = permits['latitude/longitude']

### Convert 'location' into separate latitude/longitude fields

In [67]:
permits['location'] = permits.location.str.replace('(', '').str.replace(')', '')

lat = []
lon = []

for row in permits['location']:
    try:
        lat.append(row.split(',')[0])
        lon.append(row.split(',')[1])
    except:
        lat.append(np.NaN)
        lon.append(np.NaN)

permits['latitude'] = lat
permits['longitude'] = lon

### How many total permits issued?

In [68]:
len(permits)

### Recent permits

In [69]:
years = ['2018', '2019']
recent_permits = permits[permits.year_issued.isin(years)]
recent_permits_building = permits[(permits['permit_type'] == 'Bldg-New')]

### How many recent permits for just new single- or dual-family homes

In [70]:
new_home_permits_recent = recent_permits[(permits['permit_type'] == 'Bldg-New') & (permits['permit_sub-type'] == '1 or 2 Family Dwelling')]

In [71]:
#how many new home permits is that? 
len(new_home_permits_all)

### Recent permits by census tract

In [72]:
recent_permits.groupby(['nu_census_tract']).census_tract.agg('count').to_frame('count').reset_index()

### Filter list of new contruction/homes to specific zip codes

In [73]:
platinum_zips = ['90210', '90077', '90272']
bel_air_zip = ['90077']

# Platinum Triangle
platinum_permits = new_home_permits_all[new_home_permits_all.zip_code.isin(platinum_zips)]
# Just Bel Air
belair_permits = new_home_permits_all[new_home_permits_all.zip_code.isin(bel_air_zip)]
# Everything else
rest_permits = new_home_permits_all[~new_home_permits_all.zip_code.isin(platinum_zips)]

### How do these zip code zones compare?

In [74]:
# Bel Air - by square feet of construction
belair_permits.groupby('zip_code').area.agg(['count', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

In [75]:
#All the Platinum Triangle zips - by square feet of construction
platinum_permits.groupby('zip_code').area.agg(['count', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

In [76]:
#Top 10 of everything else in the city - by square feet of construction
rest_permits.groupby('zip_code').area.agg(['count', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

### Median area (square feet) of new_home_permits by place

In [77]:
# Platinum Triangle
platinum_permits.area.median()

In [78]:
# Bel Air
belair_permits.area.median()

In [79]:
# Rest of the city
rest_permits.area.median()

### Top 10: new home permits by all zip codes, with count and average, median and max for square feet

In [80]:
new_home_permits_all.groupby('zip_code').area.agg(['size', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

### Pivot table with counts by year and zip

In [81]:
belair_permits_agg = belair_permits.agg({'area': ['min', 'max', 'mean', 'median']})
belair_permits_agg.T.round(0)

In [82]:
belair_permits_grouped_area = belair_permits.groupby(['year_issued', 'zip_code']).median()[['area']].round(0).sort_values(by='area', ascending=False).reset_index()

In [83]:
belair_permits_grouped_area.head(10)

In [84]:
belair_permits_grouped_area.pivot(index='zip_code', columns='year_issued', values='area').reset_index()

In [85]:
platinum_permits.groupby(['year_issued', 'zip_code']).median()[['area']].round(0).sort_values(by='area', ascending=False)

In [86]:
belair_permits[['year_issued', 'address_start', 'street_name', 'area']].sort_values(by='area', ascending=False)

### Lookup one permit record

In [87]:
belair_permits.loc[485304]

### Recent new building homes permits by tract

In [88]:
recent_home_permits_cnt_tract = new_home_permits_recent.groupby(['nu_census_tract']).nu_census_tract.agg('count').to_frame('count').reset_index()

### Recent permits by tract

In [89]:
recent_permits_cnt_tract = recent_permits.groupby(['nu_census_tract']).nu_census_tract.agg('count').to_frame('count').reset_index()

In [90]:
recent_permits_cnt_tract.tail()

## Geography

### LA County census tracts

In [91]:
la_cty_tract = gpd.read_file('/Users/mhustiles/Desktop/github/notebooks/permits/input/tracts.geojson')

In [92]:
la_cty_tract.columns = la_cty_tract.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [93]:
la_cty_tract.plot()

In [94]:
la_cty_tract['geoid10']

In [95]:
merged_recent = la_cty_tract.merge(recent_permits_cnt_tract, left_on='geoid10', right_on='nu_census_tract')
merged_recent_home = la_cty_tract.merge(recent_home_permits_cnt_tract, left_on='geoid10', right_on='nu_census_tract')

In [96]:
merged_recent.to_file('/Users/mhustiles/Desktop/github/notebooks/permits/output/merged_recent.geojson', driver='GeoJSON')

In [97]:
merged_recent_home.to_file('/Users/mhustiles/Desktop/github/notebooks/permits/output/merged_recent_home.geojson', driver='GeoJSON')

In [98]:
recent_permits['census_tract']