# LA building permits: 2013-2019

In [1]:
import pandas as pd
import matplotlib as mpl
import geopandas as gpd
import descartes
import geojson
import json
import numpy as np
import altair as alt
import altair_latimes as lat
alt.renderers.enable('notebook')
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.float_format = '{:,}'.format

### Read data (as of 9/9/2019), clean up column headers

In [2]:
#https://data.lacity.org/A-Prosperous-City/Building-and-Safety-Permit-Information/yv23-pmwf

permits = pd.read_csv('/Users/mhustiles/Desktop/data/LA/Building_and_Safety_Permit_Information.csv', low_memory=False)
permits.columns = permits.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [3]:
permits.columns

Index(['assessor_book', 'assessor_page', 'assessor_parcel', 'tract', 'block',
       'lot', 'reference_#_old_permit_#', 'pcis_permit_#', 'status',
       'status_date', 'permit_type', 'permit_sub-type', 'permit_category',
       'project_number', 'event_code', 'initiating_office', 'issue_date',
       'address_start', 'address_fraction_start', 'address_end',
       'address_fraction_end', 'street_direction', 'street_name',
       'street_suffix', 'suffix_direction', 'unit_range_start',
       'unit_range_end', 'zip_code', 'work_description', 'valuation',
       'floor_area-l.a._zoning_code_definition',
       '#_of_residential_dwelling_units', '#_of_accessory_dwelling_units',
       '#_of_stories', 'contractor's_business_name', 'contractor_address',
       'contractor_city', 'contractor_state', 'license_type', 'license_#',
       'principal_first_name', 'principal_middle_name', 'principal_last_name',
       'license_expiration_date', 'applicant_first_name',
       'applicant_last_name'

### Clean up data types, parse dates

In [4]:
permits['project_number'] = permits['project_number'].astype(str)
permits['address_start'] = permits['address_start'].astype(str)
permits['address_end'] = permits['address_end'].astype(str)
permits['zip_code'] = permits['zip_code'].astype(str).replace('\.0', '', regex=True)
permits['assessor_book'] = permits['assessor_book'].astype(str).replace('\.0', '', regex=True)
permits['assessor_page'] = permits['assessor_page'].astype(str).replace('\.0', '', regex=True)
permits['license_#'] = permits['license_#'].astype(str)
permits['census_tract'] = permits['census_tract'].astype(str)
permits['council_district'] = permits['council_district'].astype(str)
permits['existing_code'] = permits['existing_code'].astype(str)
permits['proposed_code'] = permits['proposed_code'].astype(str)
permits['issue_date'] = pd.to_datetime(permits['issue_date'], format='%m/%d/%Y')
permits['status_date'] = pd.to_datetime(permits['status_date'], format='%m/%d/%Y')
permits['year-issued'] = permits['issue_date'].dt.year.astype(str)
permits['month-issued'] = permits['issue_date'].dt.month.astype(str)
permits['year-issued'] = permits['issue_date'].dt.year.astype(str)
permits['weekday-issued'] = permits['issue_date'].dt.weekday_name.astype(str)
permits['year-status'] = permits['status_date'].dt.year.astype(str)
permits['month-status'] = permits['status_date'].dt.month.astype(str)
permits['year-status'] = permits['status_date'].dt.year.astype(str)
permits['weekday-status'] = permits['status_date'].dt.weekday_name.astype(str)
permits['floor_area-l.a._zoning_code_definition'] = permits['floor_area-l.a._zoning_code_definition'].astype(float)
permits['area'] = permits['floor_area-l.a._zoning_code_definition'].astype(float)
permits['location'] = permits['latitude/longitude']

### Convert 'location' into separate latitude/longitude fields

In [5]:
permits['location'] = permits.location.str.replace('(', '').str.replace(')', '')

lat = []
lon = []

for row in permits['location']:
    try:
        lat.append(row.split(',')[0])
        lon.append(row.split(',')[1])
    except:
        lat.append(np.NaN)
        lon.append(np.NaN)

permits['latitude'] = lat
permits['longitude'] = lon

### How many total permits issued?

In [6]:
len(permits)

1037228

### How many permits for just new single- or dual-family homes

In [60]:
new_home_permits_all = permits[(permits['permit_type'] == 'Bldg-New') & (permits['permit_sub-type'] == '1 or 2 Family Dwelling') & (permits['work_description'].str.contains('DWELLING'))]

In [61]:
#how many new home permits is that? 
len(new_home_permits_all)

3821

### Filter list of new contruction/homes to specific zip codes

In [62]:
platinum_zips = ['90210', '90077', '90272']
bel_air_zip = ['90077']

# Platinum Triangle
platinum_permits = new_home_permits_all[new_home_permits_all.zip_code.isin(platinum_zips)]
# Just Bel Air
belair_permits = new_home_permits_all[new_home_permits_all.zip_code.isin(bel_air_zip)]
# Everything else
rest_permits = new_home_permits_all[~new_home_permits_all.zip_code.isin(platinum_zips)]

### How do these zip code zones compare?

In [63]:
# Bel Air - by square feet of construction
belair_permits.groupby('zip_code').area.agg(['count', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

Unnamed: 0_level_0,count,mean,median,max
zip_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
90077,41,12515.0,8437.0,73934.0


In [64]:
#All the Platinum Triangle zips - by square feet of construction
platinum_permits.groupby('zip_code').area.agg(['count', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

Unnamed: 0_level_0,count,mean,median,max
zip_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
90077,41,12515.0,8437.0,73934.0
90210,16,11080.0,5815.0,55667.0
90272,107,5412.0,4812.0,17266.0


In [65]:
#Top 10 of everything else in the city - by square feet of construction
rest_permits.groupby('zip_code').area.agg(['count', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

Unnamed: 0_level_0,count,mean,median,max
zip_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
90069,35,7085.0,6437.0,21495.0
90049,92,6718.0,6040.0,23030.0
90067,2,5963.0,5963.0,6049.0
91436,65,5216.0,4894.0,13137.0
90212,1,4621.0,4621.0,4621.0
90094,1,4288.0,4288.0,4288.0
90024,24,4816.0,4265.0,9829.0
91423,56,4146.0,4245.0,10248.0
91356,44,3913.0,4185.0,8689.0
90046,64,4045.0,4164.0,15633.0


### Median area (square feet) of new_home_permits by place

In [66]:
# Platinum Triangle
platinum_permits.area.median()

5499.99

In [67]:
# Bel Air
belair_permits.area.median()

8437.0

In [68]:
# Rest of the city
rest_permits.area.median()

1627.0

### Top 10: new home permits by all zip codes, with count and average, median and max for square feet

In [69]:
new_home_permits_all.groupby('zip_code').area.agg(['size', 'mean', 'median', 'max']).round().sort_values(by='median', ascending=False).head(10)

Unnamed: 0_level_0,size,mean,median,max
zip_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
90077,41,12515.0,8437.0,73934.0
90069,35,7085.0,6437.0,21495.0
90049,92,6718.0,6040.0,23030.0
90067,2,5963.0,5963.0,6049.0
90210,16,11080.0,5815.0,55667.0
91436,65,5216.0,4894.0,13137.0
90272,108,5412.0,4812.0,17266.0
90212,1,4621.0,4621.0,4621.0
90094,1,4288.0,4288.0,4288.0
90024,24,4816.0,4265.0,9829.0


### Pivot table with counts by year and zip

In [70]:
belair_permits_agg = belair_permits.agg({'area': ['min', 'max', 'mean', 'median']})
belair_permits_agg.T.round(0)

Unnamed: 0,min,max,mean,median
area,142.0,73934.0,12515.0,8437.0


In [71]:
belair_permits_grouped_area = belair_permits.groupby(['year-issued', 'zip_code']).median()[['area']].round(0).sort_values(by='area', ascending=False).reset_index()

In [72]:
belair_permits_grouped_area.head(10)

Unnamed: 0,year-issued,zip_code,area
0,2015,90077,18998.0
1,2016,90077,9810.0
2,2013,90077,8437.0
3,2014,90077,8436.0
4,2018,90077,8206.0
5,2017,90077,7444.0
6,2019,90077,5701.0


In [73]:
belair_permits_grouped_area.pivot(index='zip_code', columns='year-issued', values='area').reset_index()

year-issued,zip_code,2013,2014,2015,2016,2017,2018,2019
0,90077,8437.0,8436.0,18998.0,9810.0,7444.0,8206.0,5701.0


In [74]:
platinum_permits.groupby(['year-issued', 'zip_code']).median()[['area']].round(0).sort_values(by='area', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,area
year-issued,zip_code,Unnamed: 2_level_1
2018,90210,28028.0
2015,90077,18998.0
2016,90077,9810.0
2013,90077,8437.0
2014,90077,8436.0
2016,90210,8351.0
2018,90077,8206.0
2017,90077,7444.0
2017,90210,6277.0
2019,90077,5701.0


In [75]:
belair_permits[['year-issued', 'address_start', 'street_name', 'area']].sort_values(by='area', ascending=False)

Unnamed: 0,year-issued,address_start,street_name,area
690283,2014,944.0,AIROLE,73934.0
367841,2016,800.0,TORTUOSO,56797.0
600220,2015,800.0,STRADELLA,25832.0
147975,2016,642.0,SAINT CLOUD,21669.0
847239,2015,454.0,CUESTA,21058.0
343720,2015,10701.0,BELLAGIO,20196.0
81261,2016,805.0,NIMES,18584.0
749551,2015,1516.0,STONE CANYON,17800.0
439601,2014,10460.0,REVUELTA,17481.0
965488,2017,10830.0,CHALON,16536.0


In [79]:
belair_permits.loc[485304]

assessor_book                                                                           4370
assessor_page                                                                            021
assessor_parcel                                                                          020
tract                                                                                BEL-AIR
block                                                                                    NaN
lot                                                                                     LT L
reference_#_old_permit_#                                                           19WL98595
pcis_permit_#                                                              18010-30000-03707
status                                                                                Issued
status_date                                                              2019-05-01 00:00:00
permit_type                                                           

### Median area for new homes in all zip codes

In [None]:
median_area_all = new_home_permits_all.groupby('zip_code').mean()[['area']].round(0).sort_values(by='area', ascending=False)

### Median area for new homes in all zip codes, top 10

In [None]:
median_area_all.head(10)

### Basic descriptives about platinum/bel air zip code permits

In [None]:
platinum_permits.describe(include=['number']).round()
belair_permits.describe(include=['number']).round()

In [None]:
belair_permits['area'].median()

In [None]:
permit_types = belair_permits.groupby(['permit_type']).agg('size').sort_values(ascending=False).reset_index()

In [None]:
permit_sub_types = belair_permits.groupby(['permit_sub-type']).agg('size').sort_values(ascending=False).reset_index()

In [None]:
permit_types

In [None]:
permit_sub_types

### How many have been issued?

In [None]:
zip_count = platinum_permits['year-issued'].groupby(platinum_permits['zip_code'])
year_count = platinum_permits['zip_code'].groupby(platinum_permits['year-issued'])
license_count = platinum_permits['zip_code'].groupby(platinum_permits['license_type'])

In [None]:
year_count.count()

In [None]:
zip_count.count()

In [None]:
license_count.count()