# LA building permits: 2013-2020

In [32]:
import pandas as pd
import geopandas as gpd
import matplotlib
import geojson
import json
import jenkspy
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

### Read data (as of May 4, 2020), clean up column headers

In [61]:
#https://data.lacity.org/A-Prosperous-City/Building-and-Safety-Permit-Information/yv23-pmwf

In [None]:
la_city_url = 'https://data.lacity.org/api/views/yv23-pmwf/rows.csv?accessType=DOWNLOAD'

In [None]:
sf_url = 'https://data.sfgov.org/api/views/i98e-djp9/rows.csv?accessType=DOWNLOAD'

In [None]:
san_url = ''

In [73]:
permits = pd.read_csv(url, low_memory=False, parse_dates=[['issue_date', 'status_date']])

ValueError: 'issue_date' is not in list

In [63]:
permits.columns = permits.columns.str.strip().str.lower()\
.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

### Clean up data types, parse dates

In [64]:
permits[['project_number', 'address_start', 'address_end', \
         'license_#', 'council_district', 'existing_code', 'proposed_code']] =\
    permits[['project_number', 'address_start', 'address_end', 'license_#', 'council_district', \
             'existing_code', 'proposed_code']].astype(str)

In [65]:
permits['zip_code'] = permits['zip_code'].astype(str).replace('\.0', '', regex=True)
permits['assessor_book'] = permits['assessor_book'].astype(str).replace('\.0', '', regex=True)
permits['assessor_page'] = permits['assessor_page'].astype(str).replace('\.0', '', regex=True)
permits['census_tract'] = '06037' + permits['census_tract'].astype(str)
permits['nu_census_tract'] = permits['census_tract'].str.replace('.','')

In [66]:
permits['issue_date'] = pd.to_datetime(permits['issue_date'], format='%m/%d/%Y')
permits['status_date'] = pd.to_datetime(permits['status_date'], format='%m/%d/%Y')
permits['year_issued'] = permits['issue_date'].dt.year.astype(str)
permits['month_issued'] = permits['issue_date'].dt.month.astype(str)
permits['year_issued'] = permits['issue_date'].dt.year.astype(str)
permits['weekday_issued'] = permits['issue_date'].dt.weekday_name.astype(str)
permits['year-status'] = permits['status_date'].dt.year.astype(str)
permits['month-status'] = permits['status_date'].dt.month.astype(str)
permits['year-status'] = permits['status_date'].dt.year.astype(str)
permits['weekday-status'] = permits['status_date'].dt.weekday_name.astype(str)

In [67]:
permits['floor_area-l.a._zoning_code_definition'] = permits['floor_area-l.a._zoning_code_definition'].astype(float)
permits['area'] = permits['floor_area-l.a._zoning_code_definition'].astype(float)
permits['location'] = permits['latitude/longitude']

### Convert 'location' into separate latitude/longitude fields

In [68]:
permits['location'] = permits.location.str.replace('(', '').str.replace(')', '')

lat = []
lon = []

for row in permits['location']:
    try:
        lat.append(row.split(',')[0])
        lon.append(row.split(',')[1])
    except:
        lat.append(np.NaN)
        lon.append(np.NaN)

permits['latitude'] = lat
permits['longitude'] = lon

### How many total permits issued?

In [69]:
len(permits)

1146006

In [70]:
permits.dtypes

assessor_book                                       object
assessor_page                                       object
assessor_parcel                                     object
tract                                               object
block                                               object
lot                                                 object
reference_#_old_permit_#                            object
pcis_permit_#                                       object
status                                              object
status_date                                 datetime64[ns]
permit_type                                         object
permit_sub-type                                     object
permit_category                                     object
project_number                                      object
event_code                                          object
initiating_office                                   object
issue_date                                  datetime64[n

In [72]:
permits.groupby(['issue_date']).resample('M').size()

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'

### Recent permits

In [None]:
years = ['2018', '2019', '2020']
recent_permits = permits[permits.year_issued.isin(years)]
recent_permits_building = permits[(permits['permit_type'] == 'Bldg-New')]

### Recent permits by tract

---

## Geography

### LA County census tracts

In [None]:
la_cty_tract = gpd.read_file('/Users/mhustiles/Desktop/github/notebooks/permits/input/tracts.geojson')

In [None]:
la_cty_tract.columns = la_cty_tract.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [None]:
la_cty_tract.plot()

In [None]:
la_cty_tract['geoid10']

In [None]:
merged_recent = la_cty_tract.merge(recent_permits_cnt_tract, left_on='geoid10', right_on='nu_census_tract')
merged_recent_home = la_cty_tract.merge(recent_home_permits_cnt_tract, left_on='geoid10', right_on='nu_census_tract')

In [None]:
merged_recent.to_file('/Users/mhustiles/Desktop/github/notebooks/permits/output/merged_recent.geojson', driver='GeoJSON')

In [None]:
merged_recent_home.to_file('/Users/mhustiles/Desktop/github/notebooks/permits/output/merged_recent_home.geojson', driver='GeoJSON')

In [None]:
recent_permits['census_tract']