In [11]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
from datetime import date

# Data Loading

SF DBI’s development pipeline datasets use four nearly-identical data models. Parcel identificator was called “BLKLOT” or “Block Lot” in older datasets and is called APN (Assessor's Parcel Number) in the most recent datasets. The same parcel id can written with a leading zero in one dataset and without a leading zero in another one. Prior to 2014 Q3, datasets don’t have fields for affordable unit counts (total affordable units and net affordable units).

Some project records don’t include building permit id or use the placeholder “MULTIPLE” instead of actual permit references. Initially, we remove permitless projects from the dataset, than re-add them at a later stage.

In [50]:
def loadData(fileName, label, fmt = 1):
    print(fileName)
    columns = ['UNITS', 'NET_UNITS', 'AFF_UNITS', 'NET_AFF_UNITS', 'NAMEADDR', 'APN', 'BESTSTAT', 'BESTDATE']
    
    converters = { 'APN': lambda x: x[4:], 'BP_APPLNO': lambda x: 'N'+x.replace(',','') }
    if fmt == 2 or fmt == 3:
        converters = { 'BLKLOT': lambda x: x.lstrip('0'), 'BP_APPLNO': lambda x: 'N'+x.replace(',','') }
    if fmt == 4:
        converters = { 'Block Lot': lambda x: x.lstrip('0'), 'DBI Permit': lambda x: 'N'+x.replace(',',''),
                     'Location 1': lambda x: x.split('\n')[0]}
        
    X = pd.read_csv(fileName, sep=',', parse_dates=[], infer_datetime_format=True, quotechar='"', converters=converters)

    if fmt == 2 or fmt == 3:
        X = X.rename(columns={"AFF_UNITS_NET": "NET_AFF_UNITS", "BLKLOT": "APN"})
    if fmt == 3:
        X = X.rename(columns={"UNITSNET": "NET_UNITS"})
    if fmt == 4:
        X = X.rename(columns={"Units": "UNITS", "Net Added Units": "NET_UNITS", "Best Stat": "BESTSTAT", "Best Date": "BESTDATE",  'Location 1': 'NAMEADDR', 'Block Lot': 'APN',
                             'DBI Permit': 'BP_APPLNO'})
        X['NET_AFF_UNITS'] = 0
        X['AFF_UNITS'] = 0
    if fmt == 5:
        X = X.rename(columns={"PROPUSE": "PROJECT_TYPE"})
    #X = X[X['BESTSTAT'] == 'CONSTRUCTION']
    X = X[X['UNITS'] > 0]
    X = X[X['NET_UNITS'] >= 0]
    
    # Filtering records without permits
    M = X[(X['BP_APPLNO'] == 'N') | (X['BP_APPLNO'] == 'NMULTIPLE')][columns+['BP_APPLNO']]
    X = X[(X['BP_APPLNO'] != 'N') & (X['BP_APPLNO'] != 'NMULTIPLE')]
    
    # Filtering duplicated records
    D = X[X.duplicated('BP_APPLNO', keep=False)][columns+['BP_APPLNO']].sort_values('BP_APPLNO')
    X=X[~X.duplicated('BP_APPLNO')]
    
    # Set unique index by Permit ID
    X = X.set_index('BP_APPLNO')
    X.index.names = ['PERMIT_ID']
    X['BESTDATE'] = pd.to_datetime(X["BESTDATE"])
    X[['UNITS', 'NET_UNITS', 'NET_AFF_UNITS', 'AFF_UNITS']] = X[['UNITS', 'NET_UNITS', 'NET_AFF_UNITS', 'AFF_UNITS']].astype(int)
    
    X = X[columns]
    
    return X,M,D

# Helper Indexes

y13q4 = 0
y14q1 = 1
y14q2 = 2
y14q3 = 3
y14q4 = 4
y15q1 = 5
y15q2 = 6
y15q3 = 7
y15q4 = 8
y16q1 = 9
y16q2 = 10
y16q3 = 11
y16q4 = 12
y17q1 = 13

# Files and versions

files = [
    {'label': '2013\'Q4', 'file': 'data/San_Francisco_Development_Pipeline_2013_Quarter_4.csv', 'format': 4},
    {'label': '2014\'Q1', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_1.csv', 'format': 4},
    {'label': '2014\'Q2', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_2.csv', 'format': 4},
    {'label': '2014\'Q3', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_3.csv', 'format': 4},
    {'label': '2014\'Q4', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_4.csv', 'format': 3},
    {'label': '2015\'Q1', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_1.csv', 'format': 2},
    {'label': '2015\'Q2', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_2.csv', 'format': 1},
    {'label': '2015\'Q3', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_3.csv', 'format': 1},
    {'label': '2015\'Q4', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_4.csv', 'format': 1},
    {'label': '2016\'Q1', 'file': 'data/SF_Development_Pipeline_2016_Q1.csv', 'format': 1},
    {'label': '2016\'Q2', 'file': 'data/SF_Development_Pipeline_2016_Q2.csv', 'format': 5},
    {'label': '2016\'Q3', 'file': 'data/SF_Development_Pipeline_2016_Q3.csv', 'format': 1},
    {'label': '2016\'Q4', 'file': 'data/SF_Development_Pipeline_2016_Q4.csv', 'format': 1},
    {'label': '2017\'Q1', 'file': 'data/SF_Development_Pipeline_2017_Q1.csv', 'format': 1}
]

# Loading Data
count = len(files)
labels = []
data = []
missing = []
duplicates = []

for f in files:
    t,m,d = loadData(f['file'], f['label'], f['format'])
    data.append(t)
    missing.append(m)
    duplicates.append(d)
    labels.append(f['label'])

data/San_Francisco_Development_Pipeline_2013_Quarter_4.csv
data/San_Francisco_Development_Pipeline_2014_Quarter_1.csv
data/San_Francisco_Development_Pipeline_2014_Quarter_2.csv
data/San_Francisco_Development_Pipeline_2014_Quarter_3.csv
data/San_Francisco_Development_Pipeline_2014_Quarter_4.csv
data/San_Francisco_Development_Pipeline_2015_Quarter_1.csv
data/San_Francisco_Development_Pipeline_2015_Quarter_2.csv
data/San_Francisco_Development_Pipeline_2015_Quarter_3.csv
data/San_Francisco_Development_Pipeline_2015_Quarter_4.csv
data/SF_Development_Pipeline_2016_Q1.csv
data/SF_Development_Pipeline_2016_Q2.csv
data/SF_Development_Pipeline_2016_Q3.csv
data/SF_Development_Pipeline_2016_Q4.csv
data/SF_Development_Pipeline_2017_Q1.csv


# Displaying duplicated records
Printing out all records with duplicate building permit id for manual inspection.

In [51]:
for i in range(count):
    if (len(duplicates[i]) > 0):
        print()
        print("Duplicated records at {}".format(labels[i]))
        display(duplicates[i])


Duplicated records at 2013'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
265,7.0,7.0,0,0,4132 Third Street,5260002,BP ISSUED,08-Aug-13,N200707055953
812,7.0,7.0,0,0,4132 03rd St,5260003,BP ISSUED,08-Aug-13,N200707055953
188,2.0,1.0,0,0,1076 Hampshire St,4152016,CONSTRUCTION,21-Feb-12,N200709193092
515,2.0,2.0,0,0,1078 Hampshire St,4152046,CONSTRUCTION,21-Feb-12,N200709193092
127,1.0,1.0,0,0,83 Panorama Dr,2821010,BP APPROVED,02-Mar-12,N200711077576
527,1.0,1.0,0,0,83 Panorama Dr,2821023,BP APPROVED,02-Mar-12,N200711077576
299,2.0,1.0,0,0,268 Madison St,5943008,CONSTRUCTION,13-Sep-13,N200711077587
520,1.0,1.0,0,0,268 Madison St,5943051,CONSTRUCTION,13-Sep-13,N200711077587
307,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,25-Apr-13,N200806194898
530,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,25-Apr-13,N200806194898



Duplicated records at 2014'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
657,7.0,7.0,0,0,4132 03rd St,5260003,CONSTRUCTION,26-Feb-14,N200707055953
124,7.0,7.0,0,0,4132 Third Street,5260002,CONSTRUCTION,26-Feb-14,N200707055953
60,2.0,1.0,0,0,1076 Hampshire St,4152016,CONSTRUCTION,21-Feb-12,N200709193092
627,2.0,2.0,0,0,1078 Hampshire St,4152046,CONSTRUCTION,21-Feb-12,N200709193092
756,1.0,1.0,0,0,83 Panorama Dr,2821023,BP APPROVED,02-Mar-12,N200711077576
281,1.0,1.0,0,0,83 Panorama Dr,2821010,BP APPROVED,02-Mar-12,N200711077576
748,1.0,1.0,0,0,268 Madison St,5943051,CONSTRUCTION,08-Jan-14,N200711077587
151,2.0,1.0,0,0,268 Madison St,5943008,CONSTRUCTION,08-Jan-14,N200711077587
758,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,25-Apr-13,N200806194898
159,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,25-Apr-13,N200806194898



Duplicated records at 2014'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
735,7.0,7.0,0,0,4132 03rd St,5260003,CONSTRUCTION,06/30/2014 12:00:00 AM,N200707055953
207,7.0,7.0,0,0,4132 Third Street,5260002,CONSTRUCTION,06/30/2014 12:00:00 AM,N200707055953
741,2.0,2.0,0,0,1078 Hampshire St,4152046,CONSTRUCTION,02/21/2012 12:00:00 AM,N200709193092
208,2.0,1.0,0,0,1076 Hampshire St,4152016,CONSTRUCTION,02/21/2012 12:00:00 AM,N200709193092
9,1.0,1.0,0,0,83 Panorama Dr,2821010,BP ISSUED,04/04/2014 12:00:00 AM,N200711077576
754,1.0,1.0,0,0,83 Panorama Dr,2821023,BP ISSUED,04/04/2014 12:00:00 AM,N200711077576
746,1.0,1.0,0,0,268 Madison St,5943051,CONSTRUCTION,01/08/2014 12:00:00 AM,N200711077587
306,2.0,1.0,0,0,268 Madison St,5943008,CONSTRUCTION,01/08/2014 12:00:00 AM,N200711077587
313,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,04/25/2013 12:00:00 AM,N200806194898
756,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,04/25/2013 12:00:00 AM,N200806194898



Duplicated records at 2014'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
514,7.0,7.0,0,0,4132 03rd St,5260003,CONSTRUCTION,28-Jul-14,N200707055953
236,7.0,7.0,0,0,4132 Third Street,5260002,CONSTRUCTION,28-Jul-14,N200707055953
171,2.0,1.0,0,0,1076 Hampshire St,4152016,CONSTRUCTION,21-Feb-12,N200709193092
654,2.0,2.0,0,0,1078 Hampshire St,4152046,CONSTRUCTION,21-Feb-12,N200709193092
668,1.0,1.0,0,0,83 Panorama Dr,2821023,CONSTRUCTION,23-Sep-14,N200711077576
112,1.0,1.0,0,0,83 Panorama Dr,2821010,CONSTRUCTION,23-Sep-14,N200711077576
659,1.0,1.0,0,0,268 Madison St,5943051,CONSTRUCTION,08-Jan-14,N200711077587
269,2.0,1.0,0,0,268 Madison St,5943008,CONSTRUCTION,08-Jan-14,N200711077587
270,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,25-Apr-13,N200806194898
670,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,25-Apr-13,N200806194898



Duplicated records at 2014'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
411,2,1,0,0,1076 HAMPSHIRE ST,4152016,CONSTRUCTION,02/21/2012,N200709193092
448,2,2,0,0,1078 HAMPSHIRE ST,4152046,CONSTRUCTION,02/21/2012,N200709193092
400,2,2,0,0,447 LINDEN ST,818048,CONSTRUCTION,10/10/2014,N200912304034
409,2,2,0,0,443 LINDEN ST,818049,CONSTRUCTION,10/10/2014,N200912304034
69,98,98,0,0,1239 TURK ST,757027,CONSTRUCTION,03/27/2015,N201207104447
70,98,98,98,98,1100 GOLDEN GATE AV,757025,CONSTRUCTION,03/27/2015,N201207104447
38,191,191,160,160,218 BUCHANAN ST,857001A,CONSTRUCTION,03/19/2015,N201209059006
55,133,133,160,160,55 LAGUNA STREET,857001,CONSTRUCTION,03/19/2015,N201209059006
53,139,139,0,0,555 FULTON ST,794028,CONSTRUCTION,03/31/2015,N201305036062
54,136,136,0,0,746 LAGUNA ST,794015,CONSTRUCTION,05/13/2010,N201305036062



Duplicated records at 2015'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
794,2,2,0,0,1078 HAMPSHIRE ST,4152046,CONSTRUCTION,02/21/2012,N200709193092
863,2,1,0,0,1076 HAMPSHIRE ST,4152016,CONSTRUCTION,02/21/2012,N200709193092
523,2,2,0,0,443 LINDEN ST,818049,CONSTRUCTION,10/10/2014,N200912304034
782,2,2,0,0,447 LINDEN ST,818048,CONSTRUCTION,10/10/2014,N200912304034



Duplicated records at 2015'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
321,8,6,0,0,95 LELAND AV,6250028,BP REINSTATED,03/05/2010,N200704128664
346,8,5,0,0,95 LELAND AV,6250037,BP REINSTATED,03/05/2010,N200704128664
170,59,59,7,7,249 PENNSYLVANIA AV,3999002,BP Filed,05/29/2015,N201505297549
222,16,16,0,0,502 07TH ST,3780001,BP FILED,05/29/2015,N201505297549



Duplicated records at 2016'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
415,2,1,0,0,1948 & 1948A QUESADA AV,5329011,BP FILED,04/11/2016,N201604114413
416,1,1,0,0,1948 QUESADA AV,5329011,BP FILED,04/11/2016,N201604114413


# Fixing duplicates
Re-adding duplicate records with corrected values.

In [52]:
def fix_duplicate(permitId, units, netUnits, affUnits, netAffUnits, addr, apn, beststat, bestdate):
    for i in range(count):
        if len(duplicates[i][duplicates[i]['BP_APPLNO'] == permitId]) > 0:
            data[i].loc[permitId] = [units, netUnits, affUnits, netAffUnits, addr, apn, beststat, bestdate]
        duplicates[i] = duplicates[i][duplicates[i]['BP_APPLNO'] != permitId]

fix_duplicate('N200709193092', 2, 1, 0, 0, '1076-1078 Hampshire St', '4152016', "CONSTRUCTION", date(2012, 2, 21))
fix_duplicate('N200711077587', 2, 1, 0, 0, '268 Madison St', '5943008', "CONSTRUCTION", date(2014, 2, 8))
fix_duplicate('N200707055953', 7, 7, 0, 0, '4132 03rd St', '5260003', "CONSTRUCTION", date(2014, 7, 28))
fix_duplicate('N200912304034', 2, 2, 0, 0, '447 Linden St', '818048', "CONSTRUCTION", date(2014, 10, 10))
fix_duplicate('N201209059006', 191, 191, 0, 0, '218 Buchanan St', '857001', "CONSTRUCTION", date(2014, 7, 31))
fix_duplicate('N200711077576', 1, 1, 0, 0, '83 Panorama Dr', '2821010', "CONSTRUCTION", date(2014, 9, 23))
fix_duplicate('N201207104447', 98, 98, 0, 0, '1100 GOLDEN GATE AV', '757025', "CONSTRUCTION", date(2015, 3, 27))
fix_duplicate('N201305036062', 139, 139, 0, 0, '555 FULTON ST', '794028', "CONSTRUCTION", date(2015, 3, 31))

hasDuplicates = False
for i in range(count):
    if (len(duplicates[i]) > 0):
        print()
        print("Duplicated records at {}".format(labels[i]))
        display(duplicates[i])
        hasDuplicates = True
if not hasDuplicates:
    print("No duplicates present!")


Duplicated records at 2013'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
307,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,25-Apr-13,N200806194898
530,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,25-Apr-13,N200806194898
440,1.0,1.0,0,0,1510 25th St,4224027,BP ISSUED,19-Aug-11,N201009140824
522,1.0,1.0,0,0,1510 25th St,4224158,BP ISSUED,19-Aug-11,N201009140824



Duplicated records at 2014'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
758,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,25-Apr-13,N200806194898
159,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,25-Apr-13,N200806194898
436,1.0,1.0,0,0,1510 25th St,4224027,BP ISSUED,19-Aug-11,N201009140824
750,1.0,1.0,0,0,1510 25th St,4224158,BP ISSUED,19-Aug-11,N201009140824
98,65.0,65.0,0,0,1000 16th St,3834001,BP ISSUED,07-Sep-12,N201112070227
97,385.0,385.0,0,0,1000 16th St,3833001,PL APPROVED,13-Jul-12,N201112070227



Duplicated records at 2014'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
313,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,04/25/2013 12:00:00 AM,N200806194898
756,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,04/25/2013 12:00:00 AM,N200806194898
328,61.0,61.0,0,0,5050 Mission St,6969001,BP APPROVED,03/13/2014 12:00:00 AM,N201006104250
119,61.0,61.0,0,0,5050 Mission St,6969011,BP APPROVED,03/13/2014 12:00:00 AM,N201006104250
426,1.0,1.0,0,0,1510 25th St,4224027,BP ISSUED,08/19/2011 12:00:00 AM,N201009140824
748,1.0,1.0,0,0,1510 25th St,4224158,BP ISSUED,08/19/2011 12:00:00 AM,N201009140824
236,385.0,385.0,0,0,1000 16th St,3833001,BP ISSUED,09/07/2012 12:00:00 AM,N201112070227
237,65.0,65.0,0,0,1000 16th St,3834001,BP ISSUED,09/07/2012 12:00:00 AM,N201112070227
505,69.0,69.0,0,0,388 Fulton St,785029,BP ISSUED,11/26/2013 12:00:00 AM,N201303273113
596,69.0,69.0,0,0,344 Fulton St,785029,BP ISSUED,11/26/2013 12:00:00 AM,N201303273113



Duplicated records at 2014'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
270,1.0,1.0,0,0,138 Alpha St,6208003,BP ISSUED,25-Apr-13,N200806194898
670,1.0,1.0,0,0,138 Alpha St,6208056,BP ISSUED,25-Apr-13,N200806194898
277,61.0,61.0,0,0,5050 Mission St,6969001,BP APPROVED,13-Mar-14,N201006104250
366,61.0,61.0,0,0,5050 Mission St,6969011,BP APPROVED,13-Mar-14,N201006104250
661,1.0,1.0,0,0,1510 25th St,4224158,BP ISSUED,19-Aug-11,N201009140824
332,1.0,1.0,0,0,1510 25th St,4224027,BP ISSUED,19-Aug-11,N201009140824
197,65.0,65.0,0,0,1000 16th St,3834001,BP ISSUED,07-Sep-12,N201112070227
196,385.0,385.0,0,0,1000 16th St,3833001,BP ISSUED,07-Sep-12,N201112070227
495,69.0,69.0,0,0,344 Fulton St,785029,BP ISSUED,26-Nov-13,N201303273113
947,69.0,69.0,0,0,388 Fulton St,785029,BP ISSUED,26-Nov-13,N201303273113



Duplicated records at 2014'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
16,418,418,0,0,57 TEHAMA ST,3736078A,BP FILED,04/30/2014,N201404304554
19,398,398,0,0,41 TEHAMA ST,3736074,BP FILED,11/14/2013,N201404304554



Duplicated records at 2015'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
321,8,6,0,0,95 LELAND AV,6250028,BP REINSTATED,03/05/2010,N200704128664
346,8,5,0,0,95 LELAND AV,6250037,BP REINSTATED,03/05/2010,N200704128664
170,59,59,7,7,249 PENNSYLVANIA AV,3999002,BP Filed,05/29/2015,N201505297549
222,16,16,0,0,502 07TH ST,3780001,BP FILED,05/29/2015,N201505297549



Duplicated records at 2016'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
415,2,1,0,0,1948 & 1948A QUESADA AV,5329011,BP FILED,04/11/2016,N201604114413
416,1,1,0,0,1948 QUESADA AV,5329011,BP FILED,04/11/2016,N201604114413


# Displaying records without Permit ID
Printing all records without Permit ID

In [53]:
for i in range(count):
    if (len(missing[i]) > 0):
        print()
        print("Records without Permit ID at {}".format(labels[i]))
        display(missing[i])


Records without Permit ID at 2013'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
2,165.0,165.0,0,0,1400 Mission St,3507042,PL FILED,08-Apr-09,N
3,7.0,4.0,0,0,231 Ellis St,331001A,PL Filed,28-Feb-13,N
5,170.0,170.0,0,0,8 Washington Street,201012,PL Approved,19-Jun-12,N
7,6.0,6.0,0,0,1020 Broadway,150054,PL Filed,16-Jan-13,N
9,28.0,28.0,0,0,1601 Larkin St,620006,PL Approved,15-Nov-13,N
17,3.0,2.0,0,0,377 Filbert St,105065,PL Filed,21-Aug-13,N
21,170.0,170.0,0,0,168 Eddy St,331010,PL Approved,26-Mar-09,N
28,45.0,45.0,0,0,300 Grant Ave.,287013,PL Approved,06-Oct-11,N
36,26.0,26.0,0,0,2601 Van Ness Av,522002A,PL Filed,28-Aug-13,N
39,2.0,1.0,0,0,2919 Laguna St,531004,PL Approved,22-Oct-09,N



Records without Permit ID at 2014'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
4,2.0,1.0,0,0,451 34th Av,1466009,PL Approved,16-Jul-09,N
11,8619.0,7800.0,0,0,Treasure Island,1939001,PL Approved,15-Mar-11,N
26,8.0,8.0,0,0,49 Julian Av,3547032,PL Approved,26-Jan-12,N
48,185.0,185.0,0,0,706 Mission St,3706093,PL Approved,31-Jul-13,N
56,39.0,39.0,0,0,114 07th Street,3726103,PL Filed,14-Aug-12,N
59,119.0,119.0,0,0,325 Fremont St,3747012,PL Filed,02-May-13,N
62,1700.0,1094.0,0,0,1 Turner Tr,4167004,PL Filed,30-Jun-10,N
66,42.0,42.0,0,0,1174 Folsom St,3730023,PL Filed,29-Jul-13,N
76,398.0,398.0,0,0,41 Tehama St,3736074,PL Approved,14-Nov-13,N
83,13.0,69.0,0,0,935 Folsom St,3753140,PL Filed,19-Apr-10,N



Records without Permit ID at 2014'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
0,165.0,165.0,0,0,1400 Mission St,3507042,PL FILED,04/08/2009 12:00:00 AM,N
1,7.0,4.0,0,0,231 Ellis St,331001A,PL Filed,02/28/2013 12:00:00 AM,N
6,170.0,170.0,0,0,8 Washington Street,201012,PL Approved,06/19/2012 12:00:00 AM,N
7,28.0,28.0,0,0,1601 Larkin St,620006,PL Approved,11/15/2013 12:00:00 AM,N
12,62.0,62.0,0,0,1800 Van Ness Ave.,619009,PL Approved,10/20/2011 12:00:00 AM,N
18,3.0,2.0,0,0,377 Filbert St,105065,PL Filed,08/21/2013 12:00:00 AM,N
21,6.0,6.0,0,0,1020 Broadway,150054,PL Approved,03/24/2014 12:00:00 AM,N
27,27.0,27.0,0,0,832 Sutter St,281003,PL Approved,02/27/2014 12:00:00 AM,N
30,45.0,45.0,0,0,300 Grant Ave.,287013,PL Approved,10/06/2011 12:00:00 AM,N
35,170.0,170.0,0,0,168 Eddy St,331010,PL Approved,03/26/2009 12:00:00 AM,N



Records without Permit ID at 2014'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
2,165.0,165.0,0,0,1400 Mission St,3507042,PL FILED,08-Apr-09,N
3,7.0,4.0,0,0,231 Ellis St,331001A,PL Filed,28-Feb-13,N
7,24.0,23.0,0,0,740 Washington St,194009,PL Filed,26-Sep-14,N
8,170.0,170.0,0,0,8 Washington Street,201012,PL Approved,19-Jun-12,N
9,28.0,28.0,0,0,1601 Larkin St,620006,PL Approved,15-Nov-13,N
18,3.0,2.0,0,0,377 Filbert St,105065,PL Filed,21-Aug-13,N
21,45.0,45.0,0,0,300 Grant Ave.,287013,PL Approved,06-Oct-11,N
22,6.0,6.0,0,0,1020 Broadway,150054,PL Approved,24-Mar-14,N
28,27.0,27.0,0,0,832 Sutter St,281003,PL Approved,27-Feb-14,N
34,170.0,170.0,0,0,168 Eddy St,331010,PL Approved,26-Mar-09,N



Records without Permit ID at 2014'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
0,10500,10237,3345,3089,HUNTERS POINT EXPY,4886008,PL APPROVED,08/03/2010,N
1,8898,5677,0,-3221,PARKMERCED,7303001,PL APPROVED,05/25/2011,N
2,8619,7800,2050,1800,TREASURE ISLAND,1939001,PL APPROVED,03/15/2011,N
3,1700,915,1700,915,1654 SUNNYDALE AVE,6310001,PL FILED,04/28/2010,N
4,1600,994,0,0,1 TURNER TR,4167004,PL FILED,06/30/2010,N
5,1500,1500,0,0,PIER 48,9900048,PL FILED,04/23/2013,N
8,676,230,0,0,1390 MARKET ST,813007,PL APPROVED,05/28/2009,N
15,429,429,0,0,150 VAN NESS AVE,814001,PL FILED,04/23/2014,N
20,395,395,0,0,1200 17TH STREET,3949001,PL FILED,04/04/2012,N
22,355,355,0,0,340 FREMONT ST,3748006,BP FILED,08/03/2012,N



Records without Permit ID at 2015'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
0,53,53,6,6,2435-2445 16TH ST,3965021,PL FILED,03/25/2015,N
8,688,688,58,58,925 MISSION ST,3725093,PL FILED,05/15/2015,N
10,550,550,110,110,1500-1580 MISSION ST,3506002,PL FILED,12/18/2014,N
15,220,220,0,0,1601 MISSION ST,3514043,PL FILED,02/11/2015,N
16,429,429,0,0,150 VAN NESS AVE,814001,PL FILED,04/23/2014,N
19,340,340,0,0,800 INDIANA STREET,4105009,PL FILED,10/10/2013,N
20,316,305,0,0,950 MARKET ST,342001,PL FILED,11/19/2013,N
21,304,304,0,0,1066 MARKET ST,350003,PL FILED,06/18/2014,N
22,292,292,0,0,50 01ST ST,3708006,PL FILED,06/04/2014,N
23,276,276,0,0,1301 16TH STREET,3954016,PL FILED,09/16/2013,N



Records without Permit ID at 2015'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
0,688,688,58,58,925 MISSION ST,3725093,PL FILED,05/15/2015,N
4,550,550,110,110,1500-1580 MISSION ST,3506007,PL FILED,12/18/2014,N
7,340,340,0,0,800 INDIANA STREET,4105009,PL FILED,10/10/2013,N
11,429,429,0,0,150 VAN NESS AVE,0814001,PL FILED,04/23/2014,N
14,398,398,49,49,41 TEHAMA ST,3736074,PL APPROVED,11/14/2013,N
18,316,316,0,0,950 MARKET ST,0342001,PL FILED,11/19/2013,N
19,312,312,0,0,1270 MISSION ST,3701021,PL FILED,05/28/2015,N
20,304,304,0,0,1066 MARKET ST,0350003,PL FILED,06/18/2014,N
21,292,292,0,0,50 01ST ST,3708006,PL FILED,06/04/2014,N
22,276,276,0,0,1301 16TH STREET,3954016,PL FILED,09/16/2013,N



Records without Permit ID at 2015'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
0,980,980,0,0,700 INNES ST,4644002A,PL FILED,12/09/2014,N
1,1700,915,1700,915,HOPE SF SUNNYDALE,6310001,PL FILED,04/28/2010,N
2,767,767,0,0,10 SOUTH VAN NESS AV,3506004,PL FILED,04/14/2015,N
3,688,688,58,58,925 MISSION ST,3725093,PL FILED,05/15/2015,N
6,584,584,107,107,1601-1637 MARKET ST / 53 COLTON ST,3505001,PL FILED,07/10/2015,N
9,10500,10237,3345,3089,HUNTERS POINT EXPY,4886008,PL APPROVED,08/03/2010,N
10,1679,1679,252,252,SCHLAGE LOCK,5087003,PL APPROVED,07/22/2014,N
11,1500,1500,0,0,PIER 48 / SEAWALL LOT 337,9900048,PL FILED,04/23/2013,N
12,1100,1100,0,0,PIER 70,4110001,PL FILED,02/10/2015,N
13,1600,994,0,0,HOPE SF POTRERO,4167004,PL FILED,06/30/2010,N



Records without Permit ID at 2015'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
0,10172,9916,3345,3089,"HUNTERS POINT SHIPYARD, PHASE II",4884025,PL APPROVED,10/27/2015,N
1,4666,4666,284,284,"PARKMERCED, PHASE I",7308001,BP FILED,01/25/2016,N
2,1679,1679,168,168,SCHLAGE LOCK,5087003,PL APPROVED,07/22/2014,N
3,1500,1500,0,0,PIER 48 / SEAWALL LOT 337,9900048,PL FILED,04/23/2013,N
4,1100,1100,0,0,PIER 70,4110001,PL FILED,02/10/2015,N
5,1600,994,0,0,HOPE SF POTRERO,4167004,PL FILED,06/30/2010,N
6,980,980,0,0,700 INNES ST,4644002A,PL FILED,12/09/2014,N
7,1700,915,1700,915,HOPE SF SUNNYDALE,6310001,PL FILED,04/28/2010,N
9,767,767,0,0,10 SOUTH VAN NESS AV,3506004,PL FILED,04/14/2015,N
10,688,688,58,58,5M,3725093,PL APPROVED,01/04/2016,N



Records without Permit ID at 2016'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
21,6,6,0,0,1020 BROADWAY,0150054,PL APPROVED,03/24/2014,N
24,186,186,0,0,1028 MARKET ST,0350002,PL FILED,04/29/2014,N
28,9,9,0,0,1033 POLK ST,0694003,PL FILED,06/17/2015,N
33,2,1,0,0,1036 WISCONSIN ST,4219004,PL APPROVED,10/06/2009,N
49,2,1,0,0,11 GLADYS ST,5710027,PL FILED,04/15/2015,N
67,2,1,0,0,1125 HAMPSHIRE ST,4211021,PL APPROVED,12/04/2015,N
68,164,164,0,0,1125 MARKET ST,3702047,PL FILED,12/18/2013,N
78,4,2,0,0,115 PLYMOUTH AV,7138056,PL FILED,08/15/2014,N
102,395,395,0,0,1200 17TH ST,3949001,PL FILED,04/04/2012,N
104,135,135,0,0,1200 VAN NESS AV,0691005,PL FILED,12/17/2015,N



Records without Permit ID at 2016'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
8,855,855,0,0,10 SOUTH VAN NESS AV,3506004,PL FILED,04/11/2016,N
10,1,1,0,0,100 ALPINE TER,2609066,PL FILED,03/12/2015,N
18,256,256,31,31,1001 VAN NESS AV,0714016,PL FILED,01/08/2015,N
27,186,186,0,0,1028 MARKET ST,0350002,PL FILED,04/29/2014,N
31,9,9,0,0,1033 POLK ST,0694003,PL FILED,06/17/2015,N
39,46,42,0,0,1052-1060 FOLSOM ST AND 190-194 RUSS ST,3731021,PL FILED,06/23/2016,N
50,8,8,0,0,1082 HOWARD ST,3726028,PL FILED,01/21/2016,N
54,2,1,0,0,11 GLADYS ST,5710027,PL FILED,04/15/2015,N
69,2,1,0,0,1125 HAMPSHIRE ST,4211021,PL APPROVED,12/04/2015,N
70,164,164,0,0,1125 MARKET ST,3702047,PL FILED,12/18/2013,N



Records without Permit ID at 2016'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
0,4,4,0,0,3140 16TH ST,3555018,PL APPROVED,07/27/2016,N
2,15,15,0,0,824 HYDE ST,0280017,PL APPROVED,03/24/2016,N
31,96,96,96,96,1296 SHOTWELL ST,6571051,PL FILED,04/04/2016,N
43,220,220,0,0,1601 MISSION ST,3514043,PL APPROVED,04/07/2016,N
52,4,3,0,0,21 ROSEMONT PL,3534020,PL APPROVED,07/23/2014,N
71,37,37,0,0,1245 FOLSOM ST,3756041,PL FILED,03/22/2016,N
84,359,359,0,0,1270 MISSION ST,3701021,PL FILED,06/28/2016,N
91,172,172,28,28,1301 16TH ST,3954016,PL APPROVED,06/28/2016,N
103,2,1,0,0,132 CORBETT AV,2652010,PL APPROVED,07/10/2014,N
104,124,18,0,0,1320 - 1380 LOMBARD ST,0499002D,PL FILED,01/12/2016,N



Records without Permit ID at 2016'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
1,94,94,0,0,955 POST ST,0302021,PL FILED,12/09/2016,N
5,164,164,0,0,1125 MARKET ST,3702047,PL FILED,12/18/2013,N
61,37,37,0,0,1245 FOLSOM ST,3756041,PL FILED,03/22/2016,N
76,94,94,94,94,1296 SHOTWELL ST,6571051,PL APPROVED,12/07/2016,N
84,172,172,28,28,1301 16TH ST,3954016,PL APPROVED,06/28/2016,N
86,231,231,0,0,1333 GOUGH ST / 1481 POST ST,0697037,PL FILED,11/13/2013,N
117,17,2,0,0,140 DUBOCE AV,3502024,PL FILED,09/11/2014,N
129,63,63,8,8,1436 & 1498 POLK ST/ 1567 CALIFORNIA ST,0645014,PL APPROVED,10/16/2015,N
134,4,2,0,0,146 JASPER PL,0103021,PL FILED,06/24/2013,N
138,45,45,0,0,1463 STEVENSON ST,3532013,PL FILED,06/28/2016,N



Records without Permit ID at 2017'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE,BP_APPLNO
7,164,164,0,0,1125 MARKET ST,3702047,PL FILED,12/18/2013,N
58,37,37,0,0,1245 FOLSOM ST,3756041,PL FILED,03/22/2016,N
78,94,94,94,94,1296 SHOTWELL ST,6571051,PL APPROVED,12/07/2016,N
98,44,44,0,0,230 07TH ST,3730004,PL FILED,07/30/2014,N
104,18,16,0,0,231 ELLIS ST,0331001A,PL FILED,02/28/2013,N
113,4,3,0,0,3532 23RD ST,3634009,PL APPROVED,07/08/2016,N
126,172,172,28,28,1301 16TH ST,3954016,PL APPROVED,06/28/2016,N
137,124,18,0,0,1320 - 1380 LOMBARD ST,0499002D,PL FILED,01/12/2016,N
138,4,1,0,0,1331 WASHINGTON ST,0215023,PL FILED,06/30/2016,N
139,231,231,0,0,1333 GOUGH ST / 1481 POST ST,0697037,PL FILED,11/13/2013,N


# Fixing records without Permit ID
Correcting records without Permit ID

In [54]:
def fix_missing(permitId, units, netUnits, affUnits, netAffUnits, addr, apn):
    for i in range(count):
        if len(missing[i][missing[i]['APN'].isin(apn)]) > 0:
            data[i].loc[permitId] = [units, netUnits, affUnits, netAffUnits, addr, apn[0]]
        missing[i] = missing[i][~missing[i]['APN'].isin(apn)]
        
fix_missing("N_CUSTOM_JAMESTOWN", 132, 14, 0, 0, '833-881 Jamestown', ['4991277'])
fix_missing("N_CUSTOM_BROTHERHOOD", 196, 196, 0, 0, '800 BROTHERHOOD WAY', ['7331003', '7331005'])
fix_missing("N_CUSTOM_SHIPYARD", 229, 229, 0, 0, 'HUNTERS POINT SHIPYARD, PHASE I', ['4591C001', '4624031'])

hasMissing = False
for i in range(count):
    if (len(missing[i]) > 0):
        print()
        print("Records without Permit ID at {}".format(labels[i]))
        display(missing[i])
        hasMissing = True
if not hasMissing:
    print("No missing present")

ValueError: cannot set a row with mismatched columns

# Searching for incorrect unit values
Searching projects with unit counts varying from quarter to quarter and projects with net units exceeding total units.

In [55]:
def find_incorrect(column):
    incorrect = {}
    for i in range(count-1):
        X = data[i]
        for j in range(i+1, count):
            X2 = data[j]
        
            X = X[X.index.isin(X2.index)].sort_index()
            X2 = X2[X2.index.isin(X.index)].sort_index()
            Y = X[X[column] != X2[column]]
            Y2 = X2[X[column] != X2[column]]
            if len(Y)>0:
                for index, row in Y.iterrows():
                    a = int(Y.loc[index][column])
                    b = int(Y2.loc[index][column])
                    if index not in incorrect:
                        incorrect[index] = { 'min': min(a, b), 'max': max(a,b)}
                    else:
                        incorrect[index] = { 'min': min(incorrect[index]['min'], a, b), 'max': max(incorrect[index]['max'], a, b)}
    return incorrect


incorrect = find_incorrect('UNITS')
print("Unit number fluctuation")
display(incorrect)

incorrectNet = find_incorrect('NET_UNITS')
print("Net Unit number fluctuation")
display(incorrectNet)

Unit number fluctuation


{'N200507208144': {'max': 25, 'min': 12},
 'N200512281146': {'max': 110, 'min': 107},
 'N200605161774': {'max': 452, 'min': 432},
 'N200607146552': {'max': 26, 'min': 22},
 'N200711077587': {'max': 80, 'min': 1},
 'N200806275522': {'max': 41, 'min': 37},
 'N200807176988': {'max': 2, 'min': 1},
 'N200810013158': {'max': 16, 'min': 12},
 'N200810013162': {'max': 16, 'min': 12},
 'N200810315586': {'max': 156, 'min': 117},
 'N200810315636': {'max': 15, 'min': 13},
 'N200908124639': {'max': 19, 'min': 18},
 'N201009140800': {'max': 320, 'min': 305},
 'N201010052342': {'max': 84, 'min': 72},
 'N201011305795': {'max': 9, 'min': 7},
 'N201012176969': {'max': 2, 'min': 1},
 'N201012237367': {'max': 39, 'min': 23},
 'N201109144613': {'max': 16, 'min': 3},
 'N201112070227': {'max': 470, 'min': 65},
 'N201204168406': {'max': 83, 'min': 50},
 'N201207124717': {'max': 806, 'min': 285},
 'N201208036505': {'max': 384, 'min': 348},
 'N201209059006': {'max': 450, 'min': 191},
 'N201209069080': {'max': 5

Net Unit number fluctuation


{'N200507208144': {'max': 25, 'min': 12},
 'N200512281146': {'max': 107, 'min': 12},
 'N200605161774': {'max': 452, 'min': 432},
 'N200607146552': {'max': 26, 'min': 22},
 'N200704128664': {'max': 6, 'min': 5},
 'N200709193092': {'max': 2, 'min': 1},
 'N200711077587': {'max': 77, 'min': 1},
 'N200806275522': {'max': 41, 'min': 37},
 'N200807176988': {'max': 2, 'min': 1},
 'N200810013158': {'max': 16, 'min': 12},
 'N200810013162': {'max': 16, 'min': 12},
 'N200810315586': {'max': 156, 'min': 117},
 'N200810315636': {'max': 15, 'min': 13},
 'N201009140800': {'max': 320, 'min': 305},
 'N201010052342': {'max': 84, 'min': 72},
 'N201011305795': {'max': 9, 'min': 7},
 'N201012176969': {'max': 2, 'min': 1},
 'N201012237367': {'max': 39, 'min': 23},
 'N201109144613': {'max': 16, 'min': 3},
 'N201112070227': {'max': 470, 'min': 65},
 'N201204168406': {'max': 83, 'min': 50},
 'N201207124717': {'max': 806, 'min': 285},
 'N201208036505': {'max': 384, 'min': 348},
 'N201209059006': {'max': 450, 'mi

In [56]:
def fix_units(permitId, units, column):
    for i in range(count):
        if permitId in data[i].index:
            d = data[i].loc[permitId]
            d[column] = units
            data[i].loc[permitId] = d

for k in incorrect:
    fix_units(k, incorrect[k]['max'], 'UNITS')
for k in incorrectNet:
    fix_units(k, incorrectNet[k]['max'], 'NET_UNITS')    
    
# 201 Folsom
fix_units('N201207124717', 656, 'UNITS')
fix_units('N201207124717', 656, 'NET_UNITS')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


# Detection of completed buildings, p.1
Identifying completed projects as the ones that were in construction in a given quarter and are removed from the pipeline in the following quarter.

In [57]:
def buildStats(data):
    buildings = list(map(lambda x: len(x), completed))
    units = list(map(lambda x: x['UNITS'].values.sum(), completed))
    netUnits = list(map(lambda x: x['NET_UNITS'].values.sum(), completed))
    aff = list(map(lambda x: x['AFF_UNITS'].values.sum(), completed[4:]))
    netAff = list(map(lambda x: x['NET_AFF_UNITS'].values.sum(), completed[4:]))
    return { "buildings": buildings, "units": units, "netUnits": netUnits, "aff": aff, "netAff": netAff }

def printStats(stats):
    print("Buildings: {}".format(stats["buildings"]))
    print("Net Units: {}".format(stats["netUnits"]))
    print("Total Units: {}".format(sum(stats["netUnits"])))
    print("Net Affordable Units: {}".format(stats["netAff"]))
    print("Total Affordable Units: {}".format(sum(stats["netAff"])))

def contains(data, key):
    for i in range(len(data)):
        if key in data[i].index:
            return True
    return False

In [58]:
completed = []
for i in range(len(data)-1):
    X = data[i]
    X2 = data[i+1]
    X = X[~X.index.isin(X2.index)]
    completed.append(X)

stats = buildStats(completed)
printStats(stats)

Buildings: [28, 37, 32, 112, 37, 43, 29, 40, 110, 103, 45, 107, 87]
Net Units: [1889, 628, 736, 3110, 909, 1126, 856, 947, 2118, 1819, 1250, 4013, 1247]
Total Units: 20648
Net Affordable Units: [161, 10, 34, 167, 126, 101, 175, 116, 75]
Total Affordable Units: 965


# Detection of completed buildings, p.2
Correcting for projects that re-appear in the pipeline after being removed.

In [59]:
completed = []
for i in range(len(data)-1):
    X = data[i]
    for j in range(i+1,len(data)):
        X2 = data[j]
        X = X[~X.index.isin(X2.index)]
    completed.append(X)

stats = buildStats(completed)
printStats(stats)

Buildings: [27, 36, 32, 75, 31, 34, 28, 39, 110, 103, 44, 104, 87]
Net Units: [1879, 627, 736, 2006, 744, 578, 855, 931, 2118, 1819, 1171, 3767, 1247]
Total Units: 18478
Net Affordable Units: [1, 10, 34, 167, 126, 101, 175, 23, 75]
Total Affordable Units: 712


# Percent of units in top 10 projects for each quartal
Here we print the top ten projects (by net new units) for each quarter and compare their contribution to the total net number of units completed in the same period. As we see below, in a typical quarter the top ten projects account for 90%+ of citywide housing production.

In [60]:
for i in range(len(labels)-1):
    topUnits = completed[i].sort_values('NET_UNITS', ascending=False).head(10)['NET_UNITS'].values.sum()
    units = completed[i]['NET_UNITS'].values.sum()
    print(labels[i+1])
    print(topUnits/units)

2014'Q1
0.973922299095
2014'Q2
0.912280701754
2014'Q3
0.953804347826
2014'Q4
0.847956131605
2015'Q1
0.961021505376
2015'Q2
0.956747404844
2015'Q3
0.983625730994
2015'Q4
0.924812030075
2016'Q1
0.847025495751
2016'Q2
0.911489829577
2016'Q3
0.949615713066
2016'Q4
0.701088399257
2017'Q1
0.850040096231


# Group Stats by Year

In [61]:
for i in range(3):
    yearCount = int(sum(stats['netUnits'][i*4:i*4+4]))
    print("Year {}:      {}".format(2014 + i, yearCount))
print("Year 2017 (Q1): {}".format(completed[len(completed) - 1]['NET_UNITS'].sum()))

Year 2014:      5248
Year 2015:      3108
Year 2016:      8875
Year 2017 (Q1): 1247


# Result Table (top 10)

In [62]:
for i in range(len(labels)-1):
    print(labels[i+1])
    display(completed[i].sort_values('NET_UNITS', ascending=False).head(10))

2014'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N200607207084,754,754,0,0,1401 Market St,3507041,CONSTRUCTION,2012-02-23
N201104224606,315,315,0,0,185 Channel St,8711023,CONSTRUCTION,2013-12-13
N201207054130,273,273,0,0,1155 04th St,8713001,CONSTRUCTION,2013-12-23
N201203135987,172,172,0,0,1200 04th St,8711017,BP ISSUED,2012-03-26
N201012217106,115,115,0,0,1960-1998 Market St,872005,CONSTRUCTION,2013-12-19
N200506246051,88,88,0,0,333 Fremont St,3747019,CONSTRUCTION,2013-12-12
N200912223711,52,52,0,0,63 West Point Rd,4624004,CONSTRUCTION,2013-04-09
N201109074027,24,24,0,0,1600 Market St,854001,CONSTRUCTION,2013-12-30
N200701051074,19,19,0,0,246 Ritch St,3776092,CONSTRUCTION,2013-10-23
N200607176702,18,18,0,0,1801 Mission St,3548039,BP Filed,2006-07-17


2014'Q2


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201106017202,182,182,0,0,260 05th St,3732008,CONSTRUCTION,2014-03-31
N200210249843,134,134,0,0,555 Market St,3708058,PL APPROVED,2002-12-05
N201111038205,40,40,0,0,1501 15th St,3553054,CONSTRUCTION,2014-02-25
N201110146841,38,38,0,0,1645-1661 Pacific Av,595013,CONSTRUCTION,2014-03-24
N201207164905,36,36,0,0,345 06th St,3753081,BP Filed,2012-07-16
N201012227225,35,35,0,0,495 Cambridge St,5992A060,BP Filed,2010-12-22
N200608290880,35,35,0,0,1080 Sutter St,279011,CONSTRUCTION,2014-01-14
N200602104391,29,29,0,0,793 South Van Ness Av,3591024,PL Filed,2012-08-14
N201006295585,23,23,0,0,42 Harriet St,3731101,PL Approved,2010-12-20
N201202154236,20,20,0,0,1717 17th St,3980007,CONSTRUCTION,2014-03-28


2014'Q3


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201012036075,150,150,0,0,1000 Fourth Street (block 13 East),8711014,CONSTRUCTION,2014-05-09
N201111179162,106,106,0,0,740 Illinois St And 2121 Third St,4045002,CONSTRUCTION,2014-05-23
N200712211199,100,100,0,0,973 Market St,3704069,CONSTRUCTION,2008-08-14
N201012156753,90,90,0,0,101 Golden Gate Av,349001,CONSTRUCTION,2014-06-30
N201207205377,88,88,0,0,2175 Market St,3543011,CONSTRUCTION,2014-06-18
N200212244171,81,71,0,0,1301 Indiana St,4228158,CONSTRUCTION,2014-06-02
N200810063512,55,55,0,0,474 Natoma Street,3725101,CONSTRUCTION,2013-11-25
N201304265571,19,19,0,0,1816 Eddy St,1127064,CONSTRUCTION,2014-02-07
N201208248160,12,12,0,0,2401 16th St,3965001,CONSTRUCTION,2014-06-30
N200706204573,11,11,0,0,3418 26th St,6529012,CONSTRUCTION,2013-07-08


2014'Q4


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N200412211855,312,312,0,0,425 First Street,3765015,CONSTRUCTION,2014-06-09
N201106017208,282,282,0,0,900 Folsom St,3732009,CONSTRUCTION,2014-08-08
N201108233049,273,273,0,0,55 9th St,3701064,CONSTRUCTION,2013-12-09
N201203135986,188,188,0,0,701 Long Bridge St,8711019,BP ISSUED,2012-03-26
N200509284149,188,188,0,0,5800 03rd St,5431A043,PL APPROVED,2012-10-25
N9924080S,132,132,0,0,833-881 Jamestown,4991277,CONSTRUCTION,2007-09-17
N201301168124,114,114,0,0,2558 Mission St,3616007,CONSTRUCTION,2014-09-11
N201105166063,75,75,0,0,235 Broadway,165021,CONSTRUCTION,2014-09-19
N201203015201,71,71,0,0,50 Phelan Av,3180001,CONSTRUCTION,2014-09-05
N200607126309,66,66,0,0,48 Tehama St,3736085,BP Filed,2006-07-12


2015'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201208026344,399,399,0,0,100 VAN NESS AV,814020,CONSTRUCTION,2015-03-30
N201203196326,182,182,0,0,240 05TH ST,3732150,BP ISSUED,2012-08-27
N201205180774,37,37,0,0,25 DOLORES ST,3534069,CONSTRUCTION,2015-01-09
N200810023278,22,22,0,0,2210 MARKET ST,3560001,BP ISSUED,2012-03-14
N201007297765,21,21,0,0,2353 LOMBARD ST,512025,BP FILED,2010-07-29
N200408121427,18,18,0,0,899 VALENCIA ST,3596113,CONSTRUCTION,2015-03-03
N200311180506,13,13,0,0,200 DOLORES ST,3557062,BP FILED,2008-08-19
N201310078679,9,9,1,1,33 NORFOLK ST,3521053A,PL FILED,2014-04-17
N9902819,8,8,0,0,1179 TENNESSEE ST,4172053,CONSTRUCTION,2014-12-18
N200603025880,6,6,0,0,782-786 ANDOVER ST,5825007,CONSTRUCTION,2013-11-08


2015'Q2


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201306280802,220,220,0,0,250 4TH ST,3733008,BP ISSUED,2014-09-12
N201301319232,160,160,10,10,1321 Mission Street,3509043,CONSTRUCTION,2015-03-31
N201306280783,132,132,0,0,144 KING ST,3794024,CONSTRUCTION,2015-06-05
N201410017815,155,17,0,0,33 POWELL ST,330004,CONSTRUCTION,2015-05-28
N200608290916,8,8,0,0,382 RANDOLPH ST,7088051,BP APPROVED,2011-11-28
N201303051501,6,4,0,0,2500 CLEMENT ST,1407017,BP FILED,2013-03-05
N201302210648,3,3,0,0,39 SAN CARLOS ST,3576018,CONSTRUCTION,2015-03-05
N201305026020,3,3,0,0,1100 POTRERO AV,4211001,BP FILED,2013-05-02
N201312043354,24,3,0,0,885 FRANKLIN ST,744020,BP FILED,2013-12-04
N200612139805,3,3,0,0,4411 CABRILLO ST,1688001A,BP FILED,2006-12-13


2015'Q3


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201009140800,320,320,0,0,45 LANSING ST,3749059,CONSTRUCTION,2015-07-14
N201209119428,210,210,0,0,800 Brotherhood Way,7331005,CONSTRUCTION,2012-09-11
N200810315586,156,156,18,18,1415 MISSION ST,3510001,CONSTRUCTION,2015-07-09
N201010143017,67,67,9,9,527 STEVENSON ST,3703012,CONSTRUCTION,2015-07-23
N200907223197,63,63,7,7,101 DONAHUE ST,4591C042,CONSTRUCTION,2015-07-23
N201303122049,15,15,0,0,248 - 252 09TH ST,3518006,CONSTRUCTION,2015-02-09
N200809091137,4,4,0,0,285 OCEAN AV,6951021,PL APPROVED,2009-11-18
N201301258793,3,2,0,0,748 TREAT AV,3612055,CONSTRUCTION,2015-03-26
N200312243005,2,2,0,0,395 ATHENS ST,6022014,CONSTRUCTION,2014-06-11
N201307313252,3,2,0,0,1681 Fulton St,1185026,CONSTRUCTION,2015-04-22


2015'Q4


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201304023626,409,409,0,0,280 BEALE ST,3738004,CONSTRUCTION,2015-09-24
N201212216752,190,190,167,167,1400 MISSION ST,3507039,CONSTRUCTION,2015-08-04
N200903134091,113,113,0,0,429 BEALE ST,3767305,PL APPROVED,2009-05-14
N201412163796,47,47,0,0,1 EARL ST,4591D131,BP ISSUED,2015-09-21
N201509217557,32,32,0,0,400 GROVE ST,0793103,BP ISSUED,2015-10-14
N201501145720,21,21,0,0,47 KIRKWOOD AV,4591C143,BP FILED,2015-01-14
N201412113580,18,18,0,0,89 KIRKWOOD AV,4591C165,BP FILED,2014-12-11
N201501145724,11,11,0,0,576 HUDSON AV,4591C032,BP FILED,2015-01-14
N201501145725,10,10,0,0,536 HUDSON AV,4591C020,BP FILED,2015-01-14
N201105256770,10,10,0,0,140 09TH ST,3509005,CONSTRUCTION,2015-01-29


2016'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N200605161774,452,452,0,0,399 FREMONT ST,3747320,CONSTRUCTION,2016-01-07
N201211073775,271,271,23,23,5800 03RD ST (BUILDING 4),5431A042,CONSTRUCTION,2016-01-14
N201212246822,263,263,0,0,718 LONG BRIDGE ST,8710007,CONSTRUCTION,2016-01-08
N201210051462,182,182,0,0,PARCEL P - MARKET OCTAVIA,0831023,CONSTRUCTION,2014-12-31
N201203276954,170,170,0,0,8 WASHINGTON ST,0201012,PL APPROVED,2012-08-30
N201307051190,162,162,19,19,101 POLK ST,0811002,CONSTRUCTION,2015-12-03
N200512281146,110,107,60,60,101 EXECUTIVE PARK BL,4991600,BP FILED,2010-10-25
N201301238536,78,78,0,0,620 SUTTER ST,0283004A,BP FILED,2013-01-23
N200705010136,74,74,7,7,72 TOWNSEND ST,3789003,CONSTRUCTION,2015-11-04
N200912223671,35,35,15,15,401 INNES AV,4591C095,CONSTRUCTION,2015-12-11


2016'Q2


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201207124717,656,656,0,0,201 FOLSOM ST,3746001,CONSTRUCTION,2016-03-17
N201112070234,393,393,94,94,1006 / 1050 16TH ST & 1380 07TH ST,3833002,CONSTRUCTION,2016-03-30
N201208036505,384,384,0,0,340 FREMONT ST,3748006,BP FILED,2012-08-03
N201209059005,116,116,0,0,55 LAGUNA ST (BLDG 1),870003,CONSTRUCTION,2016-03-28
N201203055396,34,34,0,0,1650 BROADWAY *,570011,CONSTRUCTION,2016-03-21
N201403170944,20,20,0,0,449 14TH ST,3546026,BP FILED,2014-03-17
N201504224344,81,17,0,0,555 POST ST,306020,CONSTRUCTION,2016-02-29
N201407100913,15,15,2,2,5 SHIPLEY ST / 935 FOLSOM ST / 77 FALMOUTH ST,3753314,CONSTRUCTION,2016-01-27
N200408040564,12,12,5,5,1001 17TH ST & 140 PENNSYLVANIA AV,3987010,BP ISSUED,2014-08-06
N201507020526,11,11,0,0,1155 MARKET ST,3702054,CONSTRUCTION,2016-02-23


2016'Q3


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201209059006,450,450,23,23,55 LAGUNA ST (BLDG 2),0857001,CONSTRUCTION,2016-04-29
N201312184508,260,260,31,31,1634 - 1690 PINE ST,0647007,CONSTRUCTION,2016-07-01
N201207104447,98,98,98,98,1100 GOLDEN GATE AV,0757025,CONSTRUCTION,2016-07-01
N201306250465,84,84,11,11,480 POTRERO AV,3973002C,CONSTRUCTION,2016-01-13
N201207124725,80,80,0,0,LUMINA (PLAZA C),3746001,CONSTRUCTION,2016-07-01
N201306260573,41,41,5,5,450 HAYES ST,0808039,CONSTRUCTION,2016-06-29
N200912032516,35,35,5,5,1 FRANKLIN ST,0837003,CONSTRUCTION,2016-06-16
N201406138386,27,27,2,2,832 SUTTER ST,0281003,CONSTRUCTION,2016-01-21
N200507208144,25,25,0,0,468 CLEMENTINA ST,3732071,CONSTRUCTION,2015-10-01
N201312124038,12,12,0,0,520 09TH ST,3526005,CONSTRUCTION,2016-03-22


2016'Q4


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201306250394,560,560,0,0,1 HENRY ADAMS ST,3911001,CONSTRUCTION,2016-04-07
N201112070227,470,470,0,0,POTRERO1010 (BLDG 1 OF 2),3833001,CONSTRUCTION,2016-05-26
N201511092111,313,313,0,0,55 CHUMASERO DR,7330001,BP FILED,2015-11-09
N201510260810,299,299,0,0,1208 JUNIPERO SERRA BL,7326001,BP FILED,2015-10-26
N201510230640,248,248,0,0,455 SERRANO DR,7335001,BP FILED,2015-10-23
N201409116118,202,202,0,0,1095 MARKET ST,3703059,CONSTRUCTION,2016-07-28
N201609208248,174,174,0,0,5 THOMAS MELLON CIRCLE,4991075,BP FILED,2016-09-20
N201609208258,133,133,0,0,5 THOMAS MELLON CIRCLE,4991075,BP FILED,2016-09-20
N201312265046,129,129,0,0,360 BERRY ST,8704004,CONSTRUCTION,2016-08-22
N201609208257,113,113,0,0,5 THOMAS MELLON CIRCLE,4991075,BP FILED,2016-09-20


2017'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BESTSTAT,BESTDATE
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
N201307303137,416,416,62,62,350 08TH ST,3756003,CONSTRUCTION,2016-11-03
N201404042522,200,200,0,0,MISSION BAY BLOCK 7,8711031,CONSTRUCTION,2016-12-08
N201507060668,79,79,0,0,55 LAGUNA ST (SENIOR CENTER),857002,BP ISSUED,2016-05-27
N200711077587,80,77,0,0,268 MADISON ST,5943008,CONSTRUCTION,2014-01-08
N201311222660,77,77,0,0,2101 & 2155 WEBSTER ST,629037,CONSTRUCTION,2016-11-15
N200809252660,69,69,9,9,1450 FRANKLIN ST,671006,CONSTRUCTION,2016-12-30
N201306270646,40,40,0,0,55 LAGUNA ST (AFF SENIOR HOUSING),857002,CONSTRUCTION,2016-11-10
N201412294591,38,38,4,4,980 FOLSOM ST,3732028,BP FILED,2014-12-29
N200806164548,76,37,0,0,226 06TH ST,3731003,CONSTRUCTION,2014-10-20
N200412171712,27,27,0,0,1601 LARKIN ST,620006,CONSTRUCTION,2016-09-20


# Exporting datasets

In [63]:
for i in range(count):
    fname = 'data/PipelineCleaned_'+labels[i].replace('\'','_')+'.csv'
    data[i].to_csv(fname)
for i in range(count-1):
    fname = 'data/PipelineCompleted_'+labels[i+1].replace('\'','_')+'.csv'
    completed[i].to_csv(fname)

# Total in pipeline per quartal

In [49]:
for i in range(count):
    print("{}: {}".format(labels[i], data[i]['NET_UNITS'].values.sum()))

2013'Q4: 6410
2014'Q1: 5106
2014'Q2: 6187
2014'Q3: 7246
2014'Q4: 7793
2015'Q1: 7709
2015'Q2: 8651
2015'Q3: 8990
2015'Q4: 8717
2016'Q1: 8291
2016'Q2: 7960
2016'Q3: 7173
2016'Q4: 6329
2017'Q1: 5680
