In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display

# Data Loading

SF DBI’s development pipeline datasets use four nearly-identical data models. Parcel identificator was called “BLKLOT” or “Block Lot” in older datasets and is called APN (Assessor's Parcel Number) in the most recent datasets. The same parcel id can written with a leading zero in one dataset and without a leading zero in another one. Prior to 2014 Q3, datasets don’t have fields for affordable unit counts (total affordable units and net affordable units).

Some project records don’t include building permit id or use the placeholder “MULTIPLE” instead of actual permit references. Initially, we remove permitless projects from the dataset, than re-add them at a later stage.

In [2]:
def loadData(fileName, label, fmt = 1):
    columns = ['UNITS','NET_UNITS', 'AFF_UNITS', 'NET_AFF_UNITS', 'NAMEADDR', 'APN']
    
    converters = { 'APN': lambda x: x[4:], 'BP_APPLNO': lambda x: 'N'+x.replace(',','') }
    if fmt == 2 or fmt == 3:
        converters = { 'BLKLOT': lambda x: x.lstrip('0'), 'BP_APPLNO': lambda x: 'N'+x.replace(',','') }
    if fmt == 4:
        converters = { 'Block Lot': lambda x: x.lstrip('0'), 'DBI Permit': lambda x: 'N'+x.replace(',',''),
                     'Location 1': lambda x: x.split('\n')[0]}
        
    X = pd.read_csv(fileName, sep=',', parse_dates=[], infer_datetime_format=True, quotechar='"', converters=converters)
    
    if fmt == 2 or fmt == 3:
        X = X.rename(columns={"AFF_UNITS_NET": "NET_AFF_UNITS", "BLKLOT": "APN"})
    if fmt == 3:
        X = X.rename(columns={"UNITSNET": "NET_UNITS"})
    if fmt == 4:
        X = X.rename(columns={"Units": "UNITS", "Net Added Units": "NET_UNITS", "Best Stat": "BESTSTAT", 'Location 1': 'NAMEADDR', 'Block Lot': 'APN',
                             'DBI Permit': 'BP_APPLNO'})
        X['NET_AFF_UNITS'] = 0
        X['AFF_UNITS'] = 0
    if fmt == 5:
        X = X.rename(columns={"PROPUSE": "PROJECT_TYPE"})
    X = X[X['BESTSTAT'] == 'CONSTRUCTION']
    X = X[X['UNITS'] > 0]
    X = X[X['NET_UNITS'] >= 0]
    
    # Filtering records without permits
    M = X[(X['BP_APPLNO'] == 'N') | (X['BP_APPLNO'] == 'NMULTIPLE')][columns+['BP_APPLNO']]
    X = X[(X['BP_APPLNO'] != 'N') & (X['BP_APPLNO'] != 'NMULTIPLE')]
    
    # Filtering duplicated records
    D = X[X.duplicated('BP_APPLNO', keep=False)][columns+['BP_APPLNO']].sort_values('BP_APPLNO')
    X=X[~X.duplicated('BP_APPLNO')]
    
    # Set unique index by Permit ID
    X = X.set_index('BP_APPLNO')
    X.index.names = ['PERMIT_ID']
    
    X[['UNITS', 'NET_UNITS', 'NET_AFF_UNITS', 'AFF_UNITS']] = X[['UNITS', 'NET_UNITS', 'NET_AFF_UNITS', 'AFF_UNITS']].astype(int)
    
    X = X[columns]
    
    return X,M,D

# Helper Indexes

y13q4 = 0
y14q1 = 1
y14q2 = 2
y14q3 = 3
y14q4 = 4
y15q1 = 5
y15q2 = 6
y15q3 = 7
y15q4 = 8
y16q1 = 9
y16q2 = 10
y16q3 = 11
y16q4 = 12
y17q1 = 13

# Files and versions

files = [
    {'label': '2013\'Q4', 'file': 'data/San_Francisco_Development_Pipeline_2013_Quarter_4.csv', 'format': 4},
    {'label': '2014\'Q1', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_1.csv', 'format': 4},
    {'label': '2014\'Q2', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_2.csv', 'format': 4},
    {'label': '2014\'Q3', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_3.csv', 'format': 4},
    {'label': '2014\'Q4', 'file': 'data/San_Francisco_Development_Pipeline_2014_Quarter_4.csv', 'format': 3},
    {'label': '2015\'Q1', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_1.csv', 'format': 2},
    {'label': '2015\'Q2', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_2.csv', 'format': 1},
    {'label': '2015\'Q3', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_3.csv', 'format': 1},
    {'label': '2015\'Q4', 'file': 'data/San_Francisco_Development_Pipeline_2015_Quarter_4.csv', 'format': 1},
    {'label': '2016\'Q1', 'file': 'data/SF_Development_Pipeline_2016_Q1.csv', 'format': 1},
    {'label': '2016\'Q2', 'file': 'data/SF_Development_Pipeline_2016_Q2.csv', 'format': 5},
    {'label': '2016\'Q3', 'file': 'data/SF_Development_Pipeline_2016_Q3.csv', 'format': 1},
    {'label': '2016\'Q4', 'file': 'data/SF_Development_Pipeline_2016_Q4.csv', 'format': 1},
    {'label': '2017\'Q1', 'file': 'data/SF_Development_Pipeline_2017_Q1.csv', 'format': 1}
]

# Loading Data
count = len(files)
labels = []
data = []
missing = []
duplicates = []

for f in files:
    t,m,d = loadData(f['file'], f['label'], f['format'])
    data.append(t)
    missing.append(m)
    duplicates.append(d)
    labels.append(f['label'])

# Displaying duplicated records
Printing out all records with duplicate building permit id for manual inspection.

In [3]:
for i in range(count):
    if (len(duplicates[i]) > 0):
        print()
        print("Duplicated records at {}".format(labels[i]))
        display(duplicates[i])


Duplicated records at 2013'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
188,2.0,1.0,0,0,1076 Hampshire St,4152016,N200709193092
515,2.0,2.0,0,0,1078 Hampshire St,4152046,N200709193092
299,2.0,1.0,0,0,268 Madison St,5943008,N200711077587
520,1.0,1.0,0,0,268 Madison St,5943051,N200711077587



Duplicated records at 2014'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
124,7.0,7.0,0,0,4132 Third Street,5260002,N200707055953
657,7.0,7.0,0,0,4132 03rd St,5260003,N200707055953
60,2.0,1.0,0,0,1076 Hampshire St,4152016,N200709193092
627,2.0,2.0,0,0,1078 Hampshire St,4152046,N200709193092
151,2.0,1.0,0,0,268 Madison St,5943008,N200711077587
748,1.0,1.0,0,0,268 Madison St,5943051,N200711077587



Duplicated records at 2014'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
207,7.0,7.0,0,0,4132 Third Street,5260002,N200707055953
735,7.0,7.0,0,0,4132 03rd St,5260003,N200707055953
208,2.0,1.0,0,0,1076 Hampshire St,4152016,N200709193092
741,2.0,2.0,0,0,1078 Hampshire St,4152046,N200709193092
306,2.0,1.0,0,0,268 Madison St,5943008,N200711077587
746,1.0,1.0,0,0,268 Madison St,5943051,N200711077587
380,2.0,2.0,0,0,447 Linden St,818048,N200912304034
738,2.0,2.0,0,0,443 Linden St,818049,N200912304034
58,133.0,133.0,0,0,55 Laguna Street,857001,N201209059006
591,191.0,191.0,0,0,218 Buchanan St,857001A,N201209059006



Duplicated records at 2014'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
236,7.0,7.0,0,0,4132 Third Street,5260002,N200707055953
514,7.0,7.0,0,0,4132 03rd St,5260003,N200707055953
171,2.0,1.0,0,0,1076 Hampshire St,4152016,N200709193092
654,2.0,2.0,0,0,1078 Hampshire St,4152046,N200709193092
112,1.0,1.0,0,0,83 Panorama Dr,2821010,N200711077576
668,1.0,1.0,0,0,83 Panorama Dr,2821023,N200711077576
269,2.0,1.0,0,0,268 Madison St,5943008,N200711077587
659,1.0,1.0,0,0,268 Madison St,5943051,N200711077587
326,2.0,2.0,0,0,447 Linden St,818048,N200912304034
651,2.0,2.0,0,0,443 Linden St,818049,N200912304034



Duplicated records at 2014'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
411,2,1,0,0,1076 HAMPSHIRE ST,4152016,N200709193092
448,2,2,0,0,1078 HAMPSHIRE ST,4152046,N200709193092
400,2,2,0,0,447 LINDEN ST,818048,N200912304034
409,2,2,0,0,443 LINDEN ST,818049,N200912304034
69,98,98,0,0,1239 TURK ST,757027,N201207104447
70,98,98,98,98,1100 GOLDEN GATE AV,757025,N201207104447
38,191,191,160,160,218 BUCHANAN ST,857001A,N201209059006
55,133,133,160,160,55 LAGUNA STREET,857001,N201209059006
53,139,139,0,0,555 FULTON ST,794028,N201305036062
54,136,136,0,0,746 LAGUNA ST,794015,N201305036062



Duplicated records at 2015'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
794,2,2,0,0,1078 HAMPSHIRE ST,4152046,N200709193092
863,2,1,0,0,1076 HAMPSHIRE ST,4152016,N200709193092
523,2,2,0,0,443 LINDEN ST,818049,N200912304034
782,2,2,0,0,447 LINDEN ST,818048,N200912304034


# Fixing duplicates
Re-adding duplicate records with corrected values.

In [4]:
def fix_duplicate(permitId, units, netUnits, affUnits, netAffUnits, addr, apn):
    for i in range(count):
        if len(duplicates[i][duplicates[i]['BP_APPLNO'] == permitId]) > 0:
            data[i].loc[permitId] = [units, netUnits, affUnits, netAffUnits, addr, apn]
        duplicates[i] = duplicates[i][duplicates[i]['BP_APPLNO'] != permitId]

fix_duplicate('N200709193092', 2, 1, 0, 0, '1076-1078 Hampshire St', '4152016')
fix_duplicate('N200711077587', 2, 1, 0, 0, '268 Madison St', '5943008')
fix_duplicate('N200707055953', 7, 7, 0, 0, '4132 03rd St', '5260003')
fix_duplicate('N200912304034', 2, 2, 0, 0, '447 Linden St', '818048')
fix_duplicate('N201209059006', 191, 191, 0, 0, '218 Buchanan St', '857001')
fix_duplicate('N200711077576', 1, 1, 0, 0, '83 Panorama Dr', '2821010')
fix_duplicate('N201207104447', 98, 98, 0, 0, '1100 GOLDEN GATE AV', '757025')
fix_duplicate('N201305036062', 139, 139, 0, 0, '555 FULTON ST', '794028')

hasDuplicates = False
for i in range(count):
    if (len(duplicates[i]) > 0):
        print()
        print("Duplicated records at {}".format(labels[i]))
        display(duplicates[i])
        hasDuplicates = True
if not hasDuplicates:
    print("No duplicates present!")

No duplicates present!


# Displaying records without Permit ID
Printing all records without Permit ID

In [5]:
for i in range(count):
    if (len(missing[i]) > 0):
        print()
        print("Records without Permit ID at {}".format(labels[i]))
        display(missing[i])


Records without Permit ID at 2015'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
648,132,14,0,0,833-881 Jamestown,4991277,N



Records without Permit ID at 2015'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
48,196,196,0,0,800 BROTHERHOOD WAY,7331003,N



Records without Permit ID at 2015'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
53,229,229,0,0,"HUNTERS POINT SHIPYARD, PHASE I",4591C001,N
111,95,95,0,0,800 BROTHERHOOD,7331005,N



Records without Permit ID at 2016'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
1273,169,169,0,0,HP SHIPYARD PHASE 1 (UNDER CONSTRUCTION),4591C001,N
1296,99,99,0,0,800 BROTHERHOOD WAY,7331005,N



Records without Permit ID at 2016'Q2


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
1178,81,81,0,0,800 BROTHERHOOD WAY (UNDER CONSTRUCTION),7331005,N
1289,185,185,0,0,HP SHIPYARD PHASE 1 (UNDER CONSTRUCTION),4591C001,N



Records without Permit ID at 2016'Q3


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
1138,54,54,0,0,800 BROTHERHOOD WAY (UNDER CONSTRUCTION),7331005,N
1261,167,167,0,0,HP SHIPYARD PHASE 1 (UNDER CONSTRUCTION),4591C001,N



Records without Permit ID at 2016'Q4


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
1179,49,49,0,0,800 BROTHERHOOD WAY (UNDER CONSTRUCTION),7331003,N
1221,110,110,0,0,HUNTERS POINT SHIPYARD (UNDER CONSTRUCTION),4591C001,NMULTIPLE



Records without Permit ID at 2017'Q1


Unnamed: 0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN,BP_APPLNO
1203,36,36,0,0,800 BROTHERHOOD WAY (UNDER CONSTRUCTION),7331003,NMULTIPLE
1337,86,86,0,0,HUNTERS POINT SHIPYARD (UNDER CONSTRUCTION),4591C001,NMULTIPLE
1338,107,107,106,106,HUNTERS VIEW,4624031,NMULTIPLE


# Fixing records without Permit ID
Correcting records without Permit ID

In [6]:
def fix_missing(permitId, units, netUnits, affUnits, netAffUnits, addr, apn):
    for i in range(count):
        if len(missing[i][missing[i]['APN'].isin(apn)]) > 0:
            data[i].loc[permitId] = [units, netUnits, affUnits, netAffUnits, addr, apn[0]]
        missing[i] = missing[i][~missing[i]['APN'].isin(apn)]
        
fix_missing("N_CUSTOM_JAMESTOWN", 132, 14, 0, 0, '833-881 Jamestown', ['4991277'])
fix_missing("N_CUSTOM_BROTHERHOOD", 196, 196, 0, 0, '800 BROTHERHOOD WAY', ['7331003', '7331005'])
fix_missing("N_CUSTOM_SHIPYARD", 229, 229, 0, 0, 'HUNTERS POINT SHIPYARD, PHASE I', ['4591C001', '4624031'])

hasMissing = False
for i in range(count):
    if (len(missing[i]) > 0):
        print()
        print("Records without Permit ID at {}".format(labels[i]))
        display(missing[i])
        hasMissing = True
if not hasMissing:
    print("No missing present")

No missing present


# Searching for incorrect unit values
Searching projects with unit counts varying from quarter to quarter and projects with net units exceeding total units.

In [7]:
def find_incorrect(column):
    incorrect = {}
    for i in range(count-1):
        X = data[i]
        for j in range(i+1, count):
            X2 = data[j]
        
            X = X[X.index.isin(X2.index)].sort_index()
            X2 = X2[X2.index.isin(X.index)].sort_index()
            Y = X[X[column] != X2[column]]
            Y2 = X2[X[column] != X2[column]]
            if len(Y)>0:
                for index, row in Y.iterrows():
                    a = int(Y.loc[index][column])
                    b = int(Y2.loc[index][column])
                    if index not in incorrect:
                        incorrect[index] = { 'min': min(a, b), 'max': max(a,b)}
                    else:
                        incorrect[index] = { 'min': min(incorrect[index]['min'], a, b), 'max': max(incorrect[index]['max'], a, b)}
    return incorrect


incorrect = find_incorrect('UNITS')
print("Unit number fluctuation")
display(incorrect)

incorrectNet = find_incorrect('NET_UNITS')
print("Net Unit number fluctuation")
display(incorrectNet)

Unit number fluctuation


{'N200507208144': {'max': 25, 'min': 12},
 'N200605161774': {'max': 452, 'min': 447},
 'N200711077587': {'max': 80, 'min': 1},
 'N200807176988': {'max': 2, 'min': 1},
 'N200810315586': {'max': 156, 'min': 121},
 'N200810315636': {'max': 15, 'min': 13},
 'N201009140800': {'max': 320, 'min': 305},
 'N201012237367': {'max': 39, 'min': 23},
 'N201112070227': {'max': 470, 'min': 65},
 'N201204168406': {'max': 83, 'min': 50},
 'N201207124717': {'max': 806, 'min': 285},
 'N201209059006': {'max': 450, 'min': 191},
 'N201209069080': {'max': 550, 'min': 540},
 'N201209119428': {'max': 210, 'min': 4},
 'N201211073775': {'max': 271, 'min': 121},
 'N201211093966': {'max': 84, 'min': 81},
 'N201211284953': {'max': 28, 'min': 11},
 'N201212216752': {'max': 190, 'min': 167},
 'N201303273113': {'max': 75, 'min': 69},
 'N201305015894': {'max': 140, 'min': 74},
 'N201306250394': {'max': 560, 'min': 239},
 'N201306250465': {'max': 84, 'min': 77},
 'N201306280783': {'max': 132, 'min': 4},
 'N201307303137':

Net Unit number fluctuation


{'N200507208144': {'max': 25, 'min': 12},
 'N200605161774': {'max': 452, 'min': 447},
 'N200709193092': {'max': 2, 'min': 1},
 'N200711077587': {'max': 77, 'min': 1},
 'N200807176988': {'max': 2, 'min': 1},
 'N200810315586': {'max': 156, 'min': 121},
 'N200810315636': {'max': 15, 'min': 13},
 'N201009140800': {'max': 320, 'min': 305},
 'N201012237367': {'max': 39, 'min': 23},
 'N201112070227': {'max': 470, 'min': 65},
 'N201204168406': {'max': 83, 'min': 50},
 'N201207124717': {'max': 806, 'min': 285},
 'N201209059006': {'max': 450, 'min': 191},
 'N201209069080': {'max': 550, 'min': 540},
 'N201209119428': {'max': 210, 'min': 4},
 'N201211073775': {'max': 271, 'min': 121},
 'N201211093966': {'max': 84, 'min': 81},
 'N201211284953': {'max': 28, 'min': 11},
 'N201212216752': {'max': 190, 'min': 167},
 'N201303273113': {'max': 75, 'min': 69},
 'N201305015894': {'max': 140, 'min': 74},
 'N201306250394': {'max': 560, 'min': 239},
 'N201306250465': {'max': 84, 'min': 77},
 'N201306280783': {

In [8]:
def fix_units(permitId, units, column):
    for i in range(count):
        if permitId in data[i].index:
            d = data[i].loc[permitId]
            d[column] = units
            data[i].loc[permitId] = d

for k in incorrect:
    fix_units(k, incorrect[k]['max'], 'UNITS')
for k in incorrectNet:
    fix_units(k, incorrectNet[k]['max'], 'NET_UNITS')    
    
# 201 Folsom
fix_units('N201207124717', 656, 'UNITS')
fix_units('N201207124717', 656, 'NET_UNITS')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


# Detection of completed buildings, p.1
Identifying completed projects as the ones that were in construction in a given quarter and are removed from the pipeline in the following quarter.

In [9]:
def buildStats(data):
    buildings = list(map(lambda x: len(x), completed))
    units = list(map(lambda x: x['UNITS'].values.sum(), completed))
    netUnits = list(map(lambda x: x['NET_UNITS'].values.sum(), completed))
    aff = list(map(lambda x: x['AFF_UNITS'].values.sum(), completed[4:]))
    netAff = list(map(lambda x: x['NET_AFF_UNITS'].values.sum(), completed[4:]))
    return { "buildings": buildings, "units": units, "netUnits": netUnits, "aff": aff, "netAff": netAff }

def printStats(stats):
    print("Buildings: {}".format(stats["buildings"]))
    print("Net Units: {}".format(stats["netUnits"]))
    print("Total Units: {}".format(sum(stats["netUnits"])))
    print("Net Affordable Units: {}".format(stats["netAff"]))
    print("Total Affordable Units: {}".format(sum(stats["netAff"])))

def contains(data, key):
    for i in range(len(data)):
        if key in data[i].index:
            return True
    return False

In [10]:
completed = []
for i in range(len(data)-1):
    X = data[i]
    X2 = data[i+1]
    X = X[~X.index.isin(X2.index)]
    completed.append(X)

stats = buildStats(completed)
printStats(stats)

Buildings: [18, 15, 28, 48, 19, 17, 16, 24, 71, 27, 34, 43, 50]
Net Units: [1672, 328, 732, 1872, 601, 816, 845, 656, 1674, 1272, 1151, 1933, 1062]
Total Units: 14614
Net Affordable Units: [160, 10, 34, 167, 64, 96, 175, 116, 71]
Total Affordable Units: 893


# Detection of completed buildings, p.2
Correcting for projects that re-appear in the pipeline after being removed.

In [11]:
completed = []
for i in range(len(data)-1):
    X = data[i]
    for j in range(i+1,len(data)):
        X2 = data[j]
        X = X[~X.index.isin(X2.index)]
    completed.append(X)

stats = buildStats(completed)
printStats(stats)

Buildings: [18, 15, 28, 43, 18, 16, 16, 24, 71, 27, 34, 41, 50]
Net Units: [1672, 328, 732, 1466, 485, 337, 845, 656, 1674, 1272, 1151, 1718, 1062]
Total Units: 13398
Net Affordable Units: [0, 10, 34, 167, 64, 96, 175, 23, 71]
Total Affordable Units: 640


# Percent of units in top 10 projects for each quartal
Here we print the top ten projects (by net new units) for each quarter and compare their contribution to the total net number of units completed in the same period. As we see below, in a typical quarter the top ten projects account for 90%+ of citywide housing production.

In [12]:
for i in range(len(labels)-1):
    topUnits = completed[i].sort_values('NET_UNITS', ascending=False).head(10)['NET_UNITS'].values.sum()
    units = completed[i]['NET_UNITS'].values.sum()
    print(labels[i+1])
    print(topUnits/units)

2014'Q1
0.995215311005
2014'Q2
0.984756097561
2014'Q3
0.959016393443
2014'Q4
0.93724420191
2015'Q1
0.983505154639
2015'Q2
0.982195845697
2015'Q3
0.992899408284
2015'Q4
0.977134146341
2016'Q1
0.902031063321
2016'Q2
0.98427672956
2016'Q3
0.966116420504
2016'Q4
0.971478463329
2017'Q1
0.931261770245


# Group Stats by Year

In [13]:
for i in range(3):
    yearCount = int(sum(stats['netUnits'][i*4:i*4+4]))
    print("Year {}:      {}".format(2014 + i, yearCount))
print("Year 2017 (Q1): {}".format(completed[len(completed) - 1]['NET_UNITS'].sum()))

Year 2014:      4198
Year 2015:      2323
Year 2016:      5815
Year 2017 (Q1): 1062


# Result Table (top 10)

In [14]:
for i in range(len(labels)-1):
    print(labels[i+1])
    display(completed[i].sort_values('NET_UNITS', ascending=False).head(10))

2014'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N200607207084,754,754,0,0,1401 Market St,3507041
N201104224606,315,315,0,0,185 Channel St,8711023
N201207054130,273,273,0,0,1155 04th St,8713001
N201012217106,115,115,0,0,1960-1998 Market St,872005
N200506246051,88,88,0,0,333 Fremont St,3747019
N200912223711,52,52,0,0,63 West Point Rd,4624004
N201109074027,24,24,0,0,1600 Market St,854001
N200701051074,19,19,0,0,246 Ritch St,3776092
N200912183521,15,15,0,0,1266 09th Av,1742043
N200711137944,9,9,0,0,3135 24th St,6520036


2014'Q2


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201106017202,182,182,0,0,260 05th St,3732008
N201111038205,40,40,0,0,1501 15th St,3553054
N201110146841,38,38,0,0,1645-1661 Pacific Av,595013
N200608290880,35,35,0,0,1080 Sutter St,279011
N201202154236,20,20,0,0,1717 17th St,3980007
N200711309388,3,3,0,0,1870 Golden Gate Av,1152017
N201005122282,2,2,0,0,35 Lloyd St,1260035
N200505031415,2,1,0,0,530 Sanchez St,3584007
N200901220624,3,1,0,0,253 Parker Av,1086006
N201104013321,1,1,0,0,3210 Jackson St,973031


2014'Q3


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201012036075,150,150,0,0,1000 Fourth Street (block 13 East),8711014
N201111179162,106,106,0,0,740 Illinois St And 2121 Third St,4045002
N200712211199,100,100,0,0,973 Market St,3704069
N201012156753,90,90,0,0,101 Golden Gate Av,349001
N201207205377,88,88,0,0,2175 Market St,3543011
N200212244171,81,71,0,0,1301 Indiana St,4228158
N200810063512,55,55,0,0,474 Natoma Street,3725101
N201304265571,19,19,0,0,1816 Eddy St,1127064
N201208248160,12,12,0,0,2401 16th St,3965001
N200706204573,11,11,0,0,3418 26th St,6529012


2014'Q4


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N200412211855,312,312,0,0,425 First Street,3765015
N201106017208,282,282,0,0,900 Folsom St,3732009
N201108233049,273,273,0,0,55 9th St,3701064
N9924080S,132,132,0,0,833-881 Jamestown,4991277
N201301168124,114,114,0,0,2558 Mission St,3616007
N201105166063,75,75,0,0,235 Broadway,165021
N201203015201,71,71,0,0,50 Phelan Av,3180001
N201208036517,49,49,0,0,8 Octavia St,855011
N201012237367,39,39,0,0,1875 Mission St,3548032
N201106017181,27,27,0,0,2559 Van Ness Av,527002


2015'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201208026344,399,399,0,0,100 VAN NESS AV,814020
N201205180774,37,37,0,0,25 DOLORES ST,3534069
N200408121427,18,18,0,0,899 VALENCIA ST,3596113
N9902819,8,8,0,0,1179 TENNESSEE ST,4172053
N200603025880,6,6,0,0,782-786 ANDOVER ST,5825007
N201308093997,722,2,0,0,515 JOHN MUIR DR,7282005
N200706143970,3,2,0,0,520 28TH AV,1517030
N201304023614,2,2,0,0,850 BATTERY ST,141008
N200905208827,3,2,0,0,251 ARKANSAS ST,4004024
N201401146235,3,1,0,0,1155 CHURCH ST,3650028


2015'Q2


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201301319232,160,160,10,10,1321 Mission Street,3509043
N201306280783,132,132,0,0,144 KING ST,3794024
N201410017815,155,17,0,0,33 POWELL ST,330004
N_CUSTOM_JAMESTOWN,132,14,0,0,833-881 Jamestown,4991277
N201302210648,3,3,0,0,39 SAN CARLOS ST,3576018
N200505273609,1,1,0,0,346 22ND AV,1453033
N200609152421,2,1,0,0,440 29TH ST,6620014
N201211154328,2,1,0,0,383 FAIR OAKS ST,6512016
N200712201032,2,1,0,0,168 CLIPPER ST,6549021
N201308235140,1,1,0,0,1411 HUDSON AV,5258027


2015'Q3


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201009140800,320,320,0,0,45 LANSING ST,3749059
N201209119428,210,210,0,0,800 Brotherhood Way,7331005
N200810315586,156,156,18,18,1415 MISSION ST,3510001
N201010143017,67,67,9,9,527 STEVENSON ST,3703012
N200907223197,63,63,7,7,101 DONAHUE ST,4591C042
N201303122049,15,15,0,0,248 - 252 09TH ST,3518006
N201307313252,3,2,0,0,1681 Fulton St,1185026
N201206293848,2,2,0,0,188 MUSEUM WY,2620119
N201301258793,3,2,0,0,748 TREAT AV,3612055
N200312243005,2,2,0,0,395 ATHENS ST,6022014


2015'Q4


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201304023626,409,409,0,0,280 BEALE ST,3738004
N201212216752,190,190,167,167,1400 MISSION ST,3507039
N201105256770,10,10,0,0,140 09TH ST,3509005
N201202295146,9,9,0,0,421 HUDSON AV,4591C068
N201202295147,9,9,0,0,451 HUDSON AV,4591C045
N201207265893,4,4,0,0,3820 24TH STREET,3651017
N201303071701,6,4,0,0,327 BALBOA ST,1640051
N200507137493,2,2,0,0,1665 PALOU AV,5327038
N200703126040,2,2,0,0,476 LINDEN ST,0818020
N201311071427,2,2,0,0,851 CORBETT AV,2778013


2016'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N200605161774,452,452,0,0,399 FREMONT ST,3747320
N201211073775,271,271,23,23,5800 03RD ST (BUILDING 4),5431A042
N201212246822,263,263,0,0,718 LONG BRIDGE ST,8710007
N201210051462,182,182,0,0,PARCEL P - MARKET OCTAVIA,0831023
N201307051190,162,162,19,19,101 POLK ST,0811002
N200705010136,74,74,7,7,72 TOWNSEND ST,3789003
N200912223671,35,35,15,15,401 INNES AV,4591C095
N200912223673,34,34,0,0,50 JERROLD AV,4591C099
N200907223200,25,25,0,0,201 DONAHUE ST,4591C093
N200912223674,12,12,0,0,200 COLEMAN ST,4591C098


2016'Q2


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201207124717,656,656,0,0,201 FOLSOM ST,3746001
N201112070234,393,393,94,94,1006 / 1050 16TH ST & 1380 07TH ST,3833002
N201209059005,116,116,0,0,55 LAGUNA ST (BLDG 1),870003
N201203055396,34,34,0,0,1650 BROADWAY *,570011
N201504224344,81,17,0,0,555 POST ST,306020
N201407100913,15,15,2,2,5 SHIPLEY ST / 935 FOLSOM ST / 77 FALMOUTH ST,3753314
N201507020526,11,11,0,0,1155 MARKET ST,3702054
N200904065665,6,6,0,0,1701 09TH AV,2040001
N201012307679,2,2,0,0,857 ALABAMA ST,4085026
N200701292823,4,2,0,0,2374 FOLSOM ST,3594009


2016'Q3


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201209059006,450,450,23,23,55 LAGUNA ST (BLDG 2),0857001
N201312184508,260,260,31,31,1634 - 1690 PINE ST,0647007
N201207104447,98,98,98,98,1100 GOLDEN GATE AV,0757025
N201306250465,84,84,11,11,480 POTRERO AV,3973002C
N201207124725,80,80,0,0,LUMINA (PLAZA C),3746001
N201306260573,41,41,5,5,450 HAYES ST,0808039
N200912032516,35,35,5,5,1 FRANKLIN ST,0837003
N201406138386,27,27,2,2,832 SUTTER ST,0281003
N200507208144,25,25,0,0,468 CLEMENTINA ST,3732071
N201312124038,12,12,0,0,520 09TH ST,3526005


2016'Q4


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201306250394,560,560,0,0,1 HENRY ADAMS ST,3911001
N201112070227,470,470,0,0,POTRERO1010 (BLDG 1 OF 2),3833001
N201409116118,202,202,0,0,1095 MARKET ST,3703059
N201312265046,129,129,0,0,360 BERRY ST,8704004
N201401166475,91,91,0,0,2700 ARELIOUS WALKER DR,4884027
N201211093966,84,84,0,0,2655 BUSH ST,1052024
N201311202405,72,72,11,11,346 POTRERO AV,3962008
N201211284953,28,28,11,11,ONE CAPITOL AV,7148040
N201401297354,21,21,0,0,2353 - 2347 LOMBARD ST,512026
N201305217457,12,12,1,1,690 PAGE ST,843016


2017'Q1


Unnamed: 0_level_0,UNITS,NET_UNITS,AFF_UNITS,NET_AFF_UNITS,NAMEADDR,APN
PERMIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
N201307303137,416,416,62,62,350 08TH ST,3756003
N201404042522,200,200,0,0,MISSION BAY BLOCK 7,8711031
N201311222660,77,77,0,0,2101 & 2155 WEBSTER ST,629037
N200711077587,80,77,0,0,268 MADISON ST,5943008
N200809252660,69,69,9,9,1450 FRANKLIN ST,671006
N201306270646,40,40,0,0,55 LAGUNA ST (AFF SENIOR HOUSING),857002
N200806164548,76,37,0,0,226 06TH ST,3731003
N200412171712,27,27,0,0,1601 LARKIN ST,620006
N201412194144,81,24,0,0,690 MARKET ST,311016
N201401247066,22,22,0,0,233 - 237 SHIPLEY ST,3753096


# Exporting datasets

In [15]:
for i in range(count):
    fname = 'data/PipelineCleaned_'+labels[i].replace('\'','_')+'.csv'
    data[i].to_csv(fname)
for i in range(count-1):
    fname = 'data/PipelineCompleted_'+labels[i+1].replace('\'','_')+'.csv'
    completed[i].to_csv(fname)

# Total in pipeline per quartal

In [16]:
for i in range(count):
    print("{}: {}".format(labels[i], data[i]['NET_UNITS'].values.sum()))

2013'Q4: 6410
2014'Q1: 5106
2014'Q2: 6187
2014'Q3: 7246
2014'Q4: 7793
2015'Q1: 7723
2015'Q2: 8651
2015'Q3: 9186
2015'Q4: 9142
2016'Q1: 8716
2016'Q2: 8385
2016'Q3: 7598
2016'Q4: 6754
2017'Q1: 6105
