In [19]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [273]:
def loadData(fileName, fmt = 1):
    col = 'APN'
    converters = { 'APN': lambda x: x[4:] }
    if fmt == 2 or fmt == 3:
        col = 'BLKLOT'
        converters = { 'BLKLOT': lambda x: x.lstrip('0') }
    if fmt == 4:
        col = 'Block Lot'
        converters = { 'Block Lot': lambda x: x.lstrip('0') }
        
    X = pd.read_csv(fileName, sep=',', parse_dates=[], infer_datetime_format=True, index_col=[col], quotechar='"', converters=converters)
    if fmt == 2 or fmt == 3:
        X = X.rename(columns={"AFF_UNITS_NET": "NET_AFF_UNITS"})
    if fmt == 3:
        X = X.rename(columns={"UNITSNET": "NET_UNITS"})
    if fmt == 4:
        X = X.rename(columns={"Units": "UNITS", "Net Added Units": "NET_UNITS", "Best Stat": "BESTSTAT", 'Location 1': 'NAMEADDR'})
    X = X[X['BESTSTAT'] == 'CONSTRUCTION']
    
    
    return X

labels = ['2013\'Q4', '2014\'Q1', '2014\'Q2', '2014\'Q3', '2014\'Q4', '2015\'Q1', '2015\'Q2', '2015\'Q3',
         '2015\'Q4', '2016\'Q1', '2016\'Q2', '2016\'Q3', '2016\'Q4']

data = []

data.append(loadData('data/San_Francisco_Development_Pipeline_2013_Quarter_4.csv', 4))
data.append(loadData('data/San_Francisco_Development_Pipeline_2014_Quarter_1.csv', 4))
data.append(loadData('data/San_Francisco_Development_Pipeline_2014_Quarter_2.csv', 4))
data.append(loadData('data/San_Francisco_Development_Pipeline_2014_Quarter_3.csv', 4))
data.append(loadData('data/San_Francisco_Development_Pipeline_2014_Quarter_4.csv', 3))
data.append(loadData('data/San_Francisco_Development_Pipeline_2015_Quarter_1.csv', 2))
data.append(loadData('data/San_Francisco_Development_Pipeline_2015_Quarter_2.csv'))
data.append(loadData('data/San_Francisco_Development_Pipeline_2015_Quarter_3.csv'))
data.append(loadData('data/San_Francisco_Development_Pipeline_2015_Quarter_4.csv'))
data.append(loadData('data/SF_Development_Pipeline_2016_Q1.csv'))
data.append(loadData('data/SF_Development_Pipeline_2016_Q2.csv'))
data.append(loadData('data/SF_Development_Pipeline_2016_Q3.csv'))
data.append(loadData('data/SF_Development_Pipeline_2016_Q4.csv'))
data.append(loadData('data/SF_Development_Pipeline_2017_Q1.csv'))

In [274]:
preprocessed = []
for i in range(len(data)):
    X = data[i]
    X = X[X['UNITS'] >= 0]
    X = X[X['NET_UNITS'] >= 0]
    # X = X[X['NET_UNITS'] > X['UNITS']]
    preprocessed.append(X)
    
completed = []
for i in range(len(preprocessed)-1):
    X = preprocessed[i]
    for j in range(i+1,len(preprocessed)):
        X2 = preprocessed[j]
        X = X[~X.index.isin(X2.index)]
        X = X[~X['NAMEADDR'].isin(X2['NAMEADDR'])]
        
    completed.append(X)
buildings = list(map(lambda x: len(x), completed))
units = list(map(lambda x: x['NET_UNITS'].values.sum(), completed))
units2 = list(map(lambda x: x['UNITS'].values.sum(), completed))
# affUnits = list(map(lambda x: x['AFF_UNITS'].values.sum(), completed))
# netAffUnits = list(map(lambda x: x['NET_AFF_UNITS'].values.sum(), completed))
print("Buildings: {}".format(buildings))
print("Net Units: {}".format(units))
print("Total Units: {}".format(sum(units)))
# print("Net Affordable Units: {}".format(netAffUnits))
# print("Total Affordable Units: {}".format(sum(netAffUnits)))

Buildings: [19, 15, 29, 49, 23, 17, 20, 24, 81, 28, 37, 45, 54]
Net Units: [1672.0, 328.0, 732.0, 1342.0, 1020, 192, 767, 345, 1927, 616, 864, 768, 977]
Total Units: 11550.0


In [260]:
print(len(data[2]))
print(len(completed[2]))

213
29


In [261]:
cols = ['NET_UNITS', 'UNITS', 'NAMEADDR']
for i in range(len(labels)):
    print(labels[i])
    print(completed[i].sort_values('NET_UNITS', ascending=False)[cols].head(10))

2013'Q4
           NET_UNITS  UNITS                                           NAMEADDR
Block Lot                                                                     
3507041        754.0  754.0  1401 Market St\n(37.776308800000002, -122.4178...
8711023        315.0  315.0  185 Channel St\n(37.774546399999998, -122.3899...
8713001        273.0  273.0   1155 04th St\n(37.773560400000001, -122.3914595)
872005         115.0  115.0  1960-1998 Market St\n(37.770026000000001, -122...
3747019         88.0   88.0  333 Fremont St\n(37.787635999999999, -122.3929...
4624004         52.0   52.0  63 West Point Rd\n(37.736734499999997, -122.38...
854001          24.0   24.0  1600 Market St\n(37.773991199999998, -122.4210...
3776092         19.0   19.0     246 Ritch St\n(37.780208000000002, -122.39578)
1742043         15.0   15.0   1266 09th Av\n(37.764675099999998, -122.4662513)
6520036          9.0    9.0     3135 24th St\n(37.752336, -122.41479699999999)
2014'Q1
           NET_UNITS  UNITS         

# fig = plt.figure(figsize=(11,8))
ax1 = fig.add_subplot(111)

ax1.plot(range(len(data)-1), units, label='total')
# ax1.plot(range(len(data)-1), netAffUnits, label='affordable')

In [267]:
for i in range(len(labels)):
    topUnits = completed[i].sort_values('NET_UNITS', ascending=False)[cols].head(3)['NET_UNITS'].values.sum()
    units = completed[i]['NET_UNITS'].values.sum()
    print(labels[i])
    print(topUnits/units)

2013'Q4
0.802631578947
2014'Q1
0.792682926829
2014'Q2
0.486338797814
2014'Q3
0.646050670641
2014'Q4
0.711764705882
2015'Q1
0.921875
2015'Q2
0.792698826597
2015'Q3
0.863768115942
2015'Q4
0.580176440062
2016'Q1
0.881493506494
2016'Q2
0.635416666667
2016'Q3
0.59765625
2016'Q4
0.700102354145
