In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
df = pd.read_csv('cleaned_evictions.csv').drop(columns=['Unnamed: 0', 'ID', 'Supervisor District', 'Neighborhood', 'Zipcode', 'Address'])
df.head()

Unnamed: 0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,Shape
0,2002-10-09,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,POINT (-122.44458 37.791653)
1,2020-11-12,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
2,2007-02-08,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,POINT (-122.42439 37.76721)
3,2020-05-05,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
4,2020-03-06,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,


In [3]:
len(df) - len(df.dropna())

60

In [4]:
df = df.dropna()

In [5]:
df['Shape'] = df.Shape.str.slice(start=5)
df[['lon','lat']] = pd.DataFrame(df.Shape.str.strip(' ').str.split(' ').tolist(), index= df.index)
df.drop(columns='Shape', inplace=True)
df['lon'] = df.lon.str.slice(start=1)
df['lat'] = df.lat.str.slice(stop=-1)

df.head()

Unnamed: 0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,lon,lat
0,2002-10-09,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,-122.44458,37.791653
2,2007-02-08,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,-122.42439,37.76721
5,2019-11-26,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-122.418526,37.788204
6,2019-09-09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,-122.4375,37.77982
7,2019-08-21,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-122.41251,37.800182


In [6]:
df.columns

Index(['File Date', 'Non Payment', 'Breach', 'Nuisance', 'Illegal Use',
       'Failure to Sign Renewal', 'Access Denial', 'Unapproved Subtenant',
       'Owner Move In', 'Demolition', 'Capital Improvement',
       'Substantial Rehab', 'Ellis Act WithDrawal', 'Condo Conversion',
       'Roommate Same Unit', 'Other Cause', 'Late Payments',
       'Lead Remediation', 'Development', 'Good Samaritan Ends', 'lon', 'lat'],
      dtype='object')

#### Next step is to convert and store all tracts

- Dataset: df
- Goal: Use [FCC Census API](https://geo.fcc.gov/api/census/#!/block/get_block_find) to convert all locations of evictions into their corresponding census block

In [7]:
import requests

In [10]:
def get_block(lat, lon):
    coords = {'lat': lat, 'lon': lon}
    r = requests.get('https://geo.fcc.gov/api/census/block/find', params=coords)
    r= r.json()['results']
    if r[0]['block_fips'][:5] == '06075':
        return r[0]['block_fips']
    return r[1]['block_fips']

In [11]:
get_block(37.791653, -122.44458)

'060750132003015'

In [14]:
len(df)

42182

In [15]:
df['block'] = df[['lat', 'lon']].apply(lambda x: get_block(x.lat, x.lon), axis=1)
df.head()

Unnamed: 0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,lon,lat,block
0,2002-10-09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-122.44458,37.791653,60750132003015
2,2007-02-08,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,-122.42439,37.76721,60750202002003
5,2019-11-26,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-122.418526,37.788204,60750120001006
6,2019-09-09,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,-122.4375,37.77982,60750158012019
7,2019-08-21,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-122.41251,37.800182,60750103002005


In [16]:
df['block'] = df.block.str.slice(stop=-3)
df.block.head()

0    060750132003
2    060750202002
5    060750120001
6    060750158012
7    060750103002
Name: block, dtype: object

In [57]:
len(df.block[0])

12

In [62]:
outer = df[df.block.str.slice(stop=5) != "06075"]
# row = outer[['lat', 'lon']].iloc[0,:]
# requests.get('https://geo.fcc.gov/api/census/block/find', params={'lat': row.lat, 'lon': row.lon}).json()
# get_block(row.lat, row.lon)
outer

Unnamed: 0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,lon,lat,block


In [30]:
len(outer)/len(df)

0.0002844815324071879

In [34]:
prelen = len(df)

In [63]:
df.shape[0]# - prelen

42170

In [65]:
df.head()

Unnamed: 0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,lon,lat,block
0,2002-10-09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-122.44458,37.791653,60750132003
2,2007-02-08,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,-122.42439,37.76721,60750202002
5,2019-11-26,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-122.418526,37.788204,60750120001
6,2019-09-09,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,-122.4375,37.77982,60750158012
7,2019-08-21,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-122.41251,37.800182,60750103002


In [56]:
# Dropping 12 evictions that happened outside SF County
# df.drop(index=outer.index)
# len(df) - prelen
list(outer.index)

[801,
 2144,
 6356,
 10629,
 10961,
 12709,
 23795,
 23967,
 27268,
 28255,
 32416,
 38858]

In [66]:
df = df[[list(df.columns)[-1]] + list(df.columns[:-1])]
df.head(1)

Unnamed: 0,block,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,...,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,lon,lat
0,60750132003,2002-10-09,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,-122.44458,37.791653


In [71]:
df = df.reset_index().drop(columns='index')
df.head(3)

Unnamed: 0,block,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,...,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,lon,lat
0,60750132003,2002-10-09,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,-122.44458,37.791653
1,60750202002,2007-02-08,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,-122.42439,37.76721
2,60750120001,2019-11-26,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-122.418526,37.788204


In [72]:
df.to_csv('temp_evictions_w_block.csv')

In [73]:
df.drop(columns=['lat', 'lon'], inplace=True)
df.head()

Unnamed: 0,block,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,...,Capital Improvement,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends
0,60750132003,2002-10-09,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,60750202002,2007-02-08,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,60750120001,2019-11-26,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,60750158012,2019-09-09,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,60750103002,2019-08-21,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Cumulative Evictions to 2019

In [74]:
df['File Date'] = pd.to_datetime(df['File Date'])
df_cum = df[df['File Date'] < pd.Timestamp(2019,1,1)]
df_cum[df_cum['File Date'].dt.year == 2019]

Unnamed: 0,block,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,...,Capital Improvement,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends


In [75]:
df_cum = df_cum.copy()
df_cum['Total Evictions'] = 1

grouped_cum = df_cum.groupby('block').agg({
    'File Date': list,
    'Non Payment' : sum, 
    'Breach' : sum, 
    'Nuisance' : sum,
    'Illegal Use' : sum, 
    'Failure to Sign Renewal' : sum, 
    'Access Denial' : sum,
    'Unapproved Subtenant' : sum,
    'Owner Move In' : sum,
    'Demolition' : sum,
    'Capital Improvement' : sum, 
    'Substantial Rehab' : sum, 
    'Ellis Act WithDrawal' : sum,
    'Condo Conversion' : sum, 
    'Roommate Same Unit' : sum,
    'Other Cause' : sum,
    'Late Payments' : sum, 
    'Lead Remediation' : sum,
    'Development' : sum,
    'Good Samaritan Ends' : sum,
    'Total Evictions': sum
})
grouped_cum.head()

Unnamed: 0_level_0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,Total Evictions
block,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60750101001,"[2013-03-13 00:00:00, 2013-08-16 00:00:00, 201...",11,51,22,2,16,1,4,0,0,...,0,0,0,0,2,3,0,0,0,97
60750101002,"[2013-07-29 00:00:00, 2016-08-16 00:00:00, 201...",7,53,20,3,0,0,0,19,10,...,1,23,0,3,4,5,0,0,0,152
60750102001,"[2014-12-08 00:00:00, 2012-11-28 00:00:00, 201...",8,32,14,2,0,0,2,21,1,...,0,12,3,3,3,3,0,0,0,111
60750102002,"[1999-07-22 00:00:00, 2016-12-02 00:00:00, 201...",11,22,14,4,0,0,1,26,1,...,0,16,0,0,4,13,0,0,0,112
60750102003,"[2013-04-18 00:00:00, 2012-06-15 00:00:00, 201...",1,2,6,2,0,0,3,10,2,...,0,0,2,0,2,2,0,0,0,32


In [76]:
grouped_cum.drop(columns=['File Date']).to_csv('clean_data/cumulative_eviction_agg_counts.csv')

# 2018 Evictions

In [77]:
df_2018 = df[df['File Date'].dt.year == 2018]
df_2018.head(1)

Unnamed: 0,block,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,...,Capital Improvement,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends
7,60750207002,2018-12-19,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [78]:
df_2018 = df_2018.copy()
df_2018['Total Evictions'] = 1

grouped_2018 = df_2018.groupby('block').agg({
    'File Date': list,
    'Non Payment' : sum, 
    'Breach' : sum, 
    'Nuisance' : sum,
    'Illegal Use' : sum, 
    'Failure to Sign Renewal' : sum, 
    'Access Denial' : sum,
    'Unapproved Subtenant' : sum,
    'Owner Move In' : sum,
    'Demolition' : sum,
    'Capital Improvement' : sum, 
    'Substantial Rehab' : sum, 
    'Ellis Act WithDrawal' : sum,
    'Condo Conversion' : sum, 
    'Roommate Same Unit' : sum,
    'Other Cause' : sum,
    'Late Payments' : sum, 
    'Lead Remediation' : sum,
    'Development' : sum,
    'Good Samaritan Ends' : sum,
    'Total Evictions': sum
})
grouped_2018.head()

Unnamed: 0_level_0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,Total Evictions
block,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60750101001,"[2018-12-21 00:00:00, 2018-11-16 00:00:00, 201...",0,2,1,0,3,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
60750101002,"[2018-06-06 00:00:00, 2018-01-11 00:00:00, 201...",1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
60750102001,"[2018-11-27 00:00:00, 2018-03-23 00:00:00]",1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
60750102002,"[2018-11-27 00:00:00, 2018-05-09 00:00:00, 201...",1,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
60750102003,"[2018-12-24 00:00:00, 2018-08-20 00:00:00, 201...",0,0,2,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,3


In [79]:
grouped_2018.drop(columns=['File Date']).to_csv('clean_data/2018_eviction_agg_counts.csv')

# All Agg Evictions

In [80]:
df = df.copy()
df['Total Evictions'] = 1

grouped = df.groupby('block').agg({
    'File Date': list,
    'Non Payment' : sum, 
    'Breach' : sum, 
    'Nuisance' : sum,
    'Illegal Use' : sum, 
    'Failure to Sign Renewal' : sum, 
    'Access Denial' : sum,
    'Unapproved Subtenant' : sum,
    'Owner Move In' : sum,
    'Demolition' : sum,
    'Capital Improvement' : sum, 
    'Substantial Rehab' : sum, 
    'Ellis Act WithDrawal' : sum,
    'Condo Conversion' : sum, 
    'Roommate Same Unit' : sum,
    'Other Cause' : sum,
    'Late Payments' : sum, 
    'Lead Remediation' : sum,
    'Development' : sum,
    'Good Samaritan Ends' : sum,
    'Total Evictions': sum
})
grouped.head()

Unnamed: 0_level_0,File Date,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,...,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,Total Evictions
block,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60750101001,"[2013-03-13 00:00:00, 2013-08-16 00:00:00, 201...",11,54,27,2,22,1,4,0,0,...,0,0,0,0,3,3,0,0,0,109
60750101002,"[2013-07-29 00:00:00, 2016-08-16 00:00:00, 201...",7,53,25,3,0,0,0,19,10,...,1,23,0,3,4,5,0,0,0,157
60750102001,"[2014-12-08 00:00:00, 2019-01-15 00:00:00, 201...",9,34,16,2,0,0,2,21,1,...,0,12,3,3,3,5,0,0,0,118
60750102002,"[1999-07-22 00:00:00, 2016-12-02 00:00:00, 201...",12,24,19,4,0,0,1,27,1,...,0,16,0,0,4,13,0,0,0,122
60750102003,"[2013-04-18 00:00:00, 2012-06-15 00:00:00, 201...",1,2,8,2,0,0,3,10,2,...,0,0,2,0,2,2,0,0,0,34


In [81]:
grouped.sort_values('Total Evictions', ascending=False)['File Date'][0]

[Timestamp('2012-08-16 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2014-06-05 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2017-05-24 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2011-06-13 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2020-12-31 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2016-04-18 00:00:00'),
 Timestamp('2013-03-29 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2013-06-18 00:00:00'),
 Timestamp('2010-09-24 00:00:00'),
 Timestamp('2012-07-09 00:00:00'),
 Timestamp('2012-08-16 00:00:00'),
 Timestamp('2010-09-

In [82]:
grouped.drop(columns=['File Date']).to_csv('clean_data/all_evictions_agg_counts.csv')