# Exploring Eviction Data

In [30]:
import pandas as pd
import numpy as np
from collections import Counter

import matplotlib as plt
import gmplot

In [8]:
this_file = open("/Users/edie/Box Sync/GitThings/loveSF/Eviction_Notices.csv", "r")
evictions = pd.read_csv(this_file)

evictions.head()

Unnamed: 0,Eviction ID,Address,City,State,Eviction Notice Source Zipcode,File Date,Non Payment,Breach,Nuisance,Illegal Use,...,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,Constraints Date,Supervisor District,Neighborhoods - Analysis Boundaries,Location
0,M162268,2700 Block Of San Bruno Avenue,San Francisco,CA,94134,06/24/2016,False,False,False,False,...,False,False,False,False,False,False,08/22/2021,9.0,Portola,"(37.727050746607, -122.403281820714)"
1,M161957,300 Block Of Park Street,San Francisco,CA,94110,06/06/2016,False,False,False,False,...,False,False,False,False,False,False,,9.0,Bernal Heights,"(37.7364374645373, -122.417989910582)"
2,M162256,200 Block Of Seneca Avenue,San Francisco,CA,94112,06/29/2016,False,False,False,False,...,False,False,False,False,False,False,08/25/2021,11.0,Outer Mission,"(37.7205429316262, -122.443264852669)"
3,M162135,200 Block Of Dolores Street,San Francisco,CA,94103,06/28/2016,False,True,False,False,...,False,False,False,False,False,False,,8.0,Castro/Upper Market,"(37.7652067507312, -122.426591617441)"
4,M161901,1200 Block Of 9th Avenue,San Francisco,CA,94122,06/02/2016,False,False,False,False,...,False,False,False,False,False,False,,5.0,Inner Sunset,"(37.764977785911, -122.4664456379)"


In [10]:
list(evictions)

['Eviction ID',
 'Address',
 'City',
 'State',
 'Eviction Notice Source Zipcode',
 'File Date',
 'Non Payment',
 'Breach',
 'Nuisance',
 'Illegal Use',
 'Failure to Sign Renewal',
 'Access Denial',
 'Unapproved Subtenant',
 'Owner Move In',
 'Demolition',
 'Capital Improvement',
 'Substantial Rehab',
 'Ellis Act WithDrawal',
 'Condo Conversion',
 'Roommate Same Unit',
 'Other Cause',
 'Late Payments',
 'Lead Remediation',
 'Development',
 'Good Samaritan Ends',
 'Constraints Date',
 'Supervisor District',
 'Neighborhoods - Analysis Boundaries',
 'Location']

<font color="navy">
## How many percent of evictions were due to non-payment?
Overall, 5.96%. I feel like this is inaccurate.

In [14]:
100*(len(evictions[evictions["Non Payment"]==True]) * (len(evictions["Non Payment"])**(-1)))

5.961844197138314

<font color="navy">
## Which zip code has most of the evictions?
The top 5 most evicted zip codes are: 94110, 94109, 94117, 94112, 94122.

In [21]:
Counter(evictions["Eviction Notice Source Zipcode"]).most_common()[0:5]

[('94110', 4010),
 ('94109', 2323),
 ('94117', 2302),
 ('94112', 2071),
 ('94122', 2011)]

<font color="navy">
## Are these due to non-payment?
Yes.

In [70]:
top_5 = [94110, 94109, 94117, 94112, 94122]

In [186]:
def getNeighborhoods(zip_code):
    neighborhoods = list(set(evictions[evictions["Eviction Notice Source Zipcode"]==zip_code]["Neighborhoods - Analysis Boundaries"]))
    # START AT 1 DUE TO NAN
    return([str(x) for x in neighborhoods if "nan" not in str(x)])

In [187]:
def getNonPaidPercent(zip_code):
    df = evictions[evictions["Eviction Notice Source Zipcode"]==zip_code]
    if len(df["Non Payment"])==0:
        return(0)
    else:
        return(100*(len(df[df["Non Payment"]==True]) * (len(df["Non Payment"])**(-1))))

In [188]:
def getLatePercent(zip_code):
    df = evictions[evictions["Eviction Notice Source Zipcode"]==zip_code]
    if len(df["Non Payment"])==0:
        return(0)
    else:
        return(100*(len(df[df["Late Payments"]==True]) * (len(df["Late Payments"])**(-1))))

In [189]:
def getDemolition(zip_code):
    df = evictions[evictions["Eviction Notice Source Zipcode"]==zip_code]
    if len(df["Non Payment"])==0:
        return(0)
    else:
        return(100*(len(df[df["Demolition"]==True]) * (len(df["Demolition"])**(-1))))

In [190]:
def getNuisance(zip_code):
    df = evictions[evictions["Eviction Notice Source Zipcode"]==zip_code]
    if len(df["Non Payment"])==0:
        return(0)
    else:
        return(100*(len(df[df["Nuisance"]==True]) * (len(df["Nuisance"])**(-1))))

In [181]:
top5_df = pd.DataFrame({"zip":top_5})
top5_df["nonpaid"] = [getNonPaidPercent(x) for x in top_5]
top5_df["late"] = [getLatePercent(x) for x in top_5]
top5_df["demolition"] = [getDemolition(x) for x in top_5]
top5_df["nuisance"] = [getNuisance(x) for x in top_5]
top5_df["neighborhoods"] = [", ".join(getNeighborhoods(x)) for x in top_5]

In [182]:
top5_df

Unnamed: 0,zip,nonpaid,late,demolition,nuisance,neighborhoods
0,94110,7.284768,3.752759,5.298013,13.024283,"Glen Park, Potrero Hill, Hayes Valley, Mission..."
1,94109,7.6,4.0,0.8,32.8,"Nob Hill, Japantown, Pacific Heights, Hayes Va..."
2,94117,3.272727,3.272727,2.545455,11.636364,"South of Market, Lone Mountain/USF, Hayes Vall..."
3,94112,11.111111,4.700855,5.982906,9.82906,"Sunset/Parkside, West of Twin Peaks, Mission, ..."
4,94122,8.376963,3.664921,5.235602,7.329843,"Nob Hill, Mission, Inner Sunset, Excelsior, Ou..."


<font color="navy">
## How about in general?
Let's check out all zip code rates...

In [197]:
zip_nums = set(evictions[evictions["Eviction Notice Source Zipcode"]>0]["Eviction Notice Source Zipcode"])
wrong = set([9, 94, 941, 9410, 9411, 9412,'941??', "/4132", "9", '9132', '94', '941', '941 1','9410', "9411", "9412",'9413',"9424"])
all_zips = set(zip_nums - wrong)
all_zips = [int(x) for x in all_zips]

In [198]:
all_df = pd.DataFrame({"zip":all_zips})
all_df["nonpaid"] = [getNonPaidPercent(x) for x in all_zips]
all_df["late"] = [getLatePercent(x) for x in all_zips]
all_df["demolition"] = [getDemolition(x) for x in all_zips]
all_df["nuisance"] = [getNuisance(x) for x in all_zips]
all_df["neighborhoods"] = [", ".join(getNeighborhoods(x)) for x in all_zips]

In [202]:
all_df = all_df.drop_duplicates()
all_df.head()

Unnamed: 0,zip,nonpaid,late,demolition,nuisance,neighborhoods
0,94194,0.0,0.0,0.0,0.0,
1,94116,7.207207,5.405405,8.108108,9.009009,"Sunset/Parkside, West of Twin Peaks, Inner Sunset"
2,94158,0.0,0.0,0.0,100.0,Mission Bay
3,94115,4.411765,3.676471,3.676471,15.441176,"Lone Mountain/USF, Japantown, Pacific Heights,..."
4,94118,2.525253,2.525253,2.020202,12.121212,"Seacliff, Inner Richmond, Lone Mountain/USF, M..."


In [203]:
all_df.sort_values(by="nonpaid", ascending=False)

Unnamed: 0,zip,nonpaid,late,demolition,nuisance,neighborhoods
20,94129,100.0,0.0,0.0,0.0,Presidio
30,94105,50.0,0.0,0.0,0.0,"Visitacion Valley, Twin Peaks"
16,94124,20.20202,3.030303,4.040404,15.151515,"South of Market, Potrero Hill, Hayes Valley, R..."
21,94130,16.666667,0.0,0.0,83.333333,Treasure Island
25,94134,11.881188,1.980198,4.950495,16.831683,"McLaren Park, Portola, Excelsior, Visitacion V..."
6,94112,11.111111,4.700855,5.982906,9.82906,"Sunset/Parkside, West of Twin Peaks, Mission, ..."
32,94108,11.111111,1.851852,3.703704,24.074074,"Chinatown, Nob Hill"
18,94122,8.376963,3.664921,5.235602,7.329843,"Nob Hill, Mission, Inner Sunset, Excelsior, Ou..."
31,94107,7.792208,1.298701,5.194805,14.285714,"South of Market, Potrero Hill, Mission Bay, Mi..."
14,94127,7.692308,5.128205,2.564103,5.128205,"Castro/Upper Market, West of Twin Peaks"


<font color="navy">
## Can we visualize San Francisco?
Sure, can. Will do more later.

In [28]:
gmap = gmplot.GoogleMapPlotter.from_geocode("San Francisco")

In [33]:
# CREATES HTML FILE
gmap.draw("mymap.html")