## San Francisco Eviction Notice Analytics
### Import the necessary libraires and the dataset and have a look at it.

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import requests

In [3]:
df = pd.read_csv('Eviction_Notices.csv', sep = ';')

In [4]:
df.head()

Unnamed: 0,Eviction ID,Address,City,State,Eviction Notice Source Zipcode,File Date,Non Payment,Breach,Nuisance,Illegal Use,...,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends,Constraints Date,Supervisor District,Neighborhoods - Analysis Boundaries,Location,Shape
0,M220102,200 Block Of Powell Street,San Francisco,CA,94102.0,01/20/2022,False,False,True,False,...,False,False,False,False,False,,3.0,Tenderloin,,POINT (-122.40814 37.78689)
1,M211725,1500 Block Of Mason Street,San Francisco,CA,94103.0,11/17/2021,False,False,False,False,...,True,False,False,False,False,,3.0,Nob Hill,,POINT (-122.412 37.797653)
2,M210425,400 Block Of Eddy Street,San Francisco,CA,94102.0,03/25/2021,False,False,True,False,...,False,False,False,False,False,,6.0,Tenderloin,,POINT (-122.41508 37.783627)
3,M040867,900 Block Of Bush Street,San Francisco,CA,94109.0,05/19/2004,True,False,False,False,...,False,False,False,False,False,,3.0,Nob Hill,,POINT (-122.41292 37.78964)
4,M050572,3600 Block Of Vicente Street,San Francisco,CA,94121.0,4.06.2005,True,False,False,False,...,False,False,False,False,False,,4.0,Sunset/Parkside,,POINT (-122.50594 37.737988)


In [5]:
# df.shape

In [6]:
# df.info()

In [7]:
area = df[['Neighborhoods - Analysis Boundaries']]
reasons = df.loc[:, 'Non Payment': 'Good Samaritan Ends']

In [8]:
data = pd.concat([area, reasons], axis = 1)

In [9]:
data.head(10)

Unnamed: 0,Neighborhoods - Analysis Boundaries,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,Capital Improvement,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends
0,Tenderloin,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,Nob Hill,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False
2,Tenderloin,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,Nob Hill,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,Sunset/Parkside,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,Chinatown,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
7,Sunset/Parkside,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,Nob Hill,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,Noe Valley,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


### Convert the dataset in to binary

In [10]:
for column in range(1, len(data.columns)):
    name = data.columns[column]
    data[name] = data[name].astype(int)


In [11]:
data.head()

Unnamed: 0,Neighborhoods - Analysis Boundaries,Non Payment,Breach,Nuisance,Illegal Use,Failure to Sign Renewal,Access Denial,Unapproved Subtenant,Owner Move In,Demolition,Capital Improvement,Substantial Rehab,Ellis Act WithDrawal,Condo Conversion,Roommate Same Unit,Other Cause,Late Payments,Lead Remediation,Development,Good Samaritan Ends
0,Tenderloin,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Nob Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,Tenderloin,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Nob Hill,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Sunset/Parkside,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Group the dataset together so I can summarise about each neighbourhood

In [12]:
new_df = data.groupby('Neighborhoods - Analysis Boundaries').sum()

In [13]:
# new_df.loc[new_df.index.isin(['Mission', 'Outer Richmond'])]

In [14]:
new_df['total_notices'] = new_df.sum(axis = 1)

In [29]:
new_df['total_notices'].sort_values()

Neighborhoods - Analysis Boundaries
Lincoln Park                         7
McLaren Park                        17
Presidio                            20
Mission Bay                         45
Treasure Island                     83
Seacliff                            84
Japantown                          107
Golden Gate Park                   153
Glen Park                          309
Twin Peaks                         378
Financial District/South Beach     385
Chinatown                          507
Presidio Heights                   515
Visitacion Valley                  523
Portola                            523
Potrero Hill                       743
Western Addition                   750
Outer Mission                      803
West of Twin Peaks                 815
Lone Mountain/USF                  960
Oceanview/Merced/Ingleside         985
North Beach                       1093
Russian Hill                      1140
Inner Richmond                    1146
Inner Sunset                

In [15]:
sorted_val = new_df.total_notices.sort_values(ascending = False).reset_index()

In [16]:
city_names = sorted_val['Neighborhoods - Analysis Boundaries'].values

In [17]:
city_names_strip = [i.replace(' ', '+') for i in city_names]

### Using an api, gather the lattitude and longtitude of each neighbourhood.

In [33]:
dct = {}

for city in city_names_strip:
    url = f'https://geocode.xyz/{city},+San+Francisco?json=1'
    res = requests.get(url)
    json = res.json()
    print(json)
    try:
        countryname = json['standard']['countryname']
        lat = json['latt']
        long = json['longt']
        if countryname == 'United States of America':
            dct[city] = [lat, long]
    except:
        pass

In [19]:
new_dct = {}
for key, value in dct.items():
    new_key = key.replace('+', ' ')
    new_dct[new_key] = value

In [20]:
new_dct = {'Bayview Hunters Point': ['37.66844', '-122.41215'], 'Bernal Heights': ['37.74239', '-122.41152'], 'Castro': ['37.76262', '-122.43554'], 'Chinatown': ['37.79499', '-122.40767'], 'Excelsior': ['37.72608', '-122.43329'], 'Golden Gate Park': ['37.76822', '-122.46457'], 'Haight Ashbury': ['37.77544', '-122.44774'], 'Hayes Valley': ['37.74476', '-122.42244'], 'Inner Richmond': ['37.68461', '-122.39846'], 'Inner Sunset': ['37.68461', '-122.39846'], 'Lakeshore': ['37.68461', '-122.39846'], 'Lincoln Park': ['37.58261', '-122.46860'], 'Lone Mountain/USF': ['36.38545', '-83.60091'], 'Marina': ['37.55449', '-122.28599'], 'Mission': ['37.68461', '-122.39846'], 'Mission Bay': ['37.77137', '-122.38744'], 'Nob Hill': ['37.68461', '-122.39846'], 'Outer Mission': ['37.68461', '-122.39846'], 'Outer Richmond': ['37.68461', '-122.39846'], 'Portola': ['37.79616', '-122.45475'], 'Potrero Hill': ['37.75519', '-122.39778'], 'Presidio': ['37.78900', '-122.44713'], 'Presidio Heights': ['37.78900', '-122.44713'], 'Russian Hill': ['37.79843', '-122.41499'], 'Seacliff': ['37.68802', '-122.49431'], 'South of Market': ['37.79446', '-122.39486'], 'Parkside': ['37.56826', '-122.35710'], 'Treasure Island': ['37.81360', '-122.37070'], 'Visitacion Valley': ['37.70945', '-122.40476']}

In [21]:
for index, row in sorted_val.iterrows():
    for key in new_dct.keys():
        
        name = row.values[0]
        if key == name:
            new_dct[key].append(row.values[1])


### Assign each neighbourhood a colour depending on how many eviection notices they have

In [22]:
def assign_colour(value):
    if value < 500:
        return 'green'
    elif value < 1000:
        return 'blue'
    elif value < 1500:
        return 'purple'
    elif value < 2000:
        return 'pink'
    elif value < 2500:
        return 'orange'
    elif value < 3000:
        return 'black'
    else:
        return 'red'

In [31]:
for key, value in new_dct.items():
    colour = assign_colour(value[2])
    new_dct[key].append(colour)

### Loop through the dictionary and add a coloured marker to a folium map for each neighbourhood

In [32]:
m = folium.Map([37.791530, -122.410760])


for key, values in new_dct.items():
    folium.Marker(
        location =(values[0], values[1]), # coordinates for the marker (Earth Lab at CU Boulder)
        popup= key, # pop-up label for the marker
        icon=folium.Icon(color=values[3])
    ).add_to(m)
    
m

### Conclusion.
### From looking at the map I can see that the least amount of evictions happen on the outskirts of the city centre and then increase the further in to the city. However, the two neighbourhoods with the most evictions live away from the city. The reasons for the evictions for these are 'Owner is moving in' which means that they might be becoming more desirable places to live.