# NYC Evictions Data Exploration

Exploring eviction data from [NYC OpenData](https://data.cityofnewyork.us/City-Government/Evictions/6z8x-wfk4/about_data).

In [1]:
import pandas as pd

df = pd.read_csv('../data/opendata_evictions.csv')
print(f"Rows: {len(df):,}")
print(f"Columns: {len(df.columns)}")
df.head()

Rows: 121,108
Columns: 20


Unnamed: 0,Court Index Number,Docket Number,Eviction Address,Eviction Apartment Number,Executed Date,Marshal First Name,Marshal Last Name,Residential/Commercial,BOROUGH,Eviction Postcode,Ejectment,Eviction/Legal Possession,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA
0,54925/19,157903,5505 4TH AVENUE,3R,06/03/2019,Ronald,Pazant,Residential,BROOKLYN,11220,Not an Ejectment,Possession,40.643566,-74.015585,7.0,38.0,76.0,3014939.0,3008310000.0,Sunset Park West
1,325612/24,141327,136 KINGSLAND AVENUE,2L,06/06/2025,Justin,Grossman,Residential,BROOKLYN,11222,Not an Ejectment,Possession,40.721069,-73.94089,1.0,34.0,449.0,3069851.0,3028340000.0,East Williamsburg
2,54410/17,11433,857 WOODWARD AVENUE,1R,08/24/2017,Edward,Guida,Residential,QUEENS,11385,Not an Ejectment,Possession,40.702958,-73.90238,5.0,30.0,585.0,4083380.0,4034830000.0,Ridgewood
3,69704/18,83673,491 TOMPKINS AVENUE,2R,09/10/2018,Justin,Grossman,Residential,BROOKLYN,11216,Not an Ejectment,Possession,40.680489,-73.943334,3.0,36.0,269.0,3053482.0,3018550000.0,Bedford
4,313015/22,32713,73-50 BELL BLVD.,4C,03/04/2024,Edward,Guida,Residential,QUEENS,11364,Not an Ejectment,Possession,40.739574,-73.757708,11.0,23.0,129103.0,4444449.0,4077320000.0,Oakland Gardens


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121108 entries, 0 to 121107
Data columns (total 20 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   Court Index Number         121108 non-null  object 
 1   Docket Number              121108 non-null  int64  
 2   Eviction Address           121108 non-null  object 
 3   Eviction Apartment Number  103295 non-null  object 
 4   Executed Date              121108 non-null  object 
 5   Marshal First Name         121108 non-null  object 
 6   Marshal Last Name          121108 non-null  object 
 7   Residential/Commercial     121108 non-null  object 
 8   BOROUGH                    121108 non-null  object 
 9   Eviction Postcode          121108 non-null  int64  
 10  Ejectment                  121108 non-null  object 
 11  Eviction/Legal Possession  121108 non-null  object 
 12  Latitude                   110398 non-null  float64
 13  Longitude                  11

In [3]:
df.describe()

Unnamed: 0,Docket Number,Eviction Postcode,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL
count,121108.0,121108.0,110398.0,110398.0,110398.0,110398.0,110398.0,110015.0,110015.0
mean,125087.127927,10785.763451,40.751021,-73.910603,7.94268,23.766345,8432.330024,2709411.0,2632193000.0
std,128499.481751,528.232766,0.091029,0.070037,4.569384,13.871514,21424.406948,1141431.0,1089891000.0
min,1.0,0.0,40.49905,-74.252159,1.0,1.0,1.0,1000000.0,0.0
25%,28401.0,10453.0,40.671717,-73.946634,4.0,12.0,197.0,2009182.0,2028020000.0
50%,90134.0,10472.0,40.752059,-73.912046,8.0,19.0,374.0,3000000.0,3001700000.0
75%,134979.5,11229.0,40.835956,-73.873986,12.0,36.0,964.0,3345008.0,3074221000.0
max,496987.0,19458.0,40.912869,-73.70143,18.0,51.0,157903.0,5171959.0,5080490000.0


In [None]:
import matplotlib.pyplot as plt
from pathlib import Path

# Group evictions by month
monthly = df.groupby(df['Executed Date'].dt.to_period('M')).size()

# Load ACS renter-occupied units by year
data_dir = Path('../data')
renter_units_by_year = {}
for f in data_dir.glob('acs_tenure_*.csv'):
    year = int(f.stem.split('_')[-1])
    acs_df = pd.read_csv(f)
    renter_units_by_year[year] = acs_df['renter_occupied'].sum()

# Get renter units for a year (with fallback for missing years like 2020, 2025)
def get_renter_units(year):
    if year in renter_units_by_year:
        return renter_units_by_year[year]
    available = sorted(renter_units_by_year.keys())
    closest = min(available, key=lambda y: abs(y - year))
    return renter_units_by_year[closest]

# Compute annualized eviction rate per month
monthly_rate = monthly.copy().astype(float)
for period in monthly_rate.index:
    units = get_renter_units(period.year)
    monthly_rate[period] = (monthly[period] / units) * 12  # annualized

# Plot with dual y-axes
fig, ax1 = plt.subplots(figsize=(14, 5))

ax1.plot(monthly.index.astype(str), monthly.values, linewidth=1, color='steelblue', label='Evictions')
ax1.set_xlabel('Month')
ax1.set_ylabel('Evictions (count)', color='steelblue')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax1.tick_params(axis='x', rotation=90)
ax1.set_xticks(ax1.get_xticks()[::6])

ax2 = ax1.twinx()
ax2.plot(monthly.index.astype(str), monthly_rate.values, linewidth=1, color='coral', label='Eviction Rate')
ax2.set_ylabel('Eviction Rate (annualized)', color='coral')
ax2.tick_params(axis='y', labelcolor='coral')

ax1.set_title('NYC Executed Evictions by Month')
fig.tight_layout()
plt.show()