# NYC Evictions Data Exploration

Exploring eviction data from [NYC OpenData](https://data.cityofnewyork.us/City-Government/Evictions/6z8x-wfk4/about_data).

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Load evictions data
df = pd.read_csv('../data/opendata_evictions.csv')
df['Executed Date'] = pd.to_datetime(df['Executed Date'])
print(f"Evictions: {len(df):,} rows")

# Load ACS housing tenure data (renter-occupied units by year and borough)
data_dir = Path('../data')
renter_units_by_year = {}  # year -> total NYC renter units
renter_by_year_borough = {}  # year -> Series(borough -> renter units)

for f in data_dir.glob('acs_housing_tenure_*.csv'):
    year = int(f.stem.split('_')[-1])
    acs = pd.read_csv(f)
    renter_units_by_year[year] = acs['renter_occupied'].sum()
    acs['borough'] = acs['borough'].str.upper()
    renter_by_year_borough[year] = acs.set_index('borough')['renter_occupied']

print(f"ACS tenure data: {sorted(renter_units_by_year.keys())}")

# Helper functions with fallback for missing years (2020, 2025+)
def get_renter_units(year):
    """Get total NYC renter-occupied units for a year."""
    if year in renter_units_by_year:
        return renter_units_by_year[year]
    available = sorted(renter_units_by_year.keys())
    closest = min(available, key=lambda y: abs(y - year))
    return renter_units_by_year[closest]

def get_renter_units_borough(year, borough):
    """Get renter-occupied units for a specific borough and year."""
    if year in renter_by_year_borough:
        return renter_by_year_borough[year][borough]
    available = sorted(renter_by_year_borough.keys())
    closest = min(available, key=lambda y: abs(y - year))
    return renter_by_year_borough[closest][borough]

df.head()

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121108 entries, 0 to 121107
Data columns (total 20 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   Court Index Number         121108 non-null  object        
 1   Docket Number              121108 non-null  int64         
 2   Eviction Address           121108 non-null  object        
 3   Eviction Apartment Number  103295 non-null  object        
 4   Executed Date              121108 non-null  datetime64[ns]
 5   Marshal First Name         121108 non-null  object        
 6   Marshal Last Name          121108 non-null  object        
 7   Residential/Commercial     121108 non-null  object        
 8   BOROUGH                    121108 non-null  object        
 9   Eviction Postcode          121108 non-null  int64         
 10  Ejectment                  121108 non-null  object        
 11  Eviction/Legal Possession  121108 non-null  object  

In [30]:
df.describe()

Unnamed: 0,Docket Number,Executed Date,Eviction Postcode,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL
count,121108.0,121108,121108.0,110398.0,110398.0,110398.0,110398.0,110398.0,110015.0,110015.0
mean,125087.127927,2021-02-28 22:53:11.333355008,10785.763451,40.751021,-73.910603,7.94268,23.766345,8432.330024,2709411.0,2632193000.0
min,1.0,2017-01-03 00:00:00,0.0,40.49905,-74.252159,1.0,1.0,1.0,1000000.0,0.0
25%,28401.0,2018-04-27 00:00:00,10453.0,40.671717,-73.946634,4.0,12.0,197.0,2009182.0,2028020000.0
50%,90134.0,2019-10-25 00:00:00,10472.0,40.752059,-73.912046,8.0,19.0,374.0,3000000.0,3001700000.0
75%,134979.5,2024-05-03 00:00:00,11229.0,40.835956,-73.873986,12.0,36.0,964.0,3345008.0,3074221000.0
max,496987.0,2025-12-30 00:00:00,19458.0,40.912869,-73.70143,18.0,51.0,157903.0,5171959.0,5080490000.0
std,128499.481751,,528.232766,0.091029,0.070037,4.569384,13.871514,21424.406948,1141431.0,1089891000.0


In [None]:
# Citywide evictions by month with dual y-axes (count + rate)

# Group evictions by month
monthly = df.groupby(df['Executed Date'].dt.to_period('M')).size()

# Compute annualized eviction rate per month
monthly_rate = monthly.copy().astype(float)
for period in monthly_rate.index:
    units = get_renter_units(period.year)
    monthly_rate[period] = (monthly[period] / units) * 12  # annualized

# Plot with dual y-axes
fig, ax1 = plt.subplots(figsize=(14, 5))

x_labels = monthly.index.astype(str)
ax1.plot(x_labels, monthly.values, linewidth=1, color='steelblue', label='Evictions')
ax1.set_xlabel('Month')
ax1.set_ylabel('Evictions (count)', color='steelblue')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax1.tick_params(axis='x', rotation=90)
ax1.set_xticks(ax1.get_xticks()[::6])

ax2 = ax1.twinx()
ax2.plot(x_labels, monthly_rate.values, linewidth=1, color='coral', label='Eviction Rate')
ax2.set_ylabel('Eviction Rate (annualized)', color='coral')
ax2.tick_params(axis='y', labelcolor='coral')

# Add eviction moratorium lines (March 2020 - January 2022)
moratorium_start = '2020-03'
moratorium_end = '2022-01'
x_list = list(x_labels)
if moratorium_start in x_list:
    ax1.axvline(x=moratorium_start, color='gray', linestyle=':', linewidth=1.5)
    ax1.text(moratorium_start, ax1.get_ylim()[1] * 0.95, ' Moratorium\n start', fontsize=8, va='top')
if moratorium_end in x_list:
    ax1.axvline(x=moratorium_end, color='gray', linestyle=':', linewidth=1.5)
    ax1.text(moratorium_end, ax1.get_ylim()[1] * 0.95, ' Moratorium\n end', fontsize=8, va='top')

ax1.set_title('NYC Executed Evictions by Month')
fig.tight_layout()

# Save plot
plots_dir = Path('../plots')
plots_dir.mkdir(exist_ok=True)
fig.savefig(plots_dir / 'city_executed_evictions_by_mo.png', dpi=150, bbox_inches='tight')
print(f"Saved to {plots_dir / 'city_executed_evictions_by_mo.png'}")

plt.show()

In [None]:
# Citywide evictions by year with dual y-axes (count + rate)

# Group evictions by year
yearly = df.groupby(df['Executed Date'].dt.year).size()

# Compute eviction rate per year (per 100 renter-occupied units)
yearly_rate = yearly.copy().astype(float)
for year in yearly_rate.index:
    units = get_renter_units(year)
    yearly_rate[year] = (yearly[year] / units) * 100

# Print table
yearly_table = pd.DataFrame({
    'Evictions': yearly,
    'Renter Units': [get_renter_units(y) for y in yearly.index],
    'Rate (per 100 units)': yearly_rate.round(3)
})
display(yearly_table)

# Plot with dual y-axes
fig, ax1 = plt.subplots(figsize=(10, 5))

x_labels = yearly.index.astype(str)
bars = ax1.bar(x_labels, yearly.values, color='steelblue', alpha=0.7)
ax1.set_xlabel('Year')
ax1.set_ylabel('Evictions (count)', color='steelblue')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax1.set_ylim(bottom=0)

# Add value labels on bars
for i, v in enumerate(yearly.values):
    ax1.text(i, v + 200, f'{v:,}', ha='center', fontsize=9)

ax2 = ax1.twinx()
ax2.plot(x_labels, yearly_rate.values, color='coral', marker='o', linewidth=2, label='Eviction Rate')
ax2.set_ylabel('Eviction Rate (per 100 renter units)', color='coral')
ax2.tick_params(axis='y', labelcolor='coral')
ax2.set_ylim(bottom=0)

ax1.set_title('NYC Executed Evictions by Year')
fig.tight_layout()

# Save plot
plots_dir = Path('../plots')
plots_dir.mkdir(exist_ok=True)
fig.savefig(plots_dir / 'city_executed_evictions_by_yr.png', dpi=150, bbox_inches='tight')
print(f"Saved to {plots_dir / 'city_executed_evictions_by_yr.png'}")

plt.show()

In [None]:
# Seasonality: average evictions by month of year (2017-2025)

# Group by month of year and compute average across years
df['month'] = df['Executed Date'].dt.month
monthly_totals = df.groupby('month').size()
n_years = df['Executed Date'].dt.year.nunique()
monthly_avg = monthly_totals / n_years

month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Plot bar chart
fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(month_names, monthly_avg.values, color='steelblue')
ax.set_xlabel('Month')
ax.set_ylabel('Average Evictions')
ax.set_title('NYC Executed Evictions by Month of Year (2017-2025 avg)')

# Add value labels on bars
for i, v in enumerate(monthly_avg.values):
    ax.text(i, v + 20, f'{v:.0f}', ha='center', fontsize=9)

fig.tight_layout()

# Save plot
plots_dir = Path('../plots')
plots_dir.mkdir(exist_ok=True)
fig.savefig(plots_dir / 'city_executed_evictions_seasonality.png', dpi=150, bbox_inches='tight')
print(f"Saved to {plots_dir / 'city_executed_evictions_seasonality.png'}")

plt.show()

In [None]:
# Eviction count and rate by borough

# Use 2023 renter units by borough
renter_by_borough = renter_by_year_borough[2023]

# Count evictions by borough
evictions_by_borough = df['BOROUGH'].value_counts()

# Compute eviction rate (per 100 renter-occupied units, annualized over dataset span)
years_of_data = (df['Executed Date'].max() - df['Executed Date'].min()).days / 365.25
eviction_rate = (evictions_by_borough / renter_by_borough / years_of_data) * 100

# Combine into DataFrame for plotting
borough_stats = pd.DataFrame({
    'Evictions': evictions_by_borough,
    'Renter Units': renter_by_borough,
    'Annual Rate (per 100 units)': eviction_rate
}).sort_values('Evictions', ascending=True)

# Plot side-by-side: count and rate
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

borough_stats['Evictions'].plot(kind='barh', ax=ax1, color='steelblue')
ax1.set_xlabel('Total Evictions (2017-2025)')
ax1.set_title('Eviction Count by Borough')

borough_stats['Annual Rate (per 100 units)'].plot(kind='barh', ax=ax2, color='coral')
ax2.set_xlabel('Annual Evictions per 100 Renter Units')
ax2.set_title('Eviction Rate by Borough')

plt.tight_layout()
plt.show()

# Show table
borough_stats

In [None]:
# Quick check: 2018 eviction rates by borough
renter_2018 = renter_by_year_borough[2018]

evictions_2018 = df[df['Executed Date'].dt.year == 2018]['BOROUGH'].value_counts()

rate_2018 = (evictions_2018 / renter_2018) * 100
units_per_eviction = renter_2018 / evictions_2018

pd.DataFrame({
    'Evictions': evictions_2018,
    'Renter Units': renter_2018,
    'Rate (%)': rate_2018.round(3),
    '1 Eviction per X Units': units_per_eviction.round(0).astype(int)
}).sort_values('Rate (%)', ascending=False)

In [None]:
# Eviction count and rate by borough by year

# Count evictions by year and borough
df['year'] = df['Executed Date'].dt.year
evictions_by_year_borough = df.groupby(['year', 'BOROUGH']).size().unstack(fill_value=0)

# Compute rates
rates = evictions_by_year_borough.copy().astype(float)
for year in rates.index:
    for borough in rates.columns:
        units = get_renter_units_borough(year, borough)
        rates.loc[year, borough] = (evictions_by_year_borough.loc[year, borough] / units) * 100

# Display counts
print("Eviction Counts by Borough by Year:")
display(evictions_by_year_borough)

# Display rates
print("\nExecuted Eviction Rate (per 100 renter-occupied units) by Borough by Year:")
display(rates.round(2))

# Plot rates over time
fig, ax = plt.subplots(figsize=(12, 6))
rates.plot(ax=ax, marker='o')
ax.set_xlabel('Year')
ax.set_ylabel('Executed Eviction Rate (per 100 renter-occupied units)')
ax.set_title('Executed Eviction Rate by Borough')
ax.legend(title='Borough')
plt.tight_layout()

# Save plot
plots_dir = Path('../plots')
plots_dir.mkdir(exist_ok=True)
fig.savefig(plots_dir / 'borough_executed_evictions_by_year.png', dpi=150, bbox_inches='tight')
print(f"Saved to {plots_dir / 'borough_executed_evictions_by_year.png'}")

plt.show()