In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, inspect
pd.set_option('max_colwidth', 400)

In [2]:
# Importing Data Sets 
demographics_df = pd.read_csv('./Resources/NYC_Demographics.csv')
evictions_df = pd.read_csv('./Resources/NYC_Evictions.csv')

In [10]:
# Create a filtered dataframe from specific columns
demographics_cols = ["JURISDICTION NAME", "COUNT PARTICIPANTS", "PERCENT FEMALE", "PERCENT MALE", 
                     "PERCENT PACIFIC ISLANDER", "PERCENT HISPANIC LATINO", "PERCENT AMERICAN INDIAN", 
                     "PERCENT ASIAN NON HISPANIC", "PERCENT WHITE NON HISPANIC", "PERCENT BLACK NON HISPANIC", 
                     "PERCENT OTHER ETHNICITY", "PERCENT ETHNICITY UNKNOWN", "PERCENT PERMANENT RESIDENT ALIEN", 
                     "PERCENT US CITIZEN", "PERCENT OTHER CITIZEN STATUS", "PERCENT RECEIVES PUBLIC ASSISTANCE", 
                     "PERCENT NRECEIVES PUBLIC ASSISTANCE"]
demographics_filtered_df = demographics_df[demographics_cols]

# Rename the column headers
demographics_filtered_df = demographics_filtered_df.rename(columns={"JURISDICTION NAME": "ZIPCODE"})

# Clean the data by dropping duplicates and dropping zipcodes with no participants
demographics_filtered_df.drop_duplicates("ZIPCODE", inplace=True)
demographics_filtered = demographics_filtered_df.loc[demographics_filtered_df["COUNT PARTICIPANTS"] >= 20]


demographics_filtered

Unnamed: 0,ZIPCODE,COUNT PARTICIPANTS,PERCENT FEMALE,PERCENT MALE,PERCENT PACIFIC ISLANDER,PERCENT HISPANIC LATINO,PERCENT AMERICAN INDIAN,PERCENT ASIAN NON HISPANIC,PERCENT WHITE NON HISPANIC,PERCENT BLACK NON HISPANIC,PERCENT OTHER ETHNICITY,PERCENT ETHNICITY UNKNOWN,PERCENT PERMANENT RESIDENT ALIEN,PERCENT US CITIZEN,PERCENT OTHER CITIZEN STATUS,PERCENT RECEIVES PUBLIC ASSISTANCE,PERCENT NRECEIVES PUBLIC ASSISTANCE
0,10001,44,0.5,0.5,0.0,0.36,0.0,0.07,0.02,0.48,0.07,0.0,0.05,0.95,0.0,0.45,0.55
1,10002,35,0.54,0.46,0.0,0.03,0.0,0.8,0.17,0.0,0.0,0.0,0.06,0.94,0.0,0.06,0.94
22,10025,27,0.63,0.37,0.0,0.56,0.0,0.0,0.0,0.41,0.04,0.0,0.11,0.89,0.0,0.3,0.7
26,10029,20,0.65,0.35,0.0,0.2,0.0,0.0,0.0,0.75,0.05,0.0,0.0,1.0,0.0,0.4,0.6
58,10451,41,0.51,0.49,0.0,0.54,0.0,0.02,0.0,0.37,0.05,0.02,0.07,0.93,0.0,0.29,0.71
62,10455,27,0.63,0.37,0.0,0.19,0.0,0.0,0.0,0.74,0.07,0.0,0.04,0.93,0.04,0.26,0.74
65,10458,52,0.48,0.52,0.0,0.46,0.0,0.0,0.02,0.44,0.06,0.02,0.06,0.92,0.02,0.37,0.63
67,10460,27,0.74,0.26,0.0,0.33,0.0,0.0,0.04,0.56,0.07,0.0,0.07,0.93,0.0,0.52,0.48
68,10461,49,0.53,0.47,0.02,0.43,0.02,0.02,0.02,0.37,0.1,0.02,0.02,0.98,0.0,0.43,0.57
70,10463,59,0.56,0.44,0.0,0.61,0.0,0.0,0.03,0.31,0.05,0.0,0.05,0.92,0.03,0.34,0.66


In [5]:
# Convert Executed Date to DateTime
evictions_df["Executed Date"]= pd.to_datetime(evictions_df["Executed Date"])

# Create a filtered dataframe from specific columns and dates in 
evictions_cols = ["Eviction Zip", "Executed Date", "Residential/Commercial"]
evictions_filtered_df = evictions_df[evictions_cols]

evictions_filtered = evictions_filtered_df.loc[evictions_filtered_df["Residential/Commercial"] == "Residential"]

evictions_filtered

Unnamed: 0,Eviction Zip,Executed Date,Residential/Commercial
0,10456,2018-02-26,Residential
1,10466,2022-11-16,Residential
2,10467,2017-09-29,Residential
3,11213,2018-07-12,Residential
4,10470,2019-10-24,Residential
...,...,...,...
72483,11368,2019-10-28,Residential
72484,11423,2019-08-06,Residential
72485,10472,2018-07-13,Residential
72486,11203,2019-12-04,Residential


In [6]:
# Creating Data Frame for evictions by zipcode 
zipcode_evictions = evictions_filtered.groupby(['Eviction Zip'])['Eviction Zip'].count()
zipcode_evictions_df = zipcode_evictions.to_frame()
zipcode_evictions_df = zipcode_evictions_df.rename(columns = {'Eviction Zip' : 'Total Evictions'})
zipcode_evictions_df.reset_index()

Unnamed: 0,Eviction Zip,Total Evictions
0,0,3
1,10000,1
2,10001,169
3,10002,267
4,10003,123
...,...,...
203,11692,369
204,11693,107
205,11694,143
206,11698,1


In [11]:
# Combine the data into a single dataset, left merging the demographics onto the evictions dataframe.  
combined_df = pd.merge(zipcode_evictions_df, demographics_filtered, left_on="Eviction Zip", right_on="ZIPCODE", how='inner')

#Sort by participant count
combined_df=combined_df.sort_values(by=['COUNT PARTICIPANTS','Total Evictions'], ascending=False)

combined_df

Unnamed: 0,Total Evictions,ZIPCODE,COUNT PARTICIPANTS,PERCENT FEMALE,PERCENT MALE,PERCENT PACIFIC ISLANDER,PERCENT HISPANIC LATINO,PERCENT AMERICAN INDIAN,PERCENT ASIAN NON HISPANIC,PERCENT WHITE NON HISPANIC,PERCENT BLACK NON HISPANIC,PERCENT OTHER ETHNICITY,PERCENT ETHNICITY UNKNOWN,PERCENT PERMANENT RESIDENT ALIEN,PERCENT US CITIZEN,PERCENT OTHER CITIZEN STATUS,PERCENT RECEIVES PUBLIC ASSISTANCE,PERCENT NRECEIVES PUBLIC ASSISTANCE
26,421,11230,248,0.78,0.22,0.0,0.0,0.0,0.03,0.87,0.01,0.07,0.02,0.01,0.99,0.0,0.17,0.83
21,209,11219,214,0.85,0.15,0.0,0.0,0.0,0.0,0.96,0.0,0.03,0.0,0.01,0.99,0.0,0.52,0.48
20,238,11218,111,0.87,0.13,0.0,0.01,0.0,0.03,0.86,0.05,0.05,0.0,0.07,0.92,0.01,0.46,0.54
22,318,11223,109,0.49,0.51,0.0,0.01,0.0,0.06,0.87,0.01,0.05,0.0,0.06,0.94,0.01,0.18,0.82
12,1896,10467,106,0.61,0.39,0.0,0.48,0.01,0.04,0.01,0.44,0.02,0.0,0.09,0.89,0.02,0.31,0.69
23,383,11224,93,0.57,0.43,0.0,0.15,0.0,0.08,0.04,0.65,0.09,0.0,0.06,0.92,0.01,0.3,0.7
13,1401,10468,81,0.57,0.43,0.0,0.43,0.0,0.04,0.0,0.44,0.09,0.0,0.02,0.98,0.0,0.28,0.72
11,1194,10466,65,0.6,0.4,0.02,0.22,0.0,0.0,0.0,0.77,0.0,0.0,0.05,0.95,0.0,0.26,0.74
9,649,10463,59,0.56,0.44,0.0,0.61,0.0,0.0,0.03,0.31,0.05,0.0,0.05,0.92,0.03,0.34,0.66
27,465,11234,59,0.49,0.51,0.0,0.14,0.0,0.0,0.63,0.17,0.07,0.0,0.02,0.98,0.0,0.03,0.97


In [12]:
# adding PERCENT MINORITY column
combined_df['PERCENT MINORITY']= (
    combined_df['PERCENT PACIFIC ISLANDER'] +
    combined_df['PERCENT HISPANIC LATINO'] +
    combined_df['PERCENT AMERICAN INDIAN'] + 
    combined_df['PERCENT ASIAN NON HISPANIC'] +
    combined_df['PERCENT BLACK NON HISPANIC'] +
    combined_df['PERCENT OTHER ETHNICITY']
)
combined_df


Unnamed: 0,Total Evictions,ZIPCODE,COUNT PARTICIPANTS,PERCENT FEMALE,PERCENT MALE,PERCENT PACIFIC ISLANDER,PERCENT HISPANIC LATINO,PERCENT AMERICAN INDIAN,PERCENT ASIAN NON HISPANIC,PERCENT WHITE NON HISPANIC,PERCENT BLACK NON HISPANIC,PERCENT OTHER ETHNICITY,PERCENT ETHNICITY UNKNOWN,PERCENT PERMANENT RESIDENT ALIEN,PERCENT US CITIZEN,PERCENT OTHER CITIZEN STATUS,PERCENT RECEIVES PUBLIC ASSISTANCE,PERCENT NRECEIVES PUBLIC ASSISTANCE,PERCENT MINORITY
26,421,11230,248,0.78,0.22,0.0,0.0,0.0,0.03,0.87,0.01,0.07,0.02,0.01,0.99,0.0,0.17,0.83,0.11
21,209,11219,214,0.85,0.15,0.0,0.0,0.0,0.0,0.96,0.0,0.03,0.0,0.01,0.99,0.0,0.52,0.48,0.03
20,238,11218,111,0.87,0.13,0.0,0.01,0.0,0.03,0.86,0.05,0.05,0.0,0.07,0.92,0.01,0.46,0.54,0.14
22,318,11223,109,0.49,0.51,0.0,0.01,0.0,0.06,0.87,0.01,0.05,0.0,0.06,0.94,0.01,0.18,0.82,0.13
12,1896,10467,106,0.61,0.39,0.0,0.48,0.01,0.04,0.01,0.44,0.02,0.0,0.09,0.89,0.02,0.31,0.69,0.99
23,383,11224,93,0.57,0.43,0.0,0.15,0.0,0.08,0.04,0.65,0.09,0.0,0.06,0.92,0.01,0.3,0.7,0.97
13,1401,10468,81,0.57,0.43,0.0,0.43,0.0,0.04,0.0,0.44,0.09,0.0,0.02,0.98,0.0,0.28,0.72,1.0
11,1194,10466,65,0.6,0.4,0.02,0.22,0.0,0.0,0.0,0.77,0.0,0.0,0.05,0.95,0.0,0.26,0.74,1.01
9,649,10463,59,0.56,0.44,0.0,0.61,0.0,0.0,0.03,0.31,0.05,0.0,0.05,0.92,0.03,0.34,0.66,0.97
27,465,11234,59,0.49,0.51,0.0,0.14,0.0,0.0,0.63,0.17,0.07,0.0,0.02,0.98,0.0,0.03,0.97,0.38
