# Exploratory Data Analysis


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from EDA_pr import *
from EDA_pr2 import *

### Neighborhood House Price Data Overview
- The data we have acquired is from zillow and it shows the average house price for each nighborhood in the country
- Each row represents a neighborhood in a state and show the average house price for each month from 1-31-2000 to 1-31-2024
- We cleaned and filtered the data to make 2 different csv files(pre and post covid) for house prices only in Chicago.
- We also transposed the data after so that all the rows are now columns and the columns are now rows.
    - Each row represents a month between the year 2000 and 2024
    - Each column is a neighborhood

In [2]:
# Original data from Zillow without any filtering and cleaning.
neighborhood_data = pd.read_csv('csv_files/Neighborhood_House_Price.csv')
neighborhood_data.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,2000-01-31,...,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
0,112345,0,Maryvale,neighborhood,AZ,AZ,Phoenix,"Phoenix-Mesa-Chandler, AZ",Maricopa County,66775.313666,...,313492.5,314776.5,316614.5,319072.5,322054.6,324693.8,327100.8,329141.1,330703.5,331714.1
1,192689,1,Paradise,neighborhood,NV,NV,Las Vegas,"Las Vegas-Henderson-Paradise, NV",Clark County,132638.938818,...,358563.7,358037.2,358754.6,360550.8,363426.5,366274.1,368744.6,370886.7,372963.4,374854.1
2,270958,2,Upper West Side,neighborhood,NY,NY,New York,"New York-Newark-Jersey City, NY-NJ-PA",New York County,387530.423074,...,1276836.0,1270266.0,1264532.0,1258336.0,1248721.0,1238858.0,1227969.0,1216308.0,1208912.0,1203406.0
3,270957,3,Upper East Side,neighborhood,NY,NY,New York,"New York-Newark-Jersey City, NY-NJ-PA",New York County,634533.128812,...,1259968.0,1250928.0,1245395.0,1241081.0,1236655.0,1232169.0,1224024.0,1212976.0,1202819.0,1196051.0
4,118208,4,South Los Angeles,neighborhood,CA,CA,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,127876.428774,...,619868.4,620830.5,624531.4,631738.0,641397.3,651175.4,659477.2,665923.5,670126.6,667898.8


In [3]:
# Transformed pre and post covid data for neighborhoods only in Chicago
neighborhood_2017_2019_pre = pd.read_csv('csv_files/neighborhood_data_2017_2019.csv')
neighborhood_2021_present_post = pd.read_csv('csv_files/neighborhood_data_2021_present.csv')

neighborhood_2017_2019_pre.head()

Unnamed: 0,date,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
0,2017-01-31,320800.53,187637.44,90925.08,385189.74,527941.39,104123.27,308905.87,156084.35,222163.9,...,196512.18,150302.7,109474.19,353627.49,115870.21,174695.62,25263.13,222620.47,247078.01,267835.82
1,2017-02-28,323403.88,189553.4,92159.98,387715.94,530161.42,104668.31,311086.23,157619.44,223640.4,...,198572.76,152146.67,114100.98,357398.54,117361.01,177001.34,25686.9,224828.56,248058.67,269378.31
2,2017-03-31,325554.65,191027.03,93255.23,388969.13,532967.82,105138.79,313207.54,158774.66,224694.87,...,200222.71,153710.0,117062.46,360254.8,118906.74,178717.89,26756.02,227139.69,248698.29,270910.81
3,2017-04-30,326994.07,191889.95,93859.98,389250.31,535106.28,104823.29,314064.33,159399.12,225477.48,...,201977.06,154409.58,116186.07,363148.26,120138.56,180467.27,27796.86,228359.51,248938.92,272070.8
4,2017-05-31,327962.34,192260.37,94812.11,388709.64,538274.75,104754.83,314049.22,159471.94,225828.91,...,203294.72,155492.11,115115.95,365085.52,120869.52,181256.7,28691.81,228371.01,249231.38,272600.6


### Exploring the Pre and Post Data

In [13]:
print("HOUSE PRICE BETWEEN 2017-19")
df, df2 = parse_df(neighborhood_2017_2019_pre)

pd.set_option("display.max_rows", None)
neighborhood_min_max(df)
# get_neighborhood_price_stats(df, df2)

HOUSE PRICE BETWEEN 2017-19
THE MINIMUM AND MAXIMUM HOUSE PRICES FOR EACH NEIGHBORHOOD AND THE DATES FOR WHEN THESE PRICES OCCURED

                     Neighborhood  Min Price   Min Date  Max Price   Max Date
                        Lake View  320800.53 2017-01-31  334771.35 2018-05-31
                       West Ridge  187637.44 2017-01-31  205433.04 2018-05-31
                   Little Village   90925.08 2017-01-31  130157.11 2019-12-31
                     Logan Square  385189.74 2017-01-31  416111.23 2019-04-30
                     Lincoln Park  520116.84 2019-12-31  546847.77 2018-05-31
                     South Austin  104123.27 2017-01-31  148747.61 2019-12-31
                      Irving Park  308905.87 2017-01-31  323790.58 2018-05-31
                      Rogers Park  156084.35 2017-01-31  175494.50 2019-02-28
                           Uptown  222163.90 2017-01-31  235276.59 2018-05-31
                          Gresham   93984.94 2017-01-31  123570.12 2019-07-31
          

In [4]:
# get current most expensive and cheapest neigborhood. (DONE)
# find a correlation between crime and the price of house in the neighborhoods. 
# find the arrest made for expensive and cheap neighbor hood (Pre Covid, Post Covid)
# What is the most common location description of where the crime took place 

In [5]:
get_neighborhood_price_stats(neighborhood_2017_2019)

THE MINIMUM AND MAXIMUM HOUSE PRICES FOR EACH NEIGHBORHOOD AND THE DATES FOR WHEN THESE PRICES OCCURED

                     Neighborhood  Min Price   Min Date  Max Price   Max Date
                        Lake View  320800.53 2017-01-31  334771.35 2018-05-31
                       West Ridge  187637.44 2017-01-31  205433.04 2018-05-31
                   Little Village   90925.08 2017-01-31  130157.11 2019-12-31
                     Logan Square  385189.74 2017-01-31  416111.23 2019-04-30
                     Lincoln Park  520116.84 2019-12-31  546847.77 2018-05-31
                     South Austin  104123.27 2017-01-31  148747.61 2019-12-31
                      Irving Park  308905.87 2017-01-31  323790.58 2018-05-31
                      Rogers Park  156084.35 2017-01-31  175494.50 2019-02-28
                           Uptown  222163.90 2017-01-31  235276.59 2018-05-31
                          Gresham   93984.94 2017-01-31  123570.12 2019-07-31
                     Portage Park  255

In [None]:
# Mention about the Severity score of Loop because alot of UIC student take Union, OTC in the Finding