# Cleaning Process
This notebook contain the necessary steps like getting the data info such as data size, number of feature, number of records, mean value, max value, etc. Clean the data which includes dropping some columns and rows, adding more information, joining the dataframes and more

In [1]:
import pandas as pd
import numpy as np
from CleaningPR import *

## Crime Dataset

In [2]:
crime_data = pd.read_csv('csv_files/Crimes_2001_to_Present.csv')

In [3]:
crime_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000075 entries, 0 to 8000074
Data columns (total 22 columns):
 #   Column                Dtype  
---  ------                -----  
 0   ID                    int64  
 1   Case Number           object 
 2   Date                  object 
 3   Block                 object 
 4   IUCR                  object 
 5   Primary Type          object 
 6   Description           object 
 7   Location Description  object 
 8   Arrest                bool   
 9   Domestic              bool   
 10  Beat                  int64  
 11  District              float64
 12  Ward                  float64
 13  Community Area        float64
 14  FBI Code              object 
 15  X Coordinate          float64
 16  Y Coordinate          float64
 17  Year                  int64  
 18  Updated On            object 
 19  Latitude              float64
 20  Longitude             float64
 21  Location              object 
dtypes: bool(2), float64(7), int64(3), object(1

## Converting the Date
#### step 1) changing the data type to a much suitable format

In [4]:
crime_data = convertCrimeData(crime_data) # Convert the crime data to a much suitable format


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_data['RegionName'] = crime_data['Community Area'].apply(get_community) # change coordinates to neighborhood name


In [6]:
crime_data.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,New_Date,RegionName
11,12045583,JD226426,05/07/2020 10:24:00 AM,035XX S INDIANA AVE,0820,THEFT,$500 AND UNDER,APARTMENT,0,False,...,06,1178180.0,1881621.0,2020,05/14/2020 08:47:15 AM,41.830482,-87.621752,"(41.830481843, -87.621751752)",2020-05-07 10:24:00,Douglas
12,12031001,JD209965,04/16/2020 05:00:00 AM,005XX W 32ND ST,0460,BATTERY,SIMPLE,APARTMENT,1,False,...,08B,1173292.0,1883705.0,2020,04/23/2020 03:45:11 PM,41.83631,-87.639624,"(41.836310224, -87.639624112)",2020-04-16 05:00:00,Bridgeport
13,12093529,JD282112,07/01/2020 10:16:00 AM,081XX S COLES AVE,051A,ASSAULT,AGGRAVATED - HANDGUN,STREET,1,False,...,04A,1198234.0,1851595.0,2020,07/08/2020 03:41:45 PM,41.74761,-87.549179,"(41.747609555, -87.549179329)",2020-07-01 10:16:00,South Chicago
14,12178140,JD381597,09/27/2020 11:29:00 PM,065XX S WOLCOTT AVE,0460,BATTERY,SIMPLE,RESIDENCE - PORCH / HALLWAY,0,False,...,08B,1164812.0,1861251.0,2020,10/04/2020 03:43:55 PM,41.774878,-87.671375,"(41.77487752, -87.671374872)",2020-09-27 23:29:00,West Englewood
15,4144897,HL474854,07/10/2005 03:00:00 PM,062XX S ABERDEEN ST,0430,BATTERY,AGGRAVATED: OTHER DANG WEAPON,STREET,0,False,...,04B,1170050.0,1863524.0,2005,02/28/2018 03:56:25 PM,41.781003,-87.652107,"(41.781002663, -87.652107119)",2005-07-10 15:00:00,Englewood


In [6]:
# crime_data['Arrest'] = crime_data['Arrest'].replace({
#     True:1,
#     False:0
# })

crime_data['Arrest'].unique()

array([0, 1])

In [7]:

# crime_data['New_Date'] = pd.to_datetime(crime_data['Date'])
# crime_data['New_Date'] = crime_data['New_Date'].dt.strftime('%m-%d-%Y %H:%M:%S')

In [None]:
# comparing the old date value format to the datetime value format
# crime_data[['Date', 'New_Date']]

In [9]:
# Print the Latest and Earliest record to veryfiy the range
print("Max new_date value: ", crime_data['New_Date'].max()) # Latest record of 2024
print("Min new_date value: ", crime_data['New_Date'].min()) # Earliest record of 2021

Max new_date value:  2024-02-10 00:00:00
Min new_date value:  2001-01-01 00:00:00


#### Step 2) Dropping the unecessary columns such as X & Y Coordinate, Date, Block, IUCR, Description, Domestic, Beat, District, FBI code, Ward, Updated on, Latitude, Longitude 

In [10]:
col = ['ID', 'New_Date', 'Primary Type', 'Location Description', 'Arrest', 'Community Area', 'RegionName']
crime_data =  dropCrimeDataColumns(col, crime_data)
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7297146 entries, 11 to 8000074
Data columns (total 7 columns):
 #   Column                Dtype         
---  ------                -----         
 0   ID                    int64         
 1   New_Date              datetime64[ns]
 2   Primary Type          object        
 3   Location Description  object        
 4   Arrest                int64         
 5   Community Area        float64       
 6   RegionName            object        
dtypes: datetime64[ns](1), float64(1), int64(2), object(3)
memory usage: 445.4+ MB


### Step 3) Filtering

In [11]:
(crime_data_2017_2019, crime_data_2021_present) = pre_covid_post_covid(crime_data)

#### (Pre Covid)
Extracting the data for crime report from 2017-2019

In [None]:
# crime_data_2017_2019 = crime_data[(crime_data['New_Date'].dt.year >= 2017) & (crime_data['New_Date'].dt.year <= 2019)]

In [13]:
# Print the Latest and Earliest record to veryfiy the range
print("Min new_date value: ", crime_data_2017_2019['New_Date'].min()) # Earliest record of 2017
print("Min new_date value: ", crime_data_2017_2019['New_Date'].max()) # Earliest record of 2019
print(crime_data_2017_2019['New_Date'].dt.year.unique()) 

Min new_date value:  2017-01-01 00:00:00
Min new_date value:  2019-12-31 23:55:00
[2019 2018 2017]


In [14]:
print(crime_data_2017_2019['RegionName'].isna().sum())
print(crime_data_2017_2019['Location Description'].isna().sum())
print(crime_data_2017_2019['Community Area'].isna().sum())
print(crime_data_2017_2019['Primary Type'].isna().sum())

0
0
0
0


In [None]:
# crime_data_2017_2019.dropna(inplace=True)

In [16]:
len(crime_data_2017_2019['Community Area'].unique())

77

#### (Post Covid)
Extracting the data for crime report from 2021 - present

In [None]:
# crime_data_2021_present = crime_data[crime_data['New_Date'].dt.year >= 2021]

In [18]:
print("Min new_date value: ", crime_data_2021_present['New_Date'].min()) # Earliest record of 2021
print(crime_data_2021_present['New_Date'].dt.year.unique()) # Making sure that the range (2021-2024)

Min new_date value:  2021-01-01 00:00:00
[2023 2021 2022 2024]


In [19]:
print(crime_data_2021_present['RegionName'].isna().sum())
print(crime_data_2021_present['Location Description'].isna().sum())
print(crime_data_2021_present['Community Area'].isna().sum())
print(crime_data_2021_present['Primary Type'].isna().sum())

0
0
0
0


In [None]:
# crime_data_2021_present.dropna(inplace=True)

In [21]:

len(crime_data_2021_present['Community Area'].unique())

77

In [None]:
# community_areas = {
#     1: "Rogers Park",
#     2: "West Ridge",
#     3: "Uptown",
#     4: "Lincoln Square",
#     5: "North Center",
#     6: "Lake View",
#     7: "Lincoln Park",
#     8: "Near North Side",
#     9: "Edison Park",
#     10: "Norwood Park",
#     11: "Jefferson Park",
#     12: "Forest Glen",
#     13: "North Park",
#     14: "Albany Park",
#     15: "Portage Park",
#     16: "Irving Park",
#     17: "Dunning",
#     18: "Montclare",
#     19: "Belmont Cragin",
#     20: "Hermosa",
#     21: "Avondale",
#     22: "Logan Square",
#     23: "Humboldt Park",
#     24: "West Town",
#     25: "Austin",
#     26: "West Garfield Park",
#     27: "East Garfield Park",
#     28: "Near West Side",
#     29: "North Lawndale",
#     30: "South Lawndale",
#     31: "Lower West Side",
#     32: "Loop",
#     33: "Near South Side",
#     34: "Armour Square",
#     35: "Douglas",
#     36: "Oakland",
#     37: "Fuller Park",
#     38: "Grand Boulevard",
#     39: "Kenwood",
#     40: "Washington Park",
#     41: "Hyde Park",
#     42: "Woodlawn",
#     43: "South Shore",
#     44: "Chatham",
#     45: "Avalon Park",
#     46: "South Chicago",
#     47: "Burnside",
#     48: "Calumet Heights",
#     49: "Roseland",
#     50: "Pullman",
#     51: "South Deering",
#     52: "East Side",
#     53: "West Pullman",
#     54: "Riverdale",
#     55: "Hegewisch",
#     56: "Garfield Ridge",
#     57: "Archer Heights",
#     58: "Brighton Park",
#     59: "McKinley Park",
#     60: "Bridgeport",
#     61: "New City",
#     62: "West Elsdon",
#     63: "Gage Park",
#     64: "Clearing",
#     65: "West Lawn",
#     66: "Chicago Lawn",
#     67: "West Englewood",
#     68: "Englewood",
#     69: "Greater Grand Crossing",
#     70: "Ashburn",
#     71: "Auburn Gresham",
#     72: "Beverly",
#     73: "Washington Heights",
#     74: "Mount Greenwood",
#     75: "Morgan Park",
#     76: "O'Hare",
#     77: "Edgewater"
# }

In [None]:
# def get_community(code):
#     return community_areas[code]

In [None]:
# crime_data_2017_2019.loc[:, 'Location'] = crime_data_2017_2019['Community Area'].apply(get_community)
# crime_data_2021_present.loc[:, 'Location'] = crime_data_2021_present['Community Area'].apply(get_community)

In [None]:
# crime_data_2017_2019.rename(columns={'Location':'RegionName'}, inplace = True)
# crime_data_2021_present.rename(columns={'Location':'RegionName'}, inplace = True)

In [26]:
display(crime_data_2021_present)
display(crime_data_2017_2019)

Unnamed: 0,ID,New_Date,Primary Type,Location Description,Arrest,Community Area,RegionName,Severity_Score
371,13204489,2023-09-06 11:00:00,THEFT,PARKING LOT / GARAGE (NON RESIDENTIAL),0,32.0,Loop,Low
643,12342615,2021-04-17 15:20:00,ROBBERY,RESIDENCE,1,44.0,Chatham,High
646,12589893,2022-01-11 15:00:00,SEX OFFENSE,RESIDENCE,0,46.0,South Chicago,High
647,12592454,2022-01-14 15:55:00,OTHER OFFENSE,RESIDENCE,0,68.0,Englewood,Medium
648,12785595,2022-08-05 21:00:00,SEX OFFENSE,APARTMENT,1,69.0,Greater Grand Crossing,High
...,...,...,...,...,...,...,...,...
2336176,26479,2021-12-03 20:37:00,HOMICIDE,PORCH,1,69.0,Greater Grand Crossing,High
2336315,27289,2023-01-06 21:25:00,HOMICIDE,HOUSE,1,67.0,West Englewood,High
2336416,27015,2022-08-31 09:30:00,HOMICIDE,GAS STATION,0,53.0,West Pullman,High
2336534,27577,2023-06-20 12:42:00,HOMICIDE,STREET,0,26.0,West Garfield Park,High


Unnamed: 0,ID,New_Date,Primary Type,Location Description,Arrest,Community Area,RegionName,Severity_Score
96,12098557,2019-02-01 00:01:00,BATTERY,RESIDENCE,0,63.0,Gage Park,Medium
283,12082526,2019-09-24 12:00:00,DECEPTIVE PRACTICE,RESIDENCE,0,3.0,Uptown,Medium
527,11859264,2019-10-13 06:40:00,CRIMINAL DAMAGE,APARTMENT,0,29.0,North Lawndale,Medium
641,11662417,2019-04-21 12:30:00,ROBBERY,RESIDENCE,0,44.0,Chatham,High
663,12990873,2019-08-17 13:14:00,OFFENSE INVOLVING CHILDREN,RESIDENCE,1,23.0,Humboldt Park,High
...,...,...,...,...,...,...,...,...
7999292,12022897,2019-12-05 00:00:00,DECEPTIVE PRACTICE,RESIDENCE,0,34.0,Armour Square,Medium
7999338,12056523,2019-12-20 09:00:00,DECEPTIVE PRACTICE,RESIDENCE,1,8.0,Near North Side,Medium
7999342,12056116,2019-04-09 00:00:00,OFFENSE INVOLVING CHILDREN,RESIDENCE,0,48.0,Calumet Heights,High
7999778,12166069,2019-02-01 09:00:00,DECEPTIVE PRACTICE,RESIDENCE,0,16.0,Irving Park,Medium


### Step 4) Saving the Dataframe to a CSV file

In [27]:
crime_data_2021_present.to_csv('csv_files/Crimes_2021_to_Present.csv', index=False)
crime_data_2017_2019.to_csv('csv_files/Crimes_2017_to_2019.csv', index=False)


## Neighborhood Dataset
https://www.zillow.com/research/data/

Contains information like Region Name, State, City, County Name, Average price of a house for every month since 2000

In [28]:
neighborhood_data = pd.read_csv('csv_files/Neighborhood_House_Price.csv')

In [29]:
neighborhood_data

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,2000-01-31,...,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
0,112345,0,Maryvale,neighborhood,AZ,AZ,Phoenix,"Phoenix-Mesa-Chandler, AZ",Maricopa County,66775.313666,...,3.134925e+05,3.147765e+05,3.166145e+05,3.190725e+05,3.220546e+05,3.246938e+05,3.271008e+05,3.291411e+05,3.307035e+05,3.317141e+05
1,192689,1,Paradise,neighborhood,NV,NV,Las Vegas,"Las Vegas-Henderson-Paradise, NV",Clark County,132638.938818,...,3.585637e+05,3.580372e+05,3.587546e+05,3.605508e+05,3.634265e+05,3.662741e+05,3.687446e+05,3.708867e+05,3.729634e+05,3.748541e+05
2,270958,2,Upper West Side,neighborhood,NY,NY,New York,"New York-Newark-Jersey City, NY-NJ-PA",New York County,387530.423074,...,1.276836e+06,1.270266e+06,1.264532e+06,1.258336e+06,1.248721e+06,1.238858e+06,1.227969e+06,1.216308e+06,1.208912e+06,1.203406e+06
3,270957,3,Upper East Side,neighborhood,NY,NY,New York,"New York-Newark-Jersey City, NY-NJ-PA",New York County,634533.128812,...,1.259968e+06,1.250928e+06,1.245395e+06,1.241081e+06,1.236655e+06,1.232169e+06,1.224024e+06,1.212976e+06,1.202819e+06,1.196051e+06
4,118208,4,South Los Angeles,neighborhood,CA,CA,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,127876.428774,...,6.198684e+05,6.208305e+05,6.245314e+05,6.317380e+05,6.413973e+05,6.511754e+05,6.594772e+05,6.659235e+05,6.701266e+05,6.678988e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21658,416956,27095,Enclave at Breckinridge,neighborhood,TX,TX,Richardson,"Dallas-Fort Worth-Arlington, TX",Collin County,,...,5.731472e+05,5.756044e+05,5.784355e+05,5.820318e+05,5.832007e+05,5.843683e+05,5.855589e+05,5.868283e+05,5.871518e+05,5.869640e+05
21659,830770,27095,Silver Lake,neighborhood,NJ,NJ,Blairstown Township,"Allentown-Bethlehem-Easton, PA-NJ",Warren County,,...,,,,,,,,,3.128810e+05,3.129203e+05
21660,267809,27095,Rock Springs,neighborhood,NC,NC,Greenville,"Greenville, NC",Pitt County,,...,7.755061e+05,7.853695e+05,7.965433e+05,8.053492e+05,8.125551e+05,8.162749e+05,8.203457e+05,8.235215e+05,8.257521e+05,8.240167e+05
21661,251134,27095,Lind,neighborhood,CO,CO,Fort Collins,"Fort Collins, CO",Larimer County,,...,4.906780e+05,4.897856e+05,4.897932e+05,4.908824e+05,4.926296e+05,4.944977e+05,4.952402e+05,4.959083e+05,4.961843e+05,4.971330e+05


### Filtering
Only want to focus on Chicago, IL housing prices. 

In [30]:
neighborhood_data = filterNeighborhood(neighborhood_data)

In [31]:
# neighborhood_data = neighborhood_data[(neighborhood_data['State'] == 'IL') & (neighborhood_data['City'] == 'Chicago')]

In [32]:
(neighborhood_data_2017_2019, neighborhood_data_2021_present) = pre_covid_hd_post_covid_hd(neighborhood_data)


In [33]:
# Pre Covid
# first_half_column = neighborhood_data.loc[0:, ['RegionName']]
# second_half_column =  neighborhood_data.loc[0:, '2017-01-31':'2019-12-31']

# # Post Covid
# first_half_column_2 = neighborhood_data.loc[0:, ['RegionName']]
# second_half_column_2 =  neighborhood_data.loc[0:, '2021-01-31':]


In [34]:
# neighborhood_data_2017_2019 = pd.concat([first_half_column, second_half_column], axis=1)
# neighborhood_data_2021_present = pd.concat([first_half_column_2, second_half_column_2], axis=1)

## Transposing the Data
Reseting the Index, Rotating the Dataframe and Renaming the colums to neighborhood to make it easier to do EDA


In [35]:
neighborhood_data_2017_2019 = transpose_data(data=neighborhood_data_2017_2019)
neighborhood_data_2017_2019

Unnamed: 0,date,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
0,2017-01-31,320800.53,187637.44,90925.08,385189.74,527941.39,104123.27,308905.87,156084.35,222163.9,...,196512.18,150302.7,109474.19,353627.49,115870.21,174695.62,25263.13,222620.47,247078.01,267835.82
1,2017-02-28,323403.88,189553.4,92159.98,387715.94,530161.42,104668.31,311086.23,157619.44,223640.4,...,198572.76,152146.67,114100.98,357398.54,117361.01,177001.34,25686.9,224828.56,248058.67,269378.31
2,2017-03-31,325554.65,191027.03,93255.23,388969.13,532967.82,105138.79,313207.54,158774.66,224694.87,...,200222.71,153710.0,117062.46,360254.8,118906.74,178717.89,26756.02,227139.69,248698.29,270910.81
3,2017-04-30,326994.07,191889.95,93859.98,389250.31,535106.28,104823.29,314064.33,159399.12,225477.48,...,201977.06,154409.58,116186.07,363148.26,120138.56,180467.27,27796.86,228359.51,248938.92,272070.8
4,2017-05-31,327962.34,192260.37,94812.11,388709.64,538274.75,104754.83,314049.22,159471.94,225828.91,...,203294.72,155492.11,115115.95,365085.52,120869.52,181256.7,28691.81,228371.01,249231.38,272600.6
5,2017-06-30,327811.48,191657.65,95124.03,387398.5,540584.69,104495.21,312965.46,159051.81,225370.08,...,203623.69,155826.9,114375.45,364827.71,120708.67,181725.54,29155.3,226888.91,249169.39,271903.45
6,2017-07-31,327962.79,191412.15,95401.55,386883.97,544013.4,105456.98,312033.22,159263.84,225391.15,...,203924.57,156080.19,115932.3,362801.74,119757.39,182351.22,29502.31,225638.92,249098.5,271082.51
7,2017-08-31,327437.85,191274.53,95832.13,386048.66,545448.3,106777.03,310987.59,159557.2,225711.37,...,203588.83,155547.43,116634.89,360938.6,119547.39,182371.62,29721.06,225330.79,248786.88,270401.83
8,2017-09-30,327554.6,192094.14,97255.88,386580.05,546221.2,109238.1,310692.37,160237.74,226615.06,...,203998.04,155771.45,118506.15,361452.55,120030.85,182801.55,30255.16,226455.82,250024.69,270636.65
9,2017-10-31,327374.26,193383.13,99462.73,387643.2,544315.29,111912.99,310430.04,161239.95,227167.7,...,204119.45,156728.64,120980.46,363847.9,121869.02,184368.6,30990.41,228289.68,252361.45,271321.6


In [36]:
neighborhood_data_2021_present = transpose_data(data=neighborhood_data_2021_present)
neighborhood_data_2021_present

Unnamed: 0,date,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
0,2021-01-31,335152.96,213600.23,150609.69,438745.6,521486.14,180917.37,338774.43,179522.2,238384.29,...,240022.62,205658.71,190487.83,395487.22,151295.22,230912.58,55617.85,275760.18,280946.44,307632.09
1,2021-02-28,337100.07,215838.54,154491.46,442793.48,522856.82,185608.39,342486.69,181330.85,239973.93,...,244533.1,210608.34,194609.02,399358.9,155797.67,233706.65,57571.2,280995.2,285493.77,311662.78
2,2021-03-31,338529.2,217654.91,159032.56,446040.07,523647.18,191007.91,346194.55,182732.26,241113.3,...,248901.59,216042.02,198440.91,403079.57,161320.13,235914.92,60538.07,286359.51,289930.03,316208.5
3,2021-04-30,339798.57,219105.8,163509.69,448541.71,523534.46,196147.32,349930.12,183811.5,241846.05,...,253083.58,220828.19,201429.1,407398.15,166625.77,236975.4,63795.06,290788.4,293810.5,320674.89
4,2021-05-31,340754.04,219738.82,166556.65,449749.18,522886.53,199573.26,352042.45,184104.01,242134.79,...,256385.2,224200.84,202531.69,410880.33,171526.91,237436.86,66521.93,294977.07,297387.52,323826.16
5,2021-06-30,341788.37,220295.25,168400.35,449763.28,522981.14,201319.72,353057.32,184583.48,242853.84,...,259212.1,226557.33,202418.71,413127.15,175019.39,238114.89,68435.03,299432.03,301473.58,325993.12
6,2021-07-31,343067.92,221027.0,169456.45,450720.8,524273.73,201058.55,354129.82,185376.33,243888.71,...,260569.13,227806.19,200293.88,414040.68,177032.42,241895.7,69220.69,303617.61,304741.52,326625.97
7,2021-08-31,344281.14,221449.95,169831.82,452165.58,525620.49,199220.35,355132.72,186228.67,244498.74,...,260052.48,227743.38,196555.88,414459.79,177094.06,246815.77,69192.87,305550.92,305578.71,325521.2
8,2021-09-30,344721.73,220854.56,169697.53,453102.62,525261.09,196644.02,355049.79,186063.96,244482.27,...,257600.17,225882.82,191266.12,414358.08,175381.03,252009.64,68358.36,305999.25,304024.16,322763.98
9,2021-10-31,345330.26,220143.28,169068.58,454160.75,525413.69,194897.1,354782.15,185500.21,244576.85,...,255546.85,223523.95,187047.49,415099.06,173394.54,255475.83,68157.5,306452.39,302053.1,320649.74


In [37]:
# neighborhood_data_2017_2019.reset_index(inplace=True)
# neighborhood_data_2021_present.reset_index(inplace=True)

In [38]:
# neighborhood_names_list = neighborhood_data_2017_2019["RegionName"].to_list()

In [39]:
# neighborhood_data_2017_2019.drop(columns='index', inplace=True)
# neighborhood_data_2021_present.drop(columns='index', inplace=True)
# neighborhood_data_2017_2019

In [40]:
# map = {}
# for num in range(0,181):
#     map[num] = neighborhood_names_list[num]

In [41]:

# #display(neighborhood_data_2017_2019[neighborhood_data_2017_2019['RegionName'] == 'Lake View'])
# neighborhood_data_2017_2019 = neighborhood_data_2017_2019.transpose()
# neighborhood_data_2017_2019.rename(columns=map, inplace= True)
# neighborhood_data_2021_present = neighborhood_data_2021_present.transpose()
# neighborhood_data_2021_present.rename(columns=map, inplace= True)
# #display(neighborhood_data_2021_present)

In [42]:
display(neighborhood_data_2017_2019.head())
display(neighborhood_data_2021_present.head())

Unnamed: 0,date,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
0,2017-01-31,320800.53,187637.44,90925.08,385189.74,527941.39,104123.27,308905.87,156084.35,222163.9,...,196512.18,150302.7,109474.19,353627.49,115870.21,174695.62,25263.13,222620.47,247078.01,267835.82
1,2017-02-28,323403.88,189553.4,92159.98,387715.94,530161.42,104668.31,311086.23,157619.44,223640.4,...,198572.76,152146.67,114100.98,357398.54,117361.01,177001.34,25686.9,224828.56,248058.67,269378.31
2,2017-03-31,325554.65,191027.03,93255.23,388969.13,532967.82,105138.79,313207.54,158774.66,224694.87,...,200222.71,153710.0,117062.46,360254.8,118906.74,178717.89,26756.02,227139.69,248698.29,270910.81
3,2017-04-30,326994.07,191889.95,93859.98,389250.31,535106.28,104823.29,314064.33,159399.12,225477.48,...,201977.06,154409.58,116186.07,363148.26,120138.56,180467.27,27796.86,228359.51,248938.92,272070.8
4,2017-05-31,327962.34,192260.37,94812.11,388709.64,538274.75,104754.83,314049.22,159471.94,225828.91,...,203294.72,155492.11,115115.95,365085.52,120869.52,181256.7,28691.81,228371.01,249231.38,272600.6


Unnamed: 0,date,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
0,2021-01-31,335152.96,213600.23,150609.69,438745.6,521486.14,180917.37,338774.43,179522.2,238384.29,...,240022.62,205658.71,190487.83,395487.22,151295.22,230912.58,55617.85,275760.18,280946.44,307632.09
1,2021-02-28,337100.07,215838.54,154491.46,442793.48,522856.82,185608.39,342486.69,181330.85,239973.93,...,244533.1,210608.34,194609.02,399358.9,155797.67,233706.65,57571.2,280995.2,285493.77,311662.78
2,2021-03-31,338529.2,217654.91,159032.56,446040.07,523647.18,191007.91,346194.55,182732.26,241113.3,...,248901.59,216042.02,198440.91,403079.57,161320.13,235914.92,60538.07,286359.51,289930.03,316208.5
3,2021-04-30,339798.57,219105.8,163509.69,448541.71,523534.46,196147.32,349930.12,183811.5,241846.05,...,253083.58,220828.19,201429.1,407398.15,166625.77,236975.4,63795.06,290788.4,293810.5,320674.89
4,2021-05-31,340754.04,219738.82,166556.65,449749.18,522886.53,199573.26,352042.45,184104.01,242134.79,...,256385.2,224200.84,202531.69,410880.33,171526.91,237436.86,66521.93,294977.07,297387.52,323826.16


In [43]:
# neighborhood_data_2017_2019 = neighborhood_data_2017_2019.iloc[1:]
# neighborhood_data_2021_present = neighborhood_data_2021_present.iloc[1:]

In [44]:
# neighborhood_data_2017_2019.reset_index(inplace=True)
# neighborhood_data_2021_present.reset_index(inplace=True)

In [45]:
# neighborhood_data_2017_2019.rename(columns={'index':'date'}, inplace=True)
# neighborhood_data_2021_present.rename(columns={'index':'date'}, inplace=True)


In [46]:
neighborhood_data_2017_2019.to_csv('csv_files/neighborhood_data_2017_2019.csv', index = False)
neighborhood_data_2021_present.to_csv('csv_files/neighborhood_data_2021_present.csv', index = False)


<H1><I>END OF CLEANING PROCESS<I><H1>