# Cleaning Process
This notebook contain the necessary steps like getting the data info such as data size, number of feature, number of records, mean value, max value, etc. Clean the data which includes dropping some columns and rows, adding more information, joining the dataframes and more

In [2]:
import pandas as pd
import numpy as np

## Crime Dataset

In [4]:
crime_data = pd.read_csv('csv_files/Crimes_2001_to_Present.csv')

In [5]:
crime_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000075 entries, 0 to 8000074
Data columns (total 22 columns):
 #   Column                Dtype  
---  ------                -----  
 0   ID                    int64  
 1   Case Number           object 
 2   Date                  object 
 3   Block                 object 
 4   IUCR                  object 
 5   Primary Type          object 
 6   Description           object 
 7   Location Description  object 
 8   Arrest                bool   
 9   Domestic              bool   
 10  Beat                  int64  
 11  District              float64
 12  Ward                  float64
 13  Community Area        float64
 14  FBI Code              object 
 15  X Coordinate          float64
 16  Y Coordinate          float64
 17  Year                  int64  
 18  Updated On            object 
 19  Latitude              float64
 20  Longitude             float64
 21  Location              object 
dtypes: bool(2), float64(7), int64(3), object(1

## Converting the Date
#### step 1) changing the data type to a much suitable format

In [6]:
crime_data['Arrest'] = crime_data['Arrest'].replace({
    True:1,
    False:0
})

crime_data['Arrest'].unique()

array([0, 1])

In [7]:

crime_data['New_Date'] = pd.to_datetime(crime_data['Date'])
# crime_data['New_Date'] = crime_data['New_Date'].dt.strftime('%m-%d-%Y %H:%M:%S')

In [8]:
# comparing the old date value format to the datetime value format
crime_data[['Date', 'New_Date']]

Unnamed: 0,Date,New_Date
0,03/18/2015 12:00:00 PM,2015-03-18 12:00:00
1,12/20/2018 03:00:00 PM,2018-12-20 15:00:00
2,05/01/2016 12:25:00 AM,2016-05-01 00:25:00
3,12/20/2018 04:00:00 PM,2018-12-20 16:00:00
4,06/01/2014 12:01:00 AM,2014-06-01 00:01:00
...,...,...
8000070,08/10/2020 08:30:00 PM,2020-08-10 20:30:00
8000071,06/18/2020 09:00:00 PM,2020-06-18 21:00:00
8000072,07/27/2020 03:02:00 PM,2020-07-27 15:02:00
8000073,08/14/2020 03:00:00 PM,2020-08-14 15:00:00


In [10]:
# Print the Latest and Earliest record to veryfiy the range
print("Max new_date value: ", crime_data['New_Date'].max()) # Latest record of 2024
print("Min new_date value: ", crime_data['New_Date'].min()) # Earliest record of 2021

Max new_date value:  2024-02-10 00:00:00
Min new_date value:  2001-01-01 00:00:00


#### Step 2) Dropping the unecessary columns such as X & Y Coordinate, Date, Block, IUCR, Description, Domestic, Beat, District, FBI code, Ward, Updated on, Latitude, Longitude 

In [11]:
col = ['ID', 'New_Date', 'Primary Type', 'Location Description', 'Arrest', 'Community Area', 'Location']
crime_data = crime_data[col]
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000075 entries, 0 to 8000074
Data columns (total 7 columns):
 #   Column                Dtype         
---  ------                -----         
 0   ID                    int64         
 1   New_Date              datetime64[ns]
 2   Primary Type          object        
 3   Location Description  object        
 4   Arrest                int64         
 5   Community Area        float64       
 6   Location              object        
dtypes: datetime64[ns](1), float64(1), int64(2), object(3)
memory usage: 427.3+ MB


### Step 3) Filtering

#### (Pre Covid)
Extracting the data for crime report from 2017-2019

In [12]:
crime_data_2017_2019 = crime_data[(crime_data['New_Date'].dt.year >= 2017) & (crime_data['New_Date'].dt.year <= 2019)]

In [13]:
# Print the Latest and Earliest record to veryfiy the range
print("Min new_date value: ", crime_data_2017_2019['New_Date'].min()) # Earliest record of 2017
print("Min new_date value: ", crime_data_2017_2019['New_Date'].max()) # Earliest record of 2019
print(crime_data_2017_2019['New_Date'].dt.year.unique()) 

Min new_date value:  2017-01-01 00:00:00
Min new_date value:  2019-12-31 23:55:00
[2018 2019 2017]


In [14]:
print(crime_data_2017_2019['Location'].isna().sum())
print(crime_data_2017_2019['Location Description'].isna().sum())
print(crime_data_2017_2019['Community Area'].isna().sum())
print(crime_data_2017_2019['Primary Type'].isna().sum())

11845
3706
0
0


In [15]:
crime_data_2017_2019.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_data_2017_2019.dropna(inplace=True)


In [16]:
len(crime_data_2017_2019['Community Area'].unique())

77

#### (Post Covid)
Extracting the data for crime report from 2021 - present

In [17]:
crime_data_2021_present = crime_data[crime_data['New_Date'].dt.year >= 2021]

In [18]:
print("Min new_date value: ", crime_data_2021_present['New_Date'].min()) # Earliest record of 2021
print(crime_data_2021_present['New_Date'].dt.year.unique()) # Making sure that the range (2021-2024)

Min new_date value:  2021-01-01 00:00:00
[2023 2021 2022 2024]


In [19]:
print(crime_data_2021_present['Location'].isna().sum())
print(crime_data_2021_present['Location Description'].isna().sum())
print(crime_data_2021_present['Community Area'].isna().sum())
print(crime_data_2021_present['Primary Type'].isna().sum())

10615
3486
1
0


In [20]:
crime_data_2021_present.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_data_2021_present.dropna(inplace=True)


In [21]:

len(crime_data_2021_present['Community Area'].unique())

77

In [22]:
community_areas = {
    1: "Rogers Park",
    2: "West Ridge",
    3: "Uptown",
    4: "Lincoln Square",
    5: "North Center",
    6: "Lake View",
    7: "Lincoln Park",
    8: "Near North Side",
    9: "Edison Park",
    10: "Norwood Park",
    11: "Jefferson Park",
    12: "Forest Glen",
    13: "North Park",
    14: "Albany Park",
    15: "Portage Park",
    16: "Irving Park",
    17: "Dunning",
    18: "Montclare",
    19: "Belmont Cragin",
    20: "Hermosa",
    21: "Avondale",
    22: "Logan Square",
    23: "Humboldt Park",
    24: "West Town",
    25: "Austin",
    26: "West Garfield Park",
    27: "East Garfield Park",
    28: "Near West Side",
    29: "North Lawndale",
    30: "South Lawndale",
    31: "Lower West Side",
    32: "Loop",
    33: "Near South Side",
    34: "Armour Square",
    35: "Douglas",
    36: "Oakland",
    37: "Fuller Park",
    38: "Grand Boulevard",
    39: "Kenwood",
    40: "Washington Park",
    41: "Hyde Park",
    42: "Woodlawn",
    43: "South Shore",
    44: "Chatham",
    45: "Avalon Park",
    46: "South Chicago",
    47: "Burnside",
    48: "Calumet Heights",
    49: "Roseland",
    50: "Pullman",
    51: "South Deering",
    52: "East Side",
    53: "West Pullman",
    54: "Riverdale",
    55: "Hegewisch",
    56: "Garfield Ridge",
    57: "Archer Heights",
    58: "Brighton Park",
    59: "McKinley Park",
    60: "Bridgeport",
    61: "New City",
    62: "West Elsdon",
    63: "Gage Park",
    64: "Clearing",
    65: "West Lawn",
    66: "Chicago Lawn",
    67: "West Englewood",
    68: "Englewood",
    69: "Greater Grand Crossing",
    70: "Ashburn",
    71: "Auburn Gresham",
    72: "Beverly",
    73: "Washington Heights",
    74: "Mount Greenwood",
    75: "Morgan Park",
    76: "O'Hare",
    77: "Edgewater"
}

In [23]:
def get_community(code):
    return community_areas[code]

In [24]:
crime_data_2017_2019.loc[:, 'Location'] = crime_data_2017_2019['Community Area'].apply(get_community)
crime_data_2021_present.loc[:, 'Location'] = crime_data_2021_present['Community Area'].apply(get_community)

In [25]:
crime_data_2017_2019.rename(columns={'Location':'RegionName'}, inplace = True)
crime_data_2021_present.rename(columns={'Location':'RegionName'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_data_2017_2019.rename(columns={'Location':'RegionName'}, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_data_2021_present.rename(columns={'Location':'RegionName'}, inplace = True)


In [26]:
display(crime_data_2021_present)
display(crime_data_2017_2019)

Unnamed: 0,ID,New_Date,Primary Type,Location Description,Arrest,Community Area,RegionName
371,13204489,2023-09-06 11:00:00,THEFT,PARKING LOT / GARAGE (NON RESIDENTIAL),0,32.0,Loop
643,12342615,2021-04-17 15:20:00,ROBBERY,RESIDENCE,1,44.0,Chatham
646,12589893,2022-01-11 15:00:00,SEX OFFENSE,RESIDENCE,0,46.0,South Chicago
647,12592454,2022-01-14 15:55:00,OTHER OFFENSE,RESIDENCE,0,68.0,Englewood
648,12785595,2022-08-05 21:00:00,SEX OFFENSE,APARTMENT,1,69.0,Greater Grand Crossing
...,...,...,...,...,...,...,...
2336176,26479,2021-12-03 20:37:00,HOMICIDE,PORCH,1,69.0,Greater Grand Crossing
2336315,27289,2023-01-06 21:25:00,HOMICIDE,HOUSE,1,67.0,West Englewood
2336416,27015,2022-08-31 09:30:00,HOMICIDE,GAS STATION,0,53.0,West Pullman
2336534,27577,2023-06-20 12:42:00,HOMICIDE,STREET,0,26.0,West Garfield Park


Unnamed: 0,ID,New_Date,Primary Type,Location Description,Arrest,Community Area,RegionName
96,12098557,2019-02-01 00:01:00,BATTERY,RESIDENCE,0,63.0,Gage Park
283,12082526,2019-09-24 12:00:00,DECEPTIVE PRACTICE,RESIDENCE,0,3.0,Uptown
527,11859264,2019-10-13 06:40:00,CRIMINAL DAMAGE,APARTMENT,0,29.0,North Lawndale
641,11662417,2019-04-21 12:30:00,ROBBERY,RESIDENCE,0,44.0,Chatham
663,12990873,2019-08-17 13:14:00,OFFENSE INVOLVING CHILDREN,RESIDENCE,1,23.0,Humboldt Park
...,...,...,...,...,...,...,...
7999292,12022897,2019-12-05 00:00:00,DECEPTIVE PRACTICE,RESIDENCE,0,34.0,Armour Square
7999338,12056523,2019-12-20 09:00:00,DECEPTIVE PRACTICE,RESIDENCE,1,8.0,Near North Side
7999342,12056116,2019-04-09 00:00:00,OFFENSE INVOLVING CHILDREN,RESIDENCE,0,48.0,Calumet Heights
7999778,12166069,2019-02-01 09:00:00,DECEPTIVE PRACTICE,RESIDENCE,0,16.0,Irving Park


### Step 4) Saving the Dataframe to a CSV file

In [27]:
crime_data_2021_present.to_csv('csv_files/Crimes_2021_to_Present.csv', index=False)
crime_data_2017_2019.to_csv('csv_files/Crimes_2017_to_2019.csv', index=False)


## Neighborhood Dataset
https://www.zillow.com/research/data/

Contains information like Region Name, State, City, County Name, Average price of a house for every month since 2000

In [10]:
neighborhood_data = pd.read_csv('csv_files/Neighborhood_House_Price.csv')

In [11]:
neighborhood_data.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,2000-01-31,...,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
0,112345,0,Maryvale,neighborhood,AZ,AZ,Phoenix,"Phoenix-Mesa-Chandler, AZ",Maricopa County,66775.313666,...,313492.5,314776.5,316614.5,319072.5,322054.6,324693.8,327100.8,329141.1,330703.5,331714.1
1,192689,1,Paradise,neighborhood,NV,NV,Las Vegas,"Las Vegas-Henderson-Paradise, NV",Clark County,132638.938818,...,358563.7,358037.2,358754.6,360550.8,363426.5,366274.1,368744.6,370886.7,372963.4,374854.1
2,270958,2,Upper West Side,neighborhood,NY,NY,New York,"New York-Newark-Jersey City, NY-NJ-PA",New York County,387530.423074,...,1276836.0,1270266.0,1264532.0,1258336.0,1248721.0,1238858.0,1227969.0,1216308.0,1208912.0,1203406.0
3,270957,3,Upper East Side,neighborhood,NY,NY,New York,"New York-Newark-Jersey City, NY-NJ-PA",New York County,634533.128812,...,1259968.0,1250928.0,1245395.0,1241081.0,1236655.0,1232169.0,1224024.0,1212976.0,1202819.0,1196051.0
4,118208,4,South Los Angeles,neighborhood,CA,CA,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,127876.428774,...,619868.4,620830.5,624531.4,631738.0,641397.3,651175.4,659477.2,665923.5,670126.6,667898.8


### Filtering
Only want to focus on Chicago, IL housing prices. 

In [12]:
neighborhood_data = neighborhood_data[(neighborhood_data['State'] == 'IL') & (neighborhood_data['City'] == 'Chicago')]

In [13]:
# Pre Covid
first_half_column = neighborhood_data.loc[0:, ['RegionName']]
second_half_column =  neighborhood_data.loc[0:, '2017-01-31':'2019-12-31']

# Post Covid
first_half_column_2 = neighborhood_data.loc[0:, ['RegionName']]
second_half_column_2 =  neighborhood_data.loc[0:, '2021-01-31':]


In [14]:
neighborhood_data_2017_2019 = pd.concat([first_half_column, second_half_column], axis=1)
neighborhood_data_2021_present = pd.concat([first_half_column_2, second_half_column_2], axis=1)

## Transposing the Data
Reseting the Index, Rotating the Dataframe and Renaming the colums to neighborhood to make it easier to do EDA


In [15]:
neighborhood_data_2017_2019.reset_index(inplace=True)
neighborhood_data_2021_present.reset_index(inplace=True)

In [16]:
neighborhood_names_list = neighborhood_data_2017_2019["RegionName"].to_list()

In [17]:
neighborhood_data_2017_2019.drop(columns='index', inplace=True)
neighborhood_data_2021_present.drop(columns='index', inplace=True)
neighborhood_data_2017_2019

Unnamed: 0,RegionName,2017-01-31,2017-02-28,2017-03-31,2017-04-30,2017-05-31,2017-06-30,2017-07-31,2017-08-31,2017-09-30,...,2019-03-31,2019-04-30,2019-05-31,2019-06-30,2019-07-31,2019-08-31,2019-09-30,2019-10-31,2019-11-30,2019-12-31
0,Lake View,320800.529961,323403.875393,325554.648715,326994.066372,327962.336934,327811.478035,327962.790422,327437.847970,327554.602956,...,327436.112620,327638.943873,327428.128541,326810.851395,325654.374039,324470.264123,323266.430150,322409.458344,321944.598794,322010.510163
1,West Ridge,187637.444136,189553.396344,191027.034117,191889.952161,192260.367968,191657.654310,191412.149087,191274.527805,192094.143374,...,204783.405502,204814.591194,204405.009083,204249.895972,203823.411224,203506.384146,202946.678331,203030.557504,203433.095567,204670.639264
2,Little Village,90925.075575,92159.981171,93255.225637,93859.979012,94812.109531,95124.032869,95401.547996,95832.125714,97255.880295,...,124782.842092,126055.982656,126805.514295,127412.094949,126981.804065,127537.226453,127830.794507,128503.409806,129227.471761,130157.108534
3,Logan Square,385189.738477,387715.936437,388969.125310,389250.314752,388709.642645,387398.503138,386883.966756,386048.664822,386580.050284,...,414528.089528,416111.230615,415797.190256,414317.801661,412092.450971,410604.326047,409346.866443,408934.379163,409406.784465,411298.166343
4,Lincoln Park,527941.394692,530161.421716,532967.822467,535106.275721,538274.753360,540584.689695,544013.398935,545448.299114,546221.200203,...,533113.718875,531744.025668,528767.619898,527003.180168,525522.950173,524547.076555,522445.253179,521778.078281,520769.321925,520116.842387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,Heart of Italy,174695.616386,177001.337230,178717.892446,180467.268885,181256.703189,181725.536626,182351.215235,182371.620002,182801.548030,...,211387.283107,211614.709182,210971.351293,210925.979425,210781.085712,210695.122221,209817.312211,209887.855266,210484.309940,212598.767758
177,Golden Gate,25263.130346,25686.900261,26756.021861,27796.858540,28691.811622,29155.295553,29502.305780,29721.061781,30255.160560,...,41621.830434,41514.056903,42116.313584,42738.846464,43075.210579,43220.250096,43592.471256,44341.406856,44976.630856,45283.533368
178,Marycrest,222620.465114,224828.560250,227139.685233,228359.512801,228371.013902,226888.913337,225638.915889,225330.794569,226455.824064,...,239572.563303,239978.803273,240549.150432,240891.286584,240668.961675,240444.077082,240122.750905,240710.728818,241326.823178,242843.243896
179,Mount Greenwood Heights,247078.011675,248058.672791,248698.293271,248938.921598,249231.377345,249169.393812,249098.501465,248786.876599,250024.692465,...,258493.304020,258668.042545,258522.936241,259107.675903,259501.419125,260679.817344,260902.811661,261775.682500,262210.501428,262427.216995


In [18]:
map = {}
for num in range(0,181):
    map[num] = neighborhood_names_list[num]

In [19]:

#display(neighborhood_data_2017_2019[neighborhood_data_2017_2019['RegionName'] == 'Lake View'])
neighborhood_data_2017_2019 = neighborhood_data_2017_2019.transpose()
neighborhood_data_2017_2019.rename(columns=map, inplace= True)
neighborhood_data_2021_present = neighborhood_data_2021_present.transpose()
neighborhood_data_2021_present.rename(columns=map, inplace= True)
#display(neighborhood_data_2021_present)

In [20]:
display(neighborhood_data_2017_2019)
display(neighborhood_data_2021_present)

Unnamed: 0,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,Gresham,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
RegionName,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,Gresham,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
2017-01-31,320800.529961,187637.444136,90925.075575,385189.738477,527941.394692,104123.26978,308905.874982,156084.352003,222163.898965,93984.935148,...,196512.180094,150302.703199,109474.192087,353627.489385,115870.209964,174695.616386,25263.130346,222620.465114,247078.011675,267835.822774
2017-02-28,323403.875393,189553.396344,92159.981171,387715.936437,530161.421716,104668.311807,311086.230949,157619.441673,223640.4001,95883.199031,...,198572.76316,152146.670755,114100.977206,357398.536611,117361.011908,177001.33723,25686.900261,224828.56025,248058.672791,269378.309108
2017-03-31,325554.648715,191027.034117,93255.225637,388969.12531,532967.822467,105138.791112,313207.539541,158774.659454,224694.870133,97596.351429,...,200222.70901,153710.0023,117062.46098,360254.804451,118906.735885,178717.892446,26756.021861,227139.685233,248698.293271,270910.811867
2017-04-30,326994.066372,191889.952161,93859.979012,389250.314752,535106.275721,104823.285143,314064.334049,159399.121149,225477.482034,97889.440577,...,201977.05726,154409.584642,116186.066534,363148.259581,120138.556807,180467.268885,27796.85854,228359.512801,248938.921598,272070.802637
2017-05-31,327962.336934,192260.367968,94812.109531,388709.642645,538274.75336,104754.825993,314049.222156,159471.944641,225828.90678,98312.779213,...,203294.717522,155492.111419,115115.94718,365085.521269,120869.519651,181256.703189,28691.811622,228371.013902,249231.377345,272600.604751
2017-06-30,327811.478035,191657.65431,95124.032869,387398.503138,540584.689695,104495.209457,312965.463017,159051.811652,225370.083915,98414.409174,...,203623.69133,155826.895252,114375.450935,364827.713936,120708.673093,181725.536626,29155.295553,226888.913337,249169.393812,271903.447634
2017-07-31,327962.790422,191412.149087,95401.547996,386883.966756,544013.398935,105456.97827,312033.222163,159263.837543,225391.153142,98900.877565,...,203924.565421,156080.186599,115932.302115,362801.744023,119757.391717,182351.215235,29502.30578,225638.915889,249098.501465,271082.505246
2017-08-31,327437.84797,191274.527805,95832.125714,386048.664822,545448.299114,106777.027399,310987.594809,159557.201739,225711.366292,99423.759041,...,203588.830154,155547.428168,116634.888891,360938.597703,119547.387878,182371.620002,29721.061781,225330.794569,248786.876599,270401.834849
2017-09-30,327554.602956,192094.143374,97255.880295,386580.050284,546221.200203,109238.102693,310692.367659,160237.73804,226615.055412,100825.924009,...,203998.035217,155771.446125,118506.149798,361452.554057,120030.848214,182801.54803,30255.16056,226455.824064,250024.692465,270636.650313


Unnamed: 0,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,Gresham,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
RegionName,Lake View,West Ridge,Little Village,Logan Square,Lincoln Park,South Austin,Irving Park,Rogers Park,Uptown,Gresham,...,Beverly Woods,Sleepy Hollow,Lithuanian Plaza,Forest Glen,Beverly View,Heart of Italy,Golden Gate,Marycrest,Mount Greenwood Heights,Schorsch Forest View
2021-01-31,335152.964276,213600.22992,150609.690221,438745.604328,521486.137219,180917.374761,338774.427175,179522.204697,238384.286918,142499.314263,...,240022.623576,205658.709,190487.831338,395487.224536,151295.222629,230912.584221,55617.851577,275760.178768,280946.442648,307632.094045
2021-02-28,337100.070376,215838.540172,154491.459007,442793.484892,522856.818945,185608.390589,342486.690267,181330.847136,239973.929497,147330.834777,...,244533.099594,210608.339795,194609.021205,399358.903549,155797.669721,233706.648789,57571.195221,280995.201316,285493.773982,311662.776952
2021-03-31,338529.196964,217654.90533,159032.561218,446040.066569,523647.18224,191007.910295,346194.549901,182732.26069,241113.29843,152726.783554,...,248901.593636,216042.021595,198440.913582,403079.567198,161320.133838,235914.918863,60538.071205,286359.511962,289930.026637,316208.501575
2021-04-30,339798.571621,219105.803791,163509.685324,448541.706261,523534.460888,196147.321168,349930.120903,183811.499572,241846.051785,157916.566205,...,253083.58446,220828.192199,201429.100659,407398.148654,166625.774651,236975.399,63795.056935,290788.403519,293810.501845,320674.893231
2021-05-31,340754.044648,219738.822606,166556.653383,449749.184842,522886.530447,199573.256492,352042.448059,184104.008816,242134.791372,162648.087496,...,256385.197651,224200.835112,202531.69313,410880.334613,171526.912511,237436.86208,66521.929432,294977.065756,297387.517453,323826.163279
2021-06-30,341788.369111,220295.24561,168400.346553,449763.281273,522981.141822,201319.718631,353057.317078,184583.478912,242853.837097,165970.591484,...,259212.099508,226557.332518,202418.711739,413127.154296,175019.389521,238114.885444,68435.029445,299432.029132,301473.582012,325993.122187
2021-07-31,343067.924135,221026.99873,169456.453588,450720.802504,524273.72531,201058.546668,354129.818064,185376.328301,243888.707626,166834.175998,...,260569.128844,227806.187755,200293.876885,414040.675832,177032.417315,241895.703793,69220.692911,303617.607781,304741.517336,326625.966628
2021-08-31,344281.139606,221449.948867,169831.819215,452165.579963,525620.489472,199220.354709,355132.722044,186228.671777,244498.739349,165070.576822,...,260052.47751,227743.383053,196555.883711,414459.790053,177094.056203,246815.76596,69192.86553,305550.921217,305578.714294,325521.195942
2021-09-30,344721.731199,220854.555142,169697.529621,453102.624344,525261.092467,196644.018957,355049.794556,186063.959168,244482.26829,161920.542423,...,257600.169596,225882.82093,191266.11736,414358.076889,175381.033719,252009.638464,68358.361754,305999.254374,304024.160134,322763.975147


In [21]:
neighborhood_data_2017_2019 = neighborhood_data_2017_2019.iloc[1:]
neighborhood_data_2021_present = neighborhood_data_2021_present.iloc[1:]

In [22]:
neighborhood_data_2017_2019.reset_index(inplace=True)
neighborhood_data_2021_present.reset_index(inplace=True)

In [23]:
neighborhood_data_2017_2019.rename(columns={'index':'date'}, inplace=True)
neighborhood_data_2021_present.rename(columns={'index':'date'}, inplace=True)


In [24]:
neighborhood_data_2017_2019.to_csv('csv_files/neighborhood_data_2017_2019.csv', index = False)
neighborhood_data_2021_present.to_csv('csv_files/neighborhood_data_2021_present.csv', index = False)


<H1><I>END OF CLEANING PROCESS<I><H1>