In [68]:
#Import dependancies
import os
import time
import random
import pandas as pd
import matplotlib.pyplot as plt
import requests
import json
from pprint import pprint
from scipy.stats import linregress


In [69]:
#Covid data reading and column checking 
Covid_records = "Resources/IL_covid_data.csv"
Covid_recordsDF = pd.read_csv(Covid_records, encoding="utf-8")
Covid_recordsDF.columns

Index(['Date', 'Cases - Total', 'Deaths - Total', 'Hospitalizations - Total',
       'Cases - Age 0-17', 'Cases - Age 18-29', 'Cases - Age 30-39',
       'Cases - Age 40-49', 'Cases - Age 50-59', 'Cases - Age 60-69',
       'Cases - Age 70-79', 'Cases -  Age 80+', 'Cases - Age Unknown',
       'Cases - Female', 'Cases - Male', 'Cases - Unknown Gender',
       'Cases - Latinx', 'Cases - Asian Non-Latinx',
       'Cases - Black Non-Latinx', 'Cases - White Non-Latinx',
       'Cases - Other Race Non-Latinx', 'Cases - Unknown Race/Ethnicity',
       'Deaths - Age 0-17', 'Deaths - Age 18-29', 'Deaths - Age 30-39',
       'Deaths - Age 40-49', 'Deaths - Age 50-59', 'Deaths - Age 60-69',
       'Deaths - Age 70-79', 'Deaths - Age 80+', 'Deaths - Age Unknown',
       'Deaths - Female', 'Deaths - Male', 'Deaths - Unknown Gender',
       'Deaths - Latinx', 'Deaths - Asian Non-Latinx',
       'Deaths - Black Non-Latinx', 'Deaths - White Non-Latinx',
       'Deaths - Other Race Non-Latinx', 'Death

In [70]:
Covid_recordsDF.dtypes

Date                                          object
Cases - Total                                 object
Deaths - Total                                 int64
Hospitalizations - Total                      object
Cases - Age 0-17                               int64
Cases - Age 18-29                              int64
Cases - Age 30-39                              int64
Cases - Age 40-49                              int64
Cases - Age 50-59                              int64
Cases - Age 60-69                              int64
Cases - Age 70-79                              int64
Cases -  Age 80+                               int64
Cases - Age Unknown                            int64
Cases - Female                                object
Cases - Male                                  object
Cases - Unknown Gender                         int64
Cases - Latinx                                object
Cases - Asian Non-Latinx                       int64
Cases - Black Non-Latinx                      

In [71]:
# Filtered DF from specific columns
Covid_cols = ["Date", "Cases - Total"]
Covid_transformed= Covid_recordsDF[Covid_cols].copy()

#clean
Covid_transformed.drop_duplicates("Date", inplace=True)
Covid_transformed.dropna()
Covid_transformed.head()

Unnamed: 0,Date,Cases - Total
0,3/1/2020,0
1,3/2/2020,0
2,3/3/2020,0
3,3/4/2020,0
4,3/5/2020,1


In [72]:
#Datetime for Covid
Covid_transformed['Date'] = pd.to_datetime(Covid_transformed['Date'], format="%m/%d/%Y")
Covid_transformed.dtypes

Date             datetime64[ns]
Cases - Total            object
dtype: object

In [73]:
#Crime data reading and column checking 
Crime_datadf = "Resources/Chi_crime_data.csv"
Crime_datadf = pd.read_csv(Crime_datadf, encoding="utf-8")
Crime_datadf.columns

Index(['CASE#', 'DATE  OF OCCURRENCE', 'BLOCK', ' IUCR',
       ' PRIMARY DESCRIPTION', ' SECONDARY DESCRIPTION',
       ' LOCATION DESCRIPTION', 'ARREST', 'DOMESTIC', 'BEAT', 'WARD', 'FBI CD',
       'X COORDINATE', 'Y COORDINATE', 'LATITUDE', 'LONGITUDE', 'LOCATION'],
      dtype='object')

In [74]:
Crime_datadf.head()

Unnamed: 0,CASE#,DATE OF OCCURRENCE,BLOCK,IUCR,PRIMARY DESCRIPTION,SECONDARY DESCRIPTION,LOCATION DESCRIPTION,ARREST,DOMESTIC,BEAT,WARD,FBI CD,X COORDINATE,Y COORDINATE,LATITUDE,LONGITUDE,LOCATION
0,JD164115,02/25/2020 10:20:00 AM,013XX S CANAL ST,460,BATTERY,SIMPLE,GROCERY FOOD STORE,Y,N,124,11.0,08B,1173336.0,1893976.0,41.864494,-87.639158,"(41.864493678, -87.639158)"
1,JD163938,02/25/2020 05:30:00 AM,033XX W CULLOM AVE,820,THEFT,$500 AND UNDER,STREET,N,N,1724,33.0,06,1153387.0,1928387.0,41.95934,-87.711475,"(41.959339811, -87.711474784)"
2,JC164345,02/25/2020 01:04:00 PM,023XX S PULASKI RD,1330,CRIMINAL TRESPASS,TO LAND,RESTAURANT,Y,N,1013,22.0,26,1150083.0,1887962.0,41.848475,-87.724676,"(41.848474762, -87.724676285)"
3,JD164701,02/25/2020 06:05:00 PM,058XX N RIDGE AVE,486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,Y,Y,2013,48.0,08B,1165471.0,1938819.0,41.987716,-87.66675,"(41.987716355, -87.666750459)"
4,JD164416,02/25/2020 01:42:00 PM,011XX N HARDING AVE,2027,NARCOTICS,POSS: CRACK,STREET,Y,N,1112,37.0,18,1149847.0,1907230.0,41.901353,-87.725041,"(41.901352928, -87.725041269)"


In [75]:
#fixing dates and merging DFs
Crime_datadf['DATE  OF OCCURRENCE'] = pd.to_datetime(Crime_datadf['DATE  OF OCCURRENCE'], format="%m/%d/%Y %I:%M:%S %p")
Crime_datadf.dtypes

CASE#                             object
DATE  OF OCCURRENCE       datetime64[ns]
BLOCK                             object
 IUCR                             object
 PRIMARY DESCRIPTION              object
 SECONDARY DESCRIPTION            object
 LOCATION DESCRIPTION             object
ARREST                            object
DOMESTIC                          object
BEAT                               int64
WARD                             float64
FBI CD                            object
X COORDINATE                     float64
Y COORDINATE                     float64
LATITUDE                         float64
LONGITUDE                        float64
LOCATION                          object
dtype: object

In [76]:
Crime_datadf['DATE  OF OCCURRENCE'] = Crime_datadf['DATE  OF OCCURRENCE'].apply(lambda x: x.replace(hour=0, minute=0, second=0))

In [81]:
Crime_datadf.columns

Index(['CASE#', 'DATE  OF OCCURRENCE', 'BLOCK', ' IUCR',
       ' PRIMARY DESCRIPTION', ' SECONDARY DESCRIPTION',
       ' LOCATION DESCRIPTION', 'ARREST', 'DOMESTIC', 'BEAT', 'WARD', 'FBI CD',
       'X COORDINATE', 'Y COORDINATE', 'LATITUDE', 'LONGITUDE', 'LOCATION'],
      dtype='object')

In [78]:
Crime_datadf.head()

Unnamed: 0,CASE#,DATE OF OCCURRENCE,BLOCK,IUCR,PRIMARY DESCRIPTION,SECONDARY DESCRIPTION,LOCATION DESCRIPTION,ARREST,DOMESTIC,BEAT,WARD,FBI CD,X COORDINATE,Y COORDINATE,LATITUDE,LONGITUDE,LOCATION
0,JD164115,2020-02-25,013XX S CANAL ST,460,BATTERY,SIMPLE,GROCERY FOOD STORE,Y,N,124,11.0,08B,1173336.0,1893976.0,41.864494,-87.639158,"(41.864493678, -87.639158)"
1,JD163938,2020-02-25,033XX W CULLOM AVE,820,THEFT,$500 AND UNDER,STREET,N,N,1724,33.0,06,1153387.0,1928387.0,41.95934,-87.711475,"(41.959339811, -87.711474784)"
2,JC164345,2020-02-25,023XX S PULASKI RD,1330,CRIMINAL TRESPASS,TO LAND,RESTAURANT,Y,N,1013,22.0,26,1150083.0,1887962.0,41.848475,-87.724676,"(41.848474762, -87.724676285)"
3,JD164701,2020-02-25,058XX N RIDGE AVE,486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,Y,Y,2013,48.0,08B,1165471.0,1938819.0,41.987716,-87.66675,"(41.987716355, -87.666750459)"
4,JD164416,2020-02-25,011XX N HARDING AVE,2027,NARCOTICS,POSS: CRACK,STREET,Y,N,1112,37.0,18,1149847.0,1907230.0,41.901353,-87.725041,"(41.901352928, -87.725041269)"


In [82]:
#Clean that Data 
Crime_cols = ["CASE#", "DATE  OF OCCURRENCE", " PRIMARY DESCRIPTION", " LOCATION DESCRIPTION"]
Crime_transformed = Crime_datadf[Crime_cols].copy()

Crime_transformed.drop_duplicates("CASE#", inplace=True)
Crime_transformed.dropna()
Crime_transformed.rename(columns={"DATE  OF OCCURRENCE": "Date"}, inplace=True)
Crime_transformed.head()

Unnamed: 0,CASE#,Date,PRIMARY DESCRIPTION,LOCATION DESCRIPTION
0,JD164115,2020-02-25,BATTERY,GROCERY FOOD STORE
1,JD163938,2020-02-25,THEFT,STREET
2,JC164345,2020-02-25,CRIMINAL TRESPASS,RESTAURANT
3,JD164701,2020-02-25,BATTERY,APARTMENT
4,JD164416,2020-02-25,NARCOTICS,STREET


In [83]:
#Merge to a single df
combined_data_df = pd.merge(Covid_transformed, Crime_transformed, on = "Date")

In [84]:
Covid_recordsDF

Unnamed: 0,Date,Cases - Total,Deaths - Total,Hospitalizations - Total,Cases - Age 0-17,Cases - Age 18-29,Cases - Age 30-39,Cases - Age 40-49,Cases - Age 50-59,Cases - Age 60-69,...,Hospitalizations - Age Unknown,Hospitalizations - Female,Hospitalizations - Male,Hospitalizations - Unknown Gender,Hospitalizations - Latinx,Hospitalizations - Asian Non-Latinx,Hospitalizations - Black Non-Latinx,Hospitalizations - White Non-Latinx,Hospitalizations - Other Race Non-Latinx,Hospitalizations - Unknown Race/Ethnicity
0,3/1/2020,0,0,2,0,0,0,0,0,0,...,0.0,1,1,0.0,0.0,0.0,0.0,2.0,0.0,0.0
1,3/2/2020,0,0,1,0,0,0,0,0,0,...,0.0,1,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,3/3/2020,0,0,3,0,0,0,0,0,0,...,0.0,1,2,0.0,0.0,0.0,3.0,0.0,0.0,0.0
3,3/4/2020,0,0,3,0,0,0,0,0,0,...,0.0,1,2,0.0,0.0,0.0,2.0,1.0,0.0,0.0
4,3/5/2020,1,0,5,0,0,0,0,1,0,...,0.0,2,3,0.0,0.0,0.0,2.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
328,1/23/2021,535,3,,65,110,122,77,73,50,...,,,,,,,,,,
329,1/24/2021,276,4,,30,59,60,36,35,25,...,,,,,,,,,,
330,1/25/2021,537,6,,55,128,103,81,72,69,...,,,,,,,,,,
331,1/26/2021,140,1,,14,22,32,25,24,11,...,,,,,,,,,,


In [85]:
Crime_datadf

Unnamed: 0,CASE#,DATE OF OCCURRENCE,BLOCK,IUCR,PRIMARY DESCRIPTION,SECONDARY DESCRIPTION,LOCATION DESCRIPTION,ARREST,DOMESTIC,BEAT,WARD,FBI CD,X COORDINATE,Y COORDINATE,LATITUDE,LONGITUDE,LOCATION
0,JD164115,2020-02-25,013XX S CANAL ST,0460,BATTERY,SIMPLE,GROCERY FOOD STORE,Y,N,124,11.0,08B,1173336.0,1893976.0,41.864494,-87.639158,"(41.864493678, -87.639158)"
1,JD163938,2020-02-25,033XX W CULLOM AVE,0820,THEFT,$500 AND UNDER,STREET,N,N,1724,33.0,06,1153387.0,1928387.0,41.959340,-87.711475,"(41.959339811, -87.711474784)"
2,JC164345,2020-02-25,023XX S PULASKI RD,1330,CRIMINAL TRESPASS,TO LAND,RESTAURANT,Y,N,1013,22.0,26,1150083.0,1887962.0,41.848475,-87.724676,"(41.848474762, -87.724676285)"
3,JD164701,2020-02-25,058XX N RIDGE AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,Y,Y,2013,48.0,08B,1165471.0,1938819.0,41.987716,-87.666750,"(41.987716355, -87.666750459)"
4,JD164416,2020-02-25,011XX N HARDING AVE,2027,NARCOTICS,POSS: CRACK,STREET,Y,N,1112,37.0,18,1149847.0,1907230.0,41.901353,-87.725041,"(41.901352928, -87.725041269)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205918,JD430516,2020-10-25,029XX N MELVINA AVE,1154,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT $300 AND UNDER,RESIDENCE,N,N,2511,30.0,11,1134560.0,1918918.0,41.933710,-87.780916,"(41.933709921, -87.780916246)"
205919,JD430452,2020-11-07,011XX S MICHIGAN AVE,0281,CRIMINAL SEXUAL ASSAULT,NON-AGGRAVATED,HOTEL / MOTEL,N,N,123,4.0,02,1177417.0,1895669.0,41.869048,-87.624126,"(41.869047845, -87.624125523)"
205920,JD434095,2020-11-17,047XX N KEDZIE AVE,0560,ASSAULT,SIMPLE,SIDEWALK,N,N,1713,33.0,08A,1154189.0,1931371.0,41.967512,-87.708446,"(41.967512085, -87.708446268)"
205921,JD453566,2020-12-08,037XX N RECREATION DR,0486,BATTERY,DOMESTIC BATTERY SIMPLE,PARK PROPERTY,N,Y,1925,46.0,08B,1171826.0,1925510.0,41.951058,-87.643771,"(41.951058037, -87.643770543)"


In [86]:
combined_data_df

Unnamed: 0,Date,Cases - Total,CASE#,PRIMARY DESCRIPTION,LOCATION DESCRIPTION
0,2020-03-01,0,JD170468,HOMICIDE,VESTIBULE
1,2020-03-01,0,JD368864,THEFT,OTHER (SPECIFY)
2,2020-03-01,0,JD170475,BATTERY,STREET
3,2020-03-01,0,JD191229,CRIMINAL SEXUAL ASSAULT,APARTMENT
4,2020-03-01,0,JD170171,NARCOTICS,APARTMENT
...,...,...,...,...,...
181402,2021-01-20,735,JE118499,DECEPTIVE PRACTICE,APARTMENT
181403,2021-01-20,735,JE120790,DECEPTIVE PRACTICE,RESIDENCE
181404,2021-01-20,735,JE118095,BATTERY,RESIDENCE
181405,2021-01-20,735,JE118279,BATTERY,APARTMENT


In [87]:
Covid_transformed.Date.value_counts()


2020-03-13    1
2020-06-30    1
2020-04-29    1
2020-03-29    1
2021-01-16    1
             ..
2020-04-28    1
2020-03-28    1
2021-01-15    1
2020-12-15    1
2020-12-31    1
Name: Date, Length: 332, dtype: int64

In [88]:
Crime_transformed.Date.value_counts()

2020-05-31    1895
2020-08-10     939
2020-05-30     923
2020-06-01     872
2020-02-01     791
              ... 
2020-11-26     382
2020-04-02     376
2020-03-22     376
2020-04-14     363
2020-04-19     340
Name: Date, Length: 365, dtype: int64