# Exploring the Chicago crime dataset
This is the chicago crime dataset on this link - https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2

In [78]:
import pandas as pd
from bokeh.sampledata.olympics2014 import data
from bokeh.charts import Bar, output_file, show
from bokeh.charts.attributes import cat, color, CatAttr
from bokeh.io import output_notebook
import math


In [94]:
df=pd.read_csv('Crimes_-_2001_to_present.csv')
df.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,4700982,HM212393,03/03/2006 10:40:00 PM,0000X W 95TH ST,1811,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,CTA TRAIN,True,False,...,21.0,49.0,18,1177762.0,1841949.0,2006,04/15/2016 08:55:02 AM,41.721627,-87.624485,"(41.721627204, -87.624485177)"
1,4700983,HM300030,01/01/2006 12:00:00 PM,049XX S HOYNE AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,16.0,61.0,06,1163199.0,1871730.0,2006,04/15/2016 08:55:02 AM,41.803667,-87.676995,"(41.803667166, -87.676994658)"
2,4700984,HM300880,04/19/2006 09:20:00 PM,0000X W 95TH ST,460,BATTERY,SIMPLE,CTA GARAGE / OTHER PROPERTY,False,False,...,21.0,49.0,08B,1177762.0,1841949.0,2006,04/15/2016 08:55:02 AM,41.721627,-87.624485,"(41.721627204, -87.624485177)"
3,4700985,HM304274,04/21/2006 01:45:00 PM,104XX S AVENUE L,1320,CRIMINAL DAMAGE,TO VEHICLE,STREET,False,False,...,10.0,52.0,14,1201838.0,1836252.0,2006,04/15/2016 08:55:02 AM,41.705416,-87.536494,"(41.70541635, -87.536493961)"
4,4700986,HM305728,04/22/2006 12:00:00 AM,082XX S PRAIRIE AVE,910,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,...,6.0,44.0,07,1179463.0,1850527.0,2006,04/15/2016 08:55:02 AM,41.745128,-87.617994,"(41.745127679, -87.617993623)"


### Getting attributes
Firstly I am creating more attributes which I think will be useful in determining the type of crime being commited. 

In [None]:
a=df['Date']
b=[{'date':' ','time':' ', 'ampm':' '} for i in range(len(a))]

In [None]:
for i in range(len(a)):
    c=a[i].split(' ')
    b[i]['date']=c[0]
    b[i]['time']=c[1]
    b[i]['ampm']=c[2]

In [None]:
year=[]
month=[]
date=[]
hours=[]
mins=[]
for i in b:
    x=map(int,i['date'].split('/'))
    y=map(int,i['time'].split(':'))
    month.append(x[0])
    date.append(x[1])
    year.append(x[2])
    mins.append(y[1])
    if i['ampm']=='PM':
        if y[0]==12:
            y[0]=0
        hours.append((y[0]+12))
    else:
        hours.append(y[0]%12)

In [None]:
df['Year']=year
df['Month']=month
df['Date_value']=date
df['Hour']=hours
df['Minutes']=mins

In [113]:
df=pd.read_csv('chicago_updated.csv')

In [5]:
crime_count={i:0 for i in set(df['Primary Type'])}
year_count={i:0 for i in set(df['Year'])}
time_count={i:0 for i in set(df['Hour'])}
location= {i:0 for i in set(df['Location Description'])}




In [16]:
for i in range(len(df)):
    crime_count[df.loc[i, 'Primary Type']]+=1
    year_count[df.loc[i,'Year']]+=1
    time_count[df.loc[i,'Hour']]+=1
    location[df.loc[i,'Location Description']]+=1

In [47]:
output_notebook()

# Data Exploration
Exploration of the trends in the data. 
1. Location Descriptions where the most crime reportings have taken place.
2. Types of crimes which have been most reported.
3. Yearly trend of crime reporting
4. Hour-wise rate of crime. 

In [80]:
a=[]
b=[]
for i in location.keys():
    if type(i) is float:
        continue
    else:
        a.append(i)
        b.append(location[i])
        
df1=pd.DataFrame()
df1['location']=a
df1['count']=b
df1=df1.sort_values(by='count', ascending=False)
df1=df1[0:20]
df1

p = Bar(df1, label=CatAttr(columns=['location'], sort=False), values='count',title="Crime count by location",  color="red")

output_file("locationCount.html")
p.legend.visible=False

show(p)

In [82]:
a=[]
b=[]
for i in crime_count.keys():
    if type(i) is float:
        continue
    else:
        a.append(i)
        b.append(crime_count[i])
        
df1=pd.DataFrame()
df1['type']=a
df1['count']=b
df1=df1.sort_values(by='count', ascending=False)
df1=df1[0:20]


p = Bar(df1, label=CatAttr(columns=['type'], sort=False), values='count',title="Crime count by Type",  color="wheat")

output_file("locationCount.html")
p.legend.visible=False

show(p)

In [120]:
a=[]
b=[]
for i in year_count.keys():
    if type(i) is float:
        continue
    else:
        a.append(i)
        b.append(year_count[i])
        
df1=pd.DataFrame()
df1['year']=a
df1['count']=b
df1=df1.sort_values(by='count', ascending=False)


p = Bar(df1, label=CatAttr(columns=['year'], sort=False), values='count',title="Crime count by Year",  color="SaddleBrown")

output_file("locationCount.html")
p.legend.visible=False

show(p)

In [119]:
a=[]
b=[]
for i in time_count.keys():
    if type(i) is float:
        continue
    else:
        a.append(i)
        b.append(time_count[i])
        
df1=pd.DataFrame()
df1['hour']=a
df1['count']=b
df1=df1.sort_values(by='count', ascending=False)


p = Bar(df1, label=CatAttr(columns=['hour'], sort=False), values='count',title="Crime count by Hour",  color="GoldenRod")
p.legend.visible=False
show(p)