In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import datetime
import matplotlib.pyplot as plt
import mysql.connector
from sqlalchemy import create_engine

clrs = ['darkorange', 'gold', 'green', 'lightskyblue','salmon','turquoise','pink','bisque']
palette=sns.set_style("whitegrid")

# Reading input files

In [2]:
def read_input():
    file = open("dbCredentials.txt","r")
    lines = file.readlines()
    dbCredentials = {"host" : "localhost", \
                     "uname" : "root", \
                     "password" : "password", \
                     "dbName" : "ChicagoData",
                    }
    for line in lines:
        vals = line.split("=")
        if(len(vals) == 2):
            if(vals[0] in dbCredentials):
                dbCredentials[vals[0]] = vals[1].replace("\n","")
    conn = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
                           .format(user=dbCredentials["uname"],
                                   pw=dbCredentials["password"],
                                   db=dbCredentials["dbName"]))
    crimeDF = pd.read_sql("select * from "+dbCredentials["dbName"]+".Crimes", conn);
    pd.set_option('display.expand_frame_repr', False)
    censusDF = pd.read_sql("select * from "+dbCredentials["dbName"]+".Census", conn);
    pd.set_option('display.expand_frame_repr', False)
    crimeDF.date=pd.to_datetime(crimeDF.date,format='%Y-%m-%d %H:%M:%S')
    crimeDF.index = pd.DatetimeIndex(crimeDF.date)
    crimeDF.drop(["date"],axis=1,inplace=True)
    return crimeDF,censusDF

In [None]:
crimes_df,census_df=read_input()
crimes_df

In [None]:
crimes_df_date = pd.Series(crimes_df.index)
crimes_df_date = pd.to_datetime(crimes_df_date)
crimes_df['day'] = list(crimes_df_date.dt.day_name())
crimes_df['month'] = list(crimes_df_date.dt.month_name())
crimes_df['hour'] = list(crimes_df_date.dt.hour)

In [None]:
def create_mappings(mapping_dict,dataFrame,attribute):
    mapping_cols={}
    for parent_category, child in mapping_dict.items():
        for key in parent_category:
            mapping_cols[key] = child

    dataFrame[attribute] = dataFrame[attribute].map(mapping_cols)
    return dataFrame

In [None]:

grouped_crime_dict = {}
crimes_dict = {  
    ('HOMICIDE','KIDNAPPING','HUMAN TRAFFICKING','OFFENSE INVOLVING CHILDREN',\
     'BATTERY','CRIM SEXUAL ASSAULT','SEX OFFENSE', 'ASSAULT',\
     'CRIMINAL SEXUAL ASSAULT') : 'CRIMES INVOLVING PHYSICAL HARM TO HUMANS',
    ('CRIMINAL DAMAGE','DECEPTIVE PRACTICE','BURGLARY','MOTOR VEHICLE THEFT',\
     'THEFT','ROBBERY','GAMBLING') : 'CRIMES INVOLVING MONETORY BENEFIT',
    ('STALKING','PROSTITUTION','INTIMIDATION','INTERFERENCE WITH PUBLIC OFFICER',\
     'OBSCENITY','PUBLIC INDECENCY','ARSON') : 'CRIMES INVOLVING SAFTY CONCERNS TO HUMANS',
    ('WEAPONS VIOLATION','CONCEALED CARRY LICENSE VIOLATION','PUBLIC PEACE VIOLATION',\
     'LIQUOR LAW VIOLATION','NARCOTICS','OTHER NARCOTIC VIOLATION',\
     'CRIMINAL TRESPASS') : 'CRIMES INVOLVING VIOLATIONS',
    ('NON-CRIMINAL','NON-CRIMINAL (SUBJECT SPECIFIED)','OTHER OFFENSE','RITUALISM') : 'OTHER NON-CRIMINAL OFFENSES'
}
#crimes_df=create_mappings(crimes_dict,crimes_df,"grouped_crime_type")

for groups, value in crimes_dict.items():
    for key in groups:
        grouped_crime_dict[key] = value
crimes_df['grouped_crime_type'] = crimes_df.primarytype.map(grouped_crime_dict)

In [None]:
grouped_loc_dict={}
loc_dict = {
    ('RESIDENCE', 'APARTMENT', 'CHA APARTMENT', 'RESIDENCE PORCH/HALLWAY', \
     'RESIDENCE-GARAGE','RESIDENTIAL YARD (FRONT/BACK)', \
     'DRIVEWAY - RESIDENTIAL', 'HOUSE','RESIDENCE - YARD (FRONT / BACK)',\
     'RESIDENCE - GARAGE','ROOMING HOUSE','ELEVATOR', 'NURSING HOME', 'NURSING / RETIREMENT HOME',\
     'HOTEL','RESIDENCE - PORCH / HALLWAY','MOTEL','COLLEGE / UNIVERSITY - RESIDENCE HALL',\
     'POOLROOM','HOTEL / MOTEL','PORCH','POOL ROOM') : 'RESIDENTIAL AREA',
    
    ('BARBERSHOP', 'COMMERCIAL / BUSINESS OFFICE', 'CURRENCY EXCHANGE', \
     'DEPARTMENT STORE', 'RESTAURANT','ATHLETIC CLUB', 'TAVERN/LIQUOR STORE', \
     'SMALL RETAIL STORE', 'HOTEL/MOTEL', 'GAS STATION','AUTO / BOAT / RV DEALERSHIP', \
     'CONVENIENCE STORE', 'BANK', 'BAR OR TAVERN', 'DRUG STORE',\
     'GROCERY FOOD STORE', 'CAR WASH', 'SPORTS ARENA/STADIUM', 'DAY CARE CENTER',\
     'MOVIE HOUSE/THEATER','APPLIANCE STORE', 'CLEANING STORE', 'PAWN SHOP', \
     'FACTORY/MANUFACTURING BUILDING', 'ANIMAL HOSPITAL','BOWLING ALLEY', \
     'SAVINGS AND LOAN', 'CREDIT UNION', 'KENNEL', 'GARAGE/AUTO REPAIR', \
     'LIQUOR STORE','GAS STATION DRIVE/PROP.', 'OFFICE', 'BARBER SHOP/BEAUTY SALON','FUNERAL PARLOR',\
     'SCHOOL - PRIVATE GROUNDS','CTA PARKING LOT / GARAGE / OTHER PROPERTY','CLEANERS/LAUNDROMAT'\
     'HOSPITAL BUILDING / GROUNDS', 'PUBLIC HIGH SCHOOL' ,'FACTORY / MANUFACTURING BUILDING', \
     'FACTORY', 'SCHOOL - PRIVATE BUILDING', 'RETAIL STORE','CLUB','TAVERN / LIQUOR STORE',\
     'MEDICAL / DENTAL OFFICE', 'MOVIE HOUSE / THEATER') : 'BUSINESS AREA',
    
    ('VEHICLE NON-COMMERCIAL', 'AUTO', 'VEHICLE - OTHER RIDE SHARE SERVICE (E.G., UBER, LYFT)', \
     'TAXICAB','VEHICLE-COMMERCIAL', 'VEHICLE - DELIVERY TRUCK', \
     'VEHICLE-COMMERCIAL - TROLLEY BUS','VEHICLE-COMMERCIAL - ENTERTAINMENT/PARTY BUS',\
     'VEHICLE - COMMERCIAL','VEHICLE - OTHER RIDE SERVICE','DELIVERY TRUCK',\
     'TAXI CAB', 'VEHICLE - OTHER RIDE SHARE SERVICE (LYFT, UBER, ETC.)',\
     'VEHICLE - COMMERCIAL: ENTERTAINMENT / PARTY BUS', 'TRUCK','TRAILER') : 'VEHICLE',
    
    ('HOSPITAL BUILDING/GROUNDS', 'NURSING HOME/RETIREMENT HOME', \
     'SCHOOL, PUBLIC, BUILDING','CHURCH/SYNAGOGUE/PLACE OF WORSHIP', \
     'SCHOOL, PUBLIC, GROUNDS', 'SCHOOL, PRIVATE, BUILDING',\
     'MEDICAL/DENTAL OFFICE', 'LIBRARY', 'COLLEGE/UNIVERSITY RESIDENCE HALL', \
     'YMCA', 'HOSPITAL','SCHOOL - PUBLIC BUILDING','COLLEGE / UNIVERSITY - GROUNDS',\
     'EXPRESSWAY EMBANKMENT', 'SCHOOL - PRIVATE GROUNDS','RAILROAD PROPERTY',\
     'COACH HOUSE','CHURCH') : 'PUBLIC BUILDINGS',
    
    ('STREET', 'PARKING LOT/GARAGE(NON.RESID.)', 'SIDEWALK', 'PARK PROPERTY', \
     'ALLEY', 'CEMETARY','CHA HALLWAY/STAIRWELL/ELEVATOR', 'CHA PARKING LOT/GROUNDS', \
     'COLLEGE/UNIVERSITY GROUNDS', 'BRIDGE','SCHOOL, PRIVATE, GROUNDS', \
     'FOREST PRESERVE', 'LAKEFRONT/WATERFRONT/RIVERBANK', 'PARKING LOT', 'DRIVEWAY',\
     'HALLWAY', 'YARD', 'CHA GROUNDS', 'RIVER BANK', 'STAIRWELL', 'CHA PARKING LOT',\
     'AIRPORT TERMINAL UPPER LEVEL - SECURE AREA','OTHER RAILROAD PROP / TRAIN DEPOT',\
     'AIRPORT TERMINAL LOWER LEVEL - SECURE AREA', 'AIRPORT BUILDING NON-TERMINAL - SECURE AREA',\
     'AIRPORT BUILDING NON-TERMINAL - SECURE AREA', 'AIRPORT EXTERIOR - NON-SECURE AREA',\
     'AIRPORT PARKING LOT', 'AIRPORT TERMINAL LOWER LEVEL - NON-SECURE AREA',\
     'AIRPORT BUILDING NON-TERMINAL - NON-SECURE AREA', 'AIRPORT VENDING ESTABLISHMENT',\
     'AIRPORT TERMINAL MEZZANINE - NON-SECURE AREA','CTA PROPERTY','CTA "L" PLATFORM',\
     'VACANT LOT', 'VACANT LOT / LAND','VACANT LOT/LAND', 'LAGOON','HIGHWAY / EXPRESSWAY','SEWER',\
     'LIVERY STAND OFFICE','SCHOOL YARD','SCHOOL - PUBLIC GROUNDS',\
     'PUBLIC GRAMMAR SCHOOL', 'SPORTS ARENA / STADIUM', 'CHA BREEZEWAY', 'DUMPSTER',\
     'CHA PARKING LOT / GROUNDS','RIVER', 'JUNK YARD/GARBAGE DUMP', \
     'PARKING LOT / GARAGE (NON RESIDENTIAL)','TAVERN', 'VESTIBULE', 'TRUCKING TERMINAL',\
     'CHURCH / SYNAGOGUE / PLACE OF WORSHIP', 'CHURCH PROPERTY', \
     'LAKE','LAKEFRONT / WATERFRONT / RIVERBANK','HIGHWAY/EXPRESSWAY') : 'PUBLIC AREA',
    
    ('POLICE FACILITY/VEH PARKING LOT', 'GOVERNMENT BUILDING/PROPERTY', \
     'FEDERAL BUILDING', 'JAIL / LOCK-UP FACILITY','FIRE STATION', \
     'GOVERNMENT BUILDING','CHA ELEVATOR','GOVERNMENT BUILDING / PROPERTY',\
     'OTHER RAILROAD PROPERTY / TRAIN DEPOT','COUNTY JAIL', 'CHA STAIRWELL',\
     'POLICE FACILITY / VEHICLE PARKING LOT','CHA PLAY LOT','CHA LOBBY',\
     'CHA HALLWAY') : 'GOVERNMENT LOCATONS',
    
    ('AIRPORT TERMINAL UPPER LEVEL - NON-SECURE AREA', 'CTA PLATFORM', \
     'CTA STATION', 'CTA BUS STOP', 'CTA TRAIN', 'CTA BUS', 'CTA GARAGE / OTHER PROPERTY',\
     'AIRCRAFT', 'OTHER COMMERCIAL TRANSPORTATION', \
     'AIRPORT EXTERIOR - SECURE AREA', 'AIRPORT TRANSPORTATION SYSTEM (ATS)','CTA TRACKS - RIGHT OF WAY', \
     'AIRPORT/AIRCRAFT', 'BOAT/WATERCRAFT','CTA "L" TRAIN','PRAIRIE','CHA HALLWAY / STAIRWELL / ELEVATOR') : 'PUBLIC TRANSPORT',
    
    ('OTHER', 'ABANDONED BUILDING', 'WAREHOUSE', 'ATM (AUTOMATIC TELLER MACHINE)',\
     'CONSTRUCTION SITE', 'NEWSSTAND','COIN OPERATED MACHINE',\
     'HORSE STABLE','FARM', 'GARAGE', 'WOODED AREA', 'GANGWAY', 'BASEMENT',\
     'OTHER (SPECIFY)') : 'OTHER'  
}


for groups, value in loc_dict.items():
    for key in groups:
        grouped_loc_dict[key] = value
crimes_df['grouped_crime_location'] = crimes_df.locationdescription.map(grouped_loc_dict)

In [None]:
crimes_df["arrest"] = crimes_df["arrest"].astype(int)
crimes_df["domestic"] = crimes_df["domestic"] .astype(int)

# Visualisations

## 1. Percentage of different crimes 

In [None]:
data=crimes_df['grouped_crime_type'].value_counts()
values=data.index
fig = plt.figure(figsize=(6, 6))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
txt_prp={'color':"black",'size': 'large'}
exp=(0, 0, 0,0.3,0.5)
ax.pie(data, labels=data.index, colors=clrs ,explode = exp  ,autopct='%1.1f%%',  \
        labeldistance=1.05,radius=2, startangle=40,textprops=txt_prp)
plt.title("Percentage of all the different crimes", bbox={'facecolor':'0.9', 'pad':5},y=1.4)
fig.show()

## 2. Percentage of crime at different locations

In [None]:
data=crimes_df['grouped_crime_location'].value_counts()
values=data.index
fig = plt.figure(figsize=(5, 5))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
txt_prp={'color':"black",'size': 'large'}
exp=(0, 0, 0, 0,0,0.0,0.3,0.5)
plt.title("Percentage of crimes occuring  at different locations", bbox={'facecolor':'0.9', 'pad':5},y=1.4)
ax.pie(data, labels=values, colors=clrs ,explode = exp  ,autopct='%1.1f%%',  \
        labeldistance=1.05,radius=2, startangle=40,textprops=txt_prp)

fig.show()

## 3.Types of crimes occuring at different locations

In [None]:
fig=plt.figure(figsize = (15, 10))
count=crimes_df['grouped_crime_location'].value_counts()
sns.set_theme(style="darkgrid")
sns.countplot(y ='grouped_crime_location',\
                   data = crimes_df,\
                   order = count.iloc[:10].index,\
                   hue='grouped_crime_type',\
                   palette="bright")

plt.legend(title='Crimes', loc='lower right')
plt.ylabel("Locations",bbox={'facecolor':'0.9', 'pad':5})
plt.xlabel("Count",bbox={'facecolor':'0.9', 'pad':5})
plt.title("Crimes occuring at different locations",bbox={'facecolor':'0.9', 'pad':5},y=1.0)
fig.show()

In [None]:
fig=plt.figure(figsize = (15, 10))
plot=sns.countplot(x='year',data=crimes_df,palette="flare")
plt.ylabel('No of Crimes')
plt.title("Count of crimes from 2001-2021",bbox={'facecolor':'0.9', 'pad':5},y=1.0)
plt.show()

In [None]:
fig=plt.figure(figsize = (15, 10))
plot=sns.countplot(x=crimes_df.index.month,data=crimes_df,palette="flare")
month=['Jan','Feb','Mar','Apr','May','June','July','Aug','Sept','Oct','Nov','Dec']
plot.set_xticklabels(month)
plt.ylabel('No of Crimes')
plt.xlabel('Month of the year')
plt.title("Count of crimes per month",bbox={'facecolor':'0.9', 'pad':5},y=1.0)
plt.show()

In [None]:
fig=plt.figure(figsize = (15, 5))
plot=sns.countplot(x=crimes_df.index.day, data=crimes_df, palette="flare")
plt.ylabel('No of Crimes')
plt.xlabel('Day of the month')
plt.title("Count of crimes per day",bbox={'facecolor':'0.9', 'pad':5},y=1.0)
plt.show()

In [None]:
fig=plt.figure(figsize = (15, 5))
plot=sns.countplot(x=crimes_df.index.hour, data=crimes_df, palette="flare")
plt.ylabel('No of Crimes')
plt.xlabel('Hour of the day')
plt.title("Count of crimes per hour",bbox={'facecolor':'0.9', 'pad':5},y=1.0)
plt.show()


## 4.Trends of Crimes in the years

In [None]:
crimes_year_df, census_year_df = read_input()


### Trends of Crimes in the years

In [None]:
plt.figure(figsize=(11,4))
crimes_year_df.resample('D').size().rolling(365).sum().plot()
plt.title('Rolling sum  of all crimes from 2001 - 2020')
plt.ylabel('Number of crimes')
plt.xlabel('Days')
plt.show()

### Trends of Crimes monthly basis

In [None]:
crimes_month_df, census_month_df=read_input()
crimes_month_df = crimes_month_df.resample('W')
crimes_month_df.size().plot(legend=False)
plt.title('Number of crimes per Week (2001 - 2021)')
plt.xlabel('Weeks')
plt.ylabel('Number of crimes')
plt.show()

### Crimes happening monthwise

In [None]:
crimes_df.groupby([crimes_df['month']]).size().plot(kind='barh')
plt.ylabel('Months of the year')
plt.xlabel('Number of crimes')
plt.title('Number of crimes by month of the year')
plt.show()

### Crimes happening weekly

In [None]:
days = ['Monday','Tuesday','Wednesday',  'Thursday', 'Friday', 'Saturday', 'Sunday']
crimes_df.groupby([crimes_df.index.dayofweek]).size().plot(kind='barh')
plt.ylabel('Days of the week')
plt.yticks(np.arange(7), days)
plt.xlabel('Number of crimes')
plt.title('Number of crimes by day of the week')
plt.show()

### Month Wise Crimes for Crime Locations

In [None]:
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sept", "Oct", "Nov", "Dec"]
crime_location_month_df = crimes_df.groupby(['month', 'grouped_crime_location'])
plt.figure(figsize=(11,4))
crime_location_month_df.size().unstack().plot()
plt.ylabel('Crime Count')
plt.xticks(np.arange(12), months)
plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')

In [None]:
crimes_df.head()


In [None]:
crimes_grouped= crimes_df.groupby(by = ["grouped_crime_type", crimes_df.index.month], axis = 0)
crimes_grouped_count = crimes_grouped["grouped_crime_type"].count()
vis_data = crimes_grouped_count.unstack()
fig, ax = plt.subplots(figsize = (20, 12))
sns.heatmap(data = vis_data, annot = True, fmt = ".0f", \
             ax = ax)
ax.set_title(label = "Heatmap of crimes per month ", pad = 20)
ax.set_xlabel("Month")
ax.set_ylabel("Crime Category ");

# Check if Criminals were arrested

In [None]:
sns.countplot(x='arrest',data=crimes_df,palette="flare")
plt.ylabel('No of Crimes')
plt.xlabel('Arrest')
plt.xticks(np.arange(2),[False,True])
plt.show()

# Year based arrests

In [None]:
crimes_df

In [None]:
 plt.figure(figsize = (12, 12))
data=crimes_df.groupby(['year','arrest'])['block'].count().unstack()
graph=data.plot(kind='bar')
plt.ylabel('No of Crimes')
plt.legend( ['False', 'True'])
plt.show()

# Crime-wise arrest

In [None]:
sns.countplot(x='grouped_crime_type',hue='arrest',data=crimes_df,order=crimes_df['grouped_crime_type'].value_counts().index,palette='flare')
plt.xticks(rotation='vertical')
plt.ylabel('No of Crimes')
plt.xlabel('Crime-wise arrest')
plt.legend( ['False', 'True'])
plt.show()

In [None]:
arrest_data=crimes_df[crimes_df.arrest==1]


In [None]:
arrest_data.groupby('month')['arrest'].count().plot(legend=True,label='Arrests',marker='o',figsize=(8,6))
crimes_df.groupby('month')['arrest'].count().plot(legend=True,label='Crimes',marker='o')
plt.ylabel('No of Crimes')
plt.xticks(np.arange(12),months)
plt.show()

In [None]:
crimes_df

In [None]:
census_df

In [None]:
 
# Let's simulate the map of Chicago with the help of X and Y co-ordinates
sns.lmplot('latitude', 
           'longitude',
           data=crimes_df,
           fit_reg=False, 
           hue="district",
           palette='flare',
           height=5,
           scatter_kws={"marker": "+", 
                        "s": 10})
ax = plt.gca()
ax.set_title("A Rough map of Chicago\n", fontdict={'fontsize': 15}, weight="bold")
plt.show()

In [None]:
BBox = ((crimes_df.longitude.min(),   crimes_df.longitude.max(),      
         crimes_df.latitude.min(), crimes_df.latitude.max())
BBox