In [1]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pandas.plotting import scatter_matrix
from sklearn import cluster
from scipy import stats  ## for z-score. do we still need it?

In [2]:
# Read crime data
crimedata = pd.read_csv('Data_tables_Criminal_Incidents_Visualisation_year_ending_March_2018.csv')
crimedata.head()

Unnamed: 0,Year ending March,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded
0,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646
1,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,129
2,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601
3,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,108
4,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",142


# Process the data

In [3]:
crimedata['Incidents Recorded'] = crimedata['Incidents Recorded'].str.replace(',', '')
crimedata['Incidents Recorded'] = crimedata['Incidents Recorded'].apply(pd.to_numeric)

crimedata.dtypes

Year ending March       int64
Postcode                int64
Suburb/Town Name       object
Offence Division       object
Offence Subdivision    object
Offence Subgroup       object
Incidents Recorded      int64
dtype: object

In [4]:
# Sort offenses by Offence Division to identify targeted crimes that take place in public

crimedata['Offence Division'].value_counts()

B Property and deception offences       117983
A Crimes against the person              74194
D Public order and security offences     34835
E Justice procedures offences            24523
C Drug offences                          19833
F Other offences                          6385
Name: Offence Division, dtype: int64

In [5]:
mask_public = crimedata.isin({'Offence Division': ['A Crimes against the person','D Public order and security offences']}).any(1)
crimedata = crimedata.loc[mask_public]

crimedata.head()

Unnamed: 0,Year ending March,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded
0,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646
1,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,129
2,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601
3,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,108
4,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",142


In [6]:
crimedata['Offence Division'].value_counts()

A Crimes against the person             74194
D Public order and security offences    34835
Name: Offence Division, dtype: int64

In [7]:
# Sort offenses by Offence Subdivision to identify targeted crimes that are targeted at a person(s)

crimedata['Offence Subdivision'].value_counts()

A20 Assault and related offences                       33221
A70 Stalking, harassment and threatening behaviour     17804
D20 Disorderly and offensive conduct                   16929
D10 Weapons and explosives offences                    11833
A80 Dangerous and negligent acts endangering people    10158
Other crimes against the person                         8378
D30 Public nuisance offences                            5778
A50 Robbery                                             4633
D40 Public security offences                             295
Name: Offence Subdivision, dtype: int64

In [8]:
mask_targeted = ~crimedata.isin({'Offence Subdivision': ['A50 Robbery','Weapons and explosives offences', 'Other crimes against the person']}).any(1)
crimedata = crimedata.loc[mask_targeted]

crimedata.head()

Unnamed: 0,Year ending March,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded
0,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646
1,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,129
2,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601
3,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,108
4,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",142


In [9]:
crimedata['Offence Subdivision'].value_counts()

A20 Assault and related offences                       33221
A70 Stalking, harassment and threatening behaviour     17804
D20 Disorderly and offensive conduct                   16929
D10 Weapons and explosives offences                    11833
A80 Dangerous and negligent acts endangering people    10158
D30 Public nuisance offences                            5778
D40 Public security offences                             295
Name: Offence Subdivision, dtype: int64

In [10]:
# Sort offenses by Offence Subgroup to identify that are of non domestic in nature

crimedata['Offence Subgroup'].value_counts()

A231 FV Common assault                                                7739
A212 Non-FV Serious assault                                           7654
A211 FV Serious assault                                               7506
A232 Non-FV Common assault                                            6589
D12 Prohibited and controlled weapons offences                        5331
D22 Drunk and disorderly in public                                    5271
D11 Firearms offences                                                 5254
A89 Other dangerous or negligent acts endangering people              5209
D23 Offensive conduct                                                 4513
A732 Non-FV Threatening behaviour                                     4361
A731 FV Threatening behaviour                                         4043
A22 Assault police, emergency services or other authorised officer    3733
D35 Improper movement on public or private space                      3651
A712 Non-FV Stalking     

In [11]:
crimedata[['Offence Subgroup Code', 'Offence Subgroup Name']] = crimedata['Offence Subgroup'].str.split(' ', n=1, expand = True)

In [12]:
crimedata.head()

Unnamed: 0,Year ending March,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded,Offence Subgroup Code,Offence Subgroup Name
0,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646,A232,Non-FV Common assault
1,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,129,A231,FV Common assault
2,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601,A212,Non-FV Serious assault
3,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,108,A211,FV Serious assault
4,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",142,A22,"Assault police, emergency services or other au..."


In [13]:
mask_nondomestic = ~crimedata.isin({'Offence Subgroup Code': ['A231', 'A211', 'A731', 'A711', 'A721' ]}).any(1)
crimedata = crimedata.loc[mask_nondomestic]

In [14]:
crimedata['Offence Subgroup'].value_counts()

A212 Non-FV Serious assault                                           7654
A232 Non-FV Common assault                                            6589
D12 Prohibited and controlled weapons offences                        5331
D22 Drunk and disorderly in public                                    5271
D11 Firearms offences                                                 5254
A89 Other dangerous or negligent acts endangering people              5209
D23 Offensive conduct                                                 4513
A732 Non-FV Threatening behaviour                                     4361
A22 Assault police, emergency services or other authorised officer    3733
D35 Improper movement on public or private space                      3651
A712 Non-FV Stalking                                                  3342
A83 Throw or discharge object endangering people                      2791
D24 Offensive language                                                2502
D25 Criminal intent      

In [15]:
# Remove any other offenses that are not relevant

mask_irrelevant = ~crimedata.isin({'Offence Subgroup Code': ['D12', 'D11', 'A22', 'D25', 'A82', 'D33', 'D43', 'D31', 'D44'
                                                            ,'D41', 'D34', 'D42']}).any(1)
crimedata = crimedata.loc[mask_irrelevant]

In [16]:
crimedata['Offence Subgroup'].value_counts()

A212 Non-FV Serious assault                                 7654
A232 Non-FV Common assault                                  6589
D22 Drunk and disorderly in public                          5271
A89 Other dangerous or negligent acts endangering people    5209
D23 Offensive conduct                                       4513
A732 Non-FV Threatening behaviour                           4361
D35 Improper movement on public or private space            3651
A712 Non-FV Stalking                                        3342
A83 Throw or discharge object endangering people            2791
D24 Offensive language                                      2502
A722 Non-FV Harassment and private nuisance                 2075
A81 Dangerous driving                                       1730
D21 Riot and affray                                         1505
D13 Explosives offences                                     1248
D26 Disorderly conduct                                       993
D32 Hoaxes               

# Calculate weights for each crime based on severity

In [17]:
# Function to calculate severity of crime. The severity is out of 10.

def crime_severity (row):
    if row['Offence Subgroup Code'] == 'A212' :
        return 10
    if row['Offence Subgroup Code'] == 'A232' :
        return 10
    if row['Offence Subgroup Code'] == 'D22' :
        return 4
    if row['Offence Subgroup Code'] == 'A89' :
        return 7
    if row['Offence Subgroup Code'] == 'D23' :
        return 5
    if row['Offence Subgroup Code'] == 'A732' :
        return 7
    if row['Offence Subgroup Code'] == 'D35' :
        return 4
    if row['Offence Subgroup Code'] == 'A712' :
        return 10
    if row['Offence Subgroup Code'] == 'A83' :
        return 7
    if row['Offence Subgroup Code'] == 'D24' :
        return 3
    if row['Offence Subgroup Code'] == 'A722' :
        return 8
    if row['Offence Subgroup Code'] == 'A81' :
        return 7
    if row['Offence Subgroup Code'] == 'D21' :
        return 6
    if row['Offence Subgroup Code'] == 'D13' :
        return 6
    if row['Offence Subgroup Code'] == 'D26' :
        return 5
    if row['Offence Subgroup Code'] == 'D32' :
        return 7
    if row['Offence Subgroup Code'] == 'D36' :
        return 7
    if row['Offence Subgroup Code'] == 'D49' :
        return 7

In [18]:
crime_processed = crimedata.copy(deep=True)

crime_processed['Crime Severity'] = crime_processed.apply (lambda row: crime_severity (row),axis=1)

crime_processed.head()

Unnamed: 0,Year ending March,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded,Offence Subgroup Code,Offence Subgroup Name,Crime Severity
0,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646,A232,Non-FV Common assault,10
2,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601,A212,Non-FV Serious assault,10
7,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A732 Non-FV Threatening behaviour,75,A732,Non-FV Threatening behaviour,7
9,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A722 Non-FV Harassment and private nuisance,20,A722,Non-FV Harassment and private nuisance,8
11,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A712 Non-FV Stalking,21,A712,Non-FV Stalking,10


In [19]:
# Select most recent crime data from 2018

crime_processed = crime_processed.loc[(crime_processed['Year ending March'] == 2018)]
crime_processed.head()

Unnamed: 0,Year ending March,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded,Offence Subgroup Code,Offence Subgroup Name,Crime Severity
0,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646,A232,Non-FV Common assault,10
2,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601,A212,Non-FV Serious assault,10
7,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A732 Non-FV Threatening behaviour,75,A732,Non-FV Threatening behaviour,7
9,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A722 Non-FV Harassment and private nuisance,20,A722,Non-FV Harassment and private nuisance,8
11,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A712 Non-FV Stalking,21,A712,Non-FV Stalking,10


In [20]:
# Function to calculate severity score per postcode. 

def severity_score (row):
    return row['Incidents Recorded'] * row['Crime Severity']

In [21]:
crime_processed['Severity Total Score'] = crime_processed.apply (lambda row: severity_score (row),axis=1)
crime_processed.head()

Unnamed: 0,Year ending March,Postcode,Suburb/Town Name,Offence Division,Offence Subdivision,Offence Subgroup,Incidents Recorded,Offence Subgroup Code,Offence Subgroup Name,Crime Severity,Severity Total Score
0,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646,A232,Non-FV Common assault,10,6460
2,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601,A212,Non-FV Serious assault,10,6010
7,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A732 Non-FV Threatening behaviour,75,A732,Non-FV Threatening behaviour,7,525
9,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A722 Non-FV Harassment and private nuisance,20,A722,Non-FV Harassment and private nuisance,8,160
11,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A712 Non-FV Stalking,21,A712,Non-FV Stalking,10,210


In [22]:
crime_processed['Year ending March'].value_counts()

2018    6215
Name: Year ending March, dtype: int64

In [23]:
crime_processed['Suburb/Town Name'].value_counts()

MORWELL             18
MELBOURNE           18
MILDURA             17
WERRIBEE            17
FITZROY             17
FRANKSTON           17
WARRNAMBOOL         17
SHEPPARTON          16
PAKENHAM            16
BENALLA             16
HAMILTON            16
TRARALGON           16
CRANBOURNE          16
DANDENONG           16
ST KILDA            16
ECHUCA              16
NORTH MELBOURNE     15
RICHMOND            15
SOUTH YARRA         15
WODONGA             15
CAROLINE SPRINGS    15
DOCKLANDS           15
NARRE WARREN        15
SALE                15
NORLANE             15
WANGARATTA          15
ST ALBANS           15
GEELONG             15
CRAIGIEBURN         15
EPPING              15
                    ..
BALMATTUM            1
GLENGARRY NORTH      1
LOCKWOOD SOUTH       1
TATYOON              1
MARKWOOD             1
YAMBUNA              1
KY VALLEY            1
LONGWARRY NORTH      1
GLADYSDALE           1
COBURG EAST          1
AVONSLEIGH           1
SUNDAY CREEK         1
RUBICON    

In [24]:
crime_processed['Offence Division'].value_counts()

A Crimes against the person             3953
D Public order and security offences    2262
Name: Offence Division, dtype: int64

In [25]:
crime_processed['Offence Subdivision'].value_counts()

A20 Assault and related offences                       1521
D20 Disorderly and offensive conduct                   1507
A80 Dangerous and negligent acts endangering people    1317
A70 Stalking, harassment and threatening behaviour     1115
D30 Public nuisance offences                            618
D10 Weapons and explosives offences                     133
D40 Public security offences                              4
Name: Offence Subdivision, dtype: int64

In [26]:
crime_processed['Incidents Recorded'].value_counts()

1       2681
2       1046
3        506
4        381
5        279
6        189
9        119
7        119
8        107
10        76
13        61
11        59
12        52
14        43
17        32
16        31
15        27
19        27
20        24
18        22
23        19
22        17
24        16
26        16
21        14
28        13
25        13
30        12
27        11
31         8
        ... 
276        1
36         1
199        1
207        1
232        1
129        1
452        1
139        1
71         1
79         1
646        1
170        1
142        1
126        1
86         1
82         1
78         1
66         1
403        1
107        1
601        1
245        1
145        1
113        1
105        1
77         1
69         1
115        1
135        1
1343       1
Name: Incidents Recorded, Length: 110, dtype: int64

In [27]:
crime_processed['Offence Subgroup Code'].value_counts()

A212    782
A232    739
A89     638
D22     569
A732    486
D35     446
D23     426
A81     368
A712    343
A83     311
A722    286
D24     254
D21     164
D13     133
D32      98
D26      94
D36      74
D49       4
Name: Offence Subgroup Code, dtype: int64

In [28]:
crime_processed['Offence Subgroup Name'].value_counts()

Non-FV Serious assault                                  782
Non-FV Common assault                                   739
Other dangerous or negligent acts endangering people    638
Drunk and disorderly in public                          569
Non-FV Threatening behaviour                            486
Improper movement on public or private space            446
Offensive conduct                                       426
Dangerous driving                                       368
Non-FV Stalking                                         343
Throw or discharge object endangering people            311
Non-FV Harassment and private nuisance                  286
Offensive language                                      254
Riot and affray                                         164
Explosives offences                                     133
Hoaxes                                                   98
Disorderly conduct                                       94
Other public nuisance offences          

In [29]:
crime_processed['Crime Severity'].value_counts()


7     1979
10    1864
4     1015
5      520
6      297
8      286
3      254
Name: Crime Severity, dtype: int64

In [30]:
crime_processed.drop(['Year ending March','Suburb/Town Name','Offence Division','Offence Subdivision',
                      'Incidents Recorded','Offence Subgroup Code','Offence Subgroup Name','Crime Severity'], axis=1, inplace=True, errors='raise')

In [31]:
crime_processed = crime_processed.groupby(['Postcode'], as_index=False).agg({'Severity Total Score':sum})

In [32]:
crime_processed.head()

Unnamed: 0,Postcode,Severity Total Score
0,3000,22498
1,3002,681
2,3003,524
3,3006,3266
4,3008,2058


In [33]:
crime_processed[crime_processed.duplicated(keep=False)].sort_values(by=['Postcode'])

Unnamed: 0,Postcode,Severity Total Score


# Join with postcodes to get average gps position of each postcode

In [34]:
# Read postcode data
postcode_loc = pd.read_csv('Australian_Post_Codes_Lat_Lon.csv')
postcode_loc.head()

Unnamed: 0,postcode,suburb,state,dc,type,lat,lon
0,200,AUSTRALIAN NATIONAL UNIVERSITY,ACT,AUSTRALIAN NATIONAL UNI LPO,Post Office Boxes,-35.277272,149.117136
1,221,BARTON,ACT,,LVR,-35.201372,149.095065
2,800,DARWIN,NT,DARWIN DELIVERY CENTRE,Delivery Area,-12.801028,130.955789
3,801,DARWIN,NT,DARWIN DELIVERY CENTRE,Post Office Boxes,-12.801028,130.955789
4,804,PARAP,NT,PARAP,Post Office Boxes,-12.432181,130.84331


In [35]:
postcode_loc.shape

(16080, 7)

In [36]:
postcode_loc = postcode_loc.groupby(['postcode'], as_index=False).mean()
postcode_loc.head()

Unnamed: 0,postcode,lat,lon
0,0,,
1,200,-35.277272,149.117136
2,221,-35.201372,149.095065
3,800,-12.801028,130.955789
4,801,-12.801028,130.955789


In [37]:
postcode_loc[postcode_loc.duplicated(keep=False)].sort_values(by=['postcode'])

Unnamed: 0,postcode,lat,lon


In [38]:
crime_processed = crime_processed.set_index('Postcode').join(postcode_loc.set_index('postcode'))

In [39]:
crime_processed.reset_index()

Unnamed: 0,Postcode,Severity Total Score,lat,lon
0,3000,22498,-37.814563,144.970267
1,3002,681,-37.816640,144.987811
2,3003,524,-37.806255,144.941123
3,3006,3266,-37.823258,144.965926
4,3008,2058,-37.814719,144.948039
5,3011,2246,-37.801199,144.887090
6,3012,729,-37.800197,144.867860
7,3013,359,-37.817099,144.886678
8,3015,496,-37.835258,144.879655
9,3016,675,-37.857292,144.892369


In [40]:
crime_processed.to_csv('crimes_weighted.csv', index=False)