# Import Crime Data

In [1]:
library(plyr)
library(tidyr)

In [2]:
crimedata = read.csv("Data_tables_Criminal_Incidents_Visualisation_year_ending_March_2018.csv", header = TRUE, fileEncoding="UTF-8-BOM")
head(crimedata, n=5)

Year.ending.March,Postcode,Suburb.Town.Name,Offence.Division,Offence.Subdivision,Offence.Subgroup,Incidents.Recorded
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,129
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,108
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or other authorised officer",142


# Process the data

In [3]:
# Remove "," in Incidence.Recorded column

crimedata[7] <- lapply(crimedata[7], gsub, pattern = ",", replacement = "", fixed = TRUE)
crimedata[7] <- sapply(crimedata[7], as.integer)

In [4]:
# Sort offenses by Offence Division to identify targeted crimes that take place in public

crimedata = subset(crimedata, Offence.Division == "A Crimes against the person" | Offence.Division == "D Public order and security offences")
head(crimedata, n=5)

Year.ending.March,Postcode,Suburb.Town.Name,Offence.Division,Offence.Subdivision,Offence.Subgroup,Incidents.Recorded
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,129
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,108
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or other authorised officer",142


In [5]:
count(crimedata, 'Offence.Division')

Offence.Division,freq
A Crimes against the person,74194
D Public order and security offences,34835


In [6]:
crimedata = subset(crimedata, Offence.Subdivision!="A50 Robbery")
crimedata = subset(crimedata, Offence.Subdivision!="D10 Weapons and explosives offences")
crimedata = subset(crimedata, Offence.Subdivision!="Other crimes against the person")

head(crimedata, n=5)

Year.ending.March,Postcode,Suburb.Town.Name,Offence.Division,Offence.Subdivision,Offence.Subgroup,Incidents.Recorded
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,646
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,129
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,601
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,108
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or other authorised officer",142


In [7]:
count(crimedata, 'Offence.Subdivision')

Offence.Subdivision,freq
A20 Assault and related offences,33221
"A70 Stalking, harassment and threatening behaviour",17804
A80 Dangerous and negligent acts endangering people,10158
D20 Disorderly and offensive conduct,16929
D30 Public nuisance offences,5778
D40 Public security offences,295


In [8]:
crimedata = separate(data = crimedata, col = Offence.Subgroup, into = c("Offence.Subgroup.Code", "Offence.Subgroup.Name"), sep = " ", extra = "merge", remove = FALSE)

In [9]:
head(crimedata, n=5)

Year.ending.March,Postcode,Suburb.Town.Name,Offence.Division,Offence.Subdivision,Offence.Subgroup,Offence.Subgroup.Code,Offence.Subgroup.Name,Incidents.Recorded
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,A232,Non-FV Common assault,646
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,A231,FV Common assault,129
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,A212,Non-FV Serious assault,601
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,A211,FV Serious assault,108
2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or other authorised officer",A22,"Assault police, emergency services or other authorised officer",142


In [10]:
crimedata = subset(crimedata, Offence.Subgroup.Code!="A231")
crimedata = subset(crimedata, Offence.Subgroup.Code!="A211")
crimedata = subset(crimedata, Offence.Subgroup.Code!="A731")
crimedata = subset(crimedata, Offence.Subgroup.Code!="A711")
crimedata = subset(crimedata, Offence.Subgroup.Code!="A721")

In [11]:
count(crimedata, 'Offence.Subgroup')

Offence.Subgroup,freq
A212 Non-FV Serious assault,7654
"A22 Assault police, emergency services or other authorised officer",3733
A232 Non-FV Common assault,6589
A712 Non-FV Stalking,3342
A722 Non-FV Harassment and private nuisance,2075
A732 Non-FV Threatening behaviour,4361
A81 Dangerous driving,1730
A82 Neglect or ill treatment of people,428
A83 Throw or discharge object endangering people,2791
A89 Other dangerous or negligent acts endangering people,5209


In [12]:
crimedata = subset(crimedata, Offence.Subgroup.Code!="D12")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D11")
crimedata = subset(crimedata, Offence.Subgroup.Code!="A22")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D25")
crimedata = subset(crimedata, Offence.Subgroup.Code!="A82")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D33")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D43")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D31")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D44")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D41")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D34")
crimedata = subset(crimedata, Offence.Subgroup.Code!="D42")

In [13]:
count(crimedata, 'Offence.Subgroup')

Offence.Subgroup,freq
A212 Non-FV Serious assault,7654
A232 Non-FV Common assault,6589
A712 Non-FV Stalking,3342
A722 Non-FV Harassment and private nuisance,2075
A732 Non-FV Threatening behaviour,4361
A81 Dangerous driving,1730
A83 Throw or discharge object endangering people,2791
A89 Other dangerous or negligent acts endangering people,5209
D21 Riot and affray,1505
D22 Drunk and disorderly in public,5271


# Calculate weights for each crime based on severity

In [14]:
crime_processed<-data.frame(crimedata)

In [15]:
head(crime_processed, n=5)

Unnamed: 0,Year.ending.March,Postcode,Suburb.Town.Name,Offence.Division,Offence.Subdivision,Offence.Subgroup,Offence.Subgroup.Code,Offence.Subgroup.Name,Incidents.Recorded
1,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,A232,Non-FV Common assault,646
3,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,A212,Non-FV Serious assault,601
8,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behaviour",A732 Non-FV Threatening behaviour,A732,Non-FV Threatening behaviour,75
10,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behaviour",A722 Non-FV Harassment and private nuisance,A722,Non-FV Harassment and private nuisance,20
12,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behaviour",A712 Non-FV Stalking,A712,Non-FV Stalking,21


In [16]:
for (row in 1:nrow(crime_processed)) {
    crime <- crime_processed[row, "Offence.Subgroup.Code"]    
    if (crime == "A212") {
    result <- 10
    } else if (crime == "A232") {
    result <- 10
    } else if (crime == "D22") {
    result <- 4
    } else if (crime == "A89") {
    result <- 7
    } else if (crime == "D23") {
    result <- 5
    } else if (crime == "A732") {
    result <- 7
    } else if (crime == "D35") {
    result <- 4
    } else if (crime == "A712") {
    result <- 10
    } else if (crime == "A83") {
    result <- 7
    } else if (crime == "D24") {
    result <- 3
    } else if (crime == "A722") {
    result <- 8
    } else if (crime == "A81") {
    result <- 7
    } else if (crime == "D21") {
    result <- 10
    } else if (crime == "D13") {
    result <- 6
    } else if (crime == "D26") {
    result <- 5
    } else if (crime == "D32") {
    result <- 7
    } else if (crime == "D36") {
    result <- 7
    } else {
    result <- 7
    }
    crime_processed[row, "Crime.Severity"] = result
}

In [17]:
head(crime_processed, n=5)

Unnamed: 0,Year.ending.March,Postcode,Suburb.Town.Name,Offence.Division,Offence.Subdivision,Offence.Subgroup,Offence.Subgroup.Code,Offence.Subgroup.Name,Incidents.Recorded,Crime.Severity
1,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,A232,Non-FV Common assault,646,10
3,2018,3000,MELBOURNE,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,A212,Non-FV Serious assault,601,10
8,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behaviour",A732 Non-FV Threatening behaviour,A732,Non-FV Threatening behaviour,75,7
10,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behaviour",A722 Non-FV Harassment and private nuisance,A722,Non-FV Harassment and private nuisance,20,8
12,2018,3000,MELBOURNE,A Crimes against the person,"A70 Stalking, harassment and threatening behaviour",A712 Non-FV Stalking,A712,Non-FV Stalking,21,10


In [18]:
crime_processed = subset(crime_processed, Year.ending.March==2018)

In [19]:
# Function to calculate severity score per postcode

for (row in 1:nrow(crime_processed)) {
    crime_processed[row, "Severity.Total.Score"] = crime_processed[row, "Incidents.Recorded"] * crime_processed[row, "Crime.Severity"]
}

In [20]:
keeps <- c("Postcode", "Severity.Total.Score")
crime_processed = crime_processed[keeps]
head(crime_processed, n=5)

Unnamed: 0,Postcode,Severity.Total.Score
1,3000,6460
3,3000,6010
8,3000,525
10,3000,160
12,3000,210


In [21]:
crime_processed = aggregate(Severity.Total.Score ~ Postcode, crime_processed, sum)
colnames(crime_processed)[2] <- "Severity.Total.Score"
head(crime_processed, n=5)

Postcode,Severity.Total.Score
3000,22618
3002,679
3003,524
3006,3326
3008,2060


# Join with postcodes to get average gps position of each postcode

In [22]:
postcode_loc = read.csv("Australian_Post_Codes_Lat_Lon.csv", header = TRUE, fileEncoding="UTF-8-BOM")
head(postcode_loc, n=5)

postcode,suburb,state,dc,type,lat,lon
200,AUSTRALIAN NATIONAL UNIVERSITY,ACT,AUSTRALIAN NATIONAL UNI LPO,Post Office Boxes,-35.27727,149.1171
221,BARTON,ACT,,LVR,-35.20137,149.0951
800,DARWIN,NT,DARWIN DELIVERY CENTRE,Delivery Area,-12.80103,130.9558
801,DARWIN,NT,DARWIN DELIVERY CENTRE,Post Office Boxes,-12.80103,130.9558
804,PARAP,NT,PARAP,Post Office Boxes,-12.43218,130.8433


In [23]:
keeps <- c("postcode", "lat", "lon")
postcode_loc = postcode_loc[keeps]
colnames(postcode_loc)[1] <- "Postcode"
head(postcode_loc, n=5)

Postcode,lat,lon
200,-35.27727,149.1171
221,-35.20137,149.0951
800,-12.80103,130.9558
801,-12.80103,130.9558
804,-12.43218,130.8433


In [24]:
crime_processed = merge(postcode_loc, crime_processed, by = "Postcode")
head(crime_processed, n=5)

Postcode,lat,lon,Severity.Total.Score
3000,-37.81456,144.9703,22618
3002,-37.81664,144.9878,679
3003,-37.80626,144.9411,524
3006,-37.82326,144.9659,3326
3008,-37.81472,144.948,2060


In [25]:
keeps <- c("lat", "lon", "Severity.Total.Score")
crime_processed = crime_processed[keeps]
colnames(crime_processed)[1] <- "c_lat"
colnames(crime_processed)[2] <- "c_lon"
head(crime_processed, n=5)

c_lat,c_lon,Severity.Total.Score
-37.81456,144.9703,22618
-37.81664,144.9878,679
-37.80626,144.9411,524
-37.82326,144.9659,3326
-37.81472,144.948,2060


In [26]:
write.csv(crime_processed, file = "crime_processed_unweighted_r.csv")

# Import Tram Data

In [27]:
tramdata = read.csv('stop_locations.txt', header = FALSE, sep = "|", fileEncoding="UTF-8-BOM")
names(tramdata) <- c("StopLocationID","StopNameShort","StopNameLong","StopType","SuburbName","PostCode","RegionName", "LocalGovernmentArea","StatDivision","lat","lon")
head(tramdata, n=5)

StopLocationID,StopNameShort,StopNameLong,StopType,SuburbName,PostCode,RegionName,LocalGovernmentArea,StatDivision,lat,lon
867,Weemala Court,Weemala Ct/Plenty River Dr (Greensborough),Kerbside,Greensborough,3088,Melbourne,Banyule,Greater Metro,-37.6896,145.1051
868,Crana Grove,Crana Gr/Plenty River Dr (Greensborough),Kerbside,Greensborough,3088,Melbourne,Banyule,Greater Metro,-37.68674,145.1056
869,Punkerri Circuit,Punkerri Cct/Plenty River Dr (Greensborough),Kerbside,Greensborough,3088,Melbourne,Banyule,Greater Metro,-37.68364,145.1087
870,Plenty River Drive,231 Plenty River Dr (Greensborough),Kerbside,Greensborough,3088,Melbourne,Banyule,Greater Metro,-37.68259,145.1113
875,Oldstead Rd,Oldstead Rd/Diamond Creek Rd (Greensborough),Kerbside,Greensborough,3088,Melbourne,Banyule,Greater Metro,-37.68534,145.1173


In [28]:
keeps <- c("StopNameShort", "lat", "lon")
tramdata = tramdata[keeps]
head(tramdata, n=5)

StopNameShort,lat,lon
Weemala Court,-37.6896,145.1051
Crana Grove,-37.68674,145.1056
Punkerri Circuit,-37.68364,145.1087
Plenty River Drive,-37.68259,145.1113
Oldstead Rd,-37.68534,145.1173


In [29]:
keeps <- c("lat", "lon")
tramdata_pos = tramdata[keeps]
colnames(tramdata_pos)[1] <- "t_lat"
colnames(tramdata_pos)[2] <- "t_lon"
head(tramdata_pos, n=5)

t_lat,t_lon
-37.6896,145.1051
-37.68674,145.1056
-37.68364,145.1087
-37.68259,145.1113
-37.68534,145.1173


In [30]:
# Install required packages

In [31]:
#install.packages("sp")

In [32]:
#install.packages("sf")

In [33]:
#install.packages("geosphere")

In [34]:
library(tidyverse)
library(sp)
library(sf)
library(geosphere)


-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 2.2.1     v purrr   0.2.4
v tibble  1.4.1     v dplyr   0.7.4
v readr   1.1.1     v stringr 1.2.0
v ggplot2 2.2.1     v forcats 0.2.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::arrange()   masks plyr::arrange()
x purrr::compact()   masks plyr::compact()
x dplyr::count()     masks plyr::count()
x dplyr::failwith()  masks plyr::failwith()
x dplyr::filter()    masks stats::filter()
x dplyr::id()        masks plyr::id()
x dplyr::lag()       masks stats::lag()
x dplyr::mutate()    masks plyr::mutate()
x dplyr::rename()    masks plyr::rename()
x dplyr::summarise() masks plyr::summarise()
x dplyr::summarize() masks plyr::summarize()
Linking to GEOS 3.6.1, GDAL 2.2.0, proj.4 4.9.3


In [35]:
# Create ID for my_df_1 and my_df_2 based on row id
# This step is not required, just help me to better distinguish each point
tramdata_pos <- tramdata_pos %>% mutate(ID1 = row.names(.))
crime_processed <- crime_processed %>% mutate(ID2 = row.names(.))

In [36]:
# Create spatial point data frame
my_df_1_sp <- tramdata_pos
coordinates(my_df_1_sp) <- ~t_lon + t_lat

my_df_2_sp <- crime_processed
coordinates(my_df_2_sp) <- ~c_lon + c_lat

In [37]:
# Convert to simple feature
my_df_1_sf <- st_as_sf(my_df_1_sp)
my_df_2_sf <- st_as_sf(my_df_2_sp)

In [38]:
# Set projection based on the epsg code
st_crs(my_df_1_sf) <- 4326
st_crs(my_df_2_sf) <- 4326

In [39]:
# Calculate the distance
m_dist <- st_distance(my_df_1_sf, my_df_2_sf)

# Filter for the nearest
near_index <- apply(m_dist, 1, order)[1, ]

# Based on the index in near_index to select the rows in my_df_2
# Combine with my_df_1
my_df_final <- cbind(tramdata_pos, crime_processed[near_index, ])

In [40]:
my_df_final

Unnamed: 0,t_lat,t_lon,ID1,c_lat,c_lon,Severity.Total.Score,ID2
188,-37.68960,145.1051,1,-37.70462,145.1030,535,188
188.1,-37.68674,145.1056,2,-37.70462,145.1030,535,188
191,-37.68364,145.1087,3,-37.67157,145.1240,98,191
191.1,-37.68259,145.1113,4,-37.67157,145.1240,98,191
189,-37.68534,145.1173,5,-37.69058,145.1308,535,189
191.2,-37.68275,145.1196,6,-37.67157,145.1240,98,191
182,-37.73643,145.0445,7,-37.73635,145.0385,185,182
174,-37.72242,145.0481,8,-37.72133,145.0470,985,174
171,-37.74164,145.0434,9,-37.74227,145.0489,1093,171
171.1,-37.74213,145.0482,10,-37.74227,145.0489,1093,171


In [41]:
keeps <- c("t_lat", "t_lon", "Severity.Total.Score")
tramdata_final = my_df_final[keeps]
colnames(tramdata_final)[1] <- "Latitude"
colnames(tramdata_final)[2] <- "Longitude"
head(tramdata_final, n=5)

Unnamed: 0,Latitude,Longitude,Severity.Total.Score
188.0,-37.6896,145.1051,535
188.1,-37.68674,145.1056,535
191.0,-37.68364,145.1087,98
191.1,-37.68259,145.1113,98
189.0,-37.68534,145.1173,535


In [46]:
write.csv(tramdata_final, file = "crime_tram_model.csv", row.names=FALSE)