## Preliminary Results

#### Read Data from the Web

In [11]:
# Installs and loads the packages used in the data analysis
library(cowplot)
library(digest)
library(gridExtra)
library(infer)
library(repr)
library(tidyverse)
library(datateachr)

In [12]:
# Loads the dataset from the web
dataset <- read.csv(url("https://docs.google.com/spreadsheets/d/1SXkE_T-CmZzd1kPAIArQOLg9GQa_ftnmVMAI30sVDqo/export?gid=277724310&format=csv")) 

# Prints the first 6 rows of the dataset
head(dataset)

# Prints the last 6 rows of the dataset
tail(dataset)

Unnamed: 0_level_0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<chr>,<chr>,<dbl>,<dbl>
1,Break and Enter Commercial,2012,12,14,8,52,,Oakridge,491285.0,5453433
2,Break and Enter Commercial,2019,3,7,2,6,10XX SITKA SQ,Fairview,490613.0,5457110
3,Break and Enter Commercial,2019,8,27,4,12,10XX ALBERNI ST,West End,491007.8,5459174
4,Break and Enter Commercial,2021,4,26,4,44,10XX ALBERNI ST,West End,491007.8,5459174
5,Break and Enter Commercial,2014,8,8,5,13,10XX ALBERNI ST,West End,491015.9,5459166
6,Break and Enter Commercial,2020,7,28,19,12,10XX ALBERNI ST,West End,491015.9,5459166


Unnamed: 0_level_0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<chr>,<chr>,<dbl>,<dbl>
877874,Vehicle Collision or Pedestrian Struck (with Injury),2023,2,22,12,12,YUKON ST / W BROADWAY AVE,Mount Pleasant,491786,5456719
877875,Vehicle Collision or Pedestrian Struck (with Injury),2003,9,1,20,45,YUKON ST / W KING EDWARD AVE,Riley Park,491786,5455143
877876,Vehicle Collision or Pedestrian Struck (with Injury),2005,7,20,18,57,YUKON ST / W KING EDWARD AVE,Riley Park,491786,5455143
877877,Vehicle Collision or Pedestrian Struck (with Injury),2016,6,20,18,11,YUKON ST / W KING EDWARD AVE,Riley Park,491786,5455143
877878,Vehicle Collision or Pedestrian Struck (with Injury),2013,12,3,9,49,YUKON ST / W KING EDWARD AVE,Riley Park,491835,5455126
877879,Vehicle Collision or Pedestrian Struck (with Injury),2013,12,27,14,40,YUKON ST / W KING EDWARD AVE,Riley Park,491835,5455126


#### Cleaning and Wrangling

In [13]:
# Remove X and Y columns as well as inaccurate data
tidy_dataset <- dataset |>
    select(-X, -Y) |>
    na.omit()

# Prints the first 6 rows of the dataset
head(tidy_dataset)

# Prints the last 6 rows of the dataset
tail(tidy_dataset)

Unnamed: 0_level_0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<chr>,<chr>
1,Break and Enter Commercial,2012,12,14,8,52,,Oakridge
2,Break and Enter Commercial,2019,3,7,2,6,10XX SITKA SQ,Fairview
3,Break and Enter Commercial,2019,8,27,4,12,10XX ALBERNI ST,West End
4,Break and Enter Commercial,2021,4,26,4,44,10XX ALBERNI ST,West End
5,Break and Enter Commercial,2014,8,8,5,13,10XX ALBERNI ST,West End
6,Break and Enter Commercial,2020,7,28,19,12,10XX ALBERNI ST,West End


Unnamed: 0_level_0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<chr>,<chr>
877874,Vehicle Collision or Pedestrian Struck (with Injury),2023,2,22,12,12,YUKON ST / W BROADWAY AVE,Mount Pleasant
877875,Vehicle Collision or Pedestrian Struck (with Injury),2003,9,1,20,45,YUKON ST / W KING EDWARD AVE,Riley Park
877876,Vehicle Collision or Pedestrian Struck (with Injury),2005,7,20,18,57,YUKON ST / W KING EDWARD AVE,Riley Park
877877,Vehicle Collision or Pedestrian Struck (with Injury),2016,6,20,18,11,YUKON ST / W KING EDWARD AVE,Riley Park
877878,Vehicle Collision or Pedestrian Struck (with Injury),2013,12,3,9,49,YUKON ST / W KING EDWARD AVE,Riley Park
877879,Vehicle Collision or Pedestrian Struck (with Injury),2013,12,27,14,40,YUKON ST / W KING EDWARD AVE,Riley Park


#### Plotting the Raw Data

#### Estimating the Parameter ####

In [33]:
# Filters the dataset for years 2004 - 2008 and selects the type of crime and the neighbourhood
crime_2004_2008 <- tidy_dataset |>
    filter(YEAR %in% (2004:2008)) |>
    select(TYPE, NEIGHBOURHOOD)

# Prints the first 6 rows of the dataset
head(crime_2004_2008)

Unnamed: 0_level_0,TYPE,NEIGHBOURHOOD
Unnamed: 0_level_1,<chr>,<chr>
1,Break and Enter Commercial,West End
2,Break and Enter Commercial,West End
3,Break and Enter Commercial,West End
4,Break and Enter Commercial,West End
5,Break and Enter Commercial,West End
6,Break and Enter Commercial,West End


In [28]:
set.seed(1)

# Randomly samples the 2004-2008 crime population (size = 500)
sample_2004_2008 <- crime_2004_2008 |>
    rep_sample_n(size = 500, replace = FALSE) |>
    ungroup() |>
    select(-replicate)

# Filters for "Other Theft" from the random sample
other_theft_2004_2008 <- sample_2004_2008 |>
    filter(TYPE == "Other Theft") 

# Calculates p̂ for other theft from 2004-2008
phat_theft_1 <- nrow(other_theft_2004_2008)/nrow(sample_2004_2008)

# Prints the other theft proportion estimate from 2004-2008
print(paste0("The estimated proportion of other theft crimes out of all crimes in Vancouver from 2004 - 2008 is ", phat_theft_1, "."))

# Filters for "Break and Enter Commercial" and "Break and Enter Residential/Other" from the random sample
break_and_enter_2004_2008 <- sample_2004_2008 |>
    filter(TYPE == "Break and Enter Commercial" | TYPE == "Break and Enter Residential/Other") 

# Calculates p̂ for break and enters from 2004-2008
phat_bne_1 <- nrow(break_and_enter_2004_2008)/nrow(sample_2004_2008)

# Prints the break and enter proportion estimate from 2004-2008
print(paste0("The estimated proportion of break and enter crimes out of all crimes in Vancouver from 2004 - 2008 is ", phat_bne_1, "."))

[1] "The estimated proportion of other theft crimes out of all crimes in Vancouver from 2004 - 2008 is 0.21."
[1] "The estimated proportion of break and enter crimes out of all crimes in Vancouver from 2004 - 2008 is 0.196."


Now, we will find the sample proportions for the crimes of interest, between 2018 and 2022.

In [18]:
# Filters the dataset for years 2004 - 2008 and selects the type of crime and the neighbourhood
crime_2018_2022 <- tidy_dataset |>
    filter(YEAR %in% (2018:2022)) |>
    select(TYPE, NEIGHBOURHOOD)

# Prints the first 6 rows of the dataset
head(crime_2018_2022)

# Prints the last 6 rows of the dataset
tail(crime_2018_2022)

Unnamed: 0_level_0,TYPE,NEIGHBOURHOOD
Unnamed: 0_level_1,<chr>,<chr>
1,Break and Enter Commercial,Fairview
2,Break and Enter Commercial,West End
3,Break and Enter Commercial,West End
4,Break and Enter Commercial,West End
5,Break and Enter Commercial,West End
6,Break and Enter Commercial,West End


Unnamed: 0_level_0,TYPE,NEIGHBOURHOOD
Unnamed: 0_level_1,<chr>,<chr>
196371,Vehicle Collision or Pedestrian Struck (with Injury),Riley Park
196372,Vehicle Collision or Pedestrian Struck (with Injury),Riley Park
196373,Vehicle Collision or Pedestrian Struck (with Injury),Mount Pleasant
196374,Vehicle Collision or Pedestrian Struck (with Injury),Mount Pleasant
196375,Vehicle Collision or Pedestrian Struck (with Injury),Mount Pleasant
196376,Vehicle Collision or Pedestrian Struck (with Injury),Mount Pleasant


In [30]:
set.seed(2)

# Randomly samples the 2018-2022 crime population (size = 500)
sample_2018_2022 <- crime_2018_2022 |>
    rep_sample_n(size = 500, replace = FALSE) |>
    ungroup() |>
    select(-replicate)

# Filters for "Other Theft" from the random sample
other_theft_2018_2022 <- sample_2018_2022 |>
    filter(TYPE == "Other Theft") 

# Calculates p̂ for other theft from 2018-2022
phat_theft_2 <- nrow(other_theft_2018_2022)/nrow(sample_2018_2022)

# Prints the other theft proportion estimate from 2018-2022
print(paste0("The estimated proportion of other theft crimes out of all crimes in Vancouver from 2018 - 2022 is ", phat_theft_2, "."))

# Filters for "Break and Enter Commercial" and "Break and Enter Residential/Other" from the random sample
break_and_enter_2018_2022 <- sample_2018_2022 |>
    filter(TYPE == "Break and Enter Commercial" | TYPE == "Break and Enter Residential/Other") 

# Calculates p̂ for break and enters from 2018-2022
phat_bne_2 <- nrow(break_and_enter_2018_2022)/nrow(sample_2018_2022)

# Prints the break and enter proportion estimate from 2018-2022
print(paste0("The estimated proportion of break and enter crimes out of all crimes in Vancouver from 2018 - 2022 is ", phat_bne_2, "."))

[1] "The estimated proportion of other theft crimes out of all crimes in Vancouver from 2018 - 2022 is 0.264."
[1] "The estimated proportion of break and enter crimes out of all crimes in Vancouver from 2018 - 2022 is 0.148."


Finally, we will calculate the difference in sample proportions—our sample statistic of interest.

In [32]:
# Calculates the difference in sample proportions for other theft
phat_theft = phat_theft_2 - phat_theft_1
print(paste0("The estimated difference in proportion of other theft crimes from 2018 - 2022 and 2004 - 2008 out of all crimes in Vancouver is ", phat_theft, "."))


# Calculates the difference in sample proportions for break and enters
phat_bne = phat_bne_2 - phat_bne_1
print(paste0("The estimated difference in proportion of break and enters from 2018 - 2022 and 2004 - 2008 out of all crimes in Vancouver is ", phat_bne, "."))

[1] "The estimated difference in proportion of other theft crimes from 2018 - 2022 and 2004 - 2008 out of all crimes in Vancouver is 0.054."
[1] "The estimated difference in proportion of break and enters from 2018 - 2022 and 2004 - 2008 out of all crimes in Vancouver is -0.048."


**need to add titles