In [None]:
library(tidyverse)
library(infer)
library(moderndive)
library(nycflights13)
library(ggplot2movies) 

In [None]:
intakes <- "https://raw.githubusercontent.com/adagfs/Stat_201_Group_Project/main/Austin_Animal_Center_Intakes.csv"
outcomes <- "https://raw.githubusercontent.com/adagfs/Stat_201_Group_Project/main/Austin_Animal_Center_Outcomes.csv"

animal_intakes <- read_csv(intakes) 
head(animal_intakes)
nrow(animal_intakes)


animal_outcomes <- read_csv(outcomes)
head(animal_outcomes)
nrow(animal_outcomes)

In [None]:
# Get size 200 samples

animal_data %>%
    select(period) %>%
    filter(!is.na(),
            period > 0,
            AnimalType %in% c("Dog", "Cat")) %>%
    arrange(AnimalID) 

animal_data_dog <- animal_data %>%
    filter(AnimalType == "Dog")

animal_data_cat <- animal_data %>%
    filter(AnimalType == "Cat")

# Dog&Cat: Sample Distribution (size 200)
animal_sample_200 <- animal_data %>%
    sample_n(size = 200) 

# Dog: Sample Distribution (size 200)
animal_sample_200_dog <- animal_data_dog %>%
    sample_n(size = 200) 

# Cat: Sample Distribution (size 200)
animal_sample_200_cat <- animal_data_cat %>%
    sample_n(size = 200) 
    
# Dog&Cat: Barplot relating period of staying
animal_sample200_bar <- ggplot(animal_sample_200, aes(x = AnimalType, fill = period)) +
                            geom_bar() +
                            lbas(x = Animal Type, title = Barplot relating dog/cat period of staying)

# Dog: Sample200 histogram
animal_sample200_dog_dist <- animal_sample_200_dog %>%
                            filter(AnimalType == "Dog") %>%
                            ggplot(aes(x = AnimalType)) +
                            geom_histogram(binwidth = 5) +
                            lbas(x = Animal Type, title = Barplot relating dog/cat period of staying)

# Cat: Sample200 histogram
animal_sample200_cat_dist <- animal_sample_200_cat %>%
                            filter(AnimalType == "Cat") %>%
                            ggplot(aes(x = AnimalType)) +
                            geom_histogram(binwidth = 5) +
                            lbas(x = Animal Type, title = Barplot relating dog/cat period of staying)



In [None]:
# Sampling Distribution (1000 repetition, 200 samples)

# Dog: resamples size 200, repetitation 10,000
animal_sampling200_dog <- animal_sample_200_dog %>%
    rep_sample_n(size = 200, reps = 10000, replace = F) 

# Dog: Compute resulting 1000 replicates of mean
mean_sampling200_dog <- animal_sampling200_dog %>%
    group_by(replicate) %>%
    summarise(mean_dog = mean(period)) 

# Dog: summarise standard deviation
sd_sampling200_dog <- mean_sampling200_dog %>%
    summarise(sd = sd(mean_dog))

# Dog: Plot distribution, Histogram
mean_sampling200_dog_hist <- ggplot(mean_sampling200_dog, aes(x = mean_dog)) +
    geom_histogram(binwidth = 5, boundary = ?) +
    labs(x = "Mean of dog staying period", title = "Dog Sampling Distribution with 1000 repeated 200 samples")

# Cat can be done with the same way

In [None]:
# Bootstrap Distribution (1000 repetition, 200 samples), Infer package

# Dog: Compute resulting 1000 replicates of mean
mean_sampling200_dog <- animal_sample_200 %>%
    specify(response = period) %>%
    hypothesis(null = "point") %>%
    generate(reps = 10000, type = "bootstrap") %>%
    calculate(stat = "mean")

# Dog: Histogram
mean_sampling200_dog_hist <- 
    visualise(mean_sampling200_dog, bins = 5)

#Cat can be done with the same way

In [None]:
# Hypothesis Testing

# Dog&Cat: Compute resulting 1000 replicates of mean
null_distribution <- animal_sample_200 %>%
    specify(formula = period ~ AnimalType) %>%
    hypothesis(null = "independece") %>%
    generate(reps = 10000, type = "permute") %>%
    calculate(stat = "diff in means", order = c("Dog","Cat"))

# observed difference in period mean
obs_diff_mean <- animal_sample_200 %>%
    specify(period ~ AnimalType) %>%
    calculate(stat = "diff in means", order = c("Dog","Cat"))

# Distribution 
mean_sampling200_hist <- 
    visulaize(null_distribution, bins = 5) +
    shade_p_value(obs_stat = obs_diff_mean, direction = "right")

# p_value
null_distribution_p <- get_p_value(obs_stat = obs_diff_mean, direction = "right")