In [None]:
# This R environment comes with many helpful analytics packages installed
# It is defined by the kaggle/rstats Docker image: https://github.com/kaggle/docker-rstats
# For example, here's a helpful package to load

library(tidyverse) # metapackage of all tidyverse packages

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

list.files(path = "../input")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# bring in some packages
library(tidymodels)
library(lubridate)

In [None]:
# read the files
list.files(path = "../input/sliced-s01e10-playoffs-2")
sample_submission <- read_csv('../input/sliced-s01e10-playoffs-2/sample_submission.csv')
train_raw <- read_csv('../input/sliced-s01e10-playoffs-2/train.csv')
test_raw <- read_csv('../input/sliced-s01e10-playoffs-2/test.csv')

In [None]:
# EDA
summary(train_raw)

In [None]:
head(train_raw)

In [None]:
# count animal types
train_raw %>%
    count(animal_type, sort = TRUE)

In [None]:
# count breeds
train <- train_raw %>%
    mutate(breed = fct_lump(breed,100)) %>%
    mutate(age = time_length(difftime(as.Date(datetime), as.Date(date_of_birth)),"years")) %>%
    select(id, age, animal_type, breed, color, datetime, sex, spay_neuter, outcome_type) %>%
mutate_if(is.character, factor)

In [None]:
head(train)

In [None]:
# clean the data
train <- train_raw %>%
  mutate(breed = fct_lump(breed,100),
         color = fct_lump(breed,50)) %>%
  mutate(age = time_length(difftime(as.Date(datetime), as.Date(date_of_birth)),"years")) %>%
  select(id, age, animal_type, breed, color, datetime, sex, spay_neuter, outcome_type) %>%
  mutate_if(is.character, factor)

test <- test_raw %>%
  mutate(breed = fct_lump(breed,100),
         color = fct_lump(breed,50)) %>%
  mutate(age = time_length(difftime(as.Date(datetime), as.Date(date_of_birth)),"years")) %>%
  select(id, age, animal_type, breed, color, datetime, sex, spay_neuter) %>%
  mutate_if(is.character, factor)

# build a model
set.seed(123)
animal_folds <- vfold_cv(train, v = 5, strata = outcome_type)
animal_folds

library(themis)
library(baguette)

animal_metrics <- metric_set(mn_log_loss, accuracy, sensitivity, specificity)

animal_rec <- recipe(outcome_type ~ ., data = train) %>%
  update_role(id, new_role = "Id") %>%
  step_other(breed) %>%
  step_other(color) %>%
  step_date(datetime) %>%
  step_rm(datetime)

bag_spec <-
  bag_tree(min_n = 10) %>%
  set_engine("rpart", times = 25) %>%
  set_mode("classification")

animal_wf <-
  workflow() %>%
  add_recipe(animal_rec) %>%
  add_model(bag_spec)

animal_res <- fit_resamples(
  animal_wf,
  resamples = animal_folds,
  metrics = animal_metrics,
  control = control_resamples(save_pred = TRUE)
)

collect_metrics(animal_res)  

animal_res %>%
  collect_predictions() %>%
  conf_mat(outcome_type, .pred_class)  

set.seed(123)
bag_fit <- fit(animal_wf, data = train)
bag_fit

test_rs <- augment(bag_fit, test)

prediction <- test_rs %>%
  select(id, .pred_adoption, `.pred_no outcome`, .pred_transfer)

colnames(prediction) <- c("id", "adoption", "no outcome", "transfer")  
  
write_csv(prediction, 'my_prediction.csv')
