In [1]:
# to get latest version
install.packages("torch")
set.seed(2013)
torch_manual_seed(2013)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



ERROR: Error in torch_manual_seed(2013): could not find function "torch_manual_seed"


In [2]:
library(tidyverse) # metapackage of all tidyverse packages
library(torch)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.1     [32m✔[39m [34mdplyr  [39m 1.0.5
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [3]:
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
list.files(path = "../input/traindata")
getwd()

# for deciding whether to augment data
augment = TRUE

In [4]:
packageVersion("torch")
device <- if(cuda_is_available()) "cuda" else "cpu"
device

[1] ‘0.3.0’

## Define dataloader and model architecture

In [5]:
# define dataloader
tracking_dataset <- dataset(
  name = "tracking_dataset",
  
  initialize = function(x_tensor, y_tensor) {
    
    self$data_x <- x_tensor
    self$data_y <- y_tensor
    
  },
  
  .getitem = function(i) {
    list(self$data_x[i,], self$data_y[i])
  },
  
  .length = function() {
    self$data_y$size()[[1]]
  }
)

In [6]:
# define model
net <- nn_module(
  "Net",
  
  initialize = function() {
    
    self$conv_block_1 <- nn_sequential(
      nn_conv2d(
        in_channels = input_channels,
        out_channels = 128,
        kernel_size = 1
      ),
      nn_relu(inplace = TRUE),
      nn_conv2d(
        in_channels = 128,
        out_channels = 160,
        kernel_size = 1
      ),
      nn_relu(inplace = TRUE),
      nn_conv2d(
        in_channels = 160,
        out_channels = 128,
        kernel_size = 1
      ),
      nn_relu(inplace = TRUE),
    )
    
    self$conv_block_2 <- nn_sequential(
      nn_batch_norm1d(128),
      nn_conv1d(
        in_channels = 128,
        out_channels = 160,
        kernel_size = 1
      ),
      nn_relu(inplace = TRUE),
      nn_batch_norm1d(160),
      nn_conv1d(
        in_channels = 160,
        out_channels = 96,
        kernel_size = 1
      ),
      nn_relu(inplace = TRUE),
      nn_batch_norm1d(96),
      nn_conv1d(
        in_channels = 96,
        out_channels = 96,
        kernel_size = 1
      ),
      nn_relu(inplace = TRUE),
      nn_batch_norm1d(96)
    )
    
    self$linear_block <- nn_sequential(
      nn_linear(96, 96),
      nn_relu(inplace = TRUE),
      nn_batch_norm1d(96),
      
      nn_linear(96, 256),
      nn_relu(inplace = TRUE),
      nn_batch_norm1d(256),
      
      # breaks
      # nn_layer_norm(256),
      nn_dropout(p = 0.3),
      
      nn_linear(256, 9)
      
      # nn_softmax(2)
    )
    
  },
  
  forward = function(x) {
    
    # first conv layer
    x <- self$conv_block_1(x)
    
    # first pool layer
    avg <- nn_avg_pool2d(kernel_size = c(1, 5))(x) %>%
      torch_squeeze(-1)
    max <- nn_max_pool2d(kernel_size = c(1, 5))(x) %>%
      torch_squeeze(-1)
    
    x <- 0.7 * avg + 0.3 * max
    
    # second conv layer
    x <- self$conv_block_2(x)
    
    # second pool layer
    avg <- nn_avg_pool1d(kernel_size = 11)(x) %>%
      torch_squeeze(-1)
    max <- nn_max_pool1d(kernel_size = 11)(x) %>%
      torch_squeeze(-1)
    
    x <- 0.7 * avg + 0.3 * max
    
    x <- self$linear_block(x)
    
    x
    
  }
)

In [7]:
augment_data <- function(df, flip_indices, subtract_indices) {
  
  # indices of the elements that need to be flipped
  t <- torch_ones_like(df)
  t[, flip_indices, , ] <- -1
  
  # first fix: multiply by -1 where needed
  flipped <- df * t
  
  # now flip y coordinates: 2nd feature dimension
  t <- torch_zeros_like(df)
  t[, subtract_indices, , ] <- 160/3
  
  # flip around y
  flipped[, subtract_indices, , ] <- t[, subtract_indices, , ] - flipped[, subtract_indices, , ]
  
  return(flipped)
  
}

## Load and prepare data

In [8]:
# get tensors
train_x <- torch_load("../input/traindata/train_x_one_frame.pt")
train_y <- torch_load("../input/traindata/train_y_one_frame.pt")

# get pre-saved lengths
lengths <- readRDS("../input/traindata/data_sizes_one_frame.rds")

test_length <- lengths$test_length
plays <- lengths$plays

input_channels <- dim(train_x)[2]

test_length
plays

input_channels

# right now we have tensors for train_x and train_y that also include test data (week 1)
dim(train_x)
dim(train_y)

# split into test and train
test_x <- train_x[1:test_length, , ]
train_x <- train_x[(test_length + 1) : plays, , ]

test_y <- train_y[1:test_length]
train_y <- train_y[(test_length + 1) : plays]

# make plays the length of train data 
plays <- dim(train_y)

# split into train and validation
train_id <- sample(1:plays, ceiling(0.80 * plays))
valid_id <- setdiff(1:plays, train_id)

train_data <- train_x[train_id, , , ]
train_label <- train_y[train_id]

# if you want to augment with flipped data
if (augment) {
  
  dim(train_data)
  train_data_augmented <- augment_data(train_data, c(4, 6, 9, 11, 13), c(2))
  
  train_data <- torch_cat(list(train_data, train_data_augmented))
  train_label <- torch_cat(list(train_label, train_label))
  
  dim(train_data)
  dim(train_label)
}

# use dataloaders for train and validation
train_ds <- tracking_dataset(train_data, train_label)
valid_ds <- tracking_dataset(train_x[valid_id, , , ], train_y[valid_id])

# Dataloaders
train_dl <- train_ds %>%
  dataloader(batch_size = 64, shuffle = TRUE)

valid_dl <- valid_ds %>%
  dataloader(batch_size = 64, shuffle = FALSE)

In [9]:
dim(train_data)
dim(train_label)

In [10]:
model <- net()
model$to(device = device)

optimizer <- optim_adam(model$parameters, lr = 0.001)

# decay by about 50% after 15 epochs
scheduler <- lr_step(optimizer, step_size = 1, 0.975)

best_accuracy <- .6

In [11]:
epochs <- 25
for (epoch in 1:epochs) {
  
  train_losses <- c()
  valid_losses <- c()
  valid_accuracies <- c()

  # train step
  model$train()
  for (b in enumerate(train_dl)) {
    optimizer$zero_grad()      
    loss <- nnf_cross_entropy(model(b[[1]]$to(device = device)), b[[2]]$to(device = device))
    loss$backward()
    optimizer$step()
    train_losses <- c(train_losses, loss$item())
  }
  
  # validation step
  model$eval()
  for (b in enumerate(valid_dl)) {
    output <- model(b[[1]]$to(device = device))
    y <- b[[2]]$to(device = device)
                    
    valid_losses <- c(valid_losses, nnf_cross_entropy(output, y)$item())
       
    pred <- torch_max(output, dim = 2)[[2]]
    correct <- (pred == y)$sum()$item()
    valid_accuracies <- c(valid_accuracies, correct/length(y))
  }
    
  scheduler$step()
    
  cat(sprintf("\nLoss at epoch %d: training: %1.4f, validation: %1.4f, validation accuracy %1.4f", epoch, mean(train_losses), mean(valid_losses), mean(valid_accuracies)))  
  # who knows if this does anything
  gc()
    
      if (mean(valid_accuracies) > best_accuracy) {
          message(glue::glue("New best accuracy at epoch {epoch}. Saving model"))
          # move to cpu for saving
          
          torch_save(model, "model.pt")
     
          # save new best loss
          best_accuracy <- mean(valid_accuracies)
          best_epoch <- epoch
      
      }
}

# move to cpu for saving
model$to(device = "cpu")
torch_save(model, "final_model.pt") 

# put back
model$to(device = device)


Loss at epoch 1: training: 0.8553, validation: 0.6190, validation accuracy 0.7712

New best accuracy at epoch 1. Saving model




Loss at epoch 2: training: 0.5876, validation: 0.5987, validation accuracy 0.7827

New best accuracy at epoch 2. Saving model




Loss at epoch 3: training: 0.5187, validation: 0.4976, validation accuracy 0.8245

New best accuracy at epoch 3. Saving model




Loss at epoch 4: training: 0.4924, validation: 0.4954, validation accuracy 0.8235
Loss at epoch 5: training: 0.4698, validation: 0.4760, validation accuracy 0.8229
Loss at epoch 6: training: 0.4451, validation: 0.4376, validation accuracy 0.8394

New best accuracy at epoch 6. Saving model




Loss at epoch 7: training: 0.4263, validation: 0.4513, validation accuracy 0.8302
Loss at epoch 8: training: 0.4167, validation: 0.4506, validation accuracy 0.8336
Loss at epoch 9: training: 0.4052, validation: 0.5681, validation accuracy 0.7970
Loss at epoch 10: training: 0.3947, validation: 0.4344, validation accuracy 0.8438

New best accuracy at epoch 10. Saving model




Loss at epoch 11: training: 0.3842, validation: 0.4219, validation accuracy 0.8469

New best accuracy at epoch 11. Saving model




Loss at epoch 12: training: 0.3784, validation: 0.4319, validation accuracy 0.8444
Loss at epoch 13: training: 0.3683, validation: 0.4361, validation accuracy 0.8400
Loss at epoch 14: training: 0.3649, validation: 0.4303, validation accuracy 0.8420
Loss at epoch 15: training: 0.3525, validation: 0.4553, validation accuracy 0.8404
Loss at epoch 16: training: 0.3462, validation: 0.4300, validation accuracy 0.8510

New best accuracy at epoch 16. Saving model




Loss at epoch 17: training: 0.3349, validation: 0.4035, validation accuracy 0.8578

New best accuracy at epoch 17. Saving model




Loss at epoch 18: training: 0.3358, validation: 0.4240, validation accuracy 0.8486
Loss at epoch 19: training: 0.3293, validation: 0.4297, validation accuracy 0.8432
Loss at epoch 20: training: 0.3254, validation: 0.4309, validation accuracy 0.8479
Loss at epoch 21: training: 0.3118, validation: 0.4288, validation accuracy 0.8488
Loss at epoch 22: training: 0.3120, validation: 0.4459, validation accuracy 0.8440
Loss at epoch 23: training: 0.3006, validation: 0.4613, validation accuracy 0.8368
Loss at epoch 24: training: 0.2899, validation: 0.4380, validation accuracy 0.8458
Loss at epoch 25: training: 0.2904, validation: 0.4292, validation accuracy 0.8509

In [12]:
# evaluate on test set
model <- torch_load("model.pt")
model$to(device = device)

model$eval()

labels <- test_y %>%
  as.matrix() %>%
  as_tibble() %>%
  set_names("label")

output <- model(test_x$to(device = device))

predictions <- as.matrix(output$to(device = "cpu")) 

predictions <- predictions %>% 
  as_tibble() %>%
  transform(prediction = max.col(predictions)) %>%
  bind_cols(labels) %>%
  mutate(correct = ifelse(prediction == label, 1, 0)) %>%
  as_tibble() %>%
  mutate(
    label = as.factor(label),
    prediction = as.factor(prediction)
  )

message(glue::glue("Week 1 test (best model): {round(100*mean(predictions$correct), 0)}% correct"))

# augmented preds
test_data_augmented <- augment_data(test_x, c(4, 6, 9, 11, 13), c(2))
output_augmented <- model(test_data_augmented$to(device = device))
output <- (output + output_augmented) / 2

predictions <- as.matrix(output$to(device = "cpu")) 

predictions <- predictions %>% 
  as_tibble() %>%
  transform(prediction = max.col(predictions)) %>%
  bind_cols(labels) %>%
  mutate(correct = ifelse(prediction == label, 1, 0)) %>%
  as_tibble() %>%
  mutate(
    label = as.factor(label),
    prediction = as.factor(prediction)
  )

message(glue::glue("Week 1 augmented test: {round(100*mean(predictions$correct), 0)}% correct"))


# evaluate on test set
model <- torch_load("final_model.pt")
model$to(device = device)

model$eval()

labels <- test_y %>%
  as.matrix() %>%
  as_tibble() %>%
  set_names("label")

output <- model(test_x$to(device = device))

predictions <- as.matrix(output$to(device = "cpu")) 

predictions <- predictions %>% 
  as_tibble() %>%
  transform(prediction = max.col(predictions)) %>%
  bind_cols(labels) %>%
  mutate(correct = ifelse(prediction == label, 1, 0)) %>%
  as_tibble() %>%
  mutate(
    label = as.factor(label),
    prediction = as.factor(prediction)
  )

message(glue::glue("Week 1 test (final model): {round(100*mean(predictions$correct), 0)}% correct"))


“The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
Using compatibility `.name_repair`.
Week 1 test (best model): 85% correct

Week 1 augmented test: 84% correct

Week 1 test (final model): 85% correct

