In [1]:
library(tidyverse)
library(tidymodels)
library(modeltime)
library(timetk)
library(vroom)
library(embed)
library(bonsai)
library(lightgbm)

# Load the training and testing datasets
train_data <- vroom::vroom("/kaggle/input/demand-forecasting-kernels-only/train.csv")
test_data <- vroom::vroom("/kaggle/input/demand-forecasting-kernels-only/test.csv")

# Determine the number of unique stores and items
num_stores <- max(train_data$store)
num_items <- max(train_data$item)

# Define the recipe for preprocessing
sales_recipe <- recipe(sales ~ ., data = train_data) %>%
  step_date(date, features = c("dow", "month", "decimal", "doy", "year")) %>%
  step_range(date_doy, min = 0, max = pi) %>%
  step_mutate(sin_doy = sin(date_doy), cos_doy = cos(date_doy)) %>%
  step_lencode_mixed(all_nominal_predictors(), outcome = vars(sales)) %>%
  step_rm(date, item, store) %>%
  step_normalize(all_numeric_predictors())

# Set up the boosted tree model
boosted_tree_model <- boost_tree(tree_depth = 2, 
                                 trees = 1000, 
                                 learn_rate = 0.01) %>%
  set_engine("lightgbm") %>%
  set_mode("regression")

# Combine the recipe and model into a workflow
boosted_workflow <- workflow() %>%
  add_recipe(sales_recipe) %>%
  add_model(boosted_tree_model)

# Iterate over each store and item to train and forecast
for (store_id in 1:num_stores) {
  for (item_id in 1:num_items) {
    
    # Filter the data for the current store-item combination
    training_subset <- train_data %>%
      filter(store == store_id, item == item_id)
    testing_subset <- test_data %>%
      filter(store == store_id, item == item_id)
    
    # Fit the workflow and make predictions
    trained_model <- boosted_workflow %>%
      fit(data = training_subset)
    predictions <- predict(trained_model, new_data = testing_subset) %>%
      bind_cols(testing_subset) %>%
      rename(sales = .pred) %>%
      select(id, sales)
    
    # Append the predictions to the final output
    if (store_id == 1 && item_id == 1) {
      final_predictions <- predictions
    } else {
      final_predictions <- bind_rows(final_predictions, predictions)
    }
  }
}

# Write the predictions to a CSV file
vroom_write(x = final_predictions, path = "./submission.csv", delim = ",")


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     


── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.2.0 ──



[32m✔[39m [34mbroom       [39m 1.0.6      [32m✔[39m [34mrsample     [39m 1.2.1 
[32m✔[39m [34mdials       [39m 1.2.1      [32m✔[39m [34mtune        [39m 1.2.1 
[32m✔[39m [34minfer       [39m 1.0.7      [32m✔[39m [34mworkflows   [39m 1.1.4 
[32m✔[39m [34mmodeldata   [39m 1.4.0      [32m✔[39m [34mworkflowsets[39m 1.1.0 
[32m✔[39m [34mparsnip     [39m 1.2.1      [32m✔[39m [34myardstick   [39m 1.3.1 
[32m✔[39m [34mrecipes     [39m 1.0.10     



── [1mConflicts[22m ───────────────────────────────────────── tidymodels_conflicts() ──
[31m✖[39m [34mscales[39m::[32mdiscard()[39m masks [34mpurrr[39m::discard()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m   masks [34mstats[39m::filter()
[31m✖[39m [34mrecipes[39m::[32mfixed()[39m  masks [34mstringr[39m::fixed()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m      masks [34mstats[39m::lag()
[31m✖[39m [34myardstick[39m::[32mspec()[39m masks [34mreadr[39m::spec()
[31m✖[39m [34mrecipes[39m::[32mstep()[39m   masks [34mstats[39m::step()
[34m•[39m Search for functions across packages at [32mhttps://www.tidymodels.org/find/[39m




Attaching package: ‘vroom’




The following object is masked from ‘package:yardstick’:

    spec




The following object is masked from ‘package:scales’:

    col_factor




The following objects are masked from ‘package:readr’:

    as.col_spec, col_character, col_date, col_datetime, col_double,
    col_factor, col_guess, col_integer, col_logical, col_number,
    col_skip, col_time, cols, cols_condense, cols_only, date_names,
    date_names_lang, date_names_langs, default_locale, fwf_cols,
    fwf_empty, fwf_positions, fwf_widths, locale, output_column,
    problems, spec




Loading required package: R6




Attaching package: ‘lightgbm’




The following object is masked from ‘package:dplyr’:

    slice




[1mRows: [22m[34m913000[39m [1mColumns: [22m[34m4[39m


[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[32mdbl[39m  (3): store, item, sales
[34mdate[39m (1): date



[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1mRows: [22m[34m45000[39m [1mColumns: [22m[34m4[39m


[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[32mdbl[39m  (3): id, store, item
[34mdate[39m (1): date



[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


“[1m[22mThe `file` argument of `vroom_write()` is deprecated as of vroom 1.5.0.”
