## 1.2. Preprocessing weather indexes

In [2]:
library("tidyverse")
library("weathercan")
library("lubridate") # yday(), ymd()
library("Amelia")


Attaching package: 'lubridate'


The following object is masked from 'package:base':

    date


Loading required package: Rcpp

## 
## Amelia II: Multiple Imputation
## (Version 1.7.6, built: 2019-11-24)
## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## Refer to http://gking.harvard.edu/amelia/ for more information
## 



Load the data frame processed in previous file `1.1_preprocessing-1.ipynb`.

In [2]:
data_df <- read.csv('output/balanced_potato_df.csv')

### Compute 5 years (mean or total) weather indices 

Because the process is long, you can load `data_stations.csv` file and make a left join with `data_df` using "Annee", "LatDD", "LonDD", "DatePlantation", and "DateRecolte" as common features.

In [3]:
data_stations = read_csv("output/weather_stations.csv")

Parsed with column specification:
cols(
  Annee = [32mcol_double()[39m,
  LatDD = [32mcol_double()[39m,
  LonDD = [32mcol_double()[39m,
  DatePlantation = [34mcol_date(format = "")[39m,
  DateRecolte = [34mcol_date(format = "")[39m,
  station_id = [32mcol_double()[39m,
  temp_moy_5years = [32mcol_double()[39m,
  prec_tot_5years = [32mcol_double()[39m,
  sdi_5years = [32mcol_double()[39m,
  gdd_5years = [32mcol_double()[39m
)


Custom fonctions 

In [4]:
# shannon diversity index
SDI_f <- function(x) { 
  p <- x/sum(x, na.rm = TRUE)
  SDI <- sum(p * log(p), na.rm = TRUE) / log(length(x))
  return(SDI)
}
# growing degree-days
GDD_f <- function(x, delim = 5) {
  sum(x[x >= delim], na.rm = TRUE)
}

Create a data frame for computations

In [5]:
data_df$DatePlantation <- ymd(data_df$DatePlantation)
data_df$DateRecolte <- ymd(data_df$DateRecolte)

" 1 failed to parse."

If you've loaded the `weather_stations.csv`, from here, you coud jump to the fith code chaine from the end (insertion into the main table). 

In [7]:
data_stations <- data_df %>%
  distinct(Annee, LatDD, LonDD, DatePlantation, DateRecolte) %>%
  filter(!is.na(Annee))
glimpse(data_stations)

Observations: 323
Variables: 5
$ Annee          [3m[90m<int>[39m[23m 2003, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2005...
$ LatDD          [3m[90m<dbl>[39m[23m 46.75306, 46.76444, 48.79722, 48.79722, 48.79722, 48...
$ LonDD          [3m[90m<dbl>[39m[23m -72.33861, -72.33083, -72.24250, -72.24250, -72.2425...
$ DatePlantation [3m[90m<date>[39m[23m 2003-05-19, 2004-05-30, 2004-05-31, 2004-06-05, 200...
$ DateRecolte    [3m[90m<date>[39m[23m 2003-09-17, 2004-09-27, 2004-09-20, 2004-09-15, 200...


Impute missing dates if year is recorded

In [8]:
# Check missing dates
data_stations %>%
  filter(is.na(DatePlantation))

Annee,LatDD,LonDD,DatePlantation,DateRecolte
<int>,<dbl>,<dbl>,<date>,<date>
2011,46.02889,-73.65111,,
1992,48.55,-71.33333,,
1993,48.55,-71.33333,,
1995,46.10487,-72.24634,,1995-09-10
1996,46.10487,-72.24634,,1996-09-11
1996,46.10903,-72.3853,,1996-09-11
1996,46.45472,-72.70722,,1996-09-11
1987,46.45472,-72.70722,,1987-09-10
1988,46.45472,-72.70722,,1988-09-11
1989,46.45472,-72.70722,,1989-09-10


In [9]:
# Check missing dates
data_stations %>%
  filter(is.na(DateRecolte))

Annee,LatDD,LonDD,DatePlantation,DateRecolte
<int>,<dbl>,<dbl>,<date>,<date>
2011,46.02889,-73.65111,,
1992,48.55,-71.33333,,
1993,48.55,-71.33333,,


In [10]:
data_stations_imp <- data_stations %>%
                          mutate(DatePlantation_yd = yday(DatePlantation), # yday = year day
                                 DateRecolte_yd = yday(DateRecolte))

data_stations_imp <- amelia(x = data_stations_imp %>%
                         select(Annee, LatDD, LonDD, DatePlantation_yd, 
                                DateRecolte_yd), m=1)$imputations[[1]]

-- Imputation 1 --

  1  2  3



In [11]:
data_stations_imp$Annee <- as.Date(paste0(data_stations_imp$Annee, "-01-01"))

data_stations$DatePlantation <- data_stations_imp$Annee + data_stations_imp$DatePlantation_yd - 1
data_stations$DateRecolte <- data_stations_imp$Annee + data_stations_imp$DateRecolte_yd - 1

In [12]:
data_stations$DatePlantation <- as.Date(data_stations$DatePlantation)
data_stations$DateRecolte <- as.Date(data_stations$DateRecolte)

Load data from Environnement Canada (weathercan)

In [12]:
year_step <- 5 # start 5 years preceeding trial year
data_stations$station_id <- NA # identify weather stations
station_weather <- list() # list of weather stations containning climate data

for (i in 1:nrow(data_stations)) {
  print(paste(i, "/", nrow(data_stations), '...'))
  
  # identify the nearest station. Search for stations by name or location
  all_stations <- stations_search(coords = c(data_stations$LatDD[i], 
                                             data_stations$LonDD[i]),  
                                  interval = "day",
                                  dist = 500)
  annee <- data_stations$Annee[i]
  
  # Available station for nearest year
  closest_station <- all_stations %>%
                        filter(start <= annee - year_step) %>%
                        filter(end >= annee) %>%
                        slice(which.min(distance))

  data_stations$station_id[i] <- closest_station$station_id %>% 
                                      as.character() %>% 
                                      as.numeric()
  
  print(paste("Station id:", data_stations$station_id[i]))
  
  # # Download weather data from Environment Canada of the station identified
  station_weather[[i]] <- weather_dl(station_ids = data_stations$station_id[i], 
                                     start = as.Date(paste0(annee - year_step, "-01-01")),
                                     end = as.Date(paste0(annee, "-01-01")),
                                     interval = "day")
  
  print(paste(i, "/", nrow(data_stations)))
}
# name of the list elements
names(station_weather) <- as.character(data_stations$station_id) 

[1] "1 / 323 ..."
[1] "Station id: 5203"
[1] "1 / 323"
[1] "2 / 323 ..."
[1] "Station id: 5203"
[1] "2 / 323"
[1] "3 / 323 ..."
[1] "Station id: 5929"
[1] "3 / 323"
[1] "4 / 323 ..."
[1] "Station id: 5929"
[1] "4 / 323"
[1] "5 / 323 ..."
[1] "Station id: 5929"
[1] "5 / 323"
[1] "6 / 323 ..."
[1] "Station id: 5929"
[1] "6 / 323"
[1] "7 / 323 ..."
[1] "Station id: 5929"
[1] "7 / 323"
[1] "8 / 323 ..."
[1] "Station id: 5929"
[1] "8 / 323"
[1] "9 / 323 ..."
[1] "Station id: 5203"
[1] "9 / 323"
[1] "10 / 323 ..."
[1] "Station id: 5203"
[1] "10 / 323"
[1] "11 / 323 ..."
[1] "Station id: 5222"
[1] "11 / 323"
[1] "12 / 323 ..."
[1] "Station id: 5222"
[1] "12 / 323"
[1] "13 / 323 ..."
[1] "Station id: 5222"
[1] "13 / 323"
[1] "14 / 323 ..."
[1] "Station id: 5929"
[1] "14 / 323"
[1] "15 / 323 ..."
[1] "Station id: 5255"
[1] "15 / 323"
[1] "16 / 323 ..."
[1] "Station id: 5929"
[1] "16 / 323"
[1] "17 / 323 ..."
[1] "Station id: 5255"
[1] "17 / 323"
[1] "18 / 323 ..."
[1] "Station id: 5255"
[1] "18

Some variables have non-numeric values (spd_max_gust), for stations: 5251

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "24 / 323"
[1] "25 / 323 ..."
[1] "Station id: 5447"
[1] "25 / 323"
[1] "26 / 323 ..."
[1] "Station id: 5255"
[1] "26 / 323"
[1] "27 / 323 ..."
[1] "Station id: 5929"
[1] "27 / 323"
[1] "28 / 323 ..."
[1] "Station id: 5393"
[1] "28 / 323"
[1] "29 / 323 ..."
[1] "Station id: 5222"
[1] "29 / 323"
[1] "30 / 323 ..."
[1] "Station id: 5929"
[1] "30 / 323"
[1] "31 / 323 ..."
[1] "Station id: 5929"
[1] "31 / 323"
[1] "32 / 323 ..."
[1] "Station id: 5929"
[1] "32 / 323"
[1] "33 / 323 ..."
[1] "Station id: 5929"
[1] "33 / 323"
[1] "34 / 323 ..."
[1] "Station id: 5929"
[1] "34 / 323"
[1] "35 / 323 ..."
[1] "Station id: 5929"
[1] "35 / 323"
[1] "36 / 323 ..."
[1] "Station id: 5929"
[1] "36 / 323"
[1] "37 / 323 ..."
[1] "Station id: 5929"
[1] "37 / 323"
[1] "38 / 323 ..."
[1] "Station id: 5929"
[1] "38 / 323"
[1] "39 / 323 ..."
[1] "Station id: 5929"
[1] "39 / 323"
[1] "40 / 323 ..."
[1] "Station id: 5929"
[1] "40 / 323"
[1] "41 / 323 ..."
[1] "Station id: 5929"
[1] "41 / 323"
[1] "42 / 323 ..

There are no data for station 5662, in this time range (1955-01-01 to 1960-01-01), for this interval (day), 
Available Station Data:
[38;5;246m# A tibble: 3 x 14[39m
  prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz   
  [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m             [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m1[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31m68[39m[31m.[39m[31m2[39m  21.6 Etc/…
[38;5;250m2[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31m68[39m[31m.[39m[31m2[39m  21.6 Etc/…
[38;5;250m3[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31

[1] "57 / 323"
[1] "58 / 323 ..."
[1] "Station id: 5662"


There are no data for station 5662, in this time range (1956-01-01 to 1961-01-01), for this interval (day), 
Available Station Data:
[38;5;246m# A tibble: 3 x 14[39m
  prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz   
  [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m             [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m1[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31m68[39m[31m.[39m[31m2[39m  21.6 Etc/…
[38;5;250m2[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31m68[39m[31m.[39m[31m2[39m  21.6 Etc/…
[38;5;250m3[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31

[1] "58 / 323"
[1] "59 / 323 ..."
[1] "Station id: 5662"


There are no data for station 5662, in this time range (1958-01-01 to 1963-01-01), for this interval (day), 
Available Station Data:
[38;5;246m# A tibble: 3 x 14[39m
  prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz   
  [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m             [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m1[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31m68[39m[31m.[39m[31m2[39m  21.6 Etc/…
[38;5;250m2[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31m68[39m[31m.[39m[31m2[39m  21.6 Etc/…
[38;5;250m3[39m QC    BAIE-COMEAU…       [4m5[24m662 7040440        [31mNA[39m [31mNA[39m     49.1 -[31

[1] "59 / 323"
[1] "60 / 323 ..."
[1] "Station id: 5201"
[1] "60 / 323"
[1] "61 / 323 ..."
[1] "Station id: 5447"
[1] "61 / 323"
[1] "62 / 323 ..."
[1] "Station id: 5447"
[1] "62 / 323"
[1] "63 / 323 ..."
[1] "Station id: 5447"
[1] "63 / 323"
[1] "64 / 323 ..."
[1] "Station id: 5490"


Some variables have non-numeric values (spd_max_gust), for stations: 5490

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "64 / 323"
[1] "65 / 323 ..."
[1] "Station id: 5490"


Some variables have non-numeric values (spd_max_gust), for stations: 5490

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "65 / 323"
[1] "66 / 323 ..."
[1] "Station id: 5490"


Some variables have non-numeric values (spd_max_gust), for stations: 5490

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "66 / 323"
[1] "67 / 323 ..."
[1] "Station id: 5490"


Some variables have non-numeric values (spd_max_gust), for stations: 5490

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "67 / 323"
[1] "68 / 323 ..."
[1] "Station id: 5237"
[1] "68 / 323"
[1] "69 / 323 ..."
[1] "Station id: 5237"
[1] "69 / 323"
[1] "70 / 323 ..."
[1] "Station id: 5936"
[1] "70 / 323"
[1] "71 / 323 ..."
[1] "Station id: 5936"
[1] "71 / 323"
[1] "72 / 323 ..."
[1] "Station id: 5936"
[1] "72 / 323"
[1] "73 / 323 ..."
[1] "Station id: 5923"
[1] "73 / 323"
[1] "74 / 323 ..."
[1] "Station id: 5923"
[1] "74 / 323"
[1] "75 / 323 ..."
[1] "Station id: 5315"
[1] "75 / 323"
[1] "76 / 323 ..."
[1] "Station id: 5936"
[1] "76 / 323"
[1] "77 / 323 ..."
[1] "Station id: 5929"
[1] "77 / 323"
[1] "78 / 323 ..."
[1] "Station id: 5923"
[1] "78 / 323"
[1] "79 / 323 ..."
[1] "Station id: 5238"
[1] "79 / 323"
[1] "80 / 323 ..."
[1] "Station id: 5237"
[1] "80 / 323"
[1] "81 / 323 ..."
[1] "Station id: 5229"
[1] "81 / 323"
[1] "82 / 323 ..."
[1] "Station id: 5274"


There are no data for station 5274, in this time range (1965-01-01 to 1970-01-01), for this interval (day), 
Available Station Data:
[38;5;246m# A tibble: 2 x 14[39m
  prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz   
  [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m             [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m1[39m QC    ST JACQUES         [4m5[24m274 7017380        [31mNA[39m [31mNA[39m     46.0 -[31m73[39m[31m.[39m[31m6[39m    69 Etc/…
[38;5;250m2[39m QC    ST JACQUES         [4m5[24m274 7017380        [31mNA[39m [31mNA[39m     46.0 -[31m73[39m[31m.[39m[31m6[39m    69 Etc/…
[38;5;246m# … with 4 more variables: interval [3m[38;5;246m<chr>[38;5;246m[23m, start [3m[38;5;246m<int>

[1] "82 / 323"
[1] "83 / 323 ..."
[1] "Station id: 5315"
[1] "83 / 323"
[1] "84 / 323 ..."
[1] "Station id: 5315"
[1] "84 / 323"
[1] "85 / 323 ..."
[1] "Station id: 5315"
[1] "85 / 323"
[1] "86 / 323 ..."
[1] "Station id: 5532"
[1] "86 / 323"
[1] "87 / 323 ..."
[1] "Station id: 5532"
[1] "87 / 323"
[1] "88 / 323 ..."
[1] "Station id: 5222"
[1] "88 / 323"
[1] "89 / 323 ..."
[1] "Station id: 5861"
[1] "89 / 323"
[1] "90 / 323 ..."
[1] "Station id: 5674"
[1] "90 / 323"
[1] "91 / 323 ..."
[1] "Station id: 5266"
[1] "91 / 323"
[1] "92 / 323 ..."
[1] "Station id: 5257"
[1] "92 / 323"
[1] "93 / 323 ..."
[1] "Station id: 5257"
[1] "93 / 323"
[1] "94 / 323 ..."
[1] "Station id: 5257"
[1] "94 / 323"
[1] "95 / 323 ..."
[1] "Station id: 5255"
[1] "95 / 323"
[1] "96 / 323 ..."
[1] "Station id: 5929"
[1] "96 / 323"
[1] "97 / 323 ..."
[1] "Station id: 5255"
[1] "97 / 323"
[1] "98 / 323 ..."
[1] "Station id: 5257"
[1] "98 / 323"
[1] "99 / 323 ..."
[1] "Station id: 5257"
[1] "99 / 323"
[1] "100 / 323 .

Some variables have non-numeric values (spd_max_gust), for stations: 5889

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "123 / 323"
[1] "124 / 323 ..."
[1] "Station id: 5230"
[1] "124 / 323"
[1] "125 / 323 ..."
[1] "Station id: 5674"
[1] "125 / 323"
[1] "126 / 323 ..."
[1] "Station id: 5274"
[1] "126 / 323"
[1] "127 / 323 ..."
[1] "Station id: 5848"
[1] "127 / 323"
[1] "128 / 323 ..."
[1] "Station id: 5459"
[1] "128 / 323"
[1] "129 / 323 ..."
[1] "Station id: 5222"
[1] "129 / 323"
[1] "130 / 323 ..."
[1] "Station id: 5923"
[1] "130 / 323"
[1] "131 / 323 ..."
[1] "Station id: 5315"
[1] "131 / 323"
[1] "132 / 323 ..."
[1] "Station id: 5936"
[1] "132 / 323"
[1] "133 / 323 ..."
[1] "Station id: 5923"
[1] "133 / 323"
[1] "134 / 323 ..."
[1] "Station id: 5929"
[1] "134 / 323"
[1] "135 / 323 ..."
[1] "Station id: 5315"
[1] "135 / 323"
[1] "136 / 323 ..."
[1] "Station id: 5936"
[1] "136 / 323"
[1] "137 / 323 ..."
[1] "Station id: 5929"
[1] "137 / 323"
[1] "138 / 323 ..."
[1] "Station id: 5936"
[1] "138 / 323"
[1] "139 / 323 ..."
[1] "Station id: 5806"
[1] "139 / 323"
[1] "140 / 323 ..."
[1] "Station id: 544

Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "154 / 323"
[1] "155 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "155 / 323"
[1] "156 / 323 ..."
[1] "Station id: 5274"
[1] "156 / 323"
[1] "157 / 323 ..."
[1] "Station id: 5255"
[1] "157 / 323"
[1] "158 / 323 ..."
[1] "Station id: 5244"
[1] "158 / 323"
[1] "159 / 323 ..."
[1] "Station id: 5203"
[1] "159 / 323"
[1] "160 / 323 ..."
[1] "Station id: 5522"
[1] "160 / 323"
[1] "161 / 323 ..."
[1] "Station id: 5222"
[1] "161 / 323"
[1] "162 / 323 ..."
[1] "Station id: 5244"
[1] "162 / 323"
[1] "163 / 323 ..."
[1] "Station id: 5203"
[1] "163 / 323"
[1] "164 / 323 ..."
[1] "Station id: 5203"
[1] "164 / 323"
[1] "165 / 323 ..."
[1] "Station id: 5203"
[1] "165 / 323"
[1] "166 / 323 ..."
[1] "Station id: 5222"
[1] "166 / 323"
[1] "167 / 323 ..."
[1] "Station id: 5674"
[1] "167 / 323"
[1] "168 / 323 ..."
[1] "Station id: 5203"
[1] "168 / 323"
[1] "169 / 323 ..."
[1] "Station id: 10965"


There are no data for station 10965, in this time range (2000-01-01 to 2005-01-01), for this interval (day), 
Available Station Data:
[38;5;246m# A tibble: 2 x 14[39m
  prov  station_name station_id climate_id WMO_id TC_id   lat   lon  elev tz   
  [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m             [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m
[38;5;250m1[39m QC    VALCARTIER        [4m1[24m[4m0[24m965 7018572        [31mNA[39m WQG    46.9 -[31m71[39m[31m.[39m[31m5[39m  168. Etc/…
[38;5;250m2[39m QC    VALCARTIER        [4m1[24m[4m0[24m965 7018572        [31mNA[39m WQG    46.9 -[31m71[39m[31m.[39m[31m5[39m  168. Etc/…
[38;5;246m# … with 4 more variables: interval [3m[38;5;246m<chr>[38;5;246m[23m, start [3m[38;5;246m<int>[

[1] "169 / 323"
[1] "170 / 323 ..."
[1] "Station id: 5674"
[1] "170 / 323"
[1] "171 / 323 ..."
[1] "Station id: 5203"
[1] "171 / 323"
[1] "172 / 323 ..."
[1] "Station id: 5255"
[1] "172 / 323"
[1] "173 / 323 ..."
[1] "Station id: 5222"
[1] "173 / 323"
[1] "174 / 323 ..."
[1] "Station id: 8989"


Some variables have non-numeric values (spd_max_gust), for stations: 8989

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "174 / 323"
[1] "175 / 323 ..."
[1] "Station id: 5255"
[1] "175 / 323"
[1] "176 / 323 ..."
[1] "Station id: 8989"


Some variables have non-numeric values (spd_max_gust), for stations: 8989

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "176 / 323"
[1] "177 / 323 ..."
[1] "Station id: 8989"


Some variables have non-numeric values (spd_max_gust), for stations: 8989

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "177 / 323"
[1] "178 / 323 ..."
[1] "Station id: 8989"


Some variables have non-numeric values (spd_max_gust), for stations: 8989

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "178 / 323"
[1] "179 / 323 ..."
[1] "Station id: 5393"
[1] "179 / 323"
[1] "180 / 323 ..."
[1] "Station id: 5861"
[1] "180 / 323"
[1] "181 / 323 ..."
[1] "Station id: 5393"
[1] "181 / 323"
[1] "182 / 323 ..."
[1] "Station id: 5861"
[1] "182 / 323"
[1] "183 / 323 ..."
[1] "Station id: 5861"
[1] "183 / 323"
[1] "184 / 323 ..."
[1] "Station id: 5201"
[1] "184 / 323"
[1] "185 / 323 ..."
[1] "Station id: 5225"
[1] "185 / 323"
[1] "186 / 323 ..."
[1] "Station id: 10732"


Some variables have non-numeric values (spd_max_gust), for stations: 10732

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "186 / 323"
[1] "187 / 323 ..."
[1] "Station id: 5201"
[1] "187 / 323"
[1] "188 / 323 ..."
[1] "Station id: 5201"
[1] "188 / 323"
[1] "189 / 323 ..."
[1] "Station id: 27646"


Some variables have non-numeric values (spd_max_gust), for stations: 27646

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "189 / 323"
[1] "190 / 323 ..."
[1] "Station id: 5201"
[1] "190 / 323"
[1] "191 / 323 ..."
[1] "Station id: 5201"
[1] "191 / 323"
[1] "192 / 323 ..."
[1] "Station id: 5201"
[1] "192 / 323"
[1] "193 / 323 ..."
[1] "Station id: 5230"
[1] "193 / 323"
[1] "194 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "194 / 323"
[1] "195 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "195 / 323"
[1] "196 / 323 ..."
[1] "Station id: 5274"
[1] "196 / 323"
[1] "197 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "197 / 323"
[1] "198 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "198 / 323"
[1] "199 / 323 ..."
[1] "Station id: 5532"
[1] "199 / 323"
[1] "200 / 323 ..."
[1] "Station id: 5393"
[1] "200 / 323"
[1] "201 / 323 ..."
[1] "Station id: 5532"
[1] "201 / 323"
[1] "202 / 323 ..."
[1] "Station id: 5393"
[1] "202 / 323"
[1] "203 / 323 ..."
[1] "Station id: 5222"
[1] "203 / 323"
[1] "204 / 323 ..."
[1] "Station id: 5203"
[1] "204 / 323"
[1] "205 / 323 ..."
[1] "Station id: 5222"
[1] "205 / 323"
[1] "206 / 323 ..."
[1] "Station id: 5203"
[1] "206 / 323"
[1] "207 / 323 ..."
[1] "Station id: 5222"
[1] "207 / 323"
[1] "208 / 323 ..."
[1] "Station id: 8321"
[1] "208 / 323"
[1] "209 / 323 ..."
[1] "Station id: 5257"
[1] "209 / 323"
[1] "210 / 323 ..."
[1] "Station id: 5936"
[1] "210 / 323"
[1] "211 / 323 ..."
[1] "Station id: 5274"
[1] "211 / 323"
[1] "212 / 323 ..."
[1] "Station id: 5274"
[1] "212 / 323"
[1] "213 / 323 ..."
[1] "Station id: 5936"
[1] "213 / 323"
[1] "214 / 323 ..."
[1] "Station id: 5936"
[1] "214 / 323"
[1] "215 / 323 ..."
[1] "Station id: 526

Some variables have non-numeric values (spd_max_gust), for stations: 8674

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "228 / 323"
[1] "229 / 323 ..."
[1] "Station id: 5255"
[1] "229 / 323"
[1] "230 / 323 ..."
[1] "Station id: 26892"
[1] "230 / 323"
[1] "231 / 323 ..."
[1] "Station id: 5266"
[1] "231 / 323"
[1] "232 / 323 ..."
[1] "Station id: 5929"
[1] "232 / 323"
[1] "233 / 323 ..."
[1] "Station id: 5929"
[1] "233 / 323"
[1] "234 / 323 ..."
[1] "Station id: 5687"
[1] "234 / 323"
[1] "235 / 323 ..."
[1] "Station id: 5274"
[1] "235 / 323"
[1] "236 / 323 ..."
[1] "Station id: 5274"
[1] "236 / 323"
[1] "237 / 323 ..."
[1] "Station id: 5274"
[1] "237 / 323"
[1] "238 / 323 ..."
[1] "Station id: 5237"
[1] "238 / 323"
[1] "239 / 323 ..."
[1] "Station id: 5225"
[1] "239 / 323"
[1] "240 / 323 ..."
[1] "Station id: 5266"
[1] "240 / 323"
[1] "241 / 323 ..."
[1] "Station id: 5255"
[1] "241 / 323"
[1] "242 / 323 ..."
[1] "Station id: 5266"
[1] "242 / 323"
[1] "243 / 323 ..."
[1] "Station id: 5266"
[1] "243 / 323"
[1] "244 / 323 ..."
[1] "Station id: 5266"
[1] "244 / 323"
[1] "245 / 323 ..."
[1] "Station id: 83

Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "245 / 323"
[1] "246 / 323 ..."
[1] "Station id: 8321"


Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "246 / 323"
[1] "247 / 323 ..."
[1] "Station id: 8321"


Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "247 / 323"
[1] "248 / 323 ..."
[1] "Station id: 8321"


Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "248 / 323"
[1] "249 / 323 ..."
[1] "Station id: 5255"
[1] "249 / 323"
[1] "250 / 323 ..."
[1] "Station id: 5687"
[1] "250 / 323"
[1] "251 / 323 ..."
[1] "Station id: 5929"
[1] "251 / 323"
[1] "252 / 323 ..."
[1] "Station id: 5929"
[1] "252 / 323"
[1] "253 / 323 ..."
[1] "Station id: 5929"
[1] "253 / 323"
[1] "254 / 323 ..."
[1] "Station id: 5255"
[1] "254 / 323"
[1] "255 / 323 ..."
[1] "Station id: 5255"
[1] "255 / 323"
[1] "256 / 323 ..."
[1] "Station id: 5257"
[1] "256 / 323"
[1] "257 / 323 ..."
[1] "Station id: 5220"
[1] "257 / 323"
[1] "258 / 323 ..."
[1] "Station id: 5220"
[1] "258 / 323"
[1] "259 / 323 ..."
[1] "Station id: 5220"
[1] "259 / 323"
[1] "260 / 323 ..."
[1] "Station id: 5220"
[1] "260 / 323"
[1] "261 / 323 ..."
[1] "Station id: 5220"
[1] "261 / 323"
[1] "262 / 323 ..."
[1] "Station id: 26892"
[1] "262 / 323"
[1] "263 / 323 ..."
[1] "Station id: 5254"
[1] "263 / 323"
[1] "264 / 323 ..."
[1] "Station id: 5861"
[1] "264 / 323"
[1] "265 / 323 ..."
[1] "Station id: 26

Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "270 / 323"
[1] "271 / 323 ..."
[1] "Station id: 8321"


Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "271 / 323"
[1] "272 / 323 ..."
[1] "Station id: 8321"


Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "272 / 323"
[1] "273 / 323 ..."
[1] "Station id: 8321"


Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "273 / 323"
[1] "274 / 323 ..."
[1] "Station id: 8674"


Some variables have non-numeric values (spd_max_gust), for stations: 8674

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "274 / 323"
[1] "275 / 323 ..."
[1] "Station id: 5929"
[1] "275 / 323"
[1] "276 / 323 ..."
[1] "Station id: 5929"
[1] "276 / 323"
[1] "277 / 323 ..."
[1] "Station id: 5929"
[1] "277 / 323"
[1] "278 / 323 ..."
[1] "Station id: 5929"
[1] "278 / 323"
[1] "279 / 323 ..."
[1] "Station id: 5274"
[1] "279 / 323"
[1] "280 / 323 ..."
[1] "Station id: 5532"
[1] "280 / 323"
[1] "281 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "281 / 323"
[1] "282 / 323 ..."
[1] "Station id: 5532"
[1] "282 / 323"
[1] "283 / 323 ..."
[1] "Station id: 5936"
[1] "283 / 323"
[1] "284 / 323 ..."
[1] "Station id: 5936"
[1] "284 / 323"
[1] "285 / 323 ..."
[1] "Station id: 5936"
[1] "285 / 323"
[1] "286 / 323 ..."
[1] "Station id: 8321"
[1] "286 / 323"
[1] "287 / 323 ..."
[1] "Station id: 8321"
[1] "287 / 323"
[1] "288 / 323 ..."
[1] "Station id: 8321"
[1] "288 / 323"
[1] "289 / 323 ..."
[1] "Station id: 5522"
[1] "289 / 323"
[1] "290 / 323 ..."
[1] "Station id: 5522"
[1] "290 / 323"
[1] "291 / 323 ..."
[1] "Station id: 5522"
[1] "291 / 323"
[1] "292 / 323 ..."
[1] "Station id: 5201"
[1] "292 / 323"
[1] "293 / 323 ..."
[1] "Station id: 5201"
[1] "293 / 323"
[1] "294 / 323 ..."
[1] "Station id: 5201"
[1] "294 / 323"
[1] "295 / 323 ..."
[1] "Station id: 5201"
[1] "295 / 323"
[1] "296 / 323 ..."
[1] "Station id: 5201"
[1] "296 / 323"
[1] "297 / 323 ..."
[1] "Station id: 5266"
[1] "297 / 323"
[1] "298 / 323 ..."
[1] "Station id: 525

Some variables have non-numeric values (spd_max_gust), for stations: 8321

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "307 / 323"
[1] "308 / 323 ..."
[1] "Station id: 5266"
[1] "308 / 323"
[1] "309 / 323 ..."
[1] "Station id: 5393"
[1] "309 / 323"
[1] "310 / 323 ..."
[1] "Station id: 5929"
[1] "310 / 323"
[1] "311 / 323 ..."
[1] "Station id: 5929"
[1] "311 / 323"
[1] "312 / 323 ..."
[1] "Station id: 8989"


Some variables have non-numeric values (spd_max_gust), for stations: 8989

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "312 / 323"
[1] "313 / 323 ..."
[1] "Station id: 10762"


Some variables have non-numeric values (spd_max_gust), for stations: 10762

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "313 / 323"
[1] "314 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "314 / 323"
[1] "315 / 323 ..."
[1] "Station id: 10843"


Some variables have non-numeric values (spd_max_gust), for stations: 10843

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "315 / 323"
[1] "316 / 323 ..."
[1] "Station id: 5522"
[1] "316 / 323"
[1] "317 / 323 ..."
[1] "Station id: 5522"
[1] "317 / 323"
[1] "318 / 323 ..."
[1] "Station id: 10843"


Some variables have non-numeric values (spd_max_gust), for stations: 10843

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "318 / 323"
[1] "319 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "319 / 323"
[1] "320 / 323 ..."
[1] "Station id: 10843"


Some variables have non-numeric values (spd_max_gust), for stations: 10843

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "320 / 323"
[1] "321 / 323 ..."
[1] "Station id: 5522"
[1] "321 / 323"
[1] "322 / 323 ..."
[1] "Station id: 5237"


Some variables have non-numeric values (spd_max_gust), for stations: 5237

  Replaced all non-numeric entries with NA. Use 'string_as = NULL' to keep as characters (see ?weather_dl).



[1] "322 / 323"
[1] "323 / 323 ..."
[1] "Station id: 5532"
[1] "323 / 323"


Compute indexes for each station

In [15]:
# Initialize columns to record indexes
data_stations$temp_moy_5years <- NA
data_stations$prec_tot_5years <- NA
data_stations$sdi_5years <- NA
data_stations$gdd_5years <- NA

for (i in 1:nrow(data_stations)) {
  starts <- data_stations$DatePlantation[i] - (year_step:1)*365.25 # beginings of seasons
  ends <- data_stations$DateRecolte[i] - (year_step:1)*365.25 # ends of seasons, for previous years
  # Create vectors to record indices from previous years
  temp_moy_j <- rep(NA, length(starts))
  prectot_j <- temp_moy_j
  sdi_j <- temp_moy_j
  gdd_j <- temp_moy_j
  for (j in 1:length(starts)) {
    # Filter the downloaded weather data tables to keep only data measured between the season dates
    season <- station_weather[[i]] %>%
      filter(date >= starts[j]) %>%
      filter(date <= ends[j])
    temp_moy_j[j] <- mean(season$mean_temp, na.rm = TRUE) # average temperature of the season
    prectot_j[j] <- sum(season$total_precip, na.rm = TRUE) # total precipitations of the season
    sdi_j[j] <- SDI_f(season$total_precip) # season SDI
    gdd_j[j] <- GDD_f(season$mean_temp, delim = 5) # season GDD
  }
  data_stations$temp_moy_5years[i] <- mean(temp_moy_j, na.rm = TRUE) # average temperature
  data_stations$prec_tot_5years[i] <- mean(prectot_j, na.rm = TRUE) # average total rainfalls
  data_stations$sdi_5years[i] <- mean(sdi_j, na.rm = TRUE) # average SDI
  data_stations$gdd_5years[i] <- mean(gdd_j, na.rm = TRUE) # average GDD
}

"Unknown or uninitialised column: 'mean_temp'."

In [16]:
write_csv(x = data_stations, path = "output/weather_stations.csv") # a backup

If you've loaded the `weather_stations.csv`, continue from here. 

Insert into the main table

In [6]:
weath_col <- c(
            'Annee', 'LatDD', 'LonDD', 'DatePlantation', 'DateRecolte', 
            'temp_moy_5years', 'prec_tot_5years', 'sdi_5years', 'gdd_5years'
            )

In [7]:
data.frame(weath_col, weath_col %in% colnames(data_df))

weath_col,weath_col..in..colnames.data_df.
<fct>,<lgl>
Annee,True
LatDD,True
LonDD,True
DatePlantation,True
DateRecolte,True
temp_moy_5years,False
prec_tot_5years,False
sdi_5years,False
gdd_5years,False


In [8]:
data_df <- left_join(data_df, data_stations[weath_col], 
                     by = c("Annee", "LatDD", "LonDD", "DatePlantation", "DateRecolte"))

In [9]:
wcol <- c('temp_moy_5years', 'prec_tot_5years', 'sdi_5years', 'gdd_5years')
data.frame(wcol, wcol %in% colnames(data_df))

wcol,wcol..in..colnames.data_df.
<fct>,<lgl>
temp_moy_5years,True
prec_tot_5years,True
sdi_5years,True
gdd_5years,True


In [10]:
write_csv(data_df, "output/pr_potato_df.csv") # backup