diff --git a/D1_data_assembly_20190130.html b/D1_data_assembly_20190130.html new file mode 100644 index 0000000..1f6c9fc --- /dev/null +++ b/D1_data_assembly_20190130.html @@ -0,0 +1,7856 @@ + + + + + + + + + + + + + + + +D1_data_assembly_20190130 + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
setwd("C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/")
+library(rgdal)
+
## Loading required package: sp
+
## rgdal: version: 1.3-4, (SVN revision 766)
+##  Geospatial Data Abstraction Library extensions to R successfully loaded
+##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
+##  Path to GDAL shared files: C:/Program Files/R/R-3.5.1/library/rgdal/gdal
+##  GDAL binary built with GEOS: TRUE 
+##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
+##  Path to PROJ.4 shared files: C:/Program Files/R/R-3.5.1/library/rgdal/proj
+##  Linking to sp version: 1.3-1
+
library(sp)
+library(dplyr)
+
## 
+## Attaching package: 'dplyr'
+
## The following objects are masked from 'package:stats':
+## 
+##     filter, lag
+
## The following objects are masked from 'package:base':
+## 
+##     intersect, setdiff, setequal, union
+
library(sf)
+
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
+
library(ggplot2)
+library(viridis)
+
## Loading required package: viridisLite
+
library(eurostat)
+
geodata <- get_eurostat_geospatial(output_class = "spdf", year = "2016", resolution = "60") ###NOTE: NUTS 2016 ONLY FOR CROP, LIVESTOCK, AND ACCOUNTS PROCESSING
+
## 
+## COPYRIGHT NOTICE
+## 
+## When data downloaded from this page 
+## <http://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units>
+## is used in any printed or electronic publication, 
+## in addition to any other provisions 
+## applicable to the whole Eurostat website, 
+## data source will have to be acknowledged 
+## in the legend of the map and 
+## in the introductory page of the publication 
+## with the following copyright notice:
+## 
+## - EN: (C) EuroGeographics for the administrative boundaries
+## - FR: (C) EuroGeographics pour les limites administratives
+## - DE: (C) EuroGeographics bezuglich der Verwaltungsgrenzen
+## 
+## For publications in languages other than 
+## English, French or German, 
+## the translation of the copyright notice 
+## in the language of the publication shall be used.
+## 
+## If you intend to use the data commercially, 
+## please contact EuroGeographics for 
+## information regarding their licence agreements.
+## 
+
## SpatialPolygonDataFrame at resolution 1:60 read from local file
+
## 
+## # --------------------------
+## HEADS UP!!
+## 
+## Function now returns the data in 'sf'-class (simple features) 
+## by default which is different 
+## from previous behaviour's 'SpatialPolygonDataFrame'. 
+## 
+## If you prefer either 'SpatialPolygonDataFrame' or 
+## fortified 'data_frame' (for ggplot2::geom_polygon), 
+## please specify it explicitly to 'output_class'-argument!
+## 
+## # --------------------------          
+## 
+
nuts <- readOGR(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='NUTS_RG_01M_2013_3035_LEVL_2')
+
## OGR data source with driver: OpenFileGDB 
+## Source: "C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb", layer: "NUTS_RG_01M_2013_3035_LEVL_2"
+## with 320 features
+## It has 7 fields
+
head(nuts@data)
+
##   CNTR_CODE FID_1 LEVL_CODE NUTS_ID         NUTS_NAME Shape_Length
+## 1        AT  AT11         2    AT11   Burgenland (AT)     628921.6
+## 2        AT  AT22         2    AT22        Steiermark     814700.6
+## 3        AT  AT12         2    AT12 Niederösterreich    1044711.5
+## 4        AT  AT13         2    AT13              Wien     116902.7
+## 5        AT  AT21         2    AT21          Kärnten     625727.5
+## 6        AT  AT31         2    AT31   Oberösterreich     790984.0
+##    Shape_Area
+## 1  3963509482
+## 2 16414303341
+## 3 19201725666
+## 4   411979159
+## 5  9541848203
+## 6 11984617500
+
str(nuts@data)
+
## 'data.frame':    320 obs. of  7 variables:
+##  $ CNTR_CODE   : Factor w/ 35 levels "AT","BE","BG",..: 1 1 1 1 1 1 1 2 2 1 ...
+##  $ FID_1       : Factor w/ 320 levels "AT11","AT12",..: 1 5 2 3 4 6 7 11 12 8 ...
+##  $ LEVL_CODE   : int  2 2 2 2 2 2 2 2 2 2 ...
+##  $ NUTS_ID     : Factor w/ 320 levels "AT11","AT12",..: 1 5 2 3 4 6 7 11 12 8 ...
+##  $ NUTS_NAME   : Factor w/ 320 levels "Östra Mellansverige",..: 40 271 192 312 137 209 255 231 235 289 ...
+##  $ Shape_Length: num  628922 814701 1044711 116903 625728 ...
+##  $ Shape_Area  : num  3.96e+09 1.64e+10 1.92e+10 4.12e+08 9.54e+09 ...
+
#SDG variables
+sdg_data <- readOGR(dsn='C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database', layer='SDGs_database')
+
## OGR data source with driver: ESRI Shapefile 
+## Source: "C:\Users\mu5106sc\Dropbox\STAGS\SDG_data_eurostat\Final_database", layer: "SDGs_database"
+## with 471 features
+## It has 28 fields
+
head(sdg_data@data)
+
##    geo STAT_LEVL_  SHAPE_AREA  SHAPE_LEN risk_pov  factor_in org_farm
+## 0   AT          0 10.14648271 20.7854134 18.43333 2315.97000 13.56732
+## 1  AT1          1  2.94056324  9.5340600       NA 1070.85000       NA
+## 2 AT11          2  0.53145726  4.7954485 13.73333  173.70857 16.89976
+## 3 AT12          2  2.37700914  8.3963979 13.83333  884.43857 12.43632
+## 4 AT13          2  0.03209684  0.9114204 27.23333   12.70143 14.46384
+## 5  AT2          1  3.09701419  9.7616839       NA  505.33857       NA
+##   train35bas train35ful train_bas train_ful nitr_high nitr_mod nitr_poor
+## 0         NA         NA        NA        NA  64.58924 20.20774  15.20302
+## 1         NA         NA        NA        NA        NA       NA        NA
+## 2  0.1375661  0.3333333 0.1243050 0.1779190        NA       NA        NA
+## 3  0.2084775  0.4809689 0.2534787 0.3449437        NA       NA        NA
+## 4  0.3750000  0.7500000 0.1753247 0.4740260        NA       NA        NA
+## 5         NA         NA        NA        NA        NA       NA        NA
+##   irrigated irrig_vol      energy ren_energy gdp_rural unemp_yout
+## 0     1.350      2.16 113.4390710   169.3286 0.7992295   61.35624
+## 1        NA        NA          NA         NA        NA         NA
+## 2     5.850        NA   7.5699818         NA        NA         NA
+## 3     2.650        NA  37.8083090         NA        NA         NA
+## 4    10.525        NA   0.3336333         NA        NA         NA
+## 5        NA        NA          NA         NA        NA         NA
+##   unemp_rate unemp_long  pesticides    forest    artific soil_loss
+## 0    104.716   56.66667 1915425.167        NA         NA     7.324
+## 1         NA         NA          NA        NA         NA     2.164
+## 2         NA         NA  127819.574 0.3161203 0.04355635     1.842
+## 3         NA         NA  638395.448 0.4286079 0.04875064     2.236
+## 4         NA         NA    5633.417 0.1469534 0.73118280     1.014
+## 5         NA         NA          NA        NA         NA     7.927
+##   com_birds farm_birds
+## 0        NA      65.98
+## 1        NA         NA
+## 2        NA         NA
+## 3        NA         NA
+## 4        NA         NA
+## 5        NA         NA
+
str(sdg_data@data)
+
## 'data.frame':    471 obs. of  28 variables:
+##  $ geo       : Factor w/ 471 levels "AT","AT1","AT11",..: 1 2 3 4 5 6 7 8 9 10 ...
+##  $ STAT_LEVL_: int  0 1 2 2 2 1 2 2 1 2 ...
+##  $ SHAPE_AREA: num  10.1465 2.9406 0.5315 2.377 0.0321 ...
+##  $ SHAPE_LEN : num  20.785 9.534 4.795 8.396 0.911 ...
+##  $ risk_pov  : num  18.4 NA 13.7 13.8 27.2 ...
+##  $ factor_in : num  2316 1070.8 173.7 884.4 12.7 ...
+##  $ org_farm  : num  13.6 NA 16.9 12.4 14.5 ...
+##  $ train35bas: num  NA NA 0.138 0.208 0.375 ...
+##  $ train35ful: num  NA NA 0.333 0.481 0.75 ...
+##  $ train_bas : num  NA NA 0.124 0.253 0.175 ...
+##  $ train_ful : num  NA NA 0.178 0.345 0.474 ...
+##  $ nitr_high : num  64.6 NA NA NA NA ...
+##  $ nitr_mod  : num  20.2 NA NA NA NA ...
+##  $ nitr_poor : num  15.2 NA NA NA NA ...
+##  $ irrigated : num  1.35 NA 5.85 2.65 10.53 ...
+##  $ irrig_vol : num  2.16 NA NA NA NA NA NA NA NA NA ...
+##  $ energy    : num  113.439 NA 7.57 37.808 0.334 ...
+##  $ ren_energy: num  169 NA NA NA NA ...
+##  $ gdp_rural : num  0.799 NA NA NA NA ...
+##  $ unemp_yout: num  61.4 NA NA NA NA ...
+##  $ unemp_rate: num  105 NA NA NA NA ...
+##  $ unemp_long: num  56.7 NA NA NA NA ...
+##  $ pesticides: num  1915425 NA 127820 638395 5633 ...
+##  $ forest    : num  NA NA 0.316 0.429 0.147 ...
+##  $ artific   : num  NA NA 0.0436 0.0488 0.7312 ...
+##  $ soil_loss : num  7.32 2.16 1.84 2.24 1.01 ...
+##  $ com_birds : num  NA NA NA NA NA NA NA NA NA NA ...
+##  $ farm_birds: num  66 NA NA NA NA ...
+
names(sdg_data@data)
+
##  [1] "geo"        "STAT_LEVL_" "SHAPE_AREA" "SHAPE_LEN"  "risk_pov"  
+##  [6] "factor_in"  "org_farm"   "train35bas" "train35ful" "train_bas" 
+## [11] "train_ful"  "nitr_high"  "nitr_mod"   "nitr_poor"  "irrigated" 
+## [16] "irrig_vol"  "energy"     "ren_energy" "gdp_rural"  "unemp_yout"
+## [21] "unemp_rate" "unemp_long" "pesticides" "forest"     "artific"   
+## [26] "soil_loss"  "com_birds"  "farm_birds"
+
sdg.dat <- sdg_data@data[,c(1:6,8:15,24:28)] #we will add organic farming, energies, employment, GDP later
+
+#here we replace SDG variables that were corrected after initial processing of database
+#Organic farming
+org.farm <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal2/Percentage_organic_farming/org_farm.csv", head=T)
+head(org.farm)
+
##    geo STAT_LEVL_ organic_farms_mean ag_area_mean  org_farm
+## 1   AT          0           396606.0    3063526.0 12.946063
+## 2  AT1          1           119970.0    1138110.0 10.541160
+## 3 AT11          2            36312.5     186847.5 19.434298
+## 4 AT12          2           123420.0     919957.5 13.415837
+## 5 AT13          2             1307.5       7952.5 16.441371
+## 6  AT2          1            75990.0     790560.0  9.612174
+
sdg.dat <- left_join(sdg.dat, org.farm[,c(1,5)])
+
## Joining, by = "geo"
+
#Energy consumption in agriculture
+energy.rt <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal7/Energy_use/energy_rate_in_ag_20190104.csv", head=T)
+ren.energy <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal7/Renewable_energy/ren_energy_pct_in_ag_20190104.csv", head=T)
+
+head(energy.rt)
+
##   geo energy_use_mean ag_energy  energy_rt
+## 1  AT       518.52857 226.87693 0.08319988
+## 2  BE       736.84286 489.26256 0.37408254
+## 3  BG       191.40000  98.50132 0.02117880
+## 4  CY        39.25714  13.82370 0.12644017
+## 5  CZ       590.04286 322.88530 0.09247833
+## 6  DE         0.00000   0.00000 0.00000000
+
head(ren.energy)
+
##   geo ag_energy ag_ren_energy ren_nrg_pct
+## 1  AT 226.87693     74.088003   32.655591
+## 2  BE 489.26256     28.276851    5.779484
+## 3  BG  98.50132      4.300886    4.366323
+## 4  CY  13.82370      1.307920    9.461426
+## 5  CZ 322.88530     53.706075   16.633174
+## 6  DE   0.00000      0.000000          NA
+
sdg.dat <- left_join(sdg.dat, energy.rt[,c(1,4)])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factors with different levels, coercing to
+## character vector
+
sdg.dat <- left_join(sdg.dat, ren.energy[,c(1,4)])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
names(sdg.dat)[22] <- "renew_pct"
+
+sdg.dat[!is.na(sdg.dat$energy_rt), c("geo", "energy_rt", "renew_pct")]
+
##     geo  energy_rt  renew_pct
+## 1    AT 0.08319988 32.6555914
+## 18   BG 0.02117880  4.3663233
+## 27   CY 0.12644017  9.4614265
+## 30   CZ 0.09247833 16.6331743
+## 48   BE 0.37408254  5.7794839
+## 75   DE 0.00000000         NA
+## 105  DK 0.20918463  8.2563274
+## 112  EE 0.03396048  3.2504309
+## 116  EL 0.04639303  7.5043321
+## 133  ES 0.06894380  3.0152604
+## 174  HR 0.05016945  0.8461058
+## 178  HU 0.07918869  6.4675409
+## 194  FI 0.02956624 22.5146831
+## 202  FR 0.09725156  3.3937818
+## 230  IE 0.04240159  0.0000000
+## 260  LT 0.02018419 11.0719610
+## 263  LU 0.11485572 14.0080046
+## 266  LV 0.02838868  8.9837713
+## 275  MT 0.42373931  0.0000000
+## 278  NL 1.75149448  3.6927288
+## 290  IT 0.12395977  1.1361492
+## 301  PT 0.05362489  1.0188356
+## 312  RO 0.02160031  2.4277124
+## 342  PL 0.14355187 14.3339623
+## 358  SE 0.01350281 41.0113526
+## 365  SK 0.03502762 13.0074813
+## 410  UK 0.04487651 23.7198977
+## 419  SI 0.04163011  3.9145197
+
#Renewable energy production in agriculture
+ren.nrg.prod <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal7/Renewable_energy/c43_en_clean.csv", head=T)
+head(ren.nrg.prod)
+
##   geo       ktoe pct_ren_prod
+## 1  BE   833.1227    28.159356
+## 2  BG    31.0925     1.529691
+## 3  CZ   773.6845    18.079698
+## 4  DK   128.6000     3.644711
+## 5  DE 10329.3300    26.563039
+## 6  EE         NA           NA
+
names(ren.nrg.prod)[3] <- "renew_prod"
+sdg.dat <- left_join(sdg.dat, ren.nrg.prod[,c(1,3)])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
#Gross Nutrient Balances
+gnb <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal2/Gross_nutrient_balance/gross_nutrient_balance_mean_allnuts.csv")
+head(gnb)
+
##   geo   gross_N   gross_P
+## 1  AT  32.57143  1.833333
+## 2  BE 138.33333  5.333333
+## 3  BG  20.33333 -6.000000
+## 4  CH  58.83333  1.833333
+## 5  CY 190.16667 31.000000
+## 6  CZ  80.28571 -2.428571
+
sdg.dat <- left_join(sdg.dat, gnb)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
#Factor income from Eurostat table agr_r_accts has gaps that can be filled with NUTS0 data from table aact_eaa01
+afi.nuts0 <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal2/Agricultural_factor_income/factor_income_NUTS0_mean_allnuts.csv")
+head(afi.nuts0)
+
##   geo factor_income_mean
+## 1  BG          2032.4813
+## 2  CH          3937.8550
+## 3  CY           320.0075
+## 4  AL                 NA
+## 5  CZ          1904.5900
+## 6  BE          2179.0687
+
(afi.na <- sdg.dat[which(is.na(sdg.dat$factor_in) & sdg.dat$STAT_LEVL_ == 0), 'geo'])
+
##  [1] "CY" "BE" "HR" "LI" "LT" "LU" "LV" "ME" "MK" "MT" "NO" "PL" "TR" "SI"
+
(afi.nuts0.geo <- afi.nuts0[!is.na(afi.nuts0$factor_income_mean),'geo'])
+
##  [1] BG CH CY CZ BE AT DE DK EE EL ES FI HR FR HU IE IS IT LT LU LV MK MT
+## [24] NL NO PL PT RO SE SI SK UK
+## 2017 Levels: AL AL0 AL01 AL011 AL012 AL013 AL014 AL015 AL02 AL021 ... UKN16
+
sdg.dat[which(is.na(sdg.dat$factor_in) & sdg.dat$geo %in% afi.nuts0.geo),c('geo', 'factor_in')]
+
##     geo factor_in
+## 27   CY        NA
+## 48   BE        NA
+## 174  HR        NA
+## 260  LT        NA
+## 263  LU        NA
+## 266  LV        NA
+## 272  MK        NA
+## 275  MT        NA
+## 333  NO        NA
+## 342  PL        NA
+## 419  SI        NA
+
for(e in afi.nuts0.geo) {
+  sdg.dat[which(is.na(sdg.dat$factor_in) & sdg.dat$geo == e), 'factor_in'] <- afi.nuts0[which(afi.nuts0$geo == e), 'factor_income_mean']
+}
+sdg.dat[which(sdg.dat$geo %in% afi.na), c('geo', 'factor_in')]
+
##     geo   factor_in
+## 27   CY   320.00750
+## 48   BE  2179.06875
+## 174  HR   937.82375
+## 257  LI          NA
+## 260  LT   931.65125
+## 263  LU    79.65875
+## 266  LV   471.46500
+## 269  ME          NA
+## 272  MK   614.92000
+## 275  MT    68.66250
+## 333  NO  1832.13000
+## 342  PL 10472.04500
+## 371  TR          NA
+## 419  SI   439.74250
+
#additional consensus variables
+con_data <- readOGR(dsn='C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database', layer='Add_con_vars_database')
+
## OGR data source with driver: ESRI Shapefile 
+## Source: "C:\Users\mu5106sc\Dropbox\STAGS\SDG_data_eurostat\Final_database", layer: "Add_con_vars_database"
+## with 471 features
+## It has 42 fields
+
head(con_data@data)
+
##    geo STAT_LEVL_  SHAPE_AREA  SHAPE_LEN    total wintercrop cover_crop
+## 0   AT          0 10.14648271 20.7854134 0.747063  0.3504656  0.2685136
+## 1  AT1          1  2.94056324  9.5340600       NA         NA         NA
+## 2 AT11          2  0.53145726  4.7954485       NA         NA         NA
+## 3 AT12          2  2.37700914  8.3963979       NA         NA         NA
+## 4 AT13          2  0.03209684  0.9114204       NA         NA         NA
+## 5  AT2          1  3.09701419  9.7616839       NA         NA         NA
+##    plant_res bare_soil conv_till cons_till   zero_till  nfert   arable
+## 0 0.01832581  0.109758        NA        NA          NA     NA 50.01522
+## 1         NA        NA        NA        NA          NA     NA 76.90925
+## 2         NA        NA 0.6182190 0.3199207 0.025012794 7.6840 83.64566
+## 3         NA        NA 0.6226791 0.3280354 0.019896256 7.4528 76.22380
+## 4         NA        NA 0.5109890 0.4047619 0.007326007 7.4970 79.80050
+## 5         NA        NA        NA        NA          NA     NA 30.51920
+##   grassland permanent  wheat_a     rye_a   barley_a oats_A  crnmaize_a
+## 0 47.536571  2.389535 304.4137 47.565000 149.032500 27.995 206.2050000
+## 1 18.041880  4.257529       NA 38.207143  93.364286     NA  88.0371429
+## 2  8.715722  7.545200       NA  5.232857   9.365714     NA  22.7585714
+## 3 20.140837  3.585850       NA 32.715714  83.720000     NA  65.1400000
+## 4 10.099751 10.099751       NA  0.260000   0.280000     NA   0.1357143
+## 5 57.091839  2.448436       NA  2.724286  14.268571     NA  67.6742857
+##   fruits_a    grmaize_a olives_a citrus_a vegetab_a   wine_a   bovine
+## 0   8.9075 87.270000000        0        0   17.2775 44.77875       NA
+## 1       NA 33.821428571        0        0        NA       NA       NA
+## 2       NA  3.625714286        0        0        NA       NA  20.9925
+## 3       NA 30.187142857        0        0        NA       NA 444.7362
+## 4       NA  0.007142857        0        0        NA       NA   0.1000
+## 5       NA 20.728571429        0        0        NA       NA       NA
+##   milk_cows      pigs    sheep     goats org_carbon labour_for soil_prod
+## 0        NA        NA       NA        NA      262.1    77677.5        NA
+## 1        NA        NA       NA        NA         NA         NA        NA
+## 2   4.21875  48.02125  5.56375 241.68000         NA     3632.5         6
+## 3 103.90000 798.19571 72.91375  15.29125         NA    22395.0         6
+## 4   0.02625   0.18750 95.23125  65.09875         NA      347.5         6
+## 5        NA        NA       NA        NA         NA         NA        NA
+##   wheat_y   rye_y barley_y   oat_y maize_y grmaize_y lab_prod
+## 0  5.3825 4.31125   5.3575 3.96125 10.0475  45.49125 23.00056
+## 1      NA      NA       NA      NA      NA        NA       NA
+## 2      NA      NA       NA      NA      NA        NA       NA
+## 3      NA      NA       NA      NA      NA        NA       NA
+## 4      NA      NA       NA      NA      NA        NA       NA
+## 5      NA      NA       NA      NA      NA        NA       NA
+
str(con_data@data)
+
## 'data.frame':    471 obs. of  42 variables:
+##  $ geo       : Factor w/ 471 levels "AT","AT1","AT11",..: 1 2 3 4 5 6 7 8 9 10 ...
+##  $ STAT_LEVL_: int  0 1 2 2 2 1 2 2 1 2 ...
+##  $ SHAPE_AREA: num  10.1465 2.9406 0.5315 2.377 0.0321 ...
+##  $ SHAPE_LEN : num  20.785 9.534 4.795 8.396 0.911 ...
+##  $ total     : num  0.747 NA NA NA NA ...
+##  $ wintercrop: num  0.35 NA NA NA NA ...
+##  $ cover_crop: num  0.269 NA NA NA NA ...
+##  $ plant_res : num  0.0183 NA NA NA NA ...
+##  $ bare_soil : num  0.11 NA NA NA NA ...
+##  $ conv_till : num  NA NA 0.618 0.623 0.511 ...
+##  $ cons_till : num  NA NA 0.32 0.328 0.405 ...
+##  $ zero_till : num  NA NA 0.02501 0.0199 0.00733 ...
+##  $ nfert     : num  NA NA 7.68 7.45 7.5 ...
+##  $ arable    : num  50 76.9 83.6 76.2 79.8 ...
+##  $ grassland : num  47.54 18.04 8.72 20.14 10.1 ...
+##  $ permanent : num  2.39 4.26 7.55 3.59 10.1 ...
+##  $ wheat_a   : num  304 NA NA NA NA ...
+##  $ rye_a     : num  47.56 38.21 5.23 32.72 0.26 ...
+##  $ barley_a  : num  149.03 93.36 9.37 83.72 0.28 ...
+##  $ oats_A    : num  28 NA NA NA NA ...
+##  $ crnmaize_a: num  206.205 88.037 22.759 65.14 0.136 ...
+##  $ fruits_a  : num  8.91 NA NA NA NA ...
+##  $ grmaize_a : num  87.27 33.82143 3.62571 30.18714 0.00714 ...
+##  $ olives_a  : num  0 0 0 0 0 0 0 0 0 0 ...
+##  $ citrus_a  : num  0 0 0 0 0 0 0 0 0 0 ...
+##  $ vegetab_a : num  17.3 NA NA NA NA ...
+##  $ wine_a    : num  44.8 NA NA NA NA ...
+##  $ bovine    : num  NA NA 21 444.7 0.1 ...
+##  $ milk_cows : num  NA NA 4.2188 103.9 0.0262 ...
+##  $ pigs      : num  NA NA 48.021 798.196 0.188 ...
+##  $ sheep     : num  NA NA 5.56 72.91 95.23 ...
+##  $ goats     : num  NA NA 241.7 15.3 65.1 ...
+##  $ org_carbon: num  262 NA NA NA NA ...
+##  $ labour_for: num  77678 NA 3632 22395 348 ...
+##  $ soil_prod : int  NA NA 6 6 6 NA 6 6 NA 6 ...
+##  $ wheat_y   : num  5.38 NA NA NA NA ...
+##  $ rye_y     : num  4.31 NA NA NA NA ...
+##  $ barley_y  : num  5.36 NA NA NA NA ...
+##  $ oat_y     : num  3.96 NA NA NA NA ...
+##  $ maize_y   : num  10 NA NA NA NA ...
+##  $ grmaize_y : num  45.5 NA NA NA NA ...
+##  $ lab_prod  : num  23 NA NA NA NA ...
+
names(con_data@data)
+
##  [1] "geo"        "STAT_LEVL_" "SHAPE_AREA" "SHAPE_LEN"  "total"     
+##  [6] "wintercrop" "cover_crop" "plant_res"  "bare_soil"  "conv_till" 
+## [11] "cons_till"  "zero_till"  "nfert"      "arable"     "grassland" 
+## [16] "permanent"  "wheat_a"    "rye_a"      "barley_a"   "oats_A"    
+## [21] "crnmaize_a" "fruits_a"   "grmaize_a"  "olives_a"   "citrus_a"  
+## [26] "vegetab_a"  "wine_a"     "bovine"     "milk_cows"  "pigs"      
+## [31] "sheep"      "goats"      "org_carbon" "labour_for" "soil_prod" 
+## [36] "wheat_y"    "rye_y"      "barley_y"   "oat_y"      "maize_y"   
+## [41] "grmaize_y"  "lab_prod"
+
con.dat <- con_data@data[,c(1:4,10:16,35)] #we will add labour productivity, soil cover, SOC, crop and livestock data later
+
+#here we add new eurostat variables to con.dat that were processed after the Add_con_vars_database was created
+awu_tot <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Additional_consensus_variables/C22_labour_force_awu_total_mean_allnuts_20190104.csv", head=T)
+head(awu_tot)
+
##    geo c22_labour_force
+## 1   AT         138800.0
+## 2  AT1               NA
+## 3 AT11           7007.5
+## 4 AT12          38360.0
+## 5 AT13           1975.0
+## 6  AT2               NA
+
tot_uaa <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Additional_consensus_variables/uaa_mean_allnuts.csv", head=T)
+head(tot_uaa)
+
##   geo uaa_mean
+## 1  BG  4559720
+## 2  CH       NA
+## 3  CY   110630
+## 4  AL       NA
+## 5  CZ  3473440
+## 6  BE  1331075
+
gva <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Additional_consensus_variables/gross_value_added_mean_20190104.csv", head=T)
+head(gva)
+
##    geo   gva_mean
+## 1   AT 2769.36857
+## 2  AT1 1172.67143
+## 3 AT11  177.76429
+## 4 AT12  975.25143
+## 5 AT13   19.65571
+## 6  AT2  641.63571
+
#There are some holes in this from Eurostat table agr_r_accts that can be filled with NUTS0 data from table aact_eaa01
+gva.2 <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Additional_consensus_variables/gross_value_added_NUTS0_mean_allnuts_20190108.csv", head=T)
+head(gva.2)
+
##   geo c14_eea_gva
+## 1  BG   1671.6575
+## 2  CH   3323.7038
+## 3  CY    310.0225
+## 4  AL          NA
+## 5  CZ   1432.8025
+## 6  BE   2332.7675
+
(gva.na <- gva[is.na(gva$gva), 'geo'])
+
##  [1] BE3  BE31 BE32 BE33 BE34 BE35 CY   CY0  CY00 CZ01 BE   BE1  BE10 BE2 
+## [15] BE21 BE22 BE23 BE24 BE25 FRA5 HR   FI1B FI1C FI1D LI   LI0  LI00 LT  
+## [29] LT0  LT00 LU   LU0  LU00 LV   LV0  LV00 ME   ME0  ME00 MK   MK0  MK00
+## [43] MT   MT0  MT00 NO   NO0  NO01 NO02 NO03 NO04 NO05 NO06 NO07 SI04 TR  
+## [57] TR1  TR10 TR2  TR21 TR22 TR3  TR31 TR32 TR33 TR4  TR41 TR42 TR5  TR51
+## [71] TR52 TR6  TR61 TR62 TR63 TR7  TR71 TR72 TR8  TR81 TR82 TR83 TR9  TR90
+## [85] TRA  TRA1 TRA2 TRB  TRB1 TRB2 TRC  TRC1 TRC2 TRC3 SI   SI0  SI03
+## 471 Levels: AT AT1 AT11 AT12 AT13 AT2 AT21 AT22 AT3 AT31 AT32 AT33 ... UKN0
+
(gva.2.geo <- gva.2[!is.na(gva.2$c14_eea_gva),'geo'])
+
##  [1] BG CH CY CZ BE AT DE DK EE EL ES FI HR FR HU IE IS IT LT LU LV MK MT
+## [24] NL NO PL PT RO SE SI SK UK
+## 2017 Levels: AL AL0 AL01 AL011 AL012 AL013 AL014 AL015 AL02 AL021 ... UKN16
+
gva[which(is.na(gva$gva) & gva$geo %in% gva.2.geo),]
+
##     geo gva_mean
+## 27   CY       NA
+## 48   BE       NA
+## 174  HR       NA
+## 260  LT       NA
+## 263  LU       NA
+## 266  LV       NA
+## 272  MK       NA
+## 275  MT       NA
+## 333  NO       NA
+## 419  SI       NA
+
for(e in gva.2.geo) {
+  gva[which(is.na(gva$gva) & gva$geo == e), 'gva_mean'] <- gva.2[which(gva.2$geo == e), 'c14_eea_gva']
+}
+gva[which(gva$geo %in% gva.na),]
+
##      geo   gva_mean
+## 12   BE3         NA
+## 13  BE31         NA
+## 14  BE32         NA
+## 15  BE33         NA
+## 16  BE34         NA
+## 17  BE35         NA
+## 27    CY  310.02250
+## 28   CY0         NA
+## 29  CY00         NA
+## 32  CZ01         NA
+## 48    BE 2332.76750
+## 49   BE1         NA
+## 50  BE10         NA
+## 51   BE2         NA
+## 52  BE21         NA
+## 53  BE22         NA
+## 54  BE23         NA
+## 55  BE24         NA
+## 56  BE25         NA
+## 173 FRA5         NA
+## 174   HR 1057.60250
+## 197 FI1B         NA
+## 198 FI1C         NA
+## 199 FI1D         NA
+## 257   LI         NA
+## 258  LI0         NA
+## 259 LI00         NA
+## 260   LT 1019.96250
+## 261  LT0         NA
+## 262 LT00         NA
+## 263   LU  106.55875
+## 264  LU0         NA
+## 265 LU00         NA
+## 266   LV  311.59125
+## 267  LV0         NA
+## 268 LV00         NA
+## 269   ME         NA
+## 270  ME0         NA
+## 271 ME00         NA
+## 272   MK  671.34000
+## 273  MK0         NA
+## 274 MK00         NA
+## 275   MT   58.47625
+## 276  MT0         NA
+## 277 MT00         NA
+## 333   NO 2086.08375
+## 334  NO0         NA
+## 335 NO01         NA
+## 336 NO02         NA
+## 337 NO03         NA
+## 338 NO04         NA
+## 339 NO05         NA
+## 340 NO06         NA
+## 341 NO07         NA
+## 364 SI04         NA
+## 371   TR         NA
+## 372  TR1         NA
+## 373 TR10         NA
+## 374  TR2         NA
+## 375 TR21         NA
+## 376 TR22         NA
+## 377  TR3         NA
+## 378 TR31         NA
+## 379 TR32         NA
+## 380 TR33         NA
+## 381  TR4         NA
+## 382 TR41         NA
+## 383 TR42         NA
+## 384  TR5         NA
+## 385 TR51         NA
+## 386 TR52         NA
+## 387  TR6         NA
+## 388 TR61         NA
+## 389 TR62         NA
+## 390 TR63         NA
+## 391  TR7         NA
+## 392 TR71         NA
+## 393 TR72         NA
+## 394  TR8         NA
+## 395 TR81         NA
+## 396 TR82         NA
+## 397 TR83         NA
+## 398  TR9         NA
+## 399 TR90         NA
+## 400  TRA         NA
+## 401 TRA1         NA
+## 402 TRA2         NA
+## 403  TRB         NA
+## 404 TRB1         NA
+## 405 TRB2         NA
+## 406  TRC         NA
+## 407 TRC1         NA
+## 408 TRC2         NA
+## 409 TRC3         NA
+## 419   SI  450.92125
+## 420  SI0         NA
+## 421 SI03         NA
+
#add to con.dat
+con.dat2 <- left_join(awu_tot, gva)
+
## Joining, by = "geo"
+
con.dat2 <- left_join(con.dat2, tot_uaa)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factors with different levels, coercing to
+## character vector
+
names(con.dat2)
+
## [1] "geo"              "c22_labour_force" "gva_mean"        
+## [4] "uaa_mean"
+
names(con.dat2)[2:4] <- c("tot_awu", "gva", "tot_uaa")
+head(con.dat2)
+
##    geo  tot_awu        gva tot_uaa
+## 1   AT 138800.0 2769.36857 2698320
+## 2  AT1       NA 1172.67143      NA
+## 3 AT11   7007.5  177.76429  181150
+## 4 AT12  38360.0  975.25143  895805
+## 5 AT13   1975.0   19.65571    7190
+## 6  AT2       NA  641.63571      NA
+
con.dat <- left_join(con.dat, con.dat2)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factor and character vector, coercing into
+## character vector
+
head(con.dat)
+
##    geo STAT_LEVL_  SHAPE_AREA  SHAPE_LEN conv_till cons_till   zero_till
+## 1   AT          0 10.14648271 20.7854134        NA        NA          NA
+## 2  AT1          1  2.94056324  9.5340600        NA        NA          NA
+## 3 AT11          2  0.53145726  4.7954485 0.6182190 0.3199207 0.025012794
+## 4 AT12          2  2.37700914  8.3963979 0.6226791 0.3280354 0.019896256
+## 5 AT13          2  0.03209684  0.9114204 0.5109890 0.4047619 0.007326007
+## 6  AT2          1  3.09701419  9.7616839        NA        NA          NA
+##    nfert   arable grassland permanent soil_prod  tot_awu        gva
+## 1     NA 50.01522 47.536571  2.389535        NA 138800.0 2769.36857
+## 2     NA 76.90925 18.041880  4.257529        NA       NA 1172.67143
+## 3 7.6840 83.64566  8.715722  7.545200         6   7007.5  177.76429
+## 4 7.4528 76.22380 20.140837  3.585850         6  38360.0  975.25143
+## 5 7.4970 79.80050 10.099751 10.099751         6   1975.0   19.65571
+## 6     NA 30.51920 57.091839  2.448436        NA       NA  641.63571
+##   tot_uaa
+## 1 2698320
+## 2      NA
+## 3  181150
+## 4  895805
+## 5    7190
+## 6      NA
+
#whole database for processing
+dbase <- as.data.frame(matrix(nrow=nrow(nuts@data), ncol=(ncol(sdg.dat[,-c(1:4)]) + ncol(con.dat[,-c(1:4)]) + 1)))
+dbase[,1] <- nuts@data$NUTS_ID
+names(dbase) <- c("NUTS_ID", names(sdg.dat)[-c(1:4)], names(con.dat)[-c(1:4)])
+head(dbase)
+
##   NUTS_ID risk_pov factor_in train35bas train35ful train_bas train_ful
+## 1    AT11       NA        NA         NA         NA        NA        NA
+## 2    AT22       NA        NA         NA         NA        NA        NA
+## 3    AT12       NA        NA         NA         NA        NA        NA
+## 4    AT13       NA        NA         NA         NA        NA        NA
+## 5    AT21       NA        NA         NA         NA        NA        NA
+## 6    AT31       NA        NA         NA         NA        NA        NA
+##   nitr_high nitr_mod nitr_poor irrigated forest artific soil_loss
+## 1        NA       NA        NA        NA     NA      NA        NA
+## 2        NA       NA        NA        NA     NA      NA        NA
+## 3        NA       NA        NA        NA     NA      NA        NA
+## 4        NA       NA        NA        NA     NA      NA        NA
+## 5        NA       NA        NA        NA     NA      NA        NA
+## 6        NA       NA        NA        NA     NA      NA        NA
+##   com_birds farm_birds org_farm energy_rt renew_pct renew_prod gross_N
+## 1        NA         NA       NA        NA        NA         NA      NA
+## 2        NA         NA       NA        NA        NA         NA      NA
+## 3        NA         NA       NA        NA        NA         NA      NA
+## 4        NA         NA       NA        NA        NA         NA      NA
+## 5        NA         NA       NA        NA        NA         NA      NA
+## 6        NA         NA       NA        NA        NA         NA      NA
+##   gross_P conv_till cons_till zero_till nfert arable grassland permanent
+## 1      NA        NA        NA        NA    NA     NA        NA        NA
+## 2      NA        NA        NA        NA    NA     NA        NA        NA
+## 3      NA        NA        NA        NA    NA     NA        NA        NA
+## 4      NA        NA        NA        NA    NA     NA        NA        NA
+## 5      NA        NA        NA        NA    NA     NA        NA        NA
+## 6      NA        NA        NA        NA    NA     NA        NA        NA
+##   soil_prod tot_awu gva tot_uaa
+## 1        NA      NA  NA      NA
+## 2        NA      NA  NA      NA
+## 3        NA      NA  NA      NA
+## 4        NA      NA  NA      NA
+## 5        NA      NA  NA      NA
+## 6        NA      NA  NA      NA
+
nrow(dbase)
+
## [1] 320
+
#In this chunk, we will determine which NUTS2 have data for each variable, then apply NUTS1 or NUTS0 data to those NUTS2 without, wherever possible. We will do this in a function for all variables; however, directly translating NUTS1 or NUTS0 data to NUTS2 is only valid where the variable is a ratio (e.g., proportion, percentage, rate). In a later chunk, we will edit those variables that are absolute.
+
+#list to summarise where data are NUTS2, 1, 0 for each variable
+data.level <- vector("list", 4*length(names(dbase)[-1]))
+names(data.level) <- c(paste(names(dbase)[-1], 'n2.dat', sep='.'),
+                       paste(names(dbase)[-1], 'n1.dat', sep='.'),
+                       paste(names(dbase)[-1], 'n0.dat', sep='.'),
+                       paste(names(dbase)[-1], 'nuts0.na', sep='.')
+                       )
+labels(data.level)
+
##   [1] "risk_pov.n2.dat"     "factor_in.n2.dat"    "train35bas.n2.dat"  
+##   [4] "train35ful.n2.dat"   "train_bas.n2.dat"    "train_ful.n2.dat"   
+##   [7] "nitr_high.n2.dat"    "nitr_mod.n2.dat"     "nitr_poor.n2.dat"   
+##  [10] "irrigated.n2.dat"    "forest.n2.dat"       "artific.n2.dat"     
+##  [13] "soil_loss.n2.dat"    "com_birds.n2.dat"    "farm_birds.n2.dat"  
+##  [16] "org_farm.n2.dat"     "energy_rt.n2.dat"    "renew_pct.n2.dat"   
+##  [19] "renew_prod.n2.dat"   "gross_N.n2.dat"      "gross_P.n2.dat"     
+##  [22] "conv_till.n2.dat"    "cons_till.n2.dat"    "zero_till.n2.dat"   
+##  [25] "nfert.n2.dat"        "arable.n2.dat"       "grassland.n2.dat"   
+##  [28] "permanent.n2.dat"    "soil_prod.n2.dat"    "tot_awu.n2.dat"     
+##  [31] "gva.n2.dat"          "tot_uaa.n2.dat"      "risk_pov.n1.dat"    
+##  [34] "factor_in.n1.dat"    "train35bas.n1.dat"   "train35ful.n1.dat"  
+##  [37] "train_bas.n1.dat"    "train_ful.n1.dat"    "nitr_high.n1.dat"   
+##  [40] "nitr_mod.n1.dat"     "nitr_poor.n1.dat"    "irrigated.n1.dat"   
+##  [43] "forest.n1.dat"       "artific.n1.dat"      "soil_loss.n1.dat"   
+##  [46] "com_birds.n1.dat"    "farm_birds.n1.dat"   "org_farm.n1.dat"    
+##  [49] "energy_rt.n1.dat"    "renew_pct.n1.dat"    "renew_prod.n1.dat"  
+##  [52] "gross_N.n1.dat"      "gross_P.n1.dat"      "conv_till.n1.dat"   
+##  [55] "cons_till.n1.dat"    "zero_till.n1.dat"    "nfert.n1.dat"       
+##  [58] "arable.n1.dat"       "grassland.n1.dat"    "permanent.n1.dat"   
+##  [61] "soil_prod.n1.dat"    "tot_awu.n1.dat"      "gva.n1.dat"         
+##  [64] "tot_uaa.n1.dat"      "risk_pov.n0.dat"     "factor_in.n0.dat"   
+##  [67] "train35bas.n0.dat"   "train35ful.n0.dat"   "train_bas.n0.dat"   
+##  [70] "train_ful.n0.dat"    "nitr_high.n0.dat"    "nitr_mod.n0.dat"    
+##  [73] "nitr_poor.n0.dat"    "irrigated.n0.dat"    "forest.n0.dat"      
+##  [76] "artific.n0.dat"      "soil_loss.n0.dat"    "com_birds.n0.dat"   
+##  [79] "farm_birds.n0.dat"   "org_farm.n0.dat"     "energy_rt.n0.dat"   
+##  [82] "renew_pct.n0.dat"    "renew_prod.n0.dat"   "gross_N.n0.dat"     
+##  [85] "gross_P.n0.dat"      "conv_till.n0.dat"    "cons_till.n0.dat"   
+##  [88] "zero_till.n0.dat"    "nfert.n0.dat"        "arable.n0.dat"      
+##  [91] "grassland.n0.dat"    "permanent.n0.dat"    "soil_prod.n0.dat"   
+##  [94] "tot_awu.n0.dat"      "gva.n0.dat"          "tot_uaa.n0.dat"     
+##  [97] "risk_pov.nuts0.na"   "factor_in.nuts0.na"  "train35bas.nuts0.na"
+## [100] "train35ful.nuts0.na" "train_bas.nuts0.na"  "train_ful.nuts0.na" 
+## [103] "nitr_high.nuts0.na"  "nitr_mod.nuts0.na"   "nitr_poor.nuts0.na" 
+## [106] "irrigated.nuts0.na"  "forest.nuts0.na"     "artific.nuts0.na"   
+## [109] "soil_loss.nuts0.na"  "com_birds.nuts0.na"  "farm_birds.nuts0.na"
+## [112] "org_farm.nuts0.na"   "energy_rt.nuts0.na"  "renew_pct.nuts0.na" 
+## [115] "renew_prod.nuts0.na" "gross_N.nuts0.na"    "gross_P.nuts0.na"   
+## [118] "conv_till.nuts0.na"  "cons_till.nuts0.na"  "zero_till.nuts0.na" 
+## [121] "nfert.nuts0.na"      "arable.nuts0.na"     "grassland.nuts0.na" 
+## [124] "permanent.nuts0.na"  "soil_prod.nuts0.na"  "tot_awu.nuts0.na"   
+## [127] "gva.nuts0.na"        "tot_uaa.nuts0.na"
+
attach(sdg.dat)
+for(i in names(sdg.dat)[-c(1:4)]) {
+  (nuts2.na <- sdg.dat[STAT_LEVL_ == 2 & is.na(sdg.dat[,i]), 'geo'])
+  (nuts1 <- sdg.dat[STAT_LEVL_ == 1 & geo %in% gsub(".{1}$", "", nuts2.na), 'geo'])
+  (nuts1.na <- sdg.dat[geo %in% nuts1 & is.na(sdg.dat[,i]), 'geo'])
+  (nuts0 <- sdg.dat[STAT_LEVL_ == 0 & geo %in% gsub(".{1}$", "", nuts1.na), 'geo'])
+  (nuts0.na <- sdg.dat[geo %in% nuts0 & is.na(sdg.dat[,i]), 'geo'])
+  
+#NUTS2 data
+(n2.dat <- sdg.dat[!(geo %in% nuts2.na) & STAT_LEVL_ == 2, 'geo'])
+#NUTS1 data
+(n1.dat <- nuts1[!nuts1 %in% nuts1.na])
+#NUTS0 data
+(n0.dat <- nuts0[!nuts0 %in% nuts0.na])
+#NO DATA
+nuts0.na
+
+data.level[[paste(i, 'n2.dat', sep='.')]] <- n2.dat
+data.level[[paste(i, 'n1.dat', sep='.')]] <- n1.dat
+data.level[[paste(i, 'n0.dat', sep='.')]] <- n0.dat
+data.level[[paste(i, 'nuts0.na', sep='.')]] <- nuts0.na
+
+  for(e in n0.dat) {
+    dbase[dbase$NUTS_ID %in% dbase$NUTS_ID[grep(paste(e, '..', sep=''), dbase$NUTS_ID)], i] <- sdg.dat[sdg.dat$geo == e, i]
+  }
+
+  for(e in n1.dat) {
+    dbase[dbase$NUTS_ID %in% dbase$NUTS_ID[grep(paste(e, '.', sep=''), dbase$NUTS_ID)], i] <- sdg.dat[sdg.dat$geo == e, i]
+  }
+
+
+  for(e in n2.dat) {
+    dbase[dbase$NUTS_ID == e, i] <- sdg.dat[sdg.dat$geo == e, i]
+  }
+}
+detach(sdg.dat)
+
+summary(dbase)
+
##     NUTS_ID       risk_pov        factor_in          train35bas     
+##  AT11   :  1   Min.   : 9.971   Min.   :   -0.73   Min.   :0.00000  
+##  AT12   :  1   1st Qu.:18.586   1st Qu.:  144.97   1st Qu.:0.09613  
+##  AT13   :  1   Median :23.514   Median :  366.48   Median :0.22800  
+##  AT21   :  1   Mean   :25.960   Mean   : 1141.61   Mean   :0.26424  
+##  AT22   :  1   3rd Qu.:29.680   3rd Qu.:  932.31   3rd Qu.:0.35996  
+##  AT31   :  1   Max.   :54.150   Max.   :10472.05   Max.   :0.88217  
+##  (Other):314   NA's   :2        NA's   :28         NA's   :52       
+##    train35ful       train_bas         train_ful         nitr_high      
+##  Min.   :0.0000   Min.   :0.01171   Min.   :0.00188   Min.   :  4.082  
+##  1st Qu.:0.1264   1st Qu.:0.10627   1st Qu.:0.04939   1st Qu.: 66.302  
+##  Median :0.2600   Median :0.19961   Median :0.12807   Median : 70.505  
+##  Mean   :0.2738   Mean   :0.26250   Mean   :0.15796   Mean   : 75.328  
+##  3rd Qu.:0.3825   3rd Qu.:0.36488   3rd Qu.:0.25108   3rd Qu.: 87.591  
+##  Max.   :0.8550   Max.   :0.94840   Max.   :0.50303   Max.   :100.000  
+##  NA's   :52       NA's   :50        NA's   :50        NA's   :44       
+##     nitr_mod        nitr_poor        irrigated           forest       
+##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000  
+##  1st Qu.: 6.533   1st Qu.: 4.106   1st Qu.: 0.3312   1st Qu.:0.08957  
+##  Median :15.896   Median : 8.883   Median : 1.2250   Median :0.24904  
+##  Mean   :13.773   Mean   :10.898   Mean   : 5.7571   Mean   :0.25141  
+##  3rd Qu.:18.416   3rd Qu.:15.385   3rd Qu.: 6.5000   3rd Qu.:0.37365  
+##  Max.   :60.000   Max.   :68.367   Max.   :74.5500   Max.   :0.75860  
+##  NA's   :44       NA's   :44       NA's   :28                         
+##     artific          soil_loss         com_birds       farm_birds    
+##  Min.   :0.00000   Min.   : 0.0300   Min.   :54.92   Min.   : 63.78  
+##  1st Qu.:0.02056   1st Qu.: 0.7047   1st Qu.:62.14   1st Qu.: 81.34  
+##  Median :0.04020   Median : 1.5005   Median :69.50   Median : 83.82  
+##  Mean   :0.09023   Mean   : 2.5482   Mean   :69.70   Mean   : 81.90  
+##  3rd Qu.:0.08193   3rd Qu.: 2.9420   3rd Qu.:81.30   3rd Qu.: 85.30  
+##  Max.   :1.00000   Max.   :17.6050   Max.   :97.22   Max.   :116.60  
+##                    NA's   :44        NA's   :158     NA's   :94      
+##     org_farm        energy_rt         renew_pct        renew_prod     
+##  Min.   : 0.000   Min.   :0.00000   Min.   : 0.000   Min.   : 0.8855  
+##  1st Qu.: 1.200   1st Qu.:0.03503   1st Qu.: 3.074   1st Qu.: 6.2422  
+##  Median : 2.687   Median :0.06128   Median : 6.124   Median : 8.3156  
+##  Mean   : 4.056   Mean   :0.15052   Mean   :11.225   Mean   :12.4318  
+##  3rd Qu.: 5.204   3rd Qu.:0.09725   3rd Qu.:22.515   3rd Qu.:18.0797  
+##  Max.   :27.487   Max.   :1.75149   Max.   :41.011   Max.   :37.7797  
+##  NA's   :28       NA's   :44        NA's   :82       NA's   :45       
+##     gross_N           gross_P       conv_till      cons_till     
+##  Min.   :  2.857   Min.   :-6.500   Mode:logical   Mode:logical  
+##  1st Qu.: 41.821   1st Qu.:-1.667   NA's:320       NA's:320      
+##  Median : 67.333   Median : 1.833                                
+##  Mean   : 67.553   Mean   : 1.941                                
+##  3rd Qu.: 85.988   3rd Qu.: 4.714                                
+##  Max.   :190.167   Max.   :31.000                                
+##  NA's   :30        NA's   :30                                    
+##  zero_till       nfert          arable        grassland     
+##  Mode:logical   Mode:logical   Mode:logical   Mode:logical  
+##  NA's:320       NA's:320       NA's:320       NA's:320      
+##                                                             
+##                                                             
+##                                                             
+##                                                             
+##                                                             
+##  permanent      soil_prod      tot_awu          gva         
+##  Mode:logical   Mode:logical   Mode:logical   Mode:logical  
+##  NA's:320       NA's:320       NA's:320       NA's:320      
+##                                                             
+##                                                             
+##                                                             
+##                                                             
+##                                                             
+##  tot_uaa       
+##  Mode:logical  
+##  NA's:320      
+##                
+##                
+##                
+##                
+## 
+
head(dbase)
+
##   NUTS_ID risk_pov factor_in train35bas train35ful train_bas train_ful
+## 1    AT11 13.73333 173.70857  0.1375661  0.3333333 0.1243050 0.1779190
+## 2    AT22 17.26667 393.97429  0.2160980  0.3648294 0.2017089 0.2413594
+## 3    AT12 13.83333 884.43857  0.2084775  0.4809689 0.2534787 0.3449437
+## 4    AT13 27.23333  12.70143  0.3750000  0.7500000 0.1753247 0.4740260
+## 5    AT21 17.20000 111.36714  0.2306238  0.3648393 0.2076173 0.2250348
+## 6    AT31 15.00000 452.67857  0.2508418  0.4284512 0.2014381 0.2857610
+##   nitr_high nitr_mod nitr_poor irrigated    forest    artific soil_loss
+## 1  64.58924 20.20774  15.20302     5.850 0.3161203 0.04355635     1.842
+## 2  64.58924 20.20774  15.20302     0.325 0.6127954 0.03306278     5.804
+## 3  64.58924 20.20774  15.20302     2.650 0.4286079 0.04875064     2.236
+## 4  64.58924 20.20774  15.20302    10.525 0.1469534 0.73118280     1.014
+## 5  64.58924 20.20774  15.20302     0.100 0.5998934 0.03047416    11.671
+## 6  64.58924 20.20774  15.20302     0.125 0.4027358 0.04900973     3.791
+##   com_birds farm_birds org_farm  energy_rt renew_pct renew_prod  gross_N
+## 1        NA      65.98 19.43430 0.08319988  32.65559   7.068917 32.57143
+## 2        NA      65.98 12.80858 0.08319988  32.65559   7.068917 32.57143
+## 3        NA      65.98 13.41584 0.08319988  32.65559   7.068917 32.57143
+## 4        NA      65.98 16.44137 0.08319988  32.65559   7.068917 32.57143
+## 5        NA      65.98 10.68078 0.08319988  32.65559   7.068917 32.57143
+## 6        NA      65.98 12.31071 0.08319988  32.65559   7.068917 32.57143
+##    gross_P conv_till cons_till zero_till nfert arable grassland permanent
+## 1 1.833333        NA        NA        NA    NA     NA        NA        NA
+## 2 1.833333        NA        NA        NA    NA     NA        NA        NA
+## 3 1.833333        NA        NA        NA    NA     NA        NA        NA
+## 4 1.833333        NA        NA        NA    NA     NA        NA        NA
+## 5 1.833333        NA        NA        NA    NA     NA        NA        NA
+## 6 1.833333        NA        NA        NA    NA     NA        NA        NA
+##   soil_prod tot_awu gva tot_uaa
+## 1        NA      NA  NA      NA
+## 2        NA      NA  NA      NA
+## 3        NA      NA  NA      NA
+## 4        NA      NA  NA      NA
+## 5        NA      NA  NA      NA
+## 6        NA      NA  NA      NA
+
tail(dbase)
+
##     NUTS_ID risk_pov factor_in train35bas train35ful  train_bas  train_ful
+## 315    UKD3 23.51429  145.0767 0.00000000  0.2000000 0.07100592 0.05621302
+## 316    TRC1 54.15000        NA         NA         NA         NA         NA
+## 317    TRC2 54.15000        NA         NA         NA         NA         NA
+## 318    UKD4 23.51429   84.5800 0.15625000  0.3125000 0.07031828 0.07846040
+## 319    TRC3 54.15000        NA         NA         NA         NA         NA
+## 320    UKM6 23.51429  103.3933 0.05925926  0.1555556 0.03854333 0.06990962
+##     nitr_high nitr_mod nitr_poor irrigated      forest     artific
+## 315  97.15694 2.388173 0.4548901     0.500 0.010517799 0.550161812
+## 316        NA       NA        NA        NA 0.022020475 0.013843281
+## 317        NA       NA        NA        NA 0.006621164 0.007146191
+## 318  97.15694 2.388173 0.4548901     0.625 0.014225182 0.109261501
+## 319        NA       NA        NA        NA 0.037659533 0.005497742
+## 320  97.15694 2.388173 0.4548901     0.000 0.126063524 0.003747982
+##     soil_loss com_birds farm_birds  org_farm  energy_rt renew_pct
+## 315     2.071      69.5      83.82 0.0000000 0.04487651   23.7199
+## 316        NA        NA         NA        NA         NA        NA
+## 317        NA        NA         NA        NA         NA        NA
+## 318     1.905      69.5      83.82 0.8233184 0.04487651   23.7199
+## 319        NA        NA         NA        NA         NA        NA
+## 320     6.174      69.5      83.82 0.9873238 0.04487651   23.7199
+##     renew_prod  gross_N  gross_P conv_till cons_till zero_till nfert
+## 315   7.183683 86.42857 5.857143        NA        NA        NA    NA
+## 316         NA       NA       NA        NA        NA        NA    NA
+## 317         NA       NA       NA        NA        NA        NA    NA
+## 318   7.183683 86.42857 5.857143        NA        NA        NA    NA
+## 319         NA       NA       NA        NA        NA        NA    NA
+## 320   7.183683 86.42857 5.857143        NA        NA        NA    NA
+##     arable grassland permanent soil_prod tot_awu gva tot_uaa
+## 315     NA        NA        NA        NA      NA  NA      NA
+## 316     NA        NA        NA        NA      NA  NA      NA
+## 317     NA        NA        NA        NA      NA  NA      NA
+## 318     NA        NA        NA        NA      NA  NA      NA
+## 319     NA        NA        NA        NA      NA  NA      NA
+## 320     NA        NA        NA        NA      NA  NA      NA
+
summary(data.level)
+
##                     Length Class  Mode     
+## risk_pov.n2.dat     144    -none- character
+## factor_in.n2.dat    245    -none- character
+## train35bas.n2.dat   231    -none- character
+## train35ful.n2.dat   231    -none- character
+## train_bas.n2.dat    232    -none- character
+## train_ful.n2.dat    232    -none- character
+## nitr_high.n2.dat      0    -none- character
+## nitr_mod.n2.dat       0    -none- character
+## nitr_poor.n2.dat      0    -none- character
+## irrigated.n2.dat    232    -none- character
+## forest.n2.dat       320    -none- character
+## artific.n2.dat      320    -none- character
+## soil_loss.n2.dat    265    -none- character
+## com_birds.n2.dat      0    -none- character
+## farm_birds.n2.dat     0    -none- character
+## org_farm.n2.dat     231    -none- character
+## energy_rt.n2.dat      0    -none- character
+## renew_pct.n2.dat      0    -none- character
+## renew_prod.n2.dat     0    -none- character
+## gross_N.n2.dat        0    -none- character
+## gross_P.n2.dat        0    -none- character
+## conv_till.n2.dat      0    -none- NULL     
+## cons_till.n2.dat      0    -none- NULL     
+## zero_till.n2.dat      0    -none- NULL     
+## nfert.n2.dat          0    -none- NULL     
+## arable.n2.dat         0    -none- NULL     
+## grassland.n2.dat      0    -none- NULL     
+## permanent.n2.dat      0    -none- NULL     
+## soil_prod.n2.dat      0    -none- NULL     
+## tot_awu.n2.dat        0    -none- NULL     
+## gva.n2.dat            0    -none- NULL     
+## tot_uaa.n2.dat        0    -none- NULL     
+## risk_pov.n1.dat      22    -none- character
+## factor_in.n1.dat      3    -none- character
+## train35bas.n1.dat    15    -none- character
+## train35ful.n1.dat    15    -none- character
+## train_bas.n1.dat     16    -none- character
+## train_ful.n1.dat     16    -none- character
+## nitr_high.n1.dat      0    -none- character
+## nitr_mod.n1.dat       0    -none- character
+## nitr_poor.n1.dat      0    -none- character
+## irrigated.n1.dat     15    -none- character
+## forest.n1.dat         0    -none- character
+## artific.n1.dat        0    -none- character
+## soil_loss.n1.dat      2    -none- character
+## com_birds.n1.dat      0    -none- character
+## farm_birds.n1.dat     0    -none- character
+## org_farm.n1.dat      16    -none- character
+## energy_rt.n1.dat      0    -none- character
+## renew_pct.n1.dat      0    -none- character
+## renew_prod.n1.dat     0    -none- character
+## gross_N.n1.dat        0    -none- character
+## gross_P.n1.dat        0    -none- character
+## conv_till.n1.dat      0    -none- NULL     
+## cons_till.n1.dat      0    -none- NULL     
+## zero_till.n1.dat      0    -none- NULL     
+## nfert.n1.dat          0    -none- NULL     
+## arable.n1.dat         0    -none- NULL     
+## grassland.n1.dat      0    -none- NULL     
+## permanent.n1.dat      0    -none- NULL     
+## soil_prod.n1.dat      0    -none- NULL     
+## tot_awu.n1.dat        0    -none- NULL     
+## gva.n1.dat            0    -none- NULL     
+## tot_uaa.n1.dat        0    -none- NULL     
+## risk_pov.n0.dat      15    -none- character
+## factor_in.n0.dat     10    -none- character
+## train35bas.n0.dat     0    -none- character
+## train35ful.n0.dat     0    -none- character
+## train_bas.n0.dat      0    -none- character
+## train_ful.n0.dat      0    -none- character
+## nitr_high.n0.dat     28    -none- character
+## nitr_mod.n0.dat      28    -none- character
+## nitr_poor.n0.dat     28    -none- character
+## irrigated.n0.dat      6    -none- character
+## forest.n0.dat         0    -none- character
+## artific.n0.dat        0    -none- character
+## soil_loss.n0.dat      3    -none- character
+## com_birds.n0.dat     11    -none- character
+## farm_birds.n0.dat    19    -none- character
+## org_farm.n0.dat       5    -none- character
+## energy_rt.n0.dat     28    -none- character
+## renew_pct.n0.dat     27    -none- character
+## renew_prod.n0.dat    27    -none- character
+## gross_N.n0.dat       30    -none- character
+## gross_P.n0.dat       30    -none- character
+## conv_till.n0.dat      0    -none- NULL     
+## cons_till.n0.dat      0    -none- NULL     
+## zero_till.n0.dat      0    -none- NULL     
+## nfert.n0.dat          0    -none- NULL     
+## arable.n0.dat         0    -none- NULL     
+## grassland.n0.dat      0    -none- NULL     
+## permanent.n0.dat      0    -none- NULL     
+## soil_prod.n0.dat      0    -none- NULL     
+## tot_awu.n0.dat        0    -none- NULL     
+## gva.n0.dat            0    -none- NULL     
+## tot_uaa.n0.dat        0    -none- NULL     
+## risk_pov.nuts0.na     2    -none- character
+## factor_in.nuts0.na    3    -none- character
+## train35bas.nuts0.na   9    -none- character
+## train35ful.nuts0.na   9    -none- character
+## train_bas.nuts0.na    7    -none- character
+## train_ful.nuts0.na    7    -none- character
+## nitr_high.nuts0.na    7    -none- character
+## nitr_mod.nuts0.na     7    -none- character
+## nitr_poor.nuts0.na    7    -none- character
+## irrigated.nuts0.na    3    -none- character
+## forest.nuts0.na       0    -none- character
+## artific.nuts0.na      0    -none- character
+## soil_loss.nuts0.na    7    -none- character
+## com_birds.nuts0.na   24    -none- character
+## farm_birds.nuts0.na  16    -none- character
+## org_farm.nuts0.na     3    -none- character
+## energy_rt.nuts0.na    7    -none- character
+## renew_pct.nuts0.na    8    -none- character
+## renew_prod.nuts0.na   8    -none- character
+## gross_N.nuts0.na      5    -none- character
+## gross_P.nuts0.na      5    -none- character
+## conv_till.nuts0.na    0    -none- NULL     
+## cons_till.nuts0.na    0    -none- NULL     
+## zero_till.nuts0.na    0    -none- NULL     
+## nfert.nuts0.na        0    -none- NULL     
+## arable.nuts0.na       0    -none- NULL     
+## grassland.nuts0.na    0    -none- NULL     
+## permanent.nuts0.na    0    -none- NULL     
+## soil_prod.nuts0.na    0    -none- NULL     
+## tot_awu.nuts0.na      0    -none- NULL     
+## gva.nuts0.na          0    -none- NULL     
+## tot_uaa.nuts0.na      0    -none- NULL
+
#check data level for risk_pov as an example
+data.level$risk_pov.n2.dat
+
##   [1] "AT11" "AT12" "AT13" "AT21" "AT22" "AT31" "AT32" "BG31" "BG32" "BG33"
+##  [11] "BG34" "BG41" "BG42" "CZ01" "CZ02" "CZ03" "CZ04" "CZ05" "CZ06" "CZ07"
+##  [21] "CZ08" "CH01" "CH02" "CH03" "CH04" "AT33" "AT34" "DE72" "DE73" "DE80"
+##  [31] "DE91" "DE92" "DE93" "DE94" "DEA1" "DEA2" "DEA3" "DEA4" "DEA5" "CH05"
+##  [41] "CH06" "CH07" "DE11" "DE12" "DE13" "DE14" "DE21" "DE22" "DE23" "DE25"
+##  [51] "DE26" "DE27" "DE30" "DE40" "DE50" "DE60" "DE71" "DEB1" "DEB2" "DEB3"
+##  [61] "DEG0" "DK01" "DK02" "DK03" "DK04" "DK05" "ES11" "ES12" "ES13" "ES21"
+##  [71] "ES22" "ES23" "ES24" "ES30" "DEC0" "DED2" "DED4" "DED5" "DEE0" "DEF0"
+##  [81] "ES41" "ES42" "ES43" "ES51" "ES52" "ES53" "ES61" "ES62" "ES63" "ES64"
+##  [91] "ES70" "FI1B" "FI1C" "FI1D" "IE01" "ITC2" "ITC3" "ITC4" "ITF1" "ITF2"
+## [101] "ITF3" "ITF4" "ITF5" "ITF6" "ITG1" "ITG2" "ITH1" "ITH2" "ITH3" "ITH4"
+## [111] "ITH5" "ITI1" "ITI2" "ITI3" "ITI4" "IE02" "ITC1" "RO11" "RO12" "RO21"
+## [121] "RO22" "RO31" "RO32" "RO41" "NO01" "NO02" "NO03" "NO04" "NO05" "NO06"
+## [131] "NO07" "RO42" "SE11" "SE12" "SE21" "SK01" "SK02" "SK03" "SK04" "SE22"
+## [141] "SE23" "SE31" "SE32" "SE33"
+
data.level$risk_pov.n1.dat
+
##  [1] "BE3" "BE1" "BE2" "EL3" "EL4" "EL5" "EL6" "FI1" "FI2" "HU1" "HU2"
+## [12] "HU3" "NL1" "NL2" "PL5" "PL6" "NL3" "NL4" "PL1" "PL2" "PL3" "PL4"
+
data.level$risk_pov.n0.dat
+
##  [1] "CY" "DE" "EE" "HR" "FR" "LT" "LU" "LV" "MK" "MT" "IS" "PT" "TR" "UK"
+## [15] "SI"
+
data.level$risk_pov.nuts0.na
+
## [1] "LI" "ME"
+
#Now we repeat the above SDGs chunk for the additional consensus variables
+attach(con.dat)
+
## The following objects are masked _by_ .GlobalEnv:
+## 
+##     gva, tot_uaa
+
for(i in names(con.dat)[-c(1:4)]) {
+  (nuts2.na <- con.dat[STAT_LEVL_ == 2 & is.na(con.dat[,i]), 'geo'])
+  (nuts1 <- con.dat[STAT_LEVL_ == 1 & geo %in% gsub(".{1}$", "", nuts2.na), 'geo'])
+  (nuts1.na <- con.dat[geo %in% nuts1 & is.na(con.dat[,i]), 'geo'])
+  (nuts0 <- con.dat[STAT_LEVL_ == 0 & geo %in% gsub(".{1}$", "", nuts1.na), 'geo'])
+  (nuts0.na <- con.dat[geo %in% nuts0 & is.na(con.dat[,i]), 'geo'])
+  
+#NUTS2 data
+(n2.dat <- con.dat[!(geo %in% nuts2.na) & STAT_LEVL_ == 2, 'geo'])
+#NUTS1 data
+(n1.dat <- nuts1[!nuts1 %in% nuts1.na])
+#NUTS0 data
+(n0.dat <- nuts0[!nuts0 %in% nuts0.na])
+#NO DATA
+nuts0.na
+
+data.level[[paste(i, 'n2.dat', sep='.')]] <- n2.dat
+data.level[[paste(i, 'n1.dat', sep='.')]] <- n1.dat
+data.level[[paste(i, 'n0.dat', sep='.')]] <- n0.dat
+data.level[[paste(i, 'nuts0.na', sep='.')]] <- nuts0.na
+
+  for(e in n0.dat) {
+    dbase[dbase$NUTS_ID %in% dbase$NUTS_ID[grep(paste(e, '..', sep=''), dbase$NUTS_ID)], i] <- con.dat[con.dat$geo == e, i]
+  }
+  
+  for(e in n1.dat) {
+    dbase[dbase$NUTS_ID %in% dbase$NUTS_ID[grep(paste(e, '.', sep=''), dbase$NUTS_ID)], i] <- con.dat[con.dat$geo == e, i]
+  }
+
+  for(e in n2.dat) {
+    dbase[dbase$NUTS_ID == e, i] <- con.dat[con.dat$geo == e, i]
+  }
+}
+detach(con.dat)
+
+summary(dbase)
+
##     NUTS_ID       risk_pov        factor_in          train35bas     
+##  AT11   :  1   Min.   : 9.971   Min.   :   -0.73   Min.   :0.00000  
+##  AT12   :  1   1st Qu.:18.586   1st Qu.:  144.97   1st Qu.:0.09613  
+##  AT13   :  1   Median :23.514   Median :  366.48   Median :0.22800  
+##  AT21   :  1   Mean   :25.960   Mean   : 1141.61   Mean   :0.26424  
+##  AT22   :  1   3rd Qu.:29.680   3rd Qu.:  932.31   3rd Qu.:0.35996  
+##  AT31   :  1   Max.   :54.150   Max.   :10472.05   Max.   :0.88217  
+##  (Other):314   NA's   :2        NA's   :28         NA's   :52       
+##    train35ful       train_bas         train_ful         nitr_high      
+##  Min.   :0.0000   Min.   :0.01171   Min.   :0.00188   Min.   :  4.082  
+##  1st Qu.:0.1264   1st Qu.:0.10627   1st Qu.:0.04939   1st Qu.: 66.302  
+##  Median :0.2600   Median :0.19961   Median :0.12807   Median : 70.505  
+##  Mean   :0.2738   Mean   :0.26250   Mean   :0.15796   Mean   : 75.328  
+##  3rd Qu.:0.3825   3rd Qu.:0.36488   3rd Qu.:0.25108   3rd Qu.: 87.591  
+##  Max.   :0.8550   Max.   :0.94840   Max.   :0.50303   Max.   :100.000  
+##  NA's   :52       NA's   :50        NA's   :50        NA's   :44       
+##     nitr_mod        nitr_poor        irrigated           forest       
+##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000  
+##  1st Qu.: 6.533   1st Qu.: 4.106   1st Qu.: 0.3312   1st Qu.:0.08957  
+##  Median :15.896   Median : 8.883   Median : 1.2250   Median :0.24904  
+##  Mean   :13.773   Mean   :10.898   Mean   : 5.7571   Mean   :0.25141  
+##  3rd Qu.:18.416   3rd Qu.:15.385   3rd Qu.: 6.5000   3rd Qu.:0.37365  
+##  Max.   :60.000   Max.   :68.367   Max.   :74.5500   Max.   :0.75860  
+##  NA's   :44       NA's   :44       NA's   :28                         
+##     artific          soil_loss         com_birds       farm_birds    
+##  Min.   :0.00000   Min.   : 0.0300   Min.   :54.92   Min.   : 63.78  
+##  1st Qu.:0.02056   1st Qu.: 0.7047   1st Qu.:62.14   1st Qu.: 81.34  
+##  Median :0.04020   Median : 1.5005   Median :69.50   Median : 83.82  
+##  Mean   :0.09023   Mean   : 2.5482   Mean   :69.70   Mean   : 81.90  
+##  3rd Qu.:0.08193   3rd Qu.: 2.9420   3rd Qu.:81.30   3rd Qu.: 85.30  
+##  Max.   :1.00000   Max.   :17.6050   Max.   :97.22   Max.   :116.60  
+##                    NA's   :44        NA's   :158     NA's   :94      
+##     org_farm        energy_rt         renew_pct        renew_prod     
+##  Min.   : 0.000   Min.   :0.00000   Min.   : 0.000   Min.   : 0.8855  
+##  1st Qu.: 1.200   1st Qu.:0.03503   1st Qu.: 3.074   1st Qu.: 6.2422  
+##  Median : 2.687   Median :0.06128   Median : 6.124   Median : 8.3156  
+##  Mean   : 4.056   Mean   :0.15052   Mean   :11.225   Mean   :12.4318  
+##  3rd Qu.: 5.204   3rd Qu.:0.09725   3rd Qu.:22.515   3rd Qu.:18.0797  
+##  Max.   :27.487   Max.   :1.75149   Max.   :41.011   Max.   :37.7797  
+##  NA's   :28       NA's   :44        NA's   :82       NA's   :45       
+##     gross_N           gross_P         conv_till         cons_till      
+##  Min.   :  2.857   Min.   :-6.500   Min.   :0.08646   Min.   :0.00000  
+##  1st Qu.: 41.821   1st Qu.:-1.667   1st Qu.:0.46182   1st Qu.:0.05077  
+##  Median : 67.333   Median : 1.833   Median :0.61740   Median :0.12499  
+##  Mean   : 67.553   Mean   : 1.941   Mean   :0.60410   Mean   :0.18031  
+##  3rd Qu.: 85.988   3rd Qu.: 4.714   3rd Qu.:0.73832   3rd Qu.:0.28382  
+##  Max.   :190.167   Max.   :31.000   Max.   :0.99752   Max.   :0.65066  
+##  NA's   :30        NA's   :30       NA's   :53        NA's   :53       
+##    zero_till           nfert            arable        grassland    
+##  Min.   :0.00000   Min.   : 0.000   Min.   : 0.00   Min.   : 0.00  
+##  1st Qu.:0.00920   1st Qu.: 6.448   1st Qu.:39.68   1st Qu.:17.20  
+##  Median :0.01843   Median : 9.917   Median :62.28   Median :32.97  
+##  Mean   :0.03000   Mean   :10.975   Mean   :57.72   Mean   :35.74  
+##  3rd Qu.:0.04003   3rd Qu.:14.254   3rd Qu.:78.19   3rd Qu.:48.62  
+##  Max.   :0.19303   Max.   :29.456   Max.   :99.28   Max.   :98.84  
+##  NA's   :53        NA's   :11       NA's   :44      NA's   :44     
+##    permanent         soil_prod       tot_awu            gva        
+##  Min.   : 0.0000   Min.   :3.00   Min.   :     0   Min.   :   0.0  
+##  1st Qu.: 0.3546   1st Qu.:6.00   1st Qu.:  9510   1st Qu.: 204.0  
+##  Median : 1.1324   Median :6.00   Median : 24760   Median : 431.2  
+##  Mean   : 5.7197   Mean   :6.45   Mean   : 78177   Mean   : 729.0  
+##  3rd Qu.: 5.6520   3rd Qu.:7.00   3rd Qu.: 74294   3rd Qu.: 895.7  
+##  Max.   :64.6743   Max.   :8.00   Max.   :791115   Max.   :7125.2  
+##  NA's   :44        NA's   :51     NA's   :28       NA's   :28      
+##     tot_uaa        
+##  Min.   :       0  
+##  1st Qu.:  195140  
+##  Median :  459335  
+##  Mean   : 3607738  
+##  3rd Qu.: 1000862  
+##  Max.   :27776795  
+##  NA's   :36
+
head(dbase)
+
##   NUTS_ID risk_pov factor_in train35bas train35ful train_bas train_ful
+## 1    AT11 13.73333 173.70857  0.1375661  0.3333333 0.1243050 0.1779190
+## 2    AT22 17.26667 393.97429  0.2160980  0.3648294 0.2017089 0.2413594
+## 3    AT12 13.83333 884.43857  0.2084775  0.4809689 0.2534787 0.3449437
+## 4    AT13 27.23333  12.70143  0.3750000  0.7500000 0.1753247 0.4740260
+## 5    AT21 17.20000 111.36714  0.2306238  0.3648393 0.2076173 0.2250348
+## 6    AT31 15.00000 452.67857  0.2508418  0.4284512 0.2014381 0.2857610
+##   nitr_high nitr_mod nitr_poor irrigated    forest    artific soil_loss
+## 1  64.58924 20.20774  15.20302     5.850 0.3161203 0.04355635     1.842
+## 2  64.58924 20.20774  15.20302     0.325 0.6127954 0.03306278     5.804
+## 3  64.58924 20.20774  15.20302     2.650 0.4286079 0.04875064     2.236
+## 4  64.58924 20.20774  15.20302    10.525 0.1469534 0.73118280     1.014
+## 5  64.58924 20.20774  15.20302     0.100 0.5998934 0.03047416    11.671
+## 6  64.58924 20.20774  15.20302     0.125 0.4027358 0.04900973     3.791
+##   com_birds farm_birds org_farm  energy_rt renew_pct renew_prod  gross_N
+## 1        NA      65.98 19.43430 0.08319988  32.65559   7.068917 32.57143
+## 2        NA      65.98 12.80858 0.08319988  32.65559   7.068917 32.57143
+## 3        NA      65.98 13.41584 0.08319988  32.65559   7.068917 32.57143
+## 4        NA      65.98 16.44137 0.08319988  32.65559   7.068917 32.57143
+## 5        NA      65.98 10.68078 0.08319988  32.65559   7.068917 32.57143
+## 6        NA      65.98 12.31071 0.08319988  32.65559   7.068917 32.57143
+##    gross_P conv_till  cons_till   zero_till    nfert   arable grassland
+## 1 1.833333 0.6182190 0.31992068 0.025012794 7.684000 83.64566  8.715722
+## 2 1.833333 0.8887161 0.05005656 0.024109163 7.551429 37.02489 58.694493
+## 3 1.833333 0.6226791 0.32803537 0.019896256 7.452800 76.22380 20.140837
+## 4 1.833333 0.5109890 0.40476190 0.007326007 7.497000 79.80050 10.099751
+## 5 1.833333 0.8592546 0.05928605 0.032552288 8.131500 28.48779 71.285286
+## 6 1.833333 0.8442576 0.12043311 0.014198645 9.138333 56.49367 43.164202
+##    permanent soil_prod tot_awu       gva tot_uaa
+## 1  7.5451998         6  7007.5 177.76429  181150
+## 2  4.2139773         6 28225.0 519.01286  374895
+## 3  3.5858503         6 38360.0 975.25143  895805
+## 4 10.0997506         6  1975.0  19.65571    7190
+## 5  0.1679223         6 10367.5 122.62143  215230
+## 6  0.2648111         6 29657.5 653.30143  513830
+
tail(dbase)
+
##     NUTS_ID risk_pov factor_in train35bas train35ful  train_bas  train_ful
+## 315    UKD3 23.51429  145.0767 0.00000000  0.2000000 0.07100592 0.05621302
+## 316    TRC1 54.15000        NA         NA         NA         NA         NA
+## 317    TRC2 54.15000        NA         NA         NA         NA         NA
+## 318    UKD4 23.51429   84.5800 0.15625000  0.3125000 0.07031828 0.07846040
+## 319    TRC3 54.15000        NA         NA         NA         NA         NA
+## 320    UKM6 23.51429  103.3933 0.05925926  0.1555556 0.03854333 0.06990962
+##     nitr_high nitr_mod nitr_poor irrigated      forest     artific
+## 315  97.15694 2.388173 0.4548901     0.500 0.010517799 0.550161812
+## 316        NA       NA        NA        NA 0.022020475 0.013843281
+## 317        NA       NA        NA        NA 0.006621164 0.007146191
+## 318  97.15694 2.388173 0.4548901     0.625 0.014225182 0.109261501
+## 319        NA       NA        NA        NA 0.037659533 0.005497742
+## 320  97.15694 2.388173 0.4548901     0.000 0.126063524 0.003747982
+##     soil_loss com_birds farm_birds  org_farm  energy_rt renew_pct
+## 315     2.071      69.5      83.82 0.0000000 0.04487651   23.7199
+## 316        NA        NA         NA        NA         NA        NA
+## 317        NA        NA         NA        NA         NA        NA
+## 318     1.905      69.5      83.82 0.8233184 0.04487651   23.7199
+## 319        NA        NA         NA        NA         NA        NA
+## 320     6.174      69.5      83.82 0.9873238 0.04487651   23.7199
+##     renew_prod  gross_N  gross_P conv_till  cons_till   zero_till
+## 315   7.183683 86.42857 5.857143 0.5116279 0.26976744 0.083720930
+## 316         NA       NA       NA        NA         NA          NA
+## 317         NA       NA       NA        NA         NA          NA
+## 318   7.183683 86.42857 5.857143 0.5070682 0.07652120 0.000921942
+## 319         NA       NA       NA        NA         NA          NA
+## 320   7.183683 86.42857 5.857143 0.3571254 0.01623576 0.064146551
+##         nfert    arable grassland   permanent soil_prod tot_awu       gva
+## 315 17.954800 21.626717  78.21169 0.134661998         6  1352.5 166.29333
+## 316  5.709333        NA        NA          NA        NA      NA        NA
+## 317  5.508154        NA        NA          NA        NA      NA        NA
+## 318 17.686000 20.185625  79.77323 0.041148500         6  7645.0 100.34333
+## 319  5.452800        NA        NA          NA        NA      NA        NA
+## 320 13.097444  6.222771  93.77578 0.001447408         6 10525.0  51.39333
+##     tot_uaa
+## 315   36325
+## 316      NA
+## 317      NA
+## 318  213575
+## 319      NA
+## 320 2516980
+
#check data level for conv_till as an example
+data.level$conv_till.n2.dat
+
##   [1] "AT11" "AT12" "AT13" "AT21" "AT22" "AT31" "AT32" "BE31" "BE32" "BE33"
+##  [11] "BE34" "BE35" "BG31" "BG32" "BG33" "BG34" "BG41" "BG42" "CY00" "CZ01"
+##  [21] "CZ02" "CZ03" "CZ04" "CZ05" "CZ06" "CZ07" "CZ08" "CH01" "CH02" "CH03"
+##  [31] "CH04" "AT33" "AT34" "BE21" "BE22" "BE23" "BE24" "BE25" "CH05" "CH06"
+##  [41] "CH07" "DK01" "DK02" "DK03" "DK04" "DK05" "EE00" "EL30" "EL41" "EL42"
+##  [51] "EL43" "ES11" "ES12" "ES13" "ES21" "ES22" "ES23" "ES24" "ES30" "FR52"
+##  [61] "FR53" "FR61" "FR62" "FR63" "FR71" "FR72" "FR81" "FR82" "FR83" "HR03"
+##  [71] "HR04" "ES41" "ES42" "ES43" "ES51" "ES52" "ES53" "ES61" "ES62" "ES70"
+##  [81] "FI19" "FI1B" "FI1C" "FI1D" "FI20" "FR10" "FR21" "FR22" "FR23" "FR24"
+##  [91] "FR25" "FR26" "FR30" "FR41" "FR42" "FR43" "FR51" "HU10" "HU21" "HU22"
+## [101] "HU23" "HU31" "HU32" "HU33" "IE01" "ITC2" "ITC3" "ITC4" "ITF1" "ITF2"
+## [111] "ITF3" "ITF4" "ITF5" "ITF6" "ITG1" "ITG2" "ITH1" "ITH2" "ITH3" "ITH4"
+## [121] "ITH5" "ITI1" "ITI2" "ITI3" "ITI4" "LT00" "LU00" "LV00" "ME00" "MT00"
+## [131] "NL11" "NL12" "NL13" "NL21" "NL22" "IE02" "IS00" "ITC1" "PL43" "PL51"
+## [141] "PL52" "PL61" "PL62" "PL63" "PT11" "PT15" "PT16" "PT17" "PT18" "PT20"
+## [151] "PT30" "RO11" "RO12" "RO21" "RO22" "RO31" "RO32" "RO41" "NL23" "NL31"
+## [161] "NL32" "NL33" "NL34" "NL41" "NL42" "NO01" "NO02" "NO03" "NO04" "NO05"
+## [171] "NO06" "NO07" "PL11" "PL12" "PL21" "PL31" "PL32" "PL33" "PL34" "PL41"
+## [181] "PL42" "RO42" "SE11" "SE12" "SE21" "SK01" "SK02" "SK03" "SK04" "UKC1"
+## [191] "SE22" "SE23" "SE31" "SE32" "SE33" "UKM2" "UKM3" "UKM5" "UKM6" "UKC2"
+## [201] "UKD1" "UKD3" "UKD4" "UKD6" "UKD7" "UKE1" "UKE2" "UKE3" "UKE4" "UKF1"
+## [211] "UKF2" "UKF3" "UKG1" "UKG2" "UKG3" "UKH1" "UKH2" "UKH3" "UKJ1" "UKJ2"
+## [221] "UKJ3" "UKJ4" "UKK1" "UKK2" "UKK3" "UKK4" "UKL1" "UKL2" "UKN0"
+
data.level$conv_till.n1.dat
+
##  [1] "DE8" "DE9" "DEA" "DE1" "DE2" "DE3" "DE4" "DE5" "DE6" "DE7" "DEB"
+## [12] "DEG" "DEC" "DED" "DEE" "DEF"
+
data.level$conv_till.n0.dat
+
## character(0)
+
data.level$conv_till.nuts0.na
+
##  [1] "BE" "EL" "ES" "FR" "LI" "MK" "PL" "TR" "UK" "SI"
+
#Utilisted agricultural area (UAA) is an important variable for weighting the allocation of other variables through NUTS2 regions and for calculating proportions, etc.
+#The Eurostat UAA data is missing NUTS2 data in many areas. To fill these gaps, we will use the CORINE land cover dataset for an estimate of agricultural area (all CORINE categories in the 200s)
+
+corine.aa <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_corine_ag_area_ha')
+
## Reading layer `nuts2_corine_ag_area_ha' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
corine.aa$geo <- corine.aa$NUTS_ID
+head(corine.aa)
+
##   NUTS_ID ZONE_CODE  COUNT        AREA       SUM  geo
+## 1    AT11         1  35088  2193000000  219300.0 AT11
+## 2    AT22         2  65849  4115562500  411556.2 AT22
+## 3    AT12         3 160793 10049562500 1004956.2 AT12
+## 4    AT13         4    972    60750000    6075.0 AT13
+## 5    AT21         5  30610  1913125000  191312.5 AT21
+## 6    AT31         6  93214  5825875000  582587.5 AT31
+
head(con.dat[,c("geo", "tot_uaa")])
+
##    geo tot_uaa
+## 1   AT 2698320
+## 2  AT1      NA
+## 3 AT11  181150
+## 4 AT12  895805
+## 5 AT13    7190
+## 6  AT2      NA
+
uaa.compare <- left_join(corine.aa[,c("geo", "SUM")], con.dat[,c("geo", "tot_uaa")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factor and character vector, coercing into
+## character vector
+
head(uaa.compare)
+
##    geo       SUM tot_uaa
+## 1 AT11  219300.0  181150
+## 2 AT22  411556.2  374895
+## 3 AT12 1004956.2  895805
+## 4 AT13    6075.0    7190
+## 5 AT21  191312.5  215230
+## 6 AT31  582587.5  513830
+
plot(uaa.compare$tot_uaa ~ uaa.compare$SUM)
+

+
cor(uaa.compare[,2:3])
+
##         SUM tot_uaa
+## SUM       1      NA
+## tot_uaa  NA       1
+
#Although the CORINE agricultural area slightly overestimates the Eurostat UAA data, the correlation is 1, so we will use CORINE agricultural area for its completeness
+
+#Next, we need to aggregate to NUTS1 and NUTS0 levels for calculations below
+nuts2.geos <- sdg.dat$geo[sdg.dat$STAT_LEVL_ == 2]
+nuts1.geos <- sdg.dat$geo[sdg.dat$STAT_LEVL_ == 1]
+nuts0.geos <- sdg.dat$geo[sdg.dat$STAT_LEVL_ == 0]
+
+uaa.nuts1 <- as.data.frame(nuts1.geos)
+names(uaa.nuts1) <- 'geo'
+uaa.nuts1$sum_uaa <- NA
+
+for(e in nuts1.geos) {
+  uaa.nuts1$sum_uaa[uaa.nuts1$geo == e] <- sum(uaa.compare$SUM[uaa.compare$geo %in% uaa.compare$geo[grep(paste(e, '.', sep=''), uaa.compare$geo)]])
+}
+
+head(uaa.nuts1)
+
##   geo   sum_uaa
+## 1 AT1 1230331.2
+## 2 AT2  602868.8
+## 3 AT3  844343.8
+## 4 BE3  906737.5
+## 5 BG3 4221656.2
+## 6 BG4 1532893.8
+
uaa.nuts0 <- as.data.frame(nuts0.geos)
+names(uaa.nuts0) <- 'geo'
+uaa.nuts0$sum_uaa <- NA
+
+for(e in nuts0.geos) {
+  uaa.nuts0$sum_uaa[uaa.nuts0$geo == e] <- sum(uaa.compare$SUM[uaa.compare$geo %in% uaa.compare$geo[grep(paste(e, '..', sep=''), uaa.compare$geo)]])
+}
+
+head(uaa.nuts0)
+
##   geo   sum_uaa
+## 1  AT 2677543.8
+## 2  BG 5754550.0
+## 3  CY  443356.2
+## 4  CZ 4508200.0
+## 5  CH 1185400.0
+## 6  BE 1760275.0
+
#bind all dataframes together
+uaa.compare$sum_uaa <- uaa.compare$SUM
+corine.aa.all.nuts <- rbind(uaa.compare[,c(1,4)], uaa.nuts1, uaa.nuts0)
+head(corine.aa.all.nuts)
+
##    geo   sum_uaa
+## 1 AT11  219300.0
+## 2 AT22  411556.2
+## 3 AT12 1004956.2
+## 4 AT13    6075.0
+## 5 AT21  191312.5
+## 6 AT31  582587.5
+
tail(corine.aa.all.nuts)
+
##     geo    sum_uaa
+## 464  PL 18689137.5
+## 465  SE  3914437.5
+## 466  SK  2327718.8
+## 467  TR 33916137.5
+## 468  UK 13837168.8
+## 469  SI   701168.8
+
#Here we use CAP Context Indicators to get better coverage of NUTS2 data for SDG variables currently at NUTS0
+
+#Volumes of water abstraction for irrigation are patchy and at the NUTS0 level from Eurostat. CAP Context Indicator C.39 contains NUTS1 and NUTS2 data in many areas from 2010, which we will use instead.
+#All zeros in this table were converted to no data due to uncertainty as to their accuracy
+cap.irrig <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal6/Irrigation_volume/C39_en_clean.csv", head=T)
+head(cap.irrig)
+
##    geo irrig_vol
+## 1   BE        NA
+## 2  BE1        NA
+## 3 BE10        NA
+## 4  BE2        NA
+## 5 BE21        NA
+## 6 BE22        NA
+
summary(cap.irrig)
+
##       geo        irrig_vol       
+##  AT     :  1   Min.   :       4  
+##  AT1    :  1   1st Qu.:    1266  
+##  AT11   :  1   Median :   11926  
+##  AT12   :  1   Mean   :  367069  
+##  AT13   :  1   3rd Qu.:  111053  
+##  AT2    :  1   Max.   :16658538  
+##  (Other):392   NA's   :73
+
#GDP from CAP Context Indicator C.08
+cap.gdp <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Alternative_variables/c8_en_clean.csv", head=T)
+head(cap.gdp)
+
##    geo tot_mil_eur tot_eur_cap tot_mil_pps tot_pps_cap rur_mil_eur
+## 1   BE      400805       35900      368652       33000       21277
+## 2  BE1       73399       62900       67511       57900          NA
+## 3 BE10       73399       62900       67511       57900          NA
+## 4  BE2      233920       36500      215155       33500          NA
+## 5 BE21       76359       42300       70234       38900          NA
+## 6 BE22       25559       29800       23509       27400          NA
+##   rur_eur_cap rur_mil_pps rur_pps_cap int_mil_eur int_eur_cap int_mil_pps
+## 1    22420.44       19571    20622.76      138660    32503.52      127538
+## 2          NA          NA          NA          NA          NA          NA
+## 3          NA          NA          NA          NA          NA          NA
+## 4          NA          NA          NA      103471    34284.63       95172
+## 5          NA          NA          NA       17909    39975.45       16473
+## 6          NA          NA          NA       10197    23334.10        9379
+##   int_pps_cap urb_mil_eur urb_eur_cap urb_mil_pps urb_pps_cap
+## 1    29896.39      240598    40491.08      221297    37242.85
+## 2          NA       73399    62895.46       67511    57850.04
+## 3          NA       73399    62895.46       67511    57850.04
+## 4    31534.79      130449    38401.24      119985    35320.87
+## 5    36770.09       58450    43072.96       53761    39617.54
+## 6    21462.24       15362    36663.48       14130    33723.15
+
#Employment rate from CAP Context Indicator C.05
+cap.emp <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Alternative_variables/c5_en_clean.csv", head=T)
+head(cap.emp)
+
##   NUTS_ID emp_rate_15_64
+## 1      BE       62.28959
+## 2     BE1       55.27341
+## 3    BE10       55.27341
+## 4     BE2       66.52326
+## 5    BE21       63.91419
+## 6    BE22       64.76931
+
cap.emp$geo <- cap.emp$NUTS_ID
+#EL51 is duplicated in this table, so deleted here
+cap.emp[cap.emp$geo == "EL51",]
+
##     NUTS_ID emp_rate_15_64  geo
+## 111    EL51       51.93326 EL51
+## 112    EL51       51.93326 EL51
+
cap.emp <- cap.emp[-112,]
+
+#Youth and total unemployment from CAP Context Indicator C.07
+cap.unemp <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Alternative_variables/c7_en_clean.csv", head=T)
+head(cap.unemp)
+
##    geo tot_unemp yth_unemp
+## 1   BE       7.8      20.1
+## 2  BE1      16.8      35.9
+## 3 BE10      16.8      35.9
+## 4  BE2       4.8      14.1
+## 5 BE21       6.1      18.5
+## 6 BE22       4.8       9.8
+
#Merge tables
+add.cap.dat <- sdg_data@data[,1:2]
+add.cap.dat <- left_join(add.cap.dat, cap.irrig)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factors with different levels, coercing to
+## character vector
+
add.cap.dat <- left_join(add.cap.dat, cap.gdp[,c(1,3,5)]) #Dont include regional data because of important differences between urban-rural typologies
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
add.cap.dat <- left_join(add.cap.dat, cap.emp[,-1])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
add.cap.dat <- left_join(add.cap.dat, cap.unemp)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
head(add.cap.dat)
+
##    geo STAT_LEVL_ irrig_vol tot_eur_cap tot_pps_cap emp_rate_15_64
+## 1   AT          0   18316.2       38700       35700       71.54552
+## 2  AT1          1   15755.7       38900       35900       68.75891
+## 3 AT11          2    3660.6       26700       24600       69.81530
+## 4 AT12          2   10828.8       31800       29300       73.06232
+## 5 AT13          2    1266.4       47300       43700       64.90683
+## 6  AT2          1    1019.4       34100       31500       70.91171
+##   tot_unemp yth_unemp
+## 1       6.0      11.2
+## 2       8.2      15.0
+## 3       5.7        NA
+## 4       5.2       9.3
+## 5      11.3      20.3
+## 6       5.2      10.8
+
names(add.cap.dat)
+
## [1] "geo"            "STAT_LEVL_"     "irrig_vol"      "tot_eur_cap"   
+## [5] "tot_pps_cap"    "emp_rate_15_64" "tot_unemp"      "yth_unemp"
+
nrow(add.cap.dat)
+
## [1] 471
+
summary(add.cap.dat)
+
##      geo              STAT_LEVL_      irrig_vol         tot_eur_cap    
+##  Length:471         Min.   :0.000   Min.   :       4   Min.   :  3800  
+##  Class :character   1st Qu.:1.000   1st Qu.:    1215   1st Qu.: 15675  
+##  Mode  :character   Median :2.000   Median :   10829   Median : 26400  
+##                     Mean   :1.605   Mean   :  365169   Mean   : 26844  
+##                     3rd Qu.:2.000   3rd Qu.:   91510   3rd Qu.: 34100  
+##                     Max.   :2.000   Max.   :16658538   Max.   :191400  
+##                                     NA's   :164        NA's   :69      
+##   tot_pps_cap     emp_rate_15_64    tot_unemp        yth_unemp    
+##  Min.   :  8200   Min.   :37.87   Min.   : 2.100   Min.   : 4.20  
+##  1st Qu.: 20025   1st Qu.:62.39   1st Qu.: 4.800   1st Qu.:11.00  
+##  Median : 24750   Median :67.64   Median : 7.000   Median :17.20  
+##  Mean   : 26698   Mean   :66.84   Mean   : 8.802   Mean   :21.50  
+##  3rd Qu.: 31325   3rd Qu.:73.61   3rd Qu.:10.500   3rd Qu.:28.35  
+##  Max.   :163500   Max.   :81.42   Max.   :31.300   Max.   :69.10  
+##  NA's   :69       NA's   :70      NA's   :71       NA's   :80
+
#create whole database for processing alternative CAP indicators
+dbase.cap <- as.data.frame(matrix(nrow=nrow(nuts@data), ncol=(ncol(add.cap.dat[,-2]))))
+dbase.cap[,1] <- nuts@data$NUTS_ID
+names(dbase.cap) <- c("NUTS_ID", names(add.cap.dat[,-c(1:2)]))
+head(dbase.cap)
+
##   NUTS_ID irrig_vol tot_eur_cap tot_pps_cap emp_rate_15_64 tot_unemp
+## 1    AT11        NA          NA          NA             NA        NA
+## 2    AT22        NA          NA          NA             NA        NA
+## 3    AT12        NA          NA          NA             NA        NA
+## 4    AT13        NA          NA          NA             NA        NA
+## 5    AT21        NA          NA          NA             NA        NA
+## 6    AT31        NA          NA          NA             NA        NA
+##   yth_unemp
+## 1        NA
+## 2        NA
+## 3        NA
+## 4        NA
+## 5        NA
+## 6        NA
+
nrow(dbase.cap)
+
## [1] 320
+
#Repeat function above to allocate NUTS1 and NUTS0 to NUTS2 units. Again, this is appropriate for ratio variables but not absolute variables. This will be cleaned and corrected below.
+data.level.cap <- vector("list", 4*length(names(dbase.cap)[-1]))
+names(data.level.cap) <- c(paste(names(dbase.cap)[-1], 'n2.dat', sep='.'),
+                       paste(names(dbase.cap)[-1], 'n1.dat', sep='.'),
+                       paste(names(dbase.cap)[-1], 'n0.dat', sep='.'),
+                       paste(names(dbase.cap)[-1], 'nuts0.na', sep='.')
+                       )
+labels(data.level.cap)
+
##  [1] "irrig_vol.n2.dat"        "tot_eur_cap.n2.dat"     
+##  [3] "tot_pps_cap.n2.dat"      "emp_rate_15_64.n2.dat"  
+##  [5] "tot_unemp.n2.dat"        "yth_unemp.n2.dat"       
+##  [7] "irrig_vol.n1.dat"        "tot_eur_cap.n1.dat"     
+##  [9] "tot_pps_cap.n1.dat"      "emp_rate_15_64.n1.dat"  
+## [11] "tot_unemp.n1.dat"        "yth_unemp.n1.dat"       
+## [13] "irrig_vol.n0.dat"        "tot_eur_cap.n0.dat"     
+## [15] "tot_pps_cap.n0.dat"      "emp_rate_15_64.n0.dat"  
+## [17] "tot_unemp.n0.dat"        "yth_unemp.n0.dat"       
+## [19] "irrig_vol.nuts0.na"      "tot_eur_cap.nuts0.na"   
+## [21] "tot_pps_cap.nuts0.na"    "emp_rate_15_64.nuts0.na"
+## [23] "tot_unemp.nuts0.na"      "yth_unemp.nuts0.na"
+
attach(add.cap.dat)
+for(i in names(add.cap.dat[,-c(1:2)])) {
+  (nuts2.na <- add.cap.dat[STAT_LEVL_ == 2 & is.na(add.cap.dat[,i]), 'geo'])
+  (nuts1 <- add.cap.dat[STAT_LEVL_ == 1 & geo %in% gsub(".{1}$", "", nuts2.na), 'geo'])
+  (nuts1.na <- add.cap.dat[geo %in% nuts1 & is.na(add.cap.dat[,i]), 'geo'])
+  (nuts0 <- add.cap.dat[STAT_LEVL_ == 0 & geo %in% gsub(".{1}$", "", nuts1.na), 'geo'])
+  (nuts0.na <- add.cap.dat[geo %in% nuts0 & is.na(add.cap.dat[,i]), 'geo'])
+  
+#NUTS2 data
+(n2.dat <- add.cap.dat[!(geo %in% nuts2.na) & STAT_LEVL_ == 2, 'geo'])
+#NUTS1 data
+(n1.dat <- nuts1[!nuts1 %in% nuts1.na])
+#NUTS0 data
+(n0.dat <- nuts0[!nuts0 %in% nuts0.na])
+#NO DATA
+nuts0.na
+
+data.level.cap[[paste(i, 'n2.dat', sep='.')]] <- n2.dat
+data.level.cap[[paste(i, 'n1.dat', sep='.')]] <- n1.dat
+data.level.cap[[paste(i, 'n0.dat', sep='.')]] <- n0.dat
+data.level.cap[[paste(i, 'nuts0.na', sep='.')]] <- nuts0.na
+
+  for(e in n0.dat) {
+    dbase.cap[dbase.cap$NUTS_ID %in% dbase.cap$NUTS_ID[grep(paste(e, '..', sep=''), dbase.cap$NUTS_ID)], i] <- add.cap.dat[add.cap.dat$geo == e, i]
+  }
+
+  for(e in n1.dat) {
+    dbase.cap[dbase.cap$NUTS_ID %in% dbase.cap$NUTS_ID[grep(paste(e, '.', sep=''), dbase.cap$NUTS_ID)], i] <- add.cap.dat[add.cap.dat$geo == e, i]
+  }
+
+  for(e in n2.dat) {
+    dbase.cap[dbase.cap$NUTS_ID == e, i] <- add.cap.dat[add.cap.dat$geo == e, i]
+  }
+}
+detach(add.cap.dat)
+
+summary(dbase.cap)
+
##     NUTS_ID      irrig_vol        tot_eur_cap      tot_pps_cap    
+##  AT11   :  1   Min.   :      4   Min.   :  3800   Min.   :  8200  
+##  AT12   :  1   1st Qu.:   1228   1st Qu.: 16350   1st Qu.: 19750  
+##  AT13   :  1   Median :  10445   Median : 26500   Median : 24750  
+##  AT21   :  1   Mean   : 361058   Mean   : 26832   Mean   : 26526  
+##  AT22   :  1   3rd Qu.:  89134   3rd Qu.: 33750   3rd Qu.: 31025  
+##  AT31   :  1   Max.   :4720091   Max.   :191400   Max.   :163500  
+##  (Other):314   NA's   :58        NA's   :44       NA's   :44      
+##  emp_rate_15_64    tot_unemp        yth_unemp    
+##  Min.   :37.87   Min.   : 2.100   Min.   : 4.20  
+##  1st Qu.:62.16   1st Qu.: 4.700   1st Qu.:10.60  
+##  Median :67.91   Median : 6.850   Median :17.00  
+##  Mean   :66.82   Mean   : 8.767   Mean   :21.39  
+##  3rd Qu.:73.84   3rd Qu.:10.600   3rd Qu.:29.10  
+##  Max.   :81.42   Max.   :31.300   Max.   :69.10  
+##  NA's   :44      NA's   :44       NA's   :44
+
head(dbase.cap)
+
##   NUTS_ID irrig_vol tot_eur_cap tot_pps_cap emp_rate_15_64 tot_unemp
+## 1    AT11    3660.6       26700       24600       69.81530       5.7
+## 2    AT22     909.0       34800       32100       71.37077       5.1
+## 3    AT12   10828.8       31800       29300       73.06232       5.2
+## 4    AT13    1266.4       47300       43700       64.90683      11.3
+## 5    AT21     110.4       32700       30200       69.88481       5.4
+## 6    AT31     319.9       39600       36500       75.46507       4.5
+##   yth_unemp
+## 1      15.0
+## 2      10.2
+## 3       9.3
+## 4      20.3
+## 5      12.2
+## 6       7.6
+
tail(dbase.cap)
+
##     NUTS_ID irrig_vol tot_eur_cap tot_pps_cap emp_rate_15_64 tot_unemp
+## 315    UKD3     582.8       29200       25000       70.77519       6.1
+## 316    TRC1        NA          NA          NA             NA        NA
+## 317    TRC2        NA          NA          NA             NA        NA
+## 318    UKD4     277.5       26400       22600       72.71144       4.4
+## 319    TRC3        NA          NA          NA             NA        NA
+## 320    UKM6     340.9       29600       25300       72.98066       4.7
+##     yth_unemp
+## 315      16.1
+## 316        NA
+## 317        NA
+## 318      13.0
+## 319        NA
+## 320      14.5
+
#check data level for irrig_vol as an example
+data.level.cap$irrig_vol.n2.dat
+
##   [1] "AT11" "AT12" "AT13" "AT21" "AT22" "AT31" "AT32" "BG31" "BG32" "BG33"
+##  [11] "BG34" "BG41" "BG42" "CY00" "CZ02" "CZ03" "CZ04" "CZ05" "CZ06" "CZ07"
+##  [21] "CZ08" "AT33" "AT34" "DK01" "DK02" "DK03" "DK04" "DK05" "EE00" "EL30"
+##  [31] "EL41" "EL42" "EL43" "ES11" "ES12" "ES13" "ES21" "ES22" "ES23" "ES24"
+##  [41] "ES30" "FR52" "FR53" "FR61" "FR62" "FR63" "FR71" "FR72" "FR81" "FR82"
+##  [51] "FR83" "HR03" "HR04" "ES41" "ES42" "ES43" "ES51" "ES52" "ES53" "ES61"
+##  [61] "ES62" "ES70" "FI19" "FI1B" "FI1C" "FI1D" "FI20" "FR10" "FR21" "FR22"
+##  [71] "FR23" "FR24" "FR25" "FR26" "FR30" "FR41" "FR42" "FR43" "FR51" "HU10"
+##  [81] "HU21" "HU22" "HU23" "HU31" "HU32" "HU33" "ITC2" "ITC3" "ITC4" "ITF1"
+##  [91] "ITF2" "ITF3" "ITF4" "ITF5" "ITF6" "ITG1" "ITG2" "ITH1" "ITH2" "ITH3"
+## [101] "ITH4" "ITH5" "ITI1" "ITI2" "ITI3" "ITI4" "LT00" "LV00" "MT00" "NL11"
+## [111] "NL12" "NL13" "NL21" "NL22" "ITC1" "PL43" "PL51" "PL52" "PL61" "PL62"
+## [121] "PL63" "PT11" "PT15" "PT16" "PT17" "PT18" "PT20" "PT30" "RO11" "RO12"
+## [131] "RO21" "RO22" "RO31" "RO32" "RO41" "NL23" "NL31" "NL32" "NL33" "NL34"
+## [141] "NL41" "NL42" "PL11" "PL12" "PL21" "PL22" "PL31" "PL32" "PL33" "PL34"
+## [151] "PL41" "PL42" "RO42" "SE11" "SE12" "SE21" "SK01" "SK02" "SK03" "SK04"
+## [161] "UKC1" "SE22" "SE23" "SE31" "SE32" "SE33" "UKM2" "UKM3" "UKM5" "UKM6"
+## [171] "UKC2" "UKD4" "UKD6" "UKE1" "UKE2" "UKE3" "UKE4" "UKF1" "UKF2" "UKF3"
+## [181] "UKG1" "UKG2" "UKH1" "UKH2" "UKH3" "UKJ1" "UKJ2" "UKJ3" "UKJ4" "UKK1"
+## [191] "UKK2" "UKK3" "UKK4" "UKL1" "UKL2"
+
data.level.cap$irrig_vol.n1.dat
+
##  [1] "CZ0" "DE8" "DE9" "DEA" "DE1" "DE2" "DE3" "DE4" "DE6" "DE7" "DEB"
+## [12] "DEG" "DEC" "DED" "DEE" "DEF" "ES6" "SI0" "UKD" "UKG"
+
data.level.cap$irrig_vol.n0.dat
+
## [1] "DE" "EL" "FR" "UK"
+
data.level.cap$irrig_vol.nuts0.na
+
##  [1] "CH" "BE" "IE" "LI" "LU" "ME" "MK" "IS" "NO" "TR"
+
#add GDP and PPS data for regional typologies
+names(dbase.cap)
+
## [1] "NUTS_ID"        "irrig_vol"      "tot_eur_cap"    "tot_pps_cap"   
+## [5] "emp_rate_15_64" "tot_unemp"      "yth_unemp"
+
names(cap.gdp)
+
##  [1] "geo"         "tot_mil_eur" "tot_eur_cap" "tot_mil_pps" "tot_pps_cap"
+##  [6] "rur_mil_eur" "rur_eur_cap" "rur_mil_pps" "rur_pps_cap" "int_mil_eur"
+## [11] "int_eur_cap" "int_mil_pps" "int_pps_cap" "urb_mil_eur" "urb_eur_cap"
+## [16] "urb_mil_pps" "urb_pps_cap"
+
cap.gdp$NUTS_ID <- cap.gdp$geo
+dbase.cap <- left_join(dbase.cap, cap.gdp[,c(7,9,11,13,15,17,18)])
+
## Joining, by = "NUTS_ID"
+
## Warning: Column `NUTS_ID` joining factors with different levels, coercing
+## to character vector
+
head(dbase.cap)
+
##   NUTS_ID irrig_vol tot_eur_cap tot_pps_cap emp_rate_15_64 tot_unemp
+## 1    AT11    3660.6       26700       24600       69.81530       5.7
+## 2    AT22     909.0       34800       32100       71.37077       5.1
+## 3    AT12   10828.8       31800       29300       73.06232       5.2
+## 4    AT13    1266.4       47300       43700       64.90683      11.3
+## 5    AT21     110.4       32700       30200       69.88481       5.4
+## 6    AT31     319.9       39600       36500       75.46507       4.5
+##   yth_unemp rur_eur_cap rur_pps_cap int_eur_cap int_pps_cap urb_eur_cap
+## 1      15.0    26690.97    24628.47          NA          NA          NA
+## 2      10.2    28015.67    25851.10    42289.66    39020.69          NA
+## 3       9.3    29392.91    27122.78    37037.74    34176.67    27574.60
+## 4      20.3          NA          NA          NA          NA    47307.69
+## 5      12.2    27340.58    25228.26    38025.00    35085.71          NA
+## 6       7.6    33453.92    30867.51    48936.17    45154.26          NA
+##   urb_pps_cap
+## 1          NA
+## 2          NA
+## 3    25441.27
+## 4    43651.88
+## 5          NA
+## 6          NA
+
names(dbase.cap)[c(3,8,10,12)] <- sub("eur", "gdp", names(dbase.cap)[c(3,8,10,12)])
+
#In the chunks above, we allocate NUTS1 and NUTS0 data to NUTS2 regions. This works for variables that are ratios (e.g., percent, proportion, rates) but not for variables that are absolute. We need to edit these here. We also merge all the additional variables.
+
+names(dbase)
+
##  [1] "NUTS_ID"    "risk_pov"   "factor_in"  "train35bas" "train35ful"
+##  [6] "train_bas"  "train_ful"  "nitr_high"  "nitr_mod"   "nitr_poor" 
+## [11] "irrigated"  "forest"     "artific"    "soil_loss"  "com_birds" 
+## [16] "farm_birds" "org_farm"   "energy_rt"  "renew_pct"  "renew_prod"
+## [21] "gross_N"    "gross_P"    "conv_till"  "cons_till"  "zero_till" 
+## [26] "nfert"      "arable"     "grassland"  "permanent"  "soil_prod" 
+## [31] "tot_awu"    "gva"        "tot_uaa"
+
names(corine.aa.all.nuts)
+
## [1] "geo"     "sum_uaa"
+
names(dbase.cap)
+
##  [1] "NUTS_ID"        "irrig_vol"      "tot_gdp_cap"    "tot_pps_cap"   
+##  [5] "emp_rate_15_64" "tot_unemp"      "yth_unemp"      "rur_gdp_cap"   
+##  [9] "rur_pps_cap"    "int_gdp_cap"    "int_pps_cap"    "urb_gdp_cap"   
+## [13] "urb_pps_cap"
+
#The variables that have absolute values in dbase are:
+#factor_in (million euros)
+#tot_awu
+#gva
+#tot_uaa (ha)
+
+names(dbase)
+
##  [1] "NUTS_ID"    "risk_pov"   "factor_in"  "train35bas" "train35ful"
+##  [6] "train_bas"  "train_ful"  "nitr_high"  "nitr_mod"   "nitr_poor" 
+## [11] "irrigated"  "forest"     "artific"    "soil_loss"  "com_birds" 
+## [16] "farm_birds" "org_farm"   "energy_rt"  "renew_pct"  "renew_prod"
+## [21] "gross_N"    "gross_P"    "conv_till"  "cons_till"  "zero_till" 
+## [26] "nfert"      "arable"     "grassland"  "permanent"  "soil_prod" 
+## [31] "tot_awu"    "gva"        "tot_uaa"
+
names(dbase)[c(3,31:33)]
+
## [1] "factor_in" "tot_awu"   "gva"       "tot_uaa"
+
dbase.clean <- dbase[,-c(3,31:33)]
+dbase.clean$geo <- dbase.clean$NUTS_ID
+names(dbase.clean)
+
##  [1] "NUTS_ID"    "risk_pov"   "train35bas" "train35ful" "train_bas" 
+##  [6] "train_ful"  "nitr_high"  "nitr_mod"   "nitr_poor"  "irrigated" 
+## [11] "forest"     "artific"    "soil_loss"  "com_birds"  "farm_birds"
+## [16] "org_farm"   "energy_rt"  "renew_pct"  "renew_prod" "gross_N"   
+## [21] "gross_P"    "conv_till"  "cons_till"  "zero_till"  "nfert"     
+## [26] "arable"     "grassland"  "permanent"  "soil_prod"  "geo"
+
#First, convert from absolute to ratios for those variables possible, then translate to NUTS2 regions as a ratio
+
+###factor_in
+names(sdg.dat)
+
##  [1] "geo"        "STAT_LEVL_" "SHAPE_AREA" "SHAPE_LEN"  "risk_pov"  
+##  [6] "factor_in"  "train35bas" "train35ful" "train_bas"  "train_ful" 
+## [11] "nitr_high"  "nitr_mod"   "nitr_poor"  "irrigated"  "forest"    
+## [16] "artific"    "soil_loss"  "com_birds"  "farm_birds" "org_farm"  
+## [21] "energy_rt"  "renew_pct"  "renew_prod" "gross_N"    "gross_P"
+
names(con.dat)
+
##  [1] "geo"        "STAT_LEVL_" "SHAPE_AREA" "SHAPE_LEN"  "conv_till" 
+##  [6] "cons_till"  "zero_till"  "nfert"      "arable"     "grassland" 
+## [11] "permanent"  "soil_prod"  "tot_awu"    "gva"        "tot_uaa"
+
afi.awu <- left_join(sdg.dat[,c("geo", "factor_in")], con.dat[,c("geo", "tot_awu")])
+
## Joining, by = "geo"
+
afi.awu$afi_awu <- afi.awu$factor_in / afi.awu$tot_awu * 1e+06 #to convert to euros per awu
+head(afi.awu)
+
##    geo  factor_in  tot_awu   afi_awu
+## 1   AT 2315.97000 138800.0 16685.663
+## 2  AT1 1070.85000       NA        NA
+## 3 AT11  173.70857   7007.5 24788.951
+## 4 AT12  884.43857  38360.0 23056.271
+## 5 AT13   12.70143   1975.0  6431.103
+## 6  AT2  505.33857       NA        NA
+
#All Italy has zeros for factor income in the Eurostat table agr_r_accts but Italy has NUTS0 data from table aact_eaa01. We will correct here.
+afi.awu[which(afi.awu$afi_awu == 0), c('geo', 'afi_awu')]
+
##      geo afi_awu
+## 233 ITC2       0
+## 234 ITC3       0
+## 235 ITC4       0
+## 237 ITF1       0
+## 238 ITF2       0
+## 239 ITF3       0
+## 240 ITF4       0
+## 241 ITF5       0
+## 242 ITF6       0
+## 244 ITG1       0
+## 245 ITG2       0
+## 247 ITH1       0
+## 248 ITH2       0
+## 249 ITH3       0
+## 250 ITH4       0
+## 251 ITH5       0
+## 253 ITI1       0
+## 254 ITI2       0
+## 255 ITI3       0
+## 256 ITI4       0
+## 290   IT       0
+## 292 ITC1       0
+
afi.nuts0[afi.nuts0$geo == "IT",]
+
##    geo factor_income_mean
+## 19  IT           22183.11
+
afi.awu[afi.awu$geo == "IT",]
+
##     geo factor_in tot_awu afi_awu
+## 290  IT         0 1111788       0
+
afi.awu[which(afi.awu$afi_awu == 0), 'afi_awu'] <- afi.nuts0[afi.nuts0$geo == "IT", 'factor_income_mean'] / afi.awu[afi.awu$geo == "IT", 'tot_awu'] * 1e+06
+
+afi.awu[afi.awu$geo == "IT",]
+
##     geo factor_in tot_awu  afi_awu
+## 290  IT         0 1111788 19952.65
+
###pesticides
+pesticides <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal12/Pesticide_sales/all_pesticide_sales_mean_allnuts.csv", head=T)
+head(pesticides)
+
##   geo pesticides_sum
+## 1  AT        3602507
+## 2  BE        6545569
+## 3  BG        1766558
+## 4  CH        2191825
+## 5  CY         819175
+## 6  CZ        6298810
+
pest.rate <- left_join(con.dat[,c("geo", "tot_uaa")], pesticides)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
head(pest.rate)
+
##    geo tot_uaa pesticides_sum
+## 1   AT 2698320        3602507
+## 2  AT1      NA             NA
+## 3 AT11  181150             NA
+## 4 AT12  895805             NA
+## 5 AT13    7190             NA
+## 6  AT2      NA             NA
+
pest.rate$pest_rate <- pest.rate$pesticides_sum / pest.rate$tot_uaa
+summary(pest.rate)
+
##      geo               tot_uaa         pesticides_sum    
+##  Length:471         Min.   :       0   Min.   :  144951  
+##  Class :character   1st Qu.:  179331   1st Qu.: 1914345  
+##  Mode  :character   Median :  403672   Median : 3624443  
+##                     Mean   : 1118138   Mean   :13990649  
+##                     3rd Qu.:  809116   3rd Qu.:11259235  
+##                     Max.   :27776795   Max.   :73505445  
+##                     NA's   :197        NA's   :440       
+##    pest_rate      
+##  Min.   : 0.3874  
+##  1st Qu.: 0.9085  
+##  Median : 1.4665  
+##  Mean   : 2.4684  
+##  3rd Qu.: 2.7402  
+##  Max.   :13.1415  
+##  NA's   :442
+
###irrig_tot
+irrig_tot <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/SDGs/Goal6/Irrigation_volume/irrigation_volume_total_mean.csv", head=T)
+head(irrig_tot)
+
##   NUTS_ID irrigation_volume_median irrigation_volume_mean
+## 1      BG                  714.535              674.93500
+## 2      CH                  135.600              135.60000
+## 3      CY                  163.900              160.01667
+## 4      AL                       NA                     NA
+## 5      CZ                   20.650               21.51667
+## 6      BE                       NA                     NA
+
names(irrig_tot)[c(1,3)] <- c("geo", "irrig_tot")
+irrig.rate <- left_join(sdg.dat[,c('geo', 'irrigated')], irrig_tot[,c('geo', 'irrig_tot')])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
irrig.rate <- left_join(irrig.rate, cap.irrig)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
irrig.rate <- left_join(irrig.rate, corine.aa.all.nuts)
+
## Joining, by = "geo"
+
head(irrig.rate)
+
##    geo irrigated irrig_tot irrig_vol   sum_uaa
+## 1   AT     1.350        18   18316.2 2677543.8
+## 2  AT1        NA        NA   15755.7 1230331.2
+## 3 AT11     5.850        NA    3660.6  219300.0
+## 4 AT12     2.650        NA   10828.8 1004956.2
+## 5 AT13    10.525        NA    1266.4    6075.0
+## 6  AT2        NA        NA    1019.4  602868.8
+
irrig.rate[is.na(irrig.rate$irrig_vol),'irrig_vol'] <- irrig.rate[is.na(irrig.rate$irrig_vol), 'irrig_tot'] #use Eurostat data where CAP data is NA
+irrig.rate$irrig_rate <- irrig.rate$irrig_vol / irrig.rate$sum_uaa * 1e+03 #to convert to cubic metres per ha note this is total irrigation volume over all agricultural area, not just irrigated area, which is captured by the percentage of UAA irrigated
+
+###GVA per AWU
+gva.awu <- left_join(con.dat[,c("geo", "gva")], con.dat[,c("geo", "tot_awu")])
+
## Joining, by = "geo"
+
gva.awu$gva_awu <- gva.awu$gva / gva.awu$tot_awu * 1e+06 #to convert to euros per awu
+head(gva.awu)
+
##    geo        gva  tot_awu  gva_awu
+## 1   AT 2769.36857 138800.0 19952.22
+## 2  AT1 1172.67143       NA       NA
+## 3 AT11  177.76429   7007.5 25367.72
+## 4 AT12  975.25143  38360.0 25423.66
+## 5 AT13   19.65571   1975.0  9952.26
+## 6  AT2  641.63571       NA       NA
+
###AWU relative to population aged 15-64
+pop_15_64 <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/SDGs/Goal8/Employment_rate/c5_en_population_15_64.csv", head=T)
+awu.pop <- left_join(con.dat[,c("geo", "tot_awu")], pop_15_64)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
awu.pop$labour_use <- awu.pop$tot_awu / (1000 * awu.pop$pop_15_64) #Pop in 1000s of persons, AWU in persons
+head(awu.pop)
+
##    geo  tot_awu pop_15_64  labour_use
+## 1   AT 138800.0    5790.3 0.023971124
+## 2  AT1       NA    2525.2          NA
+## 3 AT11   7007.5     189.5 0.036978892
+## 4 AT12  38360.0    1079.9 0.035521808
+## 5 AT13   1975.0    1255.8 0.001572703
+## 6  AT2       NA    1180.2          NA
+
#add these variables to clean database using function from above
+add.edit.dat <- left_join(sdg.dat[,1:2], afi.awu[,c(1,4)])
+
## Joining, by = "geo"
+
add.edit.dat <- left_join(add.edit.dat, gva.awu[,c(1,4)])
+
## Joining, by = "geo"
+
add.edit.dat <- left_join(add.edit.dat, awu.pop[,c(1,4)])
+
## Joining, by = "geo"
+
add.edit.dat <- left_join(add.edit.dat, pest.rate[,c(1,4)])
+
## Joining, by = "geo"
+
add.edit.dat <- left_join(add.edit.dat, irrig.rate[,c(1,6)])
+
## Joining, by = "geo"
+
head(add.edit.dat)
+
##    geo STAT_LEVL_   afi_awu  gva_awu  labour_use pest_rate irrig_rate
+## 1   AT          0 16685.663 19952.22 0.023971124  1.335093   6.840673
+## 2  AT1          1        NA       NA          NA        NA  12.806063
+## 3 AT11          2 24788.951 25367.72 0.036978892        NA  16.692202
+## 4 AT12          2 23056.271 25423.66 0.035521808        NA  10.775394
+## 5 AT13          2  6431.103  9952.26 0.001572703        NA 208.460905
+## 6  AT2          1        NA       NA          NA        NA   1.690915
+
dbase.clean$irrig_rate <- NA
+dbase.clean$afi_awu <- NA
+dbase.clean$gva_awu <- NA
+dbase.clean$labour_use <- NA
+dbase.clean$pest_rate <- NA
+
+edit.var.names <- c("irrig_rate",
+                    "afi_awu",
+                    "gva_awu",
+                    "labour_use",
+                    "pest_rate")
+
+#Repeat function above to allocate NUTS1 and NUTS0 to NUTS2 units. Again, this is appropriate for ratio variables but not absolute variables. This will be cleaned and corrected below.
+data.level.edit <- vector("list", 4*length(names(dbase.clean)[31:35]))
+names(data.level.edit) <- c(paste(names(dbase.clean)[31:35], 'n2.dat', sep='.'),
+                       paste(names(dbase.clean)[31:35], 'n1.dat', sep='.'),
+                       paste(names(dbase.clean)[31:35], 'n0.dat', sep='.'),
+                       paste(names(dbase.clean)[31:35], 'nuts0.na', sep='.')
+                       )
+labels(data.level.edit)
+
##  [1] "irrig_rate.n2.dat"   "afi_awu.n2.dat"      "gva_awu.n2.dat"     
+##  [4] "labour_use.n2.dat"   "pest_rate.n2.dat"    "irrig_rate.n1.dat"  
+##  [7] "afi_awu.n1.dat"      "gva_awu.n1.dat"      "labour_use.n1.dat"  
+## [10] "pest_rate.n1.dat"    "irrig_rate.n0.dat"   "afi_awu.n0.dat"     
+## [13] "gva_awu.n0.dat"      "labour_use.n0.dat"   "pest_rate.n0.dat"   
+## [16] "irrig_rate.nuts0.na" "afi_awu.nuts0.na"    "gva_awu.nuts0.na"   
+## [19] "labour_use.nuts0.na" "pest_rate.nuts0.na"
+
attach(add.edit.dat)
+for(i in names(add.edit.dat[,-c(1:2)])) {
+  (nuts2.na <- add.edit.dat[STAT_LEVL_ == 2 & is.na(add.edit.dat[,i]), 'geo'])
+  (nuts1 <- add.edit.dat[STAT_LEVL_ == 1 & geo %in% gsub(".{1}$", "", nuts2.na), 'geo'])
+  (nuts1.na <- add.edit.dat[geo %in% nuts1 & is.na(add.edit.dat[,i]), 'geo'])
+  (nuts0 <- add.edit.dat[STAT_LEVL_ == 0 & geo %in% gsub(".{1}$", "", nuts1.na), 'geo'])
+  (nuts0.na <- add.edit.dat[geo %in% nuts0 & is.na(add.edit.dat[,i]), 'geo'])
+  
+#NUTS2 data
+(n2.dat <- add.edit.dat[!(geo %in% nuts2.na) & STAT_LEVL_ == 2, 'geo'])
+#NUTS1 data
+(n1.dat <- nuts1[!nuts1 %in% nuts1.na])
+#NUTS0 data
+(n0.dat <- nuts0[!nuts0 %in% nuts0.na])
+#NO DATA
+nuts0.na
+
+data.level.edit[[paste(i, 'n2.dat', sep='.')]] <- n2.dat
+data.level.edit[[paste(i, 'n1.dat', sep='.')]] <- n1.dat
+data.level.edit[[paste(i, 'n0.dat', sep='.')]] <- n0.dat
+data.level.edit[[paste(i, 'nuts0.na', sep='.')]] <- nuts0.na
+
+  for(e in n0.dat) {
+    dbase.clean[dbase.clean$NUTS_ID %in% dbase.clean$NUTS_ID[grep(paste(e, '..', sep=''), dbase.clean$NUTS_ID)], i] <- add.edit.dat[add.edit.dat$geo == e, i]
+  }
+
+  for(e in n1.dat) {
+    dbase.clean[dbase.clean$NUTS_ID %in% dbase.clean$NUTS_ID[grep(paste(e, '.', sep=''), dbase.clean$NUTS_ID)], i] <- add.edit.dat[add.edit.dat$geo == e, i]
+  }
+
+  for(e in n2.dat) {
+    dbase.clean[dbase.clean$NUTS_ID == e, i] <- add.edit.dat[add.edit.dat$geo == e, i]
+  }
+}
+detach(add.edit.dat)
+
+summary(dbase.clean)
+
##     NUTS_ID       risk_pov        train35bas        train35ful    
+##  AT11   :  1   Min.   : 9.971   Min.   :0.00000   Min.   :0.0000  
+##  AT12   :  1   1st Qu.:18.586   1st Qu.:0.09613   1st Qu.:0.1264  
+##  AT13   :  1   Median :23.514   Median :0.22800   Median :0.2600  
+##  AT21   :  1   Mean   :25.960   Mean   :0.26424   Mean   :0.2738  
+##  AT22   :  1   3rd Qu.:29.680   3rd Qu.:0.35996   3rd Qu.:0.3825  
+##  AT31   :  1   Max.   :54.150   Max.   :0.88217   Max.   :0.8550  
+##  (Other):314   NA's   :2        NA's   :52        NA's   :52      
+##    train_bas         train_ful         nitr_high          nitr_mod     
+##  Min.   :0.01171   Min.   :0.00188   Min.   :  4.082   Min.   : 0.000  
+##  1st Qu.:0.10627   1st Qu.:0.04939   1st Qu.: 66.302   1st Qu.: 6.533  
+##  Median :0.19961   Median :0.12807   Median : 70.505   Median :15.896  
+##  Mean   :0.26250   Mean   :0.15796   Mean   : 75.328   Mean   :13.773  
+##  3rd Qu.:0.36488   3rd Qu.:0.25108   3rd Qu.: 87.591   3rd Qu.:18.416  
+##  Max.   :0.94840   Max.   :0.50303   Max.   :100.000   Max.   :60.000  
+##  NA's   :50        NA's   :50        NA's   :44        NA's   :44      
+##    nitr_poor        irrigated           forest           artific       
+##  Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000   Min.   :0.00000  
+##  1st Qu.: 4.106   1st Qu.: 0.3312   1st Qu.:0.08957   1st Qu.:0.02056  
+##  Median : 8.883   Median : 1.2250   Median :0.24904   Median :0.04020  
+##  Mean   :10.898   Mean   : 5.7571   Mean   :0.25141   Mean   :0.09023  
+##  3rd Qu.:15.385   3rd Qu.: 6.5000   3rd Qu.:0.37365   3rd Qu.:0.08193  
+##  Max.   :68.367   Max.   :74.5500   Max.   :0.75860   Max.   :1.00000  
+##  NA's   :44       NA's   :28                                           
+##    soil_loss         com_birds       farm_birds        org_farm     
+##  Min.   : 0.0300   Min.   :54.92   Min.   : 63.78   Min.   : 0.000  
+##  1st Qu.: 0.7047   1st Qu.:62.14   1st Qu.: 81.34   1st Qu.: 1.200  
+##  Median : 1.5005   Median :69.50   Median : 83.82   Median : 2.687  
+##  Mean   : 2.5482   Mean   :69.70   Mean   : 81.90   Mean   : 4.056  
+##  3rd Qu.: 2.9420   3rd Qu.:81.30   3rd Qu.: 85.30   3rd Qu.: 5.204  
+##  Max.   :17.6050   Max.   :97.22   Max.   :116.60   Max.   :27.487  
+##  NA's   :44        NA's   :158     NA's   :94       NA's   :28      
+##    energy_rt         renew_pct        renew_prod         gross_N       
+##  Min.   :0.00000   Min.   : 0.000   Min.   : 0.8855   Min.   :  2.857  
+##  1st Qu.:0.03503   1st Qu.: 3.074   1st Qu.: 6.2422   1st Qu.: 41.821  
+##  Median :0.06128   Median : 6.124   Median : 8.3156   Median : 67.333  
+##  Mean   :0.15052   Mean   :11.225   Mean   :12.4318   Mean   : 67.553  
+##  3rd Qu.:0.09725   3rd Qu.:22.515   3rd Qu.:18.0797   3rd Qu.: 85.988  
+##  Max.   :1.75149   Max.   :41.011   Max.   :37.7797   Max.   :190.167  
+##  NA's   :44        NA's   :82       NA's   :45        NA's   :30       
+##     gross_P         conv_till         cons_till         zero_till      
+##  Min.   :-6.500   Min.   :0.08646   Min.   :0.00000   Min.   :0.00000  
+##  1st Qu.:-1.667   1st Qu.:0.46182   1st Qu.:0.05077   1st Qu.:0.00920  
+##  Median : 1.833   Median :0.61740   Median :0.12499   Median :0.01843  
+##  Mean   : 1.941   Mean   :0.60410   Mean   :0.18031   Mean   :0.03000  
+##  3rd Qu.: 4.714   3rd Qu.:0.73832   3rd Qu.:0.28382   3rd Qu.:0.04003  
+##  Max.   :31.000   Max.   :0.99752   Max.   :0.65066   Max.   :0.19303  
+##  NA's   :30       NA's   :53        NA's   :53        NA's   :53       
+##      nfert            arable        grassland       permanent      
+##  Min.   : 0.000   Min.   : 0.00   Min.   : 0.00   Min.   : 0.0000  
+##  1st Qu.: 6.448   1st Qu.:39.68   1st Qu.:17.20   1st Qu.: 0.3546  
+##  Median : 9.917   Median :62.28   Median :32.97   Median : 1.1324  
+##  Mean   :10.975   Mean   :57.72   Mean   :35.74   Mean   : 5.7197  
+##  3rd Qu.:14.254   3rd Qu.:78.19   3rd Qu.:48.62   3rd Qu.: 5.6520  
+##  Max.   :29.456   Max.   :99.28   Max.   :98.84   Max.   :64.6743  
+##  NA's   :11       NA's   :44      NA's   :44      NA's   :44       
+##    soil_prod         geo        irrig_rate          afi_awu      
+##  Min.   :3.00   AT11   :  1   Min.   :   0.000   Min.   : -3221  
+##  1st Qu.:6.00   AT12   :  1   1st Qu.:   1.181   1st Qu.: 11878  
+##  Median :6.00   AT13   :  1   Median :   7.396   Median : 20559  
+##  Mean   :6.45   AT21   :  1   Mean   : 157.441   Mean   : 24680  
+##  3rd Qu.:7.00   AT22   :  1   3rd Qu.:  82.321   3rd Qu.: 34388  
+##  Max.   :8.00   AT31   :  1   Max.   :4156.725   Max.   :107266  
+##  NA's   :51     (Other):314   NA's   :22         NA's   :29      
+##     gva_awu           labour_use        pest_rate      
+##  Min.   :   697.4   Min.   :0.00000   Min.   : 0.3874  
+##  1st Qu.: 10428.3   1st Qu.:0.01049   1st Qu.: 1.2195  
+##  Median : 24639.3   Median :0.01976   Median : 1.8836  
+##  Mean   : 26611.6   Mean   :0.03594   Mean   : 2.4120  
+##  3rd Qu.: 38162.3   3rd Qu.:0.04539   3rd Qu.: 3.1595  
+##  Max.   :122952.6   Max.   :0.22557   Max.   :13.1415  
+##  NA's   :29         NA's   :44        NA's   :37
+
head(dbase.clean)
+
##   NUTS_ID risk_pov train35bas train35ful train_bas train_ful nitr_high
+## 1    AT11 13.73333  0.1375661  0.3333333 0.1243050 0.1779190  64.58924
+## 2    AT22 17.26667  0.2160980  0.3648294 0.2017089 0.2413594  64.58924
+## 3    AT12 13.83333  0.2084775  0.4809689 0.2534787 0.3449437  64.58924
+## 4    AT13 27.23333  0.3750000  0.7500000 0.1753247 0.4740260  64.58924
+## 5    AT21 17.20000  0.2306238  0.3648393 0.2076173 0.2250348  64.58924
+## 6    AT31 15.00000  0.2508418  0.4284512 0.2014381 0.2857610  64.58924
+##   nitr_mod nitr_poor irrigated    forest    artific soil_loss com_birds
+## 1 20.20774  15.20302     5.850 0.3161203 0.04355635     1.842        NA
+## 2 20.20774  15.20302     0.325 0.6127954 0.03306278     5.804        NA
+## 3 20.20774  15.20302     2.650 0.4286079 0.04875064     2.236        NA
+## 4 20.20774  15.20302    10.525 0.1469534 0.73118280     1.014        NA
+## 5 20.20774  15.20302     0.100 0.5998934 0.03047416    11.671        NA
+## 6 20.20774  15.20302     0.125 0.4027358 0.04900973     3.791        NA
+##   farm_birds org_farm  energy_rt renew_pct renew_prod  gross_N  gross_P
+## 1      65.98 19.43430 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 2      65.98 12.80858 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 3      65.98 13.41584 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 4      65.98 16.44137 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 5      65.98 10.68078 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 6      65.98 12.31071 0.08319988  32.65559   7.068917 32.57143 1.833333
+##   conv_till  cons_till   zero_till    nfert   arable grassland  permanent
+## 1 0.6182190 0.31992068 0.025012794 7.684000 83.64566  8.715722  7.5451998
+## 2 0.8887161 0.05005656 0.024109163 7.551429 37.02489 58.694493  4.2139773
+## 3 0.6226791 0.32803537 0.019896256 7.452800 76.22380 20.140837  3.5858503
+## 4 0.5109890 0.40476190 0.007326007 7.497000 79.80050 10.099751 10.0997506
+## 5 0.8592546 0.05928605 0.032552288 8.131500 28.48779 71.285286  0.1679223
+## 6 0.8442576 0.12043311 0.014198645 9.138333 56.49367 43.164202  0.2648111
+##   soil_prod  geo  irrig_rate   afi_awu  gva_awu  labour_use pest_rate
+## 1         6 AT11  16.6922025 24788.951 25367.72 0.036978892  1.335093
+## 2         6 AT22   2.2086896 13958.345 18388.41 0.034606425  1.335093
+## 3         6 AT12  10.7753945 23056.271 25423.66 0.035521808  1.335093
+## 4         6 AT13 208.4609053  6431.103  9952.26 0.001572703  1.335093
+## 5         6 AT21   0.5770663 10741.948 11827.48 0.028435272  1.335093
+## 6         6 AT31   0.5491021 15263.545 22028.20 0.030650579  1.335093
+
tail(dbase.clean)
+
##     NUTS_ID risk_pov train35bas train35ful  train_bas  train_ful nitr_high
+## 315    UKD3 23.51429 0.00000000  0.2000000 0.07100592 0.05621302  97.15694
+## 316    TRC1 54.15000         NA         NA         NA         NA        NA
+## 317    TRC2 54.15000         NA         NA         NA         NA        NA
+## 318    UKD4 23.51429 0.15625000  0.3125000 0.07031828 0.07846040  97.15694
+## 319    TRC3 54.15000         NA         NA         NA         NA        NA
+## 320    UKM6 23.51429 0.05925926  0.1555556 0.03854333 0.06990962  97.15694
+##     nitr_mod nitr_poor irrigated      forest     artific soil_loss
+## 315 2.388173 0.4548901     0.500 0.010517799 0.550161812     2.071
+## 316       NA        NA        NA 0.022020475 0.013843281        NA
+## 317       NA        NA        NA 0.006621164 0.007146191        NA
+## 318 2.388173 0.4548901     0.625 0.014225182 0.109261501     1.905
+## 319       NA        NA        NA 0.037659533 0.005497742        NA
+## 320 2.388173 0.4548901     0.000 0.126063524 0.003747982     6.174
+##     com_birds farm_birds  org_farm  energy_rt renew_pct renew_prod
+## 315      69.5      83.82 0.0000000 0.04487651   23.7199   7.183683
+## 316        NA         NA        NA         NA        NA         NA
+## 317        NA         NA        NA         NA        NA         NA
+## 318      69.5      83.82 0.8233184 0.04487651   23.7199   7.183683
+## 319        NA         NA        NA         NA        NA         NA
+## 320      69.5      83.82 0.9873238 0.04487651   23.7199   7.183683
+##      gross_N  gross_P conv_till  cons_till   zero_till     nfert    arable
+## 315 86.42857 5.857143 0.5116279 0.26976744 0.083720930 17.954800 21.626717
+## 316       NA       NA        NA         NA          NA  5.709333        NA
+## 317       NA       NA        NA         NA          NA  5.508154        NA
+## 318 86.42857 5.857143 0.5070682 0.07652120 0.000921942 17.686000 20.185625
+## 319       NA       NA        NA         NA          NA  5.452800        NA
+## 320 86.42857 5.857143 0.3571254 0.01623576 0.064146551 13.097444  6.222771
+##     grassland   permanent soil_prod  geo irrig_rate    afi_awu    gva_awu
+## 315  78.21169 0.134661998         6 UKD3  0.7351276 107265.558 122952.557
+## 316        NA          NA        NA TRC1  1.1807506         NA         NA
+## 317        NA          NA        NA TRC2  1.1807506         NA         NA
+## 318  79.77323 0.041148500         6 UKD4  1.4023562  11063.440  13125.354
+## 319        NA          NA        NA TRC3  1.1807506         NA         NA
+## 320  93.77578 0.001447408         6 UKM6  0.7098479   9823.595   4882.977
+##       labour_use pest_rate
+## 315 0.0007488926  1.219472
+## 316           NA        NA
+## 317           NA        NA
+## 318 0.0083215413  1.219472
+## 319           NA        NA
+## 320 0.0299345848  1.219472
+
names(dbase.clean)
+
##  [1] "NUTS_ID"    "risk_pov"   "train35bas" "train35ful" "train_bas" 
+##  [6] "train_ful"  "nitr_high"  "nitr_mod"   "nitr_poor"  "irrigated" 
+## [11] "forest"     "artific"    "soil_loss"  "com_birds"  "farm_birds"
+## [16] "org_farm"   "energy_rt"  "renew_pct"  "renew_prod" "gross_N"   
+## [21] "gross_P"    "conv_till"  "cons_till"  "zero_till"  "nfert"     
+## [26] "arable"     "grassland"  "permanent"  "soil_prod"  "geo"       
+## [31] "irrig_rate" "afi_awu"    "gva_awu"    "labour_use" "pest_rate"
+
#check data level for irrig_vol as an example
+data.level.edit$irrig_rate.n2.dat
+
##   [1] "AT11" "AT12" "AT13" "AT21" "AT22" "AT31" "AT32" "BG31" "BG32" "BG33"
+##  [11] "BG34" "BG41" "BG42" "CY00" "CZ02" "CZ03" "CZ04" "CZ05" "CZ06" "CZ07"
+##  [21] "CZ08" "AT33" "AT34" "DK01" "DK02" "DK03" "DK04" "DK05" "EE00" "EL30"
+##  [31] "EL41" "EL42" "EL43" "ES11" "ES12" "ES13" "ES21" "ES22" "ES23" "ES24"
+##  [41] "ES30" "FR52" "FR53" "FR61" "FR62" "FR63" "FR71" "FR72" "FR81" "FR82"
+##  [51] "FR83" "HR03" "HR04" "ES41" "ES42" "ES43" "ES51" "ES52" "ES53" "ES61"
+##  [61] "ES62" "ES70" "FI19" "FI1B" "FI1C" "FI1D" "FI20" "FR10" "FR21" "FR22"
+##  [71] "FR23" "FR24" "FR25" "FR26" "FR30" "FR41" "FR42" "FR43" "FR51" "HU10"
+##  [81] "HU21" "HU22" "HU23" "HU31" "HU32" "HU33" "ITC2" "ITC3" "ITC4" "ITF1"
+##  [91] "ITF2" "ITF3" "ITF4" "ITF5" "ITF6" "ITG1" "ITG2" "ITH1" "ITH2" "ITH3"
+## [101] "ITH4" "ITH5" "ITI1" "ITI2" "ITI3" "ITI4" "LT00" "LV00" "MT00" "NL11"
+## [111] "NL12" "NL13" "NL21" "NL22" "ITC1" "PL43" "PL51" "PL52" "PL61" "PL62"
+## [121] "PL63" "PT11" "PT15" "PT16" "PT17" "PT18" "PT20" "PT30" "RO11" "RO12"
+## [131] "RO21" "RO22" "RO31" "RO32" "RO41" "NL23" "NL31" "NL32" "NL33" "NL34"
+## [141] "NL41" "NL42" "PL11" "PL12" "PL21" "PL22" "PL31" "PL32" "PL33" "PL34"
+## [151] "PL41" "PL42" "RO42" "SE11" "SE12" "SE21" "SK01" "SK02" "SK03" "SK04"
+## [161] "UKC1" "SE22" "SE23" "SE31" "SE32" "SE33" "UKM2" "UKM3" "UKM5" "UKM6"
+## [171] "UKC2" "UKD4" "UKD6" "UKE1" "UKE2" "UKE3" "UKE4" "UKF1" "UKF2" "UKF3"
+## [181] "UKG1" "UKG2" "UKH1" "UKH2" "UKH3" "UKJ1" "UKJ2" "UKJ3" "UKJ4" "UKK1"
+## [191] "UKK2" "UKK3" "UKK4" "UKL1" "UKL2"
+
data.level.edit$irrig_rate.n1.dat
+
##  [1] "CZ0" "DE8" "DE9" "DEA" "DE1" "DE2" "DE3" "DE4" "DE6" "DE7" "DEB"
+## [12] "DEG" "DEC" "DED" "DEE" "DEF" "ES6" "SI0" "UKD" "UKG"
+
data.level.edit$irrig_rate.n0.dat
+
## [1] "CH" "DE" "EL" "FR" "LU" "MK" "IS" "TR" "UK"
+
data.level.edit$irrig_rate.nuts0.na
+
## [1] "BE" "IE" "LI" "ME" "NO"
+
#Add CAP data to clean database
+dbase.cap$geo <- dbase.cap$NUTS_ID
+names(dbase.cap)
+
##  [1] "NUTS_ID"        "irrig_vol"      "tot_gdp_cap"    "tot_pps_cap"   
+##  [5] "emp_rate_15_64" "tot_unemp"      "yth_unemp"      "rur_gdp_cap"   
+##  [9] "rur_pps_cap"    "int_gdp_cap"    "int_pps_cap"    "urb_gdp_cap"   
+## [13] "urb_pps_cap"    "geo"
+
dbase.clean <- left_join(dbase.clean, dbase.cap[,c(3:14)])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factor and character vector, coercing into
+## character vector
+
###Emissions data
+emi_co2eq <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='mean_rate_co2_eqv')
+
## Reading layer `mean_rate_co2_eqv' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
emi_co2eq$geo <- emi_co2eq$NUTS_ID
+head(emi_co2eq)
+
##   NUTS_ID ZONE_CODE  COUNT        AREA      MEAN  geo
+## 1    AT11         1  77114  4819625000  653096.3 AT11
+## 2    AT22         2 142371  8898187500  954802.7 AT22
+## 3    AT12         3 363790 22736875000 1136117.0 AT12
+## 4    AT13         4   2207   137937500  457098.6 AT13
+## 5    AT21         5  65190  4074375000  888623.1 AT21
+## 6    AT31         6 209774 13110875000 1943324.3 AT31
+
emi_nh3 <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='mean_rate_nh3')
+
## Reading layer `mean_rate_nh3' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
emi_nh3$geo <- emi_nh3$NUTS_ID
+head(emi_nh3)
+
##   NUTS_ID ZONE_CODE  COUNT        AREA      MEAN  geo
+## 1    AT11         1  77114  4819625000  8690.371 AT11
+## 2    AT22         2 142371  8898187500 13155.494 AT22
+## 3    AT12         3 363790 22736875000 14253.957 AT12
+## 4    AT13         4   2207   137937500  6375.132 AT13
+## 5    AT21         5  65190  4074375000 11187.491 AT21
+## 6    AT31         6 209774 13110875000 22472.061 AT31
+
emi_pm10 <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='mean_rate_pm10')
+
## Reading layer `mean_rate_pm10' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
emi_pm10$geo <- emi_pm10$NUTS_ID
+head(emi_pm10)
+
##   NUTS_ID ZONE_CODE  COUNT        AREA      MEAN  geo
+## 1    AT11         1  77114  4819625000 1025.7531 AT11
+## 2    AT22         2 142371  8898187500  850.9032 AT22
+## 3    AT12         3 363790 22736875000 1285.1966 AT12
+## 4    AT13         4   2207   137937500  609.2881 AT13
+## 5    AT21         5  65190  4074375000  471.5642 AT21
+## 6    AT31         6 209774 13110875000 1298.3660 AT31
+
emi_pm25 <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='mean_rate_pm25')
+
## Reading layer `mean_rate_pm25' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
emi_pm25$geo <- emi_pm25$NUTS_ID
+head(emi_pm25)
+
##   NUTS_ID ZONE_CODE  COUNT        AREA     MEAN  geo
+## 1    AT11         1  77114  4819625000 818.8200 AT11
+## 2    AT22         2 142371  8898187500 331.4128 AT22
+## 3    AT12         3 363790 22736875000 856.1814 AT12
+## 4    AT13         4   2207   137937500 426.0527 AT13
+## 5    AT21         5  65190  4074375000 220.7500 AT21
+## 6    AT31         6 209774 13110875000 629.6217 AT31
+
###Soils data
+soc <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_SOC_2010_stats')
+
## Reading layer `nuts2_SOC_2010_stats' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
soc$geo <- soc$NUTS_ID
+head(soc)
+
##   NUTS_ID ZONE_CODE  COUNT        AREA      MIN       MAX     RANGE
+## 1    AT11         1  35064  2191500000 30.83195 155.75535 124.92340
+## 2    AT22         2  65757  4109812500 34.05482 498.82918 464.77436
+## 3    AT12         3 160776 10048500000 18.63442 372.08350 353.44907
+## 4    AT13         4    955    59687500 40.83359  67.69893  26.86534
+## 5    AT21         5  30546  1909125000 41.81581 323.11070 281.29489
+## 6    AT31         6  93225  5826562500 24.44537 492.61410 468.16873
+##        MEAN       STD         SUM  geo
+## 1  59.33579 20.520380  2080550.19 AT11
+## 2  97.49513 43.484832  6410987.08 AT22
+## 3  64.25874 20.290802 10331263.60 AT12
+## 4  49.22190  6.893331    47006.92 AT13
+## 5 101.03633 46.169421  3086255.75 AT21
+## 6 100.91691 56.705409  9407978.51 AT31
+
biol_threats <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_soil_bio_func_threat_stats')
+
## Reading layer `nuts2_soil_bio_func_threat_stats' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
biol_threats$geo <- biol_threats$NUTS_ID
+head(biol_threats)
+
##   NUTS_ID ZONE_CODE  COUNT        AREA       MIN       MAX     RANGE
+## 1    AT11         1  35064  2191500000 0.1639321 0.5476973 0.3837653
+## 2    AT22         2  65762  4110125000 0.1518843 0.5750966 0.4232123
+## 3    AT12         3 160779 10048687500 0.1160565 0.8375627 0.7215062
+## 4    AT13         4    955    59687500 0.1727176 0.4496170 0.2768994
+## 5    AT21         5  30605  1912812500 0.1625200 0.4503910 0.2878710
+## 6    AT31         6  93228  5826750000 0.1375850 0.7379893 0.6004044
+##        MEAN        STD       SUM  geo
+## 1 0.2693722 0.04402911  9445.267 AT11
+## 2 0.2525372 0.05795605 16607.349 AT22
+## 3 0.2670201 0.06859833 42931.226 AT12
+## 4 0.2611445 0.05979863   249.393 AT13
+## 5 0.2280257 0.04369173  6978.728 AT21
+## 6 0.2672487 0.06388194 24915.058 AT31
+
soil_cov <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Additional_consensus_variables/nuts2_Cfactor_20180822.csv", head=T)
+soil_cov$geo <- soil_cov$NUTS_ID
+head(soil_cov)
+
##   X NUTS_ID C_factor C_factor_N Only_Tilla CoverCrop Only_Resid    ImAll
+## 1 1    AT11 0.204701   0.279940   0.219316  0.262085   0.279140 0.268766
+## 2 2    AT22 0.305978   0.342173   0.325065  0.323805   0.340448 0.105781
+## 3 3    AT12 0.195147   0.269209   0.210350  0.250382   0.268578 0.275109
+## 4 4    AT13 0.188655   0.265801   0.200734  0.250364   0.265244 0.290240
+## 5 5    AT21 0.278675   0.331689   0.310575  0.299779   0.329530 0.159833
+## 6 6    AT31 0.241675   0.288756   0.263650  0.265361   0.288084 0.163046
+##   imTillage  imCover  ImResid  geo
+## 1  0.216560 0.063781 0.002857 AT11
+## 2  0.049996 0.053679 0.005042 AT22
+## 3  0.218636 0.069932 0.002344 AT12
+## 4  0.244795 0.058079 0.002096 AT13
+## 5  0.063657 0.096205 0.006510 AT21
+## 6  0.086944 0.081020 0.002329 AT31
+
###Habitat conservation data
+nat2000_ag <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_ag_Natura2000')
+
## Reading layer `nuts2_ag_Natura2000' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
nat2000_tot <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_Natura2000_area')
+
## Reading layer `nuts2_Natura2000_area' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
nat2000_tot$ha <- nat2000_tot$SUM * 6.25 #Sum of number of 250m x 250m (6.25 ha) cells
+nat2000 <- merge(nat2000_ag[,c(1,5)], nat2000_tot[,c(1,6)])
+nat2000$geo <- nat2000$NUTS_ID
+nat2000 <- left_join(nat2000, nuts@data[,c(4,7)])
+
## Joining, by = "NUTS_ID"
+
## Warning: Column `NUTS_ID` joining factors with different levels, coercing
+## to character vector
+
nat2000 <- left_join(nat2000, corine.aa.all.nuts)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factor and character vector, coercing into
+## character vector
+
head(nat2000) #SUM, ha, and sum_uaa in hectares, Shape_Area in m2
+
##   NUTS_ID       SUM        ha  geo  Shape_Area   sum_uaa
+## 1    AT11  44450.00 110012.50 AT11  3963509482  219300.0
+## 2    AT12 139906.25 425081.25 AT12 19201725666 1004956.2
+## 3    AT13   1106.25   5506.25 AT13   411979159    6075.0
+## 4    AT21   1562.50  58462.50 AT21  9541848203  191312.5
+## 5    AT22  34600.00 250300.00 AT22 16414303341  411556.2
+## 6    AT31   5356.25  78300.00 AT31 11984617500  582587.5
+
nat2000$nat2000_ag <- nat2000$SUM / nat2000$sum_uaa
+nat2000$nat2000_pr <- nat2000$ha / (nat2000$Shape_Area / 10000)
+head(nat2000)
+
##   NUTS_ID       SUM        ha  geo  Shape_Area   sum_uaa  nat2000_ag
+## 1    AT11  44450.00 110012.50 AT11  3963509482  219300.0 0.202690378
+## 2    AT12 139906.25 425081.25 AT12 19201725666 1004956.2 0.139216259
+## 3    AT13   1106.25   5506.25 AT13   411979159    6075.0 0.182098765
+## 4    AT21   1562.50  58462.50 AT21  9541848203  191312.5 0.008167266
+## 5    AT22  34600.00 250300.00 AT22 16414303341  411556.2 0.084071132
+## 6    AT31   5356.25  78300.00 AT31 11984617500  582587.5 0.009193898
+##   nat2000_pr
+## 1 0.27756336
+## 2 0.22137659
+## 3 0.13365361
+## 4 0.06126958
+## 5 0.15248896
+## 6 0.06533375
+
###Calories data
+calorie_fr <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_mean_kcalFr')
+
## Reading layer `nuts2_mean_kcalFr' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
calorie_fr$geo <- calorie_fr$NUTS_ID
+names(calorie_fr)[5] <- 'cal_frac'
+head(calorie_fr)
+
##   NUTS_ID ZONE_CODE COUNT       AREA  cal_frac  geo
+## 1    AT11         1  3739 3.7390e+09 0.5245497 AT11
+## 2    AT22         2  6788 6.7880e+09 0.2280712 AT22
+## 3    AT12         3 14292 1.4292e+10 0.5354631 AT12
+## 4    AT13         4   214 2.1400e+08 0.5256335 AT13
+## 5    AT21         5  4113 4.1130e+09 0.2194208 AT21
+## 6    AT31         6  7273 7.2730e+09 0.3612959 AT31
+
###Suitability data
+precip <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_mean_ann_precip')
+
## Reading layer `nuts2_mean_ann_precip' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
precip$geo <- precip$NUTS_ID
+names(precip)[5] <- 'precip'
+head(precip)
+
##   NUTS_ID ZONE_CODE COUNT       AREA    precip  geo
+## 1    AT11         1  3970 3.9700e+09  666.3237 AT11
+## 2    AT22         2 16416 1.6416e+10 1149.1807 AT22
+## 3    AT12         3 19205 1.9205e+10  714.3840 AT12
+## 4    AT13         4   414 4.1400e+08  580.7077 AT13
+## 5    AT21         5  9538 9.5380e+09 1357.1470 AT21
+## 6    AT31         6 11977 1.1977e+10 1057.0594 AT31
+
deg_days <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_mean_gdd')
+
## Reading layer `nuts2_mean_gdd' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
deg_days$geo <- deg_days$NUTS_ID
+names(deg_days)[5] <- 'deg_days'
+head(deg_days)
+
##   NUTS_ID ZONE_CODE COUNT       AREA deg_days  geo
+## 1    AT11         1  3970 3.9700e+09 1965.423 AT11
+## 2    AT22         2 16416 1.6416e+10 1264.833 AT22
+## 3    AT12         3 19205 1.9205e+10 1764.210 AT12
+## 4    AT13         4   414 4.1400e+08 2040.184 AT13
+## 5    AT21         5  9538 9.5380e+09 1095.291 AT21
+## 6    AT31         6 11977 1.1977e+10 1537.084 AT31
+
crop_suit <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='nuts2_mean_crop_suit')
+
## Reading layer `nuts2_mean_crop_suit' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
crop_suit$geo <- crop_suit$NUTS_ID
+names(crop_suit)[5] <- 'crop_suit'
+head(crop_suit)
+
##   NUTS_ID ZONE_CODE COUNT      AREA crop_suit  geo
+## 1    AT11         1   291 2.910e+08  5.319588 AT11
+## 2    AT22         2  1253 1.253e+09  3.217877 AT22
+## 3    AT12         3  1471 1.471e+09  4.347383 AT12
+## 4    AT13         4    30 3.000e+07  5.466667 AT13
+## 5    AT21         5   726 7.260e+08  2.668044 AT21
+## 6    AT31         6   913 9.130e+08  3.925520 AT31
+
#Join all GIS tables
+gis.dat <- soil_cov[,c("geo", "C_factor")] #soil_cov is most complete of these tables (nrow = 320)
+gis.dat <- left_join(gis.dat, emi_co2eq[,c("geo", "MEAN")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factors with different levels, coercing to
+## character vector
+
names(gis.dat)[3] <- "emi_co2eq"
+gis.dat <- left_join(gis.dat, emi_nh3[,c("geo", "MEAN")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
names(gis.dat)[4] <- "emi_nh3"
+gis.dat <- left_join(gis.dat, emi_pm10[,c("geo", "MEAN")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
names(gis.dat)[5] <- "emi_pm10"
+gis.dat <- left_join(gis.dat, emi_pm25[,c("geo", "MEAN")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
names(gis.dat)[6] <- "emi_pm25"
+gis.dat <- left_join(gis.dat, soc[,c("geo", "MEAN")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
names(gis.dat)[7] <- "soc"
+gis.dat <- left_join(gis.dat, biol_threats[,c("geo", "MEAN")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
names(gis.dat)[8] <- "biol_threats"
+gis.dat <- left_join(gis.dat, nat2000[,c("geo", "nat2000_ag", "nat2000_pr")])
+
## Joining, by = "geo"
+
gis.dat <- left_join(gis.dat, calorie_fr[,c("geo", "cal_frac")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
gis.dat <- left_join(gis.dat, precip[,c("geo", "precip")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
gis.dat <- left_join(gis.dat, deg_days[,c("geo", "deg_days")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
gis.dat <- left_join(gis.dat, crop_suit[,c("geo", "crop_suit")])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
head(gis.dat)
+
##    geo C_factor emi_co2eq   emi_nh3  emi_pm10 emi_pm25       soc
+## 1 AT11 0.204701  653096.3  8690.371 1025.7531 818.8200  59.33579
+## 2 AT22 0.305978  954802.7 13155.494  850.9032 331.4128  97.49513
+## 3 AT12 0.195147 1136117.0 14253.957 1285.1966 856.1814  64.25874
+## 4 AT13 0.188655  457098.6  6375.132  609.2881 426.0527  49.22190
+## 5 AT21 0.278675  888623.1 11187.491  471.5642 220.7500 101.03633
+## 6 AT31 0.241675 1943324.3 22472.061 1298.3660 629.6217 100.91691
+##   biol_threats  nat2000_ag nat2000_pr  cal_frac    precip deg_days
+## 1    0.2693722 0.202690378 0.27756336 0.5245497  666.3237 1965.423
+## 2    0.2525372 0.084071132 0.15248896 0.2280712 1149.1807 1264.833
+## 3    0.2670201 0.139216259 0.22137659 0.5354631  714.3840 1764.210
+## 4    0.2611445 0.182098765 0.13365361 0.5256335  580.7077 2040.184
+## 5    0.2280257 0.008167266 0.06126958 0.2194208 1357.1470 1095.291
+## 6    0.2672487 0.009193898 0.06533375 0.3612959 1057.0594 1537.084
+##   crop_suit
+## 1  5.319588
+## 2  3.217877
+## 3  4.347383
+## 4  5.466667
+## 5  2.668044
+## 6  3.925520
+
#Join GIS data to clean database
+names(dbase.clean)
+
##  [1] "NUTS_ID"        "risk_pov"       "train35bas"     "train35ful"    
+##  [5] "train_bas"      "train_ful"      "nitr_high"      "nitr_mod"      
+##  [9] "nitr_poor"      "irrigated"      "forest"         "artific"       
+## [13] "soil_loss"      "com_birds"      "farm_birds"     "org_farm"      
+## [17] "energy_rt"      "renew_pct"      "renew_prod"     "gross_N"       
+## [21] "gross_P"        "conv_till"      "cons_till"      "zero_till"     
+## [25] "nfert"          "arable"         "grassland"      "permanent"     
+## [29] "soil_prod"      "geo"            "irrig_rate"     "afi_awu"       
+## [33] "gva_awu"        "labour_use"     "pest_rate"      "tot_gdp_cap"   
+## [37] "tot_pps_cap"    "emp_rate_15_64" "tot_unemp"      "yth_unemp"     
+## [41] "rur_gdp_cap"    "rur_pps_cap"    "int_gdp_cap"    "int_pps_cap"   
+## [45] "urb_gdp_cap"    "urb_pps_cap"
+
names(gis.dat)
+
##  [1] "geo"          "C_factor"     "emi_co2eq"    "emi_nh3"     
+##  [5] "emi_pm10"     "emi_pm25"     "soc"          "biol_threats"
+##  [9] "nat2000_ag"   "nat2000_pr"   "cal_frac"     "precip"      
+## [13] "deg_days"     "crop_suit"
+
nrow(dbase.clean)
+
## [1] 320
+
nrow(gis.dat)
+
## [1] 320
+
dbase.clean.gis <- left_join(dbase.clean, gis.dat)
+
## Joining, by = "geo"
+
head(dbase.clean.gis)
+
##   NUTS_ID risk_pov train35bas train35ful train_bas train_ful nitr_high
+## 1    AT11 13.73333  0.1375661  0.3333333 0.1243050 0.1779190  64.58924
+## 2    AT22 17.26667  0.2160980  0.3648294 0.2017089 0.2413594  64.58924
+## 3    AT12 13.83333  0.2084775  0.4809689 0.2534787 0.3449437  64.58924
+## 4    AT13 27.23333  0.3750000  0.7500000 0.1753247 0.4740260  64.58924
+## 5    AT21 17.20000  0.2306238  0.3648393 0.2076173 0.2250348  64.58924
+## 6    AT31 15.00000  0.2508418  0.4284512 0.2014381 0.2857610  64.58924
+##   nitr_mod nitr_poor irrigated    forest    artific soil_loss com_birds
+## 1 20.20774  15.20302     5.850 0.3161203 0.04355635     1.842        NA
+## 2 20.20774  15.20302     0.325 0.6127954 0.03306278     5.804        NA
+## 3 20.20774  15.20302     2.650 0.4286079 0.04875064     2.236        NA
+## 4 20.20774  15.20302    10.525 0.1469534 0.73118280     1.014        NA
+## 5 20.20774  15.20302     0.100 0.5998934 0.03047416    11.671        NA
+## 6 20.20774  15.20302     0.125 0.4027358 0.04900973     3.791        NA
+##   farm_birds org_farm  energy_rt renew_pct renew_prod  gross_N  gross_P
+## 1      65.98 19.43430 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 2      65.98 12.80858 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 3      65.98 13.41584 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 4      65.98 16.44137 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 5      65.98 10.68078 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 6      65.98 12.31071 0.08319988  32.65559   7.068917 32.57143 1.833333
+##   conv_till  cons_till   zero_till    nfert   arable grassland  permanent
+## 1 0.6182190 0.31992068 0.025012794 7.684000 83.64566  8.715722  7.5451998
+## 2 0.8887161 0.05005656 0.024109163 7.551429 37.02489 58.694493  4.2139773
+## 3 0.6226791 0.32803537 0.019896256 7.452800 76.22380 20.140837  3.5858503
+## 4 0.5109890 0.40476190 0.007326007 7.497000 79.80050 10.099751 10.0997506
+## 5 0.8592546 0.05928605 0.032552288 8.131500 28.48779 71.285286  0.1679223
+## 6 0.8442576 0.12043311 0.014198645 9.138333 56.49367 43.164202  0.2648111
+##   soil_prod  geo  irrig_rate   afi_awu  gva_awu  labour_use pest_rate
+## 1         6 AT11  16.6922025 24788.951 25367.72 0.036978892  1.335093
+## 2         6 AT22   2.2086896 13958.345 18388.41 0.034606425  1.335093
+## 3         6 AT12  10.7753945 23056.271 25423.66 0.035521808  1.335093
+## 4         6 AT13 208.4609053  6431.103  9952.26 0.001572703  1.335093
+## 5         6 AT21   0.5770663 10741.948 11827.48 0.028435272  1.335093
+## 6         6 AT31   0.5491021 15263.545 22028.20 0.030650579  1.335093
+##   tot_gdp_cap tot_pps_cap emp_rate_15_64 tot_unemp yth_unemp rur_gdp_cap
+## 1       26700       24600       69.81530       5.7      15.0    26690.97
+## 2       34800       32100       71.37077       5.1      10.2    28015.67
+## 3       31800       29300       73.06232       5.2       9.3    29392.91
+## 4       47300       43700       64.90683      11.3      20.3          NA
+## 5       32700       30200       69.88481       5.4      12.2    27340.58
+## 6       39600       36500       75.46507       4.5       7.6    33453.92
+##   rur_pps_cap int_gdp_cap int_pps_cap urb_gdp_cap urb_pps_cap C_factor
+## 1    24628.47          NA          NA          NA          NA 0.204701
+## 2    25851.10    42289.66    39020.69          NA          NA 0.305978
+## 3    27122.78    37037.74    34176.67    27574.60    25441.27 0.195147
+## 4          NA          NA          NA    47307.69    43651.88 0.188655
+## 5    25228.26    38025.00    35085.71          NA          NA 0.278675
+## 6    30867.51    48936.17    45154.26          NA          NA 0.241675
+##   emi_co2eq   emi_nh3  emi_pm10 emi_pm25       soc biol_threats
+## 1  653096.3  8690.371 1025.7531 818.8200  59.33579    0.2693722
+## 2  954802.7 13155.494  850.9032 331.4128  97.49513    0.2525372
+## 3 1136117.0 14253.957 1285.1966 856.1814  64.25874    0.2670201
+## 4  457098.6  6375.132  609.2881 426.0527  49.22190    0.2611445
+## 5  888623.1 11187.491  471.5642 220.7500 101.03633    0.2280257
+## 6 1943324.3 22472.061 1298.3660 629.6217 100.91691    0.2672487
+##    nat2000_ag nat2000_pr  cal_frac    precip deg_days crop_suit
+## 1 0.202690378 0.27756336 0.5245497  666.3237 1965.423  5.319588
+## 2 0.084071132 0.15248896 0.2280712 1149.1807 1264.833  3.217877
+## 3 0.139216259 0.22137659 0.5354631  714.3840 1764.210  4.347383
+## 4 0.182098765 0.13365361 0.5256335  580.7077 2040.184  5.466667
+## 5 0.008167266 0.06126958 0.2194208 1357.1470 1095.291  2.668044
+## 6 0.009193898 0.06533375 0.3612959 1057.0594 1537.084  3.925520
+
#First, we work with only those NUTS regions with crop areas > 0 to avoid spurious yield values later
+crop.area.dat <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Crop_area_yield/croparea_no_0s_mean_allnuts.csv", head=T)
+
+names(crop.area.dat)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"
+
names(geodata@data)
+
## [1] "id"        "CNTR_CODE" "NUTS_NAME" "LEVL_CODE" "FID"       "NUTS_ID"  
+## [7] "geo"
+
crop.area.dat <- left_join(crop.area.dat, geodata@data[,c(4,7)])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factor and character vector, coercing into
+## character vector
+
#Need to adjust the NUTS2016 data to NUTS2013 codes
+crop.area.dat.2013nuts <- crop.area.dat
+names(crop.area.dat.2013nuts)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"  "LEVL_CODE"
+
crop.area.dat.2013nuts$geo16 <- crop.area.dat.2013nuts$geo
+nuts.conv <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Crop_area_yield/NUTS2013-NUTS2016_2.csv", head=T)[,1:4]
+head(nuts.conv)
+
##   Code.2013 Code.2016                       Label
+## 1      IE01           Border, Midland and Western
+## 2                IE04        Northern and Western
+## 3      IE02                  Southern and Eastern
+## 4                IE05                    Southern
+## 5                IE06         Eastern and Midland
+## 6      FR24      FRB0       Centre — Val de Loire
+##                                            Change
+## 1                                    discontinued
+## 2                                      new region
+## 3                                    discontinued
+## 4 new region, made from ex-IE023, IE024 and IE025
+## 5                                      new region
+## 6                          recoded and relabelled
+
levels(nuts.conv$Change)
+
##  [1] ""                                                                         
+##  [2] "boundary shift; lost ex-UKM24"                                            
+##  [3] "discontinued"                                                             
+##  [4] "discontinued; split into new HU11 and HU12"                               
+##  [5] "discontinued; split into new PL91 and PL92"                               
+##  [6] "discontinued; split into new UKM8 and UKM9"                               
+##  [7] "new region"                                                               
+##  [8] "new region, equals ex-NUTS 3 region HU101"                                
+##  [9] "new region, equals ex-NUTS 3 region HU102"                                
+## [10] "new region, equals ex-NUTS 3 region LT00A"                                
+## [11] "new region, equals ex-PL127, PL129 and PL12A minus new PL926"             
+## [12] "new region, ex-LT00 minus ex-LT00A"                                       
+## [13] "new region, made from ex-IE023, IE024 and IE025"                          
+## [14] "new region, made from ex-PL128, PL12B, PL12C, PL12D, PL12E plus new PL926"
+## [15] "new region, made from ex-UKM24, UKM32, UKM33, UKM37 and UKM38"            
+## [16] "new region, made from ex-UKM31, UKM34, UKM35 and UKM36"                   
+## [17] "recoded"                                                                  
+## [18] "recoded and relabelled"                                                   
+## [19] "split into new LT01 and LT02"
+
#straight recodes
+for(e in nuts.conv[nuts.conv$Change == "recoded", 'Code.2016']) {
+  crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 == e, 'geo'] <- as.character(nuts.conv[nuts.conv$Code.2016 == e, 'Code.2013'])
+}
+#check
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 %in% nuts.conv[nuts.conv$Change == "recoded", 'Code.2016'], c('geo', 'geo16')]
+
##      geo geo16
+## 195 FR51  FRG0
+## 196 FR52  FRH0
+## 200 FR61  FRI1
+## 201 FR63  FRI2
+## 202 FR53  FRI3
+## 214 FR81  FRJ1
+## 215 FR62  FRJ2
+## 222 FR21  FRF2
+## 223 FR26  FRC1
+## 224 FR43  FRC2
+## 225 FR25  FRD1
+## 226 FR23  FRD2
+## 236 FR41  FRF3
+## 237 FR72  FRK1
+## 238 FR71  FRK2
+## 245 FR82  FRL0
+## 246 FR83  FRM0
+## 247 FRA1  FRY1
+## 259 FRA2  FRY2
+## 260 FRA3  FRY3
+## 261 FRA4  FRY4
+## 262 FRA5  FRY5
+## 317 FR30  FRE1
+## 318 FR22  FRE2
+## 319 FR42  FRF1
+## 441 PL32  PL82
+## 454 PL34  PL84
+## 486 PL11  PL71
+## 487 PL33  PL72
+## 488 PL31  PL81
+
#recode and relabel
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 == "FRB0", 'geo'] <- "FR24"
+
+#splits
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 %in% c("LT01", "LT02"), 'geo'] <- "LT00"
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 %in% c("HU11", "HU12"), 'geo'] <- "HU10"
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 %in% c("PL91", "PL92"), 'geo'] <- "PL12"
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 %in% c("UKM8", "UKM9"), 'geo'] <- "UKM3" #approximate split not including NUTS3 UKM24
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 == "UKM7", 'geo'] <- "UKM2" #approximate recode still including NUTS3 UKM24
+
+#IE
+#Cannot translate data from new regions to old NUTS2013 so use NUTS0 data
+crop.area.dat.2013nuts[crop.area.dat.2013nuts$geo16 == 'IE',]
+
##    geo rye_a barley_a maize_a tritic_a sorghum_a oth_cer_a rice_a
+## 17  IE    NA 194.4375      NA       NA        NA        NA     NA
+##    pasture_a  rape_a sunflow_a pulses_a potato_a sugbeet_a oth_rt_a
+## 17  96.10625 11.2325      0.01  7.05125     9.81        NA    11.95
+##    wheat_a oats_a oth_oil_a    fibre_a oth_ind_a fodder_a LEVL_CODE geo16
+## 17  75.315 22.645      0.07 0.02666667   1.96625 19.82042         0    IE
+
## Calculate sum over the split NUTS2 regions
+head(crop.area.dat.2013nuts)
+
##   geo     rye_a   barley_a   maize_a tritic_a sorghum_a oth_cer_a  rice_a
+## 1  BG 10.707500 186.503750 416.77125 13.81375     4.635 4.0450000 11.2175
+## 2  CH  1.986250  28.315000  15.33500  9.06875        NA 0.1116667  0.0800
+## 3  CY        NA  20.810000        NA  0.42500        NA        NA      NA
+## 4  AL  1.285714   2.742857  56.44286       NA        NA        NA      NA
+## 5  CZ 27.141250 357.867500  98.95625 43.45875        NA 5.3400000      NA
+## 6  BE  0.606250  46.607500  63.19125  5.85875        NA 3.2157143      NA
+##   pasture_a    rape_a   sunflow_a  pulses_a  potato_a sugbeet_a  oth_rt_a
+## 1   0.72750 175.64875 813.4100000 20.115000 12.512500   0.01000 0.1914286
+## 2 117.22000  21.79750   4.1087500  4.702857 11.072500  19.47500 0.6585714
+## 3   0.33500        NA          NA  0.413750  4.706250        NA        NA
+## 4 143.70000        NA   0.9714286 14.128571  9.585714   0.70000        NA
+## 5  37.74286 388.13375  21.6175000 27.933750 24.236250  60.71125 0.6300000
+## 6  79.00375  11.98875          NA  2.387500 80.837143  58.88000 7.5942857
+##      wheat_a    oats_a  oth_oil_a    fibre_a  oth_ind_a  fodder_a
+## 1 1184.90750 16.499750 20.2690000  1.8728571 64.7175000 110.37167
+## 2   88.57500  1.937917  1.6210357  0.0500000  1.2264286 189.00375
+## 3    9.51375  0.397500  0.0737500         NA         NA  34.95625
+## 4   70.64286 14.028571  0.2571429         NA  6.0571429 238.71667
+## 5  834.87625 47.745000 61.8150000  0.3642857 13.1067857 414.21214
+## 6  209.81500  3.721250  0.1466667 12.8037500  0.7129167 181.32375
+##   LEVL_CODE geo16
+## 1         0    BG
+## 2         0    CH
+## 3         0    CY
+## 4         0    AL
+## 5         0    CZ
+## 6         0    BE
+
crop.area.dat.2013nuts.sum <- crop.area.dat.2013nuts %>% group_by(geo) %>% summarise(rye_a = sum(rye_a,na.rm = F),
+                  barley_a = sum(barley_a,na.rm = F),
+                  maize_a = sum(maize_a,na.rm = F),
+                  tritic_a = sum(tritic_a,na.rm = F),
+                  sorghum_a = sum(sorghum_a,na.rm = F),
+                  oth_cer_a = sum(oth_cer_a,na.rm = F),
+                  rice_a = sum(rice_a,na.rm = F),
+                  pasture_a = sum(pasture_a,na.rm = F),
+                  rape_a = sum(rape_a,na.rm = F),
+                  sunflow_a = sum(sunflow_a,na.rm = F),
+                  pulses_a = sum(pulses_a,na.rm = F),
+                  potato_a = sum(potato_a,na.rm = F),
+                  sugbeet_a = sum(sugbeet_a,na.rm = F),
+                  oth_rt_a = sum(oth_rt_a,na.rm = F),
+                  wheat_a = sum(wheat_a,na.rm = F),
+                  oats_a = sum(oats_a,na.rm = F),
+                  oth_oil_a = sum(oth_oil_a,na.rm = F),
+                  fibre_a = sum(fibre_a,na.rm = F),
+                  oth_ind_a = sum(oth_ind_a,na.rm = F),
+                  fodder_a = sum(fodder_a,na.rm = F),
+                  LEVL_CODE = mean(LEVL_CODE,na.rm = F)
+                  )
+head(crop.area.dat.2013nuts.sum)
+
## # A tibble: 6 x 22
+##   geo   rye_a barley_a maize_a tritic_a sorghum_a oth_cer_a rice_a
+##   <chr> <dbl>    <dbl>   <dbl>    <dbl>     <dbl>     <dbl>  <dbl>
+## 1 AL     1.29     2.74    56.4       NA        NA        NA     NA
+## 2 AL0   NA       NA       NA         NA        NA        NA     NA
+## 3 AL01  NA       NA       NA         NA        NA        NA     NA
+## 4 AL011 NA       NA       NA         NA        NA        NA     NA
+## 5 AL012 NA       NA       NA         NA        NA        NA     NA
+## 6 AL013 NA       NA       NA         NA        NA        NA     NA
+## # ... with 14 more variables: pasture_a <dbl>, rape_a <dbl>,
+## #   sunflow_a <dbl>, pulses_a <dbl>, potato_a <dbl>, sugbeet_a <dbl>,
+## #   oth_rt_a <dbl>, wheat_a <dbl>, oats_a <dbl>, oth_oil_a <dbl>,
+## #   fibre_a <dbl>, oth_ind_a <dbl>, fodder_a <dbl>, LEVL_CODE <dbl>
+
nrow(crop.area.dat.2013nuts.sum)
+
## [1] 2013
+
#We calculate the fraction of agricultural area within each NUTS0, NUTS1, and NUTS2 area using the UAA from CORINE
+#First, join UAA dataframe to crop area dataframe
+head(corine.aa.all.nuts)
+
##    geo   sum_uaa
+## 1 AT11  219300.0
+## 2 AT22  411556.2
+## 3 AT12 1004956.2
+## 4 AT13    6075.0
+## 5 AT21  191312.5
+## 6 AT31  582587.5
+
names(crop.area.dat.2013nuts.sum)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"  "LEVL_CODE"
+
crop.area.dat.2013nuts.sum <- left_join(crop.area.dat.2013nuts.sum, corine.aa.all.nuts)
+
## Joining, by = "geo"
+
summary(crop.area.dat.2013nuts.sum)
+
##      geo                rye_a             barley_a        
+##  Length:2013        Min.   :   0.010   Min.   :   0.0167  
+##  Class :character   1st Qu.:   0.339   1st Qu.:  10.8616  
+##  Mode  :character   Median :   1.857   Median :  39.5050  
+##                     Mean   :  19.286   Mean   : 114.3821  
+##                     3rd Qu.:  11.589   3rd Qu.: 116.1025  
+##                     Max.   :1051.300   Max.   :2763.5000  
+##                     NA's   :1674       NA's   :1635       
+##     maize_a            tritic_a           sorghum_a      
+##  Min.   :   0.010   Min.   :   0.0100   Min.   : 0.0100  
+##  1st Qu.:   1.977   1st Qu.:   0.8882   1st Qu.: 0.1000  
+##  Median :  17.253   Median :   4.7900   Median : 0.4175  
+##  Mean   :  93.177   Mean   :  27.9156   Mean   : 2.6008  
+##  3rd Qu.:  82.514   3rd Qu.:  21.7703   3rd Qu.: 1.4600  
+##  Max.   :2506.171   Max.   :1289.4300   Max.   :50.6700  
+##  NA's   :1675       NA's   :1731        NA's   :1868     
+##    oth_cer_a            rice_a          pasture_a            rape_a       
+##  Min.   :  0.0100   Min.   :  0.010   Min.   :   0.010   Min.   :   0.01  
+##  1st Qu.:  0.1715   1st Qu.:  0.685   1st Qu.:   4.897   1st Qu.:   1.50  
+##  Median :  0.7017   Median :  3.410   Median :  15.800   Median :  13.73  
+##  Mean   :  4.2490   Mean   : 19.279   Mean   :  88.947   Mean   :  57.92  
+##  3rd Qu.:  2.6938   3rd Qu.: 20.203   3rd Qu.:  66.019   3rd Qu.:  62.53  
+##  Max.   :102.9237   Max.   :230.827   Max.   :3091.571   Max.   :1503.09  
+##  NA's   :1917       NA's   :1926      NA's   :1744       NA's   :1705     
+##    sunflow_a           pulses_a         potato_a         sugbeet_a      
+##  Min.   :  0.0100   Min.   :  0.01   Min.   :  0.020   Min.   :  0.010  
+##  1st Qu.:  0.4097   1st Qu.:  1.00   1st Qu.:  1.197   1st Qu.:  1.000  
+##  Median :  4.1544   Median :  4.88   Median :  4.686   Median :  6.553  
+##  Mean   : 57.2720   Mean   : 20.95   Mean   : 14.341   Mean   : 20.984  
+##  3rd Qu.: 36.7714   3rd Qu.: 18.03   3rd Qu.: 13.900   3rd Qu.: 20.265  
+##  Max.   :997.2763   Max.   :663.38   Max.   :334.110   Max.   :404.298  
+##  NA's   :1747       NA's   :1646     NA's   :1661      NA's   :1749     
+##     oth_rt_a         wheat_a            oats_a           oth_oil_a       
+##  Min.   : 0.010   Min.   :   0.01   Min.   :   0.010   Min.   :  0.0100  
+##  1st Qu.: 0.100   1st Qu.:  23.32   1st Qu.:   1.252   1st Qu.:  0.4012  
+##  Median : 0.360   Median : 116.42   Median :   5.880   Median :  1.6988  
+##  Mean   : 1.869   Mean   : 262.15   Mean   :  29.780   Mean   : 19.2813  
+##  3rd Qu.: 1.127   3rd Qu.: 294.29   3rd Qu.:  18.924   3rd Qu.: 11.8080  
+##  Max.   :40.000   Max.   :7997.88   Max.   :1488.480   Max.   :623.9917  
+##  NA's   :1892     NA's   :1634      NA's   :1666       NA's   :1744      
+##     fibre_a           oth_ind_a           fodder_a          LEVL_CODE    
+##  Min.   :  0.0100   Min.   :  0.0100   Min.   :   0.010   Min.   :0.000  
+##  1st Qu.:  0.0350   1st Qu.:  0.1200   1st Qu.:   8.642   1st Qu.:3.000  
+##  Median :  0.2225   Median :  0.7718   Median :  33.811   Median :3.000  
+##  Mean   : 16.8706   Mean   :  5.2917   Mean   :  94.226   Mean   :2.658  
+##  3rd Qu.:  2.0250   3rd Qu.:  3.6009   3rd Qu.:  92.478   3rd Qu.:3.000  
+##  Max.   :471.5714   Max.   :165.9238   Max.   :2421.790   Max.   :3.000  
+##  NA's   :1852       NA's   :1727       NA's   :1666                      
+##     sum_uaa        
+##  Min.   :      56  
+##  1st Qu.:  256462  
+##  Median :  701169  
+##  Mean   : 1469382  
+##  3rd Qu.: 1522769  
+##  Max.   :33916138  
+##  NA's   :1556
+
#Second, calculate the fraction of area
+crop.frac <- as.data.frame(crop.area.dat.2013nuts.sum)
+names(crop.frac)[2:21] <- gsub("_a", "_f", names(crop.frac)[2:21])
+crop.frac[,2:21] <- 1000 * crop.area.dat.2013nuts.sum[,2:21] / crop.area.dat.2013nuts.sum$sum_uaa
+crop.frac$total_f <- rowSums(crop.frac[,2:21], na.rm = T)
+summary(crop.frac)
+
##      geo                rye_f           barley_f         maize_f      
+##  Length:2013        Min.   :0.0000   Min.   :0.0003   Min.   :0.0001  
+##  Class :character   1st Qu.:0.0005   1st Qu.:0.0200   1st Qu.:0.0036  
+##  Mode  :character   Median :0.0021   Median :0.0421   Median :0.0192  
+##                     Mean   :0.0090   Mean   :0.0553   Mean   :0.0421  
+##                     3rd Qu.:0.0084   3rd Qu.:0.0768   3rd Qu.:0.0544  
+##                     Max.   :0.1271   Max.   :0.3657   Max.   :0.3190  
+##                     NA's   :1696     NA's   :1661     NA's   :1699    
+##     tritic_f        sorghum_f        oth_cer_f          rice_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0013   1st Qu.:0.0001   1st Qu.:0.0003   1st Qu.:0.0006  
+##  Median :0.0045   Median :0.0004   Median :0.0007   Median :0.0024  
+##  Mean   :0.0118   Mean   :0.0010   Mean   :0.0025   Mean   :0.0094  
+##  3rd Qu.:0.0116   3rd Qu.:0.0010   3rd Qu.:0.0019   3rd Qu.:0.0080  
+##  Max.   :0.1181   Max.   :0.0181   Max.   :0.0457   Max.   :0.1071  
+##  NA's   :1753     NA's   :1880     NA's   :1918     NA's   :1929    
+##    pasture_f          rape_f         sunflow_f         pulses_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0001  
+##  1st Qu.:0.0087   1st Qu.:0.0020   1st Qu.:0.0004   1st Qu.:0.0021  
+##  Median :0.0238   Median :0.0176   Median :0.0041   Median :0.0046  
+##  Mean   :0.0597   Mean   :0.0298   Mean   :0.0232   Mean   :0.0087  
+##  3rd Qu.:0.0843   3rd Qu.:0.0479   3rd Qu.:0.0222   3rd Qu.:0.0118  
+##  Max.   :0.4127   Max.   :0.1676   Max.   :0.2186   Max.   :0.0648  
+##  NA's   :1771     NA's   :1729     NA's   :1770     NA's   :1673    
+##     potato_f        sugbeet_f         oth_rt_f         wheat_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0002  
+##  1st Qu.:0.0022   1st Qu.:0.0012   1st Qu.:0.0001   1st Qu.:0.0522  
+##  Median :0.0051   Median :0.0068   Median :0.0003   Median :0.1198  
+##  Mean   :0.0151   Mean   :0.0161   Mean   :0.0016   Mean   :0.1294  
+##  3rd Qu.:0.0125   3rd Qu.:0.0171   3rd Qu.:0.0011   3rd Qu.:0.1834  
+##  Max.   :0.3619   Max.   :0.3886   Max.   :0.0229   Max.   :1.1951  
+##  NA's   :1688     NA's   :1768     NA's   :1893     NA's   :1660    
+##      oats_f         oth_oil_f         fibre_f         oth_ind_f     
+##  Min.   :0.0001   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0021   1st Qu.:0.0004   1st Qu.:0.0000   1st Qu.:0.0002  
+##  Median :0.0063   Median :0.0018   Median :0.0002   Median :0.0007  
+##  Mean   :0.0138   Mean   :0.0089   Mean   :0.0106   Mean   :0.0025  
+##  3rd Qu.:0.0152   3rd Qu.:0.0078   3rd Qu.:0.0058   3rd Qu.:0.0023  
+##  Max.   :0.1913   Max.   :0.1905   Max.   :0.1600   Max.   :0.0356  
+##  NA's   :1692     NA's   :1766     NA's   :1859     NA's   :1754    
+##     fodder_f        LEVL_CODE        sum_uaa            total_f       
+##  Min.   :0.0002   Min.   :0.000   Min.   :      56   Min.   :0.00000  
+##  1st Qu.:0.0180   1st Qu.:3.000   1st Qu.:  256462   1st Qu.:0.00000  
+##  Median :0.0394   Median :3.000   Median :  701169   Median :0.00000  
+##  Mean   :0.0524   Mean   :2.658   Mean   : 1469382   Mean   :0.07534  
+##  3rd Qu.:0.0705   3rd Qu.:3.000   3rd Qu.: 1522769   3rd Qu.:0.00000  
+##  Max.   :0.3810   Max.   :3.000   Max.   :33916138   Max.   :3.72082  
+##  NA's   :1693                     NA's   :1556
+
#Spurious fractions
+crop.frac[which(crop.frac$total_f > 1), 'geo']
+
## [1] "BE1"  "BE10"
+
crop.frac[which(crop.frac$wheat_f > 1), 'geo'] 
+
## [1] "BE1"  "BE10"
+
as.data.frame(crop.area.dat.2013nuts.sum[which(crop.area.dat.2013nuts.sum$geo %in% c("BE1", "BE10")),]) 
+
##    geo rye_a barley_a    maize_a tritic_a sorghum_a oth_cer_a rice_a
+## 1  BE1  0.01     0.16 0.08333333    0.025        NA      0.02     NA
+## 2 BE10  0.01     0.16 0.08333333    0.025        NA      0.02     NA
+##   pasture_a     rape_a sunflow_a   pulses_a  potato_a sugbeet_a oth_rt_a
+## 1      0.11 0.07333333        NA 0.02333333 0.1583333      0.17     0.01
+## 2      0.11 0.07333333        NA 0.02333333 0.1583333      0.17     0.01
+##     wheat_a oats_a oth_oil_a fibre_a oth_ind_a  fodder_a LEVL_CODE sum_uaa
+## 1 0.5228571  0.025        NA    0.07        NA 0.1666667         1   437.5
+## 2 0.5228571  0.025        NA    0.07        NA 0.1666667         2   437.5
+
#The Brussels region (BE10)  has wheat fraction > 1, and total fraction >> 1, so we will give NA
+crop.frac[which(crop.frac$geo %in% c("BE1", "BE10")), c(2:21,24)] <- NA
+crop.frac[which(crop.frac$geo %in% c("BE1", "BE10")),]
+
##     geo rye_f barley_f maize_f tritic_f sorghum_f oth_cer_f rice_f
+## 67  BE1    NA       NA      NA       NA        NA        NA     NA
+## 68 BE10    NA       NA      NA       NA        NA        NA     NA
+##    pasture_f rape_f sunflow_f pulses_f potato_f sugbeet_f oth_rt_f wheat_f
+## 67        NA     NA        NA       NA       NA        NA       NA      NA
+## 68        NA     NA        NA       NA       NA        NA       NA      NA
+##    oats_f oth_oil_f fibre_f oth_ind_f fodder_f LEVL_CODE sum_uaa total_f
+## 67     NA        NA      NA        NA       NA         1   437.5      NA
+## 68     NA        NA      NA        NA       NA         2   437.5      NA
+
#Here we add the NUTS with crop areas equal to zero, which were removed in the first crop.frac calculation due to spurious values when calculating yields. Where NA's exist in crop.frac, we add these new values, which are zeros.
+crop.0.area.dat <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Crop_area_yield/croparea_mean_allnuts.csv", head=T)
+
+names(crop.0.area.dat)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"
+
names(geodata@data)
+
## [1] "id"        "CNTR_CODE" "NUTS_NAME" "LEVL_CODE" "FID"       "NUTS_ID"  
+## [7] "geo"
+
crop.0.area.dat <- left_join(crop.0.area.dat, geodata@data[,c(4,7)])
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factor and character vector, coercing into
+## character vector
+
#Need to adjust the NUTS2016 data to NUTS2013 codes
+crop.0.area.dat.2013nuts <- crop.0.area.dat
+names(crop.0.area.dat.2013nuts)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"  "LEVL_CODE"
+
crop.0.area.dat.2013nuts$geo16 <- crop.0.area.dat.2013nuts$geo
+
+#straight recodes
+for(e in nuts.conv[nuts.conv$Change == "recoded", 'Code.2016']) {
+  crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 == e, 'geo'] <- as.character(nuts.conv[nuts.conv$Code.2016 == e, 'Code.2013'])
+}
+#check
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 %in% nuts.conv[nuts.conv$Change == "recoded", 'Code.2016'], c('geo', 'geo16')]
+
##      geo geo16
+## 195 FR51  FRG0
+## 196 FR52  FRH0
+## 200 FR61  FRI1
+## 201 FR63  FRI2
+## 202 FR53  FRI3
+## 214 FR81  FRJ1
+## 215 FR62  FRJ2
+## 222 FR21  FRF2
+## 223 FR26  FRC1
+## 224 FR43  FRC2
+## 225 FR25  FRD1
+## 226 FR23  FRD2
+## 236 FR41  FRF3
+## 237 FR72  FRK1
+## 238 FR71  FRK2
+## 245 FR82  FRL0
+## 246 FR83  FRM0
+## 247 FRA1  FRY1
+## 259 FRA2  FRY2
+## 260 FRA3  FRY3
+## 261 FRA4  FRY4
+## 262 FRA5  FRY5
+## 317 FR30  FRE1
+## 318 FR22  FRE2
+## 319 FR42  FRF1
+## 441 PL32  PL82
+## 454 PL34  PL84
+## 486 PL11  PL71
+## 487 PL33  PL72
+## 488 PL31  PL81
+
#recode and relabel
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 == "FRB0", 'geo'] <- "FR24"
+
+#splits
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 %in% c("LT01", "LT02"), 'geo'] <- "LT00"
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 %in% c("HU11", "HU12"), 'geo'] <- "HU10"
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 %in% c("PL91", "PL92"), 'geo'] <- "PL12"
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 %in% c("UKM8", "UKM9"), 'geo'] <- "UKM3" #approximate split not including NUTS3 UKM24
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 == "UKM7", 'geo'] <- "UKM2" #approximate recode still including NUTS3 UKM24
+
+#IE
+#Cannot translate data from new regions to old NUTS2013 so use NUTS0 data
+crop.0.area.dat.2013nuts[crop.0.area.dat.2013nuts$geo16 == 'IE',]
+
##    geo rye_a barley_a maize_a tritic_a sorghum_a oth_cer_a rice_a
+## 17  IE     0 194.4375       0        0         0         0      0
+##    pasture_a  rape_a sunflow_a pulses_a potato_a sugbeet_a oth_rt_a
+## 17  96.10625 11.2325   0.00125  7.05125     9.81         0    11.95
+##    wheat_a oats_a oth_oil_a fibre_a oth_ind_a fodder_a LEVL_CODE geo16
+## 17  75.315 22.645   0.02625    0.01    1.9475 19.70125         0    IE
+
## Calculate sum over the split NUTS2 regions
+head(crop.0.area.dat.2013nuts)
+
##   geo     rye_a   barley_a   maize_a tritic_a sorghum_a  oth_cer_a  rice_a
+## 1  BG 10.707500 186.503750 416.77125 13.81375     4.635 4.04500000 11.2175
+## 2  CH  1.986250  28.315000  15.33500  9.06875     0.000 0.09571429  0.0480
+## 3  CY  0.000000  20.810000   0.00000  0.10625     0.000 0.00000000  0.0000
+## 4  AL  1.285714   2.742857  56.44286  0.00000     0.000 0.00000000  0.0000
+## 5  CZ 27.141250 357.867500  98.95625 43.45875     0.000 5.34000000  0.0000
+## 6  BE  0.606250  46.607500  63.19125  5.85875     0.000 2.81375000  0.0000
+##   pasture_a    rape_a   sunflow_a  pulses_a  potato_a  sugbeet_a  oth_rt_a
+## 1   0.72750 175.64875 813.4100000 20.115000 12.512500  0.0012500 0.1675000
+## 2 117.22000  21.79750   4.1087500  4.702857 11.072500 19.4750000 0.6585714
+## 3   0.33500   0.00000   0.0000000  0.413750  4.706250  0.0000000 0.0000000
+## 4  71.85000   0.00000   0.9714286 14.128571  9.585714  0.2333333 0.0000000
+## 5  37.74286 388.13375  21.6175000 27.933750 24.236250 60.7112500 0.6300000
+## 6  79.00375  11.98875   0.0000000  2.387500 80.837143 58.8800000 6.6450000
+##      wheat_a    oats_a  oth_oil_a  fibre_a  oth_ind_a  fodder_a LEVL_CODE
+## 1 1184.90750 16.321250 20.1725714  1.63875 63.2041071 108.49250         0
+## 2   88.57500  1.909821  1.6210357  0.02500  0.9887619  83.44725         0
+## 3    8.63250  0.397500  0.0737500  0.00000  0.0000000  34.95625         0
+## 4   70.64286 14.028571  0.2571429  0.00000  6.0571429 238.71667         0
+## 5  834.87625 47.745000 61.8150000  0.31125 13.1067857 414.21214         0
+## 6  209.81500  3.721250  0.0787500 12.61500  0.7041071 181.32375         0
+##   geo16
+## 1    BG
+## 2    CH
+## 3    CY
+## 4    AL
+## 5    CZ
+## 6    BE
+
crop.0.area.dat.2013nuts.sum <- crop.0.area.dat.2013nuts %>% group_by(geo) %>% summarise(rye_a = sum(rye_a,na.rm = F),
+                  barley_a = sum(barley_a,na.rm = F),
+                  maize_a = sum(maize_a,na.rm = F),
+                  tritic_a = sum(tritic_a,na.rm = F),
+                  sorghum_a = sum(sorghum_a,na.rm = F),
+                  oth_cer_a = sum(oth_cer_a,na.rm = F),
+                  rice_a = sum(rice_a,na.rm = F),
+                  pasture_a = sum(pasture_a,na.rm = F),
+                  rape_a = sum(rape_a,na.rm = F),
+                  sunflow_a = sum(sunflow_a,na.rm = F),
+                  pulses_a = sum(pulses_a,na.rm = F),
+                  potato_a = sum(potato_a,na.rm = F),
+                  sugbeet_a = sum(sugbeet_a,na.rm = F),
+                  oth_rt_a = sum(oth_rt_a,na.rm = F),
+                  wheat_a = sum(wheat_a,na.rm = F),
+                  oats_a = sum(oats_a,na.rm = F),
+                  oth_oil_a = sum(oth_oil_a,na.rm = F),
+                  fibre_a = sum(fibre_a,na.rm = F),
+                  oth_ind_a = sum(oth_ind_a,na.rm = F),
+                  fodder_a = sum(fodder_a,na.rm = F),
+                  LEVL_CODE = mean(LEVL_CODE,na.rm = F)
+                  )
+head(crop.0.area.dat.2013nuts.sum)
+
## # A tibble: 6 x 22
+##   geo   rye_a barley_a maize_a tritic_a sorghum_a oth_cer_a rice_a
+##   <chr> <dbl>    <dbl>   <dbl>    <dbl>     <dbl>     <dbl>  <dbl>
+## 1 AL     1.29     2.74    56.4        0         0         0      0
+## 2 AL0   NA       NA       NA          0         0         0      0
+## 3 AL01  NA       NA       NA         NA        NA        NA     NA
+## 4 AL011 NA       NA       NA         NA        NA        NA     NA
+## 5 AL012 NA       NA       NA         NA        NA        NA     NA
+## 6 AL013 NA       NA       NA         NA        NA        NA     NA
+## # ... with 14 more variables: pasture_a <dbl>, rape_a <dbl>,
+## #   sunflow_a <dbl>, pulses_a <dbl>, potato_a <dbl>, sugbeet_a <dbl>,
+## #   oth_rt_a <dbl>, wheat_a <dbl>, oats_a <dbl>, oth_oil_a <dbl>,
+## #   fibre_a <dbl>, oth_ind_a <dbl>, fodder_a <dbl>, LEVL_CODE <dbl>
+
nrow(crop.0.area.dat.2013nuts.sum)
+
## [1] 2013
+
#We calculate the fraction of agricultural area within each NUTS0, NUTS1, and NUTS2 area using the UAA from CORINE
+#First, join UAA dataframe to crop area dataframe
+head(corine.aa.all.nuts)
+
##    geo   sum_uaa
+## 1 AT11  219300.0
+## 2 AT22  411556.2
+## 3 AT12 1004956.2
+## 4 AT13    6075.0
+## 5 AT21  191312.5
+## 6 AT31  582587.5
+
names(crop.0.area.dat.2013nuts.sum)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"  "LEVL_CODE"
+
crop.0.area.dat.2013nuts.sum <- left_join(crop.0.area.dat.2013nuts.sum, corine.aa.all.nuts)
+
## Joining, by = "geo"
+
summary(crop.0.area.dat.2013nuts.sum)
+
##      geo                rye_a              barley_a       
+##  Length:2013        Min.   :   0.0000   Min.   :   0.000  
+##  Class :character   1st Qu.:   0.0788   1st Qu.:   6.327  
+##  Mode  :character   Median :   1.0375   Median :  34.400  
+##                     Mean   :  16.6161   Mean   : 109.456  
+##                     3rd Qu.:   7.8637   3rd Qu.: 110.820  
+##                     Max.   :1051.3000   Max.   :2763.500  
+##                     NA's   :1620        NA's   :1618      
+##     maize_a             tritic_a           sorghum_a      
+##  Min.   :   0.0000   Min.   :   0.0000   Min.   : 0.0000  
+##  1st Qu.:   0.0303   1st Qu.:   0.0438   1st Qu.: 0.0000  
+##  Median :   6.6493   Median :   1.8200   Median : 0.0000  
+##  Mean   :  72.5540   Mean   :  22.6732   Mean   : 0.9113  
+##  3rd Qu.:  55.8162   3rd Qu.:  16.0265   3rd Qu.: 0.0991  
+##  Max.   :2506.1712   Max.   :1289.4300   Max.   :50.6700  
+##  NA's   :1579        NA's   :1666        NA's   :1601     
+##    oth_cer_a           rice_a          pasture_a       
+##  Min.   :  0.000   Min.   :  0.000   Min.   :   0.000  
+##  1st Qu.:  0.000   1st Qu.:  0.000   1st Qu.:   1.541  
+##  Median :  0.000   Median :  0.000   Median :  11.745  
+##  Mean   :  1.672   Mean   :  3.659   Mean   :  74.758  
+##  3rd Qu.:  0.308   3rd Qu.:  0.000   3rd Qu.:  48.651  
+##  Max.   :102.924   Max.   :230.827   Max.   :3091.571  
+##  NA's   :1772      NA's   :1555      NA's   :1694      
+##      rape_a            sunflow_a           pulses_a     
+##  Min.   :   0.0000   Min.   :  0.0000   Min.   :  0.00  
+##  1st Qu.:   0.0475   1st Qu.:  0.0000   1st Qu.:  0.70  
+##  Median :   4.3913   Median :  0.1112   Median :  4.43  
+##  Mean   :  47.0257   Mean   : 34.9245   Mean   : 19.76  
+##  3rd Qu.:  43.8013   3rd Qu.: 11.1494   3rd Qu.: 16.70  
+##  Max.   :1503.0925   Max.   :997.2763   Max.   :663.38  
+##  NA's   :1634        NA's   :1577       NA's   :1624    
+##     potato_a          sugbeet_a         oth_rt_a          wheat_a        
+##  Min.   :  0.0000   Min.   :  0.00   Min.   : 0.0000   Min.   :   0.000  
+##  1st Qu.:  0.9975   1st Qu.:  0.00   1st Qu.: 0.0000   1st Qu.:   4.135  
+##  Median :  4.4000   Median :  0.84   Median : 0.0000   Median :  74.737  
+##  Mean   : 13.8373   Mean   : 13.98   Mean   : 0.8507   Mean   : 225.763  
+##  3rd Qu.: 13.8000   3rd Qu.: 12.68   3rd Qu.: 0.2700   3rd Qu.: 241.577  
+##  Max.   :334.1100   Max.   :404.30   Max.   :40.0000   Max.   :7997.875  
+##  NA's   :1649       NA's   :1616     NA's   :1756      NA's   :1573      
+##      oats_a            oth_oil_a           fibre_a        
+##  Min.   :   0.0000   Min.   :  0.0000   Min.   :  0.0000  
+##  1st Qu.:   0.0897   1st Qu.:  0.0000   1st Qu.:  0.0000  
+##  Median :   3.0342   Median :  0.0779   Median :  0.0000  
+##  Mean   :  22.6225   Mean   :  9.3354   Mean   :  5.6487  
+##  3rd Qu.:  14.2528   3rd Qu.:  2.1159   3rd Qu.:  0.0166  
+##  Max.   :1393.6675   Max.   :500.5750   Max.   :471.5714  
+##  NA's   :1583        NA's   :1535       NA's   :1535      
+##    oth_ind_a           fodder_a          LEVL_CODE        sum_uaa        
+##  Min.   :  0.0000   Min.   :   0.000   Min.   :0.000   Min.   :      56  
+##  1st Qu.:  0.0000   1st Qu.:   1.748   1st Qu.:3.000   1st Qu.:  256462  
+##  Median :  0.0752   Median :  19.852   Median :3.000   Median :  701169  
+##  Mean   :  3.4717   Mean   :  76.544   Mean   :2.658   Mean   : 1469382  
+##  3rd Qu.:  1.4688   3rd Qu.:  76.551   3rd Qu.:3.000   3rd Qu.: 1522769  
+##  Max.   :165.6381   Max.   :2421.790   Max.   :3.000   Max.   :33916138  
+##  NA's   :1584       NA's   :1594                       NA's   :1556
+
#Second, calculate the fraction of area
+crop.frac.0 <- as.data.frame(crop.0.area.dat.2013nuts.sum)
+names(crop.frac.0)[2:21] <- gsub("_a", "_f", names(crop.frac.0)[2:21])
+crop.frac.0[,2:21] <- 1000 * crop.0.area.dat.2013nuts.sum[,2:21] / crop.0.area.dat.2013nuts.sum$sum_uaa
+crop.frac.0$total_f <- rowSums(crop.frac.0[,2:21], na.rm = T)
+summary(crop.frac.0)
+
##      geo                rye_f           barley_f         maize_f      
+##  Length:2013        Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  Class :character   1st Qu.:0.0002   1st Qu.:0.0166   1st Qu.:0.0001  
+##  Mode  :character   Median :0.0012   Median :0.0405   Median :0.0078  
+##                     Mean   :0.0076   Mean   :0.0523   Mean   :0.0325  
+##                     3rd Qu.:0.0063   3rd Qu.:0.0740   3rd Qu.:0.0348  
+##                     Max.   :0.1271   Max.   :0.2743   Max.   :0.3190  
+##                     NA's   :1647     NA's   :1645     NA's   :1608    
+##     tritic_f        sorghum_f        oth_cer_f          rice_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0002   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
+##  Median :0.0024   Median :0.0000   Median :0.0000   Median :0.0000  
+##  Mean   :0.0094   Mean   :0.0003   Mean   :0.0007   Mean   :0.0018  
+##  3rd Qu.:0.0094   3rd Qu.:0.0001   3rd Qu.:0.0004   3rd Qu.:0.0000  
+##  Max.   :0.1181   Max.   :0.0181   Max.   :0.0178   Max.   :0.1071  
+##  NA's   :1694     NA's   :1631     NA's   :1780     NA's   :1585    
+##    pasture_f          rape_f         sunflow_f         pulses_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0029   1st Qu.:0.0002   1st Qu.:0.0000   1st Qu.:0.0015  
+##  Median :0.0167   Median :0.0068   Median :0.0001   Median :0.0043  
+##  Mean   :0.0496   Mean   :0.0237   Mean   :0.0138   Mean   :0.0078  
+##  3rd Qu.:0.0617   3rd Qu.:0.0379   3rd Qu.:0.0071   3rd Qu.:0.0107  
+##  Max.   :0.4127   Max.   :0.1462   Max.   :0.2186   Max.   :0.0648  
+##  NA's   :1722     NA's   :1662     NA's   :1606     NA's   :1651    
+##     potato_f        sugbeet_f         oth_rt_f         wheat_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0020   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0161  
+##  Median :0.0049   Median :0.0012   Median :0.0000   Median :0.0993  
+##  Mean   :0.0139   Mean   :0.0102   Mean   :0.0005   Mean   :0.1106  
+##  3rd Qu.:0.0120   3rd Qu.:0.0108   3rd Qu.:0.0002   3rd Qu.:0.1718  
+##  Max.   :0.2714   Max.   :0.2914   Max.   :0.0099   Max.   :1.0457  
+##  NA's   :1676     NA's   :1644     NA's   :1759     NA's   :1603    
+##      oats_f         oth_oil_f         fibre_f         oth_ind_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0003   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
+##  Median :0.0038   Median :0.0001   Median :0.0000   Median :0.0001  
+##  Mean   :0.0108   Mean   :0.0042   Mean   :0.0033   Mean   :0.0016  
+##  3rd Qu.:0.0123   3rd Qu.:0.0018   3rd Qu.:0.0000   3rd Qu.:0.0011  
+##  Max.   :0.1913   Max.   :0.1905   Max.   :0.1514   Max.   :0.0356  
+##  NA's   :1613     NA's   :1565     NA's   :1565     NA's   :1614    
+##     fodder_f        LEVL_CODE        sum_uaa            total_f       
+##  Min.   :0.0000   Min.   :0.000   Min.   :      56   Min.   :0.00000  
+##  1st Qu.:0.0073   1st Qu.:3.000   1st Qu.:  256462   1st Qu.:0.00000  
+##  Median :0.0280   Median :3.000   Median :  701169   Median :0.00000  
+##  Mean   :0.0417   Mean   :2.658   Mean   : 1469382   Mean   :0.07397  
+##  3rd Qu.:0.0593   3rd Qu.:3.000   3rd Qu.: 1522769   3rd Qu.:0.00000  
+##  Max.   :0.3323   Max.   :3.000   Max.   :33916138   Max.   :2.89208  
+##  NA's   :1624                     NA's   :1556
+
#Spurious fractions
+crop.frac.0[which(crop.frac.0$total_f > 1), 'geo']
+
## [1] "BE1"  "BE10"
+
crop.frac.0[which(crop.frac.0$wheat_f > 1), 'geo'] 
+
## [1] "BE1"  "BE10"
+
as.data.frame(crop.0.area.dat.2013nuts.sum[which(crop.0.area.dat.2013nuts.sum$geo %in% c("BE1", "BE10")),]) 
+
##    geo       rye_a barley_a    maize_a   tritic_a sorghum_a oth_cer_a
+## 1  BE1 0.001428571     0.12 0.07142857 0.01428571         0     0.004
+## 2 BE10 0.001428571     0.12 0.07142857 0.01428571         0     0.004
+##   rice_a pasture_a rape_a sunflow_a pulses_a potato_a sugbeet_a oth_rt_a
+## 1      0      0.11  0.055         0  0.00875  0.11875    0.1275    0.002
+## 2      0      0.11  0.055         0  0.00875  0.11875    0.1275    0.002
+##   wheat_a     oats_a oth_oil_a fibre_a oth_ind_a  fodder_a LEVL_CODE
+## 1  0.4575 0.01428571         0    0.04         0 0.1203571         1
+## 2  0.4575 0.01428571         0    0.04         0 0.1203571         2
+##   sum_uaa
+## 1   437.5
+## 2   437.5
+
#The Brussels region (BE10)  has wheat fraction > 1, and total fraction >> 1, so we will give NA
+crop.frac.0[which(crop.frac.0$geo %in% c("BE1", "BE10")), c(2:21,24)] <- NA
+crop.frac.0[which(crop.frac.0$geo %in% c("BE1", "BE10")),]
+
##     geo rye_f barley_f maize_f tritic_f sorghum_f oth_cer_f rice_f
+## 67  BE1    NA       NA      NA       NA        NA        NA     NA
+## 68 BE10    NA       NA      NA       NA        NA        NA     NA
+##    pasture_f rape_f sunflow_f pulses_f potato_f sugbeet_f oth_rt_f wheat_f
+## 67        NA     NA        NA       NA       NA        NA       NA      NA
+## 68        NA     NA        NA       NA       NA        NA       NA      NA
+##    oats_f oth_oil_f fibre_f oth_ind_f fodder_f LEVL_CODE sum_uaa total_f
+## 67     NA        NA      NA        NA       NA         1   437.5      NA
+## 68     NA        NA      NA        NA       NA         2   437.5      NA
+
#Next, replace NA's in crop area with zero areas, where data exist
+summary(crop.frac)
+
##      geo                rye_f           barley_f         maize_f      
+##  Length:2013        Min.   :0.0000   Min.   :0.0003   Min.   :0.0001  
+##  Class :character   1st Qu.:0.0005   1st Qu.:0.0199   1st Qu.:0.0035  
+##  Mode  :character   Median :0.0021   Median :0.0420   Median :0.0190  
+##                     Mean   :0.0089   Mean   :0.0535   Mean   :0.0411  
+##                     3rd Qu.:0.0080   3rd Qu.:0.0761   3rd Qu.:0.0534  
+##                     Max.   :0.1271   Max.   :0.2253   Max.   :0.3190  
+##                     NA's   :1698     NA's   :1663     NA's   :1701    
+##     tritic_f        sorghum_f        oth_cer_f          rice_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0013   1st Qu.:0.0001   1st Qu.:0.0003   1st Qu.:0.0006  
+##  Median :0.0045   Median :0.0004   Median :0.0007   Median :0.0024  
+##  Mean   :0.0114   Mean   :0.0010   Mean   :0.0016   Mean   :0.0094  
+##  3rd Qu.:0.0116   3rd Qu.:0.0010   3rd Qu.:0.0018   3rd Qu.:0.0080  
+##  Max.   :0.1181   Max.   :0.0181   Max.   :0.0178   Max.   :0.1071  
+##  NA's   :1755     NA's   :1880     NA's   :1920     NA's   :1929    
+##    pasture_f          rape_f         sunflow_f         pulses_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0001  
+##  1st Qu.:0.0086   1st Qu.:0.0020   1st Qu.:0.0004   1st Qu.:0.0021  
+##  Median :0.0235   Median :0.0176   Median :0.0041   Median :0.0045  
+##  Mean   :0.0581   Mean   :0.0288   Mean   :0.0232   Mean   :0.0085  
+##  3rd Qu.:0.0816   3rd Qu.:0.0469   3rd Qu.:0.0222   3rd Qu.:0.0116  
+##  Max.   :0.4127   Max.   :0.1462   Max.   :0.2186   Max.   :0.0648  
+##  NA's   :1773     NA's   :1731     NA's   :1770     NA's   :1675    
+##     potato_f        sugbeet_f         oth_rt_f         wheat_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0002  
+##  1st Qu.:0.0022   1st Qu.:0.0012   1st Qu.:0.0001   1st Qu.:0.0518  
+##  Median :0.0051   Median :0.0067   Median :0.0003   Median :0.1194  
+##  Mean   :0.0129   Mean   :0.0131   Mean   :0.0012   Mean   :0.1234  
+##  3rd Qu.:0.0123   3rd Qu.:0.0164   3rd Qu.:0.0010   3rd Qu.:0.1801  
+##  Max.   :0.1882   Max.   :0.1137   Max.   :0.0123   Max.   :0.3899  
+##  NA's   :1690     NA's   :1770     NA's   :1895     NA's   :1662    
+##      oats_f         oth_oil_f         fibre_f         oth_ind_f     
+##  Min.   :0.0001   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0021   1st Qu.:0.0004   1st Qu.:0.0000   1st Qu.:0.0002  
+##  Median :0.0063   Median :0.0018   Median :0.0002   Median :0.0007  
+##  Mean   :0.0135   Mean   :0.0089   Mean   :0.0087   Mean   :0.0025  
+##  3rd Qu.:0.0150   3rd Qu.:0.0078   3rd Qu.:0.0050   3rd Qu.:0.0023  
+##  Max.   :0.1913   Max.   :0.1905   Max.   :0.1514   Max.   :0.0356  
+##  NA's   :1694     NA's   :1766     NA's   :1861     NA's   :1754    
+##     fodder_f        LEVL_CODE        sum_uaa            total_f       
+##  Min.   :0.0002   Min.   :0.000   Min.   :      56   Min.   :0.00000  
+##  1st Qu.:0.0179   1st Qu.:3.000   1st Qu.:  256462   1st Qu.:0.00000  
+##  Median :0.0393   Median :3.000   Median :  701169   Median :0.00000  
+##  Mean   :0.0503   Mean   :2.658   Mean   : 1469382   Mean   :0.07172  
+##  3rd Qu.:0.0691   3rd Qu.:3.000   3rd Qu.: 1522769   3rd Qu.:0.00000  
+##  Max.   :0.3323   Max.   :3.000   Max.   :33916138   Max.   :0.80077  
+##  NA's   :1695                     NA's   :1556       NA's   :2
+
summary(crop.frac.0)
+
##      geo                rye_f           barley_f         maize_f      
+##  Length:2013        Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  Class :character   1st Qu.:0.0002   1st Qu.:0.0165   1st Qu.:0.0001  
+##  Mode  :character   Median :0.0012   Median :0.0399   Median :0.0077  
+##                     Mean   :0.0077   Mean   :0.0510   Mean   :0.0318  
+##                     3rd Qu.:0.0063   3rd Qu.:0.0721   3rd Qu.:0.0346  
+##                     Max.   :0.1271   Max.   :0.2253   Max.   :0.3190  
+##                     NA's   :1649     NA's   :1647     NA's   :1610    
+##     tritic_f        sorghum_f        oth_cer_f          rice_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0002   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
+##  Median :0.0024   Median :0.0000   Median :0.0000   Median :0.0000  
+##  Mean   :0.0092   Mean   :0.0003   Mean   :0.0006   Mean   :0.0018  
+##  3rd Qu.:0.0093   3rd Qu.:0.0001   3rd Qu.:0.0004   3rd Qu.:0.0000  
+##  Max.   :0.1181   Max.   :0.0181   Max.   :0.0178   Max.   :0.1071  
+##  NA's   :1696     NA's   :1633     NA's   :1782     NA's   :1587    
+##    pasture_f          rape_f         sunflow_f         pulses_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0028   1st Qu.:0.0002   1st Qu.:0.0000   1st Qu.:0.0014  
+##  Median :0.0165   Median :0.0066   Median :0.0001   Median :0.0042  
+##  Mean   :0.0482   Mean   :0.0231   Mean   :0.0139   Mean   :0.0078  
+##  3rd Qu.:0.0604   3rd Qu.:0.0378   3rd Qu.:0.0072   3rd Qu.:0.0104  
+##  Max.   :0.4127   Max.   :0.1462   Max.   :0.2186   Max.   :0.0648  
+##  NA's   :1724     NA's   :1664     NA's   :1608     NA's   :1653    
+##     potato_f        sugbeet_f         oth_rt_f         wheat_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0019   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0158  
+##  Median :0.0049   Median :0.0012   Median :0.0000   Median :0.0989  
+##  Mean   :0.0124   Mean   :0.0086   Mean   :0.0005   Mean   :0.1060  
+##  3rd Qu.:0.0117   3rd Qu.:0.0103   3rd Qu.:0.0002   3rd Qu.:0.1708  
+##  Max.   :0.1882   Max.   :0.1137   Max.   :0.0099   Max.   :0.3899  
+##  NA's   :1678     NA's   :1646     NA's   :1761     NA's   :1605    
+##      oats_f         oth_oil_f         fibre_f         oth_ind_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0003   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
+##  Median :0.0037   Median :0.0001   Median :0.0000   Median :0.0001  
+##  Mean   :0.0106   Mean   :0.0043   Mean   :0.0029   Mean   :0.0016  
+##  3rd Qu.:0.0121   3rd Qu.:0.0019   3rd Qu.:0.0000   3rd Qu.:0.0011  
+##  Max.   :0.1913   Max.   :0.1905   Max.   :0.1514   Max.   :0.0356  
+##  NA's   :1615     NA's   :1567     NA's   :1567     NA's   :1616    
+##     fodder_f        LEVL_CODE        sum_uaa            total_f       
+##  Min.   :0.0000   Min.   :0.000   Min.   :      56   Min.   :0.00000  
+##  1st Qu.:0.0073   1st Qu.:3.000   1st Qu.:  256462   1st Qu.:0.00000  
+##  Median :0.0276   Median :3.000   Median :  701169   Median :0.00000  
+##  Mean   :0.0405   Mean   :2.658   Mean   : 1469382   Mean   :0.07117  
+##  3rd Qu.:0.0582   3rd Qu.:3.000   3rd Qu.: 1522769   3rd Qu.:0.00000  
+##  Max.   :0.3323   Max.   :3.000   Max.   :33916138   Max.   :0.80004  
+##  NA's   :1626                     NA's   :1556       NA's   :2
+
crop.frac.rep <- crop.frac
+for(e in crop.frac.rep$geo) {
+  for(i in names(crop.frac.rep)[2:21]) {
+    crop.frac.rep[crop.frac.rep$geo == e,i] <- ifelse(is.na(crop.frac.rep[crop.frac.rep$geo == e,i]), crop.frac.0[crop.frac.0$geo == e, i], crop.frac[crop.frac$geo == e, i])
+  }
+}
+
+summary(crop.frac.rep)
+
##      geo                rye_f           barley_f         maize_f      
+##  Length:2013        Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  Class :character   1st Qu.:0.0003   1st Qu.:0.0167   1st Qu.:0.0002  
+##  Mode  :character   Median :0.0012   Median :0.0405   Median :0.0077  
+##                     Mean   :0.0077   Mean   :0.0512   Mean   :0.0319  
+##                     3rd Qu.:0.0064   3rd Qu.:0.0721   3rd Qu.:0.0346  
+##                     Max.   :0.1271   Max.   :0.2253   Max.   :0.3190  
+##                     NA's   :1649     NA's   :1647     NA's   :1610    
+##     tritic_f        sorghum_f        oth_cer_f          rice_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0003   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
+##  Median :0.0024   Median :0.0000   Median :0.0000   Median :0.0000  
+##  Mean   :0.0093   Mean   :0.0003   Mean   :0.0006   Mean   :0.0019  
+##  3rd Qu.:0.0093   3rd Qu.:0.0001   3rd Qu.:0.0004   3rd Qu.:0.0000  
+##  Max.   :0.1181   Max.   :0.0181   Max.   :0.0178   Max.   :0.1071  
+##  NA's   :1696     NA's   :1633     NA's   :1782     NA's   :1587    
+##    pasture_f          rape_f         sunflow_f         pulses_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0028   1st Qu.:0.0002   1st Qu.:0.0000   1st Qu.:0.0015  
+##  Median :0.0167   Median :0.0072   Median :0.0002   Median :0.0043  
+##  Mean   :0.0483   Mean   :0.0233   Mean   :0.0139   Mean   :0.0079  
+##  3rd Qu.:0.0604   3rd Qu.:0.0379   3rd Qu.:0.0072   3rd Qu.:0.0106  
+##  Max.   :0.4127   Max.   :0.1462   Max.   :0.2186   Max.   :0.0648  
+##  NA's   :1724     NA's   :1664     NA's   :1608     NA's   :1653    
+##     potato_f        sugbeet_f         oth_rt_f         wheat_f      
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0020   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0158  
+##  Median :0.0049   Median :0.0012   Median :0.0000   Median :0.0993  
+##  Mean   :0.0124   Mean   :0.0087   Mean   :0.0006   Mean   :0.1061  
+##  3rd Qu.:0.0120   3rd Qu.:0.0103   3rd Qu.:0.0002   3rd Qu.:0.1715  
+##  Max.   :0.1882   Max.   :0.1137   Max.   :0.0123   Max.   :0.3899  
+##  NA's   :1678     NA's   :1646     NA's   :1761     NA's   :1605    
+##      oats_f         oth_oil_f         fibre_f         oth_ind_f     
+##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
+##  1st Qu.:0.0004   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
+##  Median :0.0039   Median :0.0001   Median :0.0000   Median :0.0002  
+##  Mean   :0.0108   Mean   :0.0050   Mean   :0.0030   Mean   :0.0017  
+##  3rd Qu.:0.0121   3rd Qu.:0.0024   3rd Qu.:0.0000   3rd Qu.:0.0011  
+##  Max.   :0.1913   Max.   :0.1905   Max.   :0.1514   Max.   :0.0356  
+##  NA's   :1615     NA's   :1567     NA's   :1567     NA's   :1616    
+##     fodder_f        LEVL_CODE        sum_uaa            total_f       
+##  Min.   :0.0000   Min.   :0.000   Min.   :      56   Min.   :0.00000  
+##  1st Qu.:0.0073   1st Qu.:3.000   1st Qu.:  256462   1st Qu.:0.00000  
+##  Median :0.0287   Median :3.000   Median :  701169   Median :0.00000  
+##  Mean   :0.0413   Mean   :2.658   Mean   : 1469382   Mean   :0.07172  
+##  3rd Qu.:0.0599   3rd Qu.:3.000   3rd Qu.: 1522769   3rd Qu.:0.00000  
+##  Max.   :0.3323   Max.   :3.000   Max.   :33916138   Max.   :0.80077  
+##  NA's   :1626                     NA's   :1556       NA's   :2
+
#Finally, run script to allocate NUTS1 or NUTS0 fractions to NUTS2 where needed
+
+#list to summarise where data are NUTS2, 1, 0 for each variable
+data.level.crop.f.0 <- vector("list", 4*length(names(crop.frac.rep)[2:21]))
+names(data.level.crop.f.0) <- c(paste(names(crop.frac.rep)[2:21], 'n2.dat', sep='.'),
+                       paste(names(crop.frac.rep)[2:21], 'n1.dat', sep='.'),
+                       paste(names(crop.frac.rep)[2:21], 'n0.dat', sep='.'),
+                       paste(names(crop.frac.rep)[2:21], 'nuts0.na', sep='.')
+                       )
+labels(data.level.crop.f.0)
+
##  [1] "rye_f.n2.dat"       "barley_f.n2.dat"    "maize_f.n2.dat"    
+##  [4] "tritic_f.n2.dat"    "sorghum_f.n2.dat"   "oth_cer_f.n2.dat"  
+##  [7] "rice_f.n2.dat"      "pasture_f.n2.dat"   "rape_f.n2.dat"     
+## [10] "sunflow_f.n2.dat"   "pulses_f.n2.dat"    "potato_f.n2.dat"   
+## [13] "sugbeet_f.n2.dat"   "oth_rt_f.n2.dat"    "wheat_f.n2.dat"    
+## [16] "oats_f.n2.dat"      "oth_oil_f.n2.dat"   "fibre_f.n2.dat"    
+## [19] "oth_ind_f.n2.dat"   "fodder_f.n2.dat"    "rye_f.n1.dat"      
+## [22] "barley_f.n1.dat"    "maize_f.n1.dat"     "tritic_f.n1.dat"   
+## [25] "sorghum_f.n1.dat"   "oth_cer_f.n1.dat"   "rice_f.n1.dat"     
+## [28] "pasture_f.n1.dat"   "rape_f.n1.dat"      "sunflow_f.n1.dat"  
+## [31] "pulses_f.n1.dat"    "potato_f.n1.dat"    "sugbeet_f.n1.dat"  
+## [34] "oth_rt_f.n1.dat"    "wheat_f.n1.dat"     "oats_f.n1.dat"     
+## [37] "oth_oil_f.n1.dat"   "fibre_f.n1.dat"     "oth_ind_f.n1.dat"  
+## [40] "fodder_f.n1.dat"    "rye_f.n0.dat"       "barley_f.n0.dat"   
+## [43] "maize_f.n0.dat"     "tritic_f.n0.dat"    "sorghum_f.n0.dat"  
+## [46] "oth_cer_f.n0.dat"   "rice_f.n0.dat"      "pasture_f.n0.dat"  
+## [49] "rape_f.n0.dat"      "sunflow_f.n0.dat"   "pulses_f.n0.dat"   
+## [52] "potato_f.n0.dat"    "sugbeet_f.n0.dat"   "oth_rt_f.n0.dat"   
+## [55] "wheat_f.n0.dat"     "oats_f.n0.dat"      "oth_oil_f.n0.dat"  
+## [58] "fibre_f.n0.dat"     "oth_ind_f.n0.dat"   "fodder_f.n0.dat"   
+## [61] "rye_f.nuts0.na"     "barley_f.nuts0.na"  "maize_f.nuts0.na"  
+## [64] "tritic_f.nuts0.na"  "sorghum_f.nuts0.na" "oth_cer_f.nuts0.na"
+## [67] "rice_f.nuts0.na"    "pasture_f.nuts0.na" "rape_f.nuts0.na"   
+## [70] "sunflow_f.nuts0.na" "pulses_f.nuts0.na"  "potato_f.nuts0.na" 
+## [73] "sugbeet_f.nuts0.na" "oth_rt_f.nuts0.na"  "wheat_f.nuts0.na"  
+## [76] "oats_f.nuts0.na"    "oth_oil_f.nuts0.na" "fibre_f.nuts0.na"  
+## [79] "oth_ind_f.nuts0.na" "fodder_f.nuts0.na"
+
dbase.crop <- as.data.frame(matrix(nrow=nrow(nuts@data), ncol=(ncol(crop.frac.rep[,2:21]) + 1)))
+dbase.crop[,1] <- nuts@data$NUTS_ID
+names(dbase.crop) <- c("NUTS_ID", names(crop.frac.rep)[2:21])
+head(dbase.crop)
+
##   NUTS_ID rye_f barley_f maize_f tritic_f sorghum_f oth_cer_f rice_f
+## 1    AT11    NA       NA      NA       NA        NA        NA     NA
+## 2    AT22    NA       NA      NA       NA        NA        NA     NA
+## 3    AT12    NA       NA      NA       NA        NA        NA     NA
+## 4    AT13    NA       NA      NA       NA        NA        NA     NA
+## 5    AT21    NA       NA      NA       NA        NA        NA     NA
+## 6    AT31    NA       NA      NA       NA        NA        NA     NA
+##   pasture_f rape_f sunflow_f pulses_f potato_f sugbeet_f oth_rt_f wheat_f
+## 1        NA     NA        NA       NA       NA        NA       NA      NA
+## 2        NA     NA        NA       NA       NA        NA       NA      NA
+## 3        NA     NA        NA       NA       NA        NA       NA      NA
+## 4        NA     NA        NA       NA       NA        NA       NA      NA
+## 5        NA     NA        NA       NA       NA        NA       NA      NA
+## 6        NA     NA        NA       NA       NA        NA       NA      NA
+##   oats_f oth_oil_f fibre_f oth_ind_f fodder_f
+## 1     NA        NA      NA        NA       NA
+## 2     NA        NA      NA        NA       NA
+## 3     NA        NA      NA        NA       NA
+## 4     NA        NA      NA        NA       NA
+## 5     NA        NA      NA        NA       NA
+## 6     NA        NA      NA        NA       NA
+
nrow(dbase.crop)
+
## [1] 320
+
attach(crop.frac.rep)
+for(i in names(crop.frac.rep)[2:21]) {
+  (nuts2.na <- crop.frac.rep[LEVL_CODE == 2 & is.na(crop.frac.rep[,i]), 'geo'])
+  (nuts1 <- crop.frac.rep[LEVL_CODE == 1 & geo %in% gsub(".{1}$", "", nuts2.na), 'geo'])
+  (nuts1.na <- crop.frac.rep[geo %in% nuts1 & is.na(crop.frac.rep[,i]), 'geo'])
+  (nuts0 <- crop.frac.rep[LEVL_CODE == 0 & geo %in% gsub(".{1}$", "", nuts1.na), 'geo'])
+  (nuts0.na <- crop.frac.rep[geo %in% nuts0 & is.na(crop.frac.rep[,i]), 'geo'])
+  
+#NUTS2 data
+(n2.dat <- crop.frac.rep[!(geo %in% nuts2.na) & LEVL_CODE == 2, 'geo'])
+#NUTS1 data
+(n1.dat <- nuts1[!nuts1 %in% nuts1.na])
+#NUTS0 data
+(n0.dat <- nuts0[!nuts0 %in% nuts0.na])
+#NO DATA
+nuts0.na
+
+data.level.crop.f.0[[paste(i, 'n2.dat', sep='.')]] <- n2.dat
+data.level.crop.f.0[[paste(i, 'n1.dat', sep='.')]] <- n1.dat
+data.level.crop.f.0[[paste(i, 'n0.dat', sep='.')]] <- n0.dat
+data.level.crop.f.0[[paste(i, 'nuts0.na', sep='.')]] <- nuts0.na
+
+  for(e in n0.dat) {
+    dbase.crop[dbase.crop$NUTS_ID %in% dbase.crop$NUTS_ID[grep(paste(e, '..', sep=''), dbase.crop$NUTS_ID)], i] <- crop.frac.rep[crop.frac.rep$geo == e, i]
+  }
+
+  for(e in n1.dat) {
+    dbase.crop[dbase.crop$NUTS_ID %in% dbase.crop$NUTS_ID[grep(paste(e, '.', sep=''), dbase.crop$NUTS_ID)], i] <- crop.frac.rep[crop.frac.rep$geo == e, i]
+  }
+
+  for(e in n2.dat) {
+    dbase.crop[dbase.crop$NUTS_ID == e, i] <- crop.frac.rep[crop.frac.rep$geo == e, i]
+  }
+}
+detach(crop.frac.rep)
+
+summary(dbase.crop)
+
##     NUTS_ID        rye_f              barley_f          maize_f        
+##  AT11   :  1   Min.   :0.0000000   Min.   :0.00000   Min.   :0.000000  
+##  AT12   :  1   1st Qu.:0.0001763   1st Qu.:0.01824   1st Qu.:0.000236  
+##  AT13   :  1   Median :0.0010613   Median :0.04720   Median :0.008416  
+##  AT21   :  1   Mean   :0.0078947   Mean   :0.05499   Mean   :0.029646  
+##  AT22   :  1   3rd Qu.:0.0069115   3rd Qu.:0.08157   3rd Qu.:0.033520  
+##  AT31   :  1   Max.   :0.1270547   Max.   :0.22525   Max.   :0.319030  
+##  (Other):314   NA's   :1           NA's   :1         NA's   :1         
+##     tritic_f           sorghum_f           oth_cer_f      
+##  Min.   :0.0000000   Min.   :0.0000000   Min.   :0.00000  
+##  1st Qu.:0.0006165   1st Qu.:0.0000000   1st Qu.:0.00000  
+##  Median :0.0028821   Median :0.0000000   Median :0.00008  
+##  Mean   :0.0090348   Mean   :0.0003367   Mean   :0.00097  
+##  3rd Qu.:0.0101557   3rd Qu.:0.0000793   3rd Qu.:0.00128  
+##  Max.   :0.1181380   Max.   :0.0180664   Max.   :0.01778  
+##  NA's   :1           NA's   :1           NA's   :65       
+##      rice_f           pasture_f            rape_f         
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.0000000  
+##  1st Qu.:0.000000   1st Qu.:0.006223   1st Qu.:0.0002153  
+##  Median :0.000000   Median :0.021880   Median :0.0125632  
+##  Mean   :0.001738   Mean   :0.047618   Mean   :0.0268691  
+##  3rd Qu.:0.000000   3rd Qu.:0.084760   3rd Qu.:0.0427553  
+##  Max.   :0.107097   Max.   :0.412707   Max.   :0.1461908  
+##  NA's   :1          NA's   :8          NA's   :1          
+##    sunflow_f           pulses_f           potato_f       
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.000000   1st Qu.:0.001809   1st Qu.:0.002072  
+##  Median :0.000192   Median :0.004299   Median :0.005518  
+##  Mean   :0.011671   Mean   :0.008475   Mean   :0.012031  
+##  3rd Qu.:0.004120   3rd Qu.:0.011267   3rd Qu.:0.013376  
+##  Max.   :0.218639   Max.   :0.064771   Max.   :0.188248  
+##  NA's   :1          NA's   :1          NA's   :1         
+##    sugbeet_f           oth_rt_f          wheat_f        
+##  Min.   :0.000000   Min.   :0.00000   Min.   :0.000000  
+##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.006516  
+##  Median :0.001072   Median :0.00015   Median :0.103025  
+##  Mean   :0.009541   Mean   :0.00091   Mean   :0.103869  
+##  3rd Qu.:0.013027   3rd Qu.:0.00108   3rd Qu.:0.170883  
+##  Max.   :0.113660   Max.   :0.01231   Max.   :0.389937  
+##  NA's   :1          NA's   :35        NA's   :1         
+##      oats_f            oth_oil_f           fibre_f         
+##  Min.   :0.0000000   Min.   :0.000000   Min.   :0.0000000  
+##  1st Qu.:0.0002925   1st Qu.:0.000000   1st Qu.:0.0000000  
+##  Median :0.0037227   Median :0.000000   Median :0.0000000  
+##  Mean   :0.0096505   Mean   :0.004699   Mean   :0.0030096  
+##  3rd Qu.:0.0106135   3rd Qu.:0.001537   3rd Qu.:0.0000127  
+##  Max.   :0.1912935   Max.   :0.190498   Max.   :0.1513896  
+##  NA's   :1           NA's   :1          NA's   :1          
+##    oth_ind_f            fodder_f       
+##  Min.   :0.0000000   Min.   :0.000000  
+##  1st Qu.:0.0000000   1st Qu.:0.007706  
+##  Median :0.0002218   Median :0.032609  
+##  Mean   :0.0015573   Mean   :0.045518  
+##  3rd Qu.:0.0010943   3rd Qu.:0.069995  
+##  Max.   :0.0356431   Max.   :0.253901  
+##  NA's   :1           NA's   :1
+
head(dbase.crop)
+
##   NUTS_ID       rye_f   barley_f    maize_f    tritic_f    sorghum_f
+## 1    AT11 0.023158915 0.04191746 0.10322047 0.010459416 0.0019037848
+## 2    AT22 0.004659144 0.02072924 0.12555999 0.009728318 0.0014761044
+## 3    AT12 0.031413059 0.08222000 0.06573296 0.023418930 0.0008681970
+## 4    AT13 0.040740741 0.04567901 0.02242798 0.008436214 0.0016460905
+## 5    AT21 0.004181640 0.02944136 0.08118262 0.016478275 0.0003136230
+## 6    AT31 0.013497972 0.07211149 0.08582187 0.027766215 0.0001695024
+##      oth_cer_f rice_f   pasture_f      rape_f    sunflow_f    pulses_f
+## 1 0.0088828089      0 0.016951664 0.042230962 0.0163702690 0.017379161
+## 2 0.0007775365      0 0.029889596 0.001075187 0.0006074504 0.001716047
+## 3 0.0051265913      0 0.016059157 0.028237548 0.0174845920 0.012741848
+## 4 0.0177777778      0 0.002674897 0.032921811 0.0039976484 0.014403292
+## 5 0.0009722313      0 0.053812480 0.000313623 0.0008232604 0.003711205
+## 6 0.0007895810      0 0.014203875 0.018338447 0.0007960178 0.007065462
+##      potato_f    sugbeet_f     oth_rt_f    wheat_f      oats_f  oth_oil_f
+## 1 0.004103967 0.0187300502 7.295942e-05 0.22119243 0.006499088 0.08481532
+## 2 0.001737308 0.0006074504 4.373643e-05 0.01938374 0.002786982 0.04561345
+## 3 0.017488323 0.0360314193 6.567450e-05 0.19060904 0.014256840 0.02270124
+## 4 0.013374486 0.0467078189 0.000000e+00 0.24773663 0.003621399 0.01255144
+## 5 0.001973211 0.0001045410 0.000000e+00 0.01787216 0.006377001 0.01950996
+## 6 0.002589740 0.0100478469 9.955586e-05 0.08748686 0.017608514 0.02501556
+##        fibre_f    oth_ind_f   fodder_f
+## 1 2.735978e-04 0.0009746922 0.04886001
+## 2 7.289405e-05 0.0005181552 0.04823763
+## 3 4.796229e-04 0.0029031115 0.06777658
+## 4 0.000000e+00 0.0016460905 0.03168724
+## 5 5.227050e-05 0.0002744201 0.07297615
+## 6 1.235866e-04 0.0026176325 0.09126311
+
tail(dbase.crop)
+
##     NUTS_ID       rye_f   barley_f    maize_f     tritic_f sorghum_f
+## 315    UKD3 0.000000000 0.05408133 0.00000000 0.0012613721         0
+## 316    TRC1 0.000000000 0.10890637 0.01116988 0.0000000000         0
+## 317    TRC2 0.000000000 0.11965185 0.04140112 0.0000000000         0
+## 318    UKD4 0.000000000 0.05408133 0.00000000 0.0012613721         0
+## 319    TRC3 0.000000000 0.04446833 0.06607323 0.0000000000         0
+## 320    UKM6 0.002408652 0.14945685 0.00000000 0.0004817304         0
+##        oth_cer_f      rice_f  pasture_f      rape_f   sunflow_f
+## 315 0.0000000000 0.000000000 0.08475975 0.006622203 0.000000000
+## 316 0.0002398355 0.000000000 0.00000000 0.000000000 0.001074027
+## 317 0.0004376439 0.001050345 0.00000000 0.000000000 0.001838105
+## 318 0.0000000000 0.000000000 0.08475975 0.006622203 0.000000000
+## 319 0.0002398355 0.000000000 0.00000000 0.000000000 0.001247144
+## 320 0.0000000000 0.000000000 0.08475975 0.017041212 0.000000000
+##        pulses_f     potato_f    sugbeet_f    oth_rt_f   wheat_f
+## 315 0.002838087 1.009098e-02 0.0010721662 0.002890765 0.0000000
+## 316 0.049190452 2.148055e-04 0.0012888328 0.000000000 0.2053540
+## 317 0.064771304 4.376439e-05 0.0004376439 0.000000000 0.3008364
+## 318 0.002838087 1.009098e-02 0.0010721662 0.002890765 0.0000000
+## 319 0.059570786 0.000000e+00 0.0003357697 0.000000000 0.3899369
+## 320 0.002167787 1.337605e-02 0.0000000000 0.002890765 0.0000000
+##           oats_f  oth_oil_f    fibre_f   oth_ind_f    fodder_f
+## 315 0.0000000000 0.00000000 0.00000000 0.000000000 0.000000000
+## 316 0.0000000000 0.02577666 0.01997691 0.006444164 0.014320364
+## 317 0.0001750576 0.10809805 0.10678512 0.000000000 0.007294066
+## 318 0.0000000000 0.00000000 0.00000000 0.000000000 0.000000000
+## 319 0.0000000000 0.03356101 0.02202441 0.001048781 0.013284565
+## 320 0.0000000000 0.00000000 0.00000000 0.000000000 0.000000000
+
#check data level for rye_f as an example
+length(data.level.crop.f.0$rye_f.n2.dat)
+
## [1] 229
+
length(data.level.crop.f.0$rye_f.n1.dat)
+
## [1] 34
+
data.level.crop.f.0$rye_f.n0.dat
+
## [1] "BE" "LU" "ME" "MK"
+
data.level.crop.f.0$rye_f.nuts0.na
+
## [1] "AL" "LI" "RS"
+
#Here we manually calculate yields, using only those NUTS regions that have area > 0 for each crop.
+
+crop.prod.dat <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Crop_area_yield/cropprod_no_0s_mean_allnuts.csv", head=T)
+
+head(crop.area.dat)
+
##   geo     rye_a   barley_a   maize_a tritic_a sorghum_a oth_cer_a  rice_a
+## 1  BG 10.707500 186.503750 416.77125 13.81375     4.635 4.0450000 11.2175
+## 2  CH  1.986250  28.315000  15.33500  9.06875        NA 0.1116667  0.0800
+## 3  CY        NA  20.810000        NA  0.42500        NA        NA      NA
+## 4  AL  1.285714   2.742857  56.44286       NA        NA        NA      NA
+## 5  CZ 27.141250 357.867500  98.95625 43.45875        NA 5.3400000      NA
+## 6  BE  0.606250  46.607500  63.19125  5.85875        NA 3.2157143      NA
+##   pasture_a    rape_a   sunflow_a  pulses_a  potato_a sugbeet_a  oth_rt_a
+## 1   0.72750 175.64875 813.4100000 20.115000 12.512500   0.01000 0.1914286
+## 2 117.22000  21.79750   4.1087500  4.702857 11.072500  19.47500 0.6585714
+## 3   0.33500        NA          NA  0.413750  4.706250        NA        NA
+## 4 143.70000        NA   0.9714286 14.128571  9.585714   0.70000        NA
+## 5  37.74286 388.13375  21.6175000 27.933750 24.236250  60.71125 0.6300000
+## 6  79.00375  11.98875          NA  2.387500 80.837143  58.88000 7.5942857
+##      wheat_a    oats_a  oth_oil_a    fibre_a  oth_ind_a  fodder_a
+## 1 1184.90750 16.499750 20.2690000  1.8728571 64.7175000 110.37167
+## 2   88.57500  1.937917  1.6210357  0.0500000  1.2264286 189.00375
+## 3    9.51375  0.397500  0.0737500         NA         NA  34.95625
+## 4   70.64286 14.028571  0.2571429         NA  6.0571429 238.71667
+## 5  834.87625 47.745000 61.8150000  0.3642857 13.1067857 414.21214
+## 6  209.81500  3.721250  0.1466667 12.8037500  0.7129167 181.32375
+##   LEVL_CODE
+## 1         0
+## 2         0
+## 3         0
+## 4         0
+## 5         0
+## 6         0
+
head(crop.prod.dat)
+
##   geo      rye_p    barley_p   maize_p tritic_p sorghum_p oth_cer_p
+## 1  BG  20.221250  728.001250 2417.0175  41.0550  11.48875   6.30250
+## 2  CH  11.846250  184.607500  141.3250  52.1775        NA   0.34000
+## 3  CY         NA   33.743750        NA   0.4400        NA        NA
+## 4  AL   2.928571    7.857143  372.7429       NA        NA        NA
+## 5  CZ 127.265000 1765.546250  758.5862 198.7113        NA   8.05125
+## 6  BE   2.410000  378.551250  717.2150  38.0100        NA  14.16000
+##       rice_p  pasture_p     rape_p sunflow_p  pulses_p  potato_p sugbeet_p
+## 1 59.1500000    2.39625  451.34000 1742.8525 43.480000  184.2188     0.540
+## 2  0.2566667 1452.92000   76.99375   11.0975 15.947143  429.4575  1533.865
+## 3         NA         NA         NA        NA  0.876250  105.4137        NA
+## 4         NA         NA         NA    2.1000 25.971429  234.8857    29.600
+## 5         NA  164.70857 1242.46500   50.4650 66.733750  657.4800  3867.503
+## 6         NA  581.97500   48.63875        NA  8.301429 3710.9129  4947.113
+##     oth_rt_p   wheat_p     oats_p  oth_oil_p    fibre_p oth_ind_p
+## 1   2.275714 5083.4000  32.080000 18.7660000  1.7285714 90.718750
+## 2  62.166667  442.3188   9.760417  4.3605000  0.9700000  1.526071
+## 3         NA   19.1050   0.587500  0.2612500         NA        NA
+## 4         NA  284.6571  30.114286  0.5142857         NA 13.925000
+## 5  20.416250 4772.9613 154.703750 61.2250000  0.5428571 23.835357
+## 6 315.505000 1802.9088  19.687500  1.1380000 68.7500000  0.156250
+##    fodder_p
+## 1  930.5467
+## 2  719.1500
+## 3  180.6650
+## 4 6887.9417
+## 5 9824.5921
+## 6 7722.9237
+
nrow(crop.area.dat)
+
## [1] 2017
+
nrow(crop.prod.dat)
+
## [1] 2017
+
crop.yield.dat <- left_join(crop.area.dat, crop.prod.dat)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining character vector and factor, coercing into
+## character vector
+
names(crop.yield.dat)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"  "LEVL_CODE" "rye_p"     "barley_p"  "maize_p"  
+## [26] "tritic_p"  "sorghum_p" "oth_cer_p" "rice_p"    "pasture_p"
+## [31] "rape_p"    "sunflow_p" "pulses_p"  "potato_p"  "sugbeet_p"
+## [36] "oth_rt_p"  "wheat_p"   "oats_p"    "oth_oil_p" "fibre_p"  
+## [41] "oth_ind_p" "fodder_p"
+
#Need to adjust the NUTS2016 data to NUTS2013 codes
+crop.yield.dat.2013nuts <- crop.yield.dat
+names(crop.yield.dat.2013nuts)
+
##  [1] "geo"       "rye_a"     "barley_a"  "maize_a"   "tritic_a" 
+##  [6] "sorghum_a" "oth_cer_a" "rice_a"    "pasture_a" "rape_a"   
+## [11] "sunflow_a" "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a" 
+## [16] "wheat_a"   "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a"
+## [21] "fodder_a"  "LEVL_CODE" "rye_p"     "barley_p"  "maize_p"  
+## [26] "tritic_p"  "sorghum_p" "oth_cer_p" "rice_p"    "pasture_p"
+## [31] "rape_p"    "sunflow_p" "pulses_p"  "potato_p"  "sugbeet_p"
+## [36] "oth_rt_p"  "wheat_p"   "oats_p"    "oth_oil_p" "fibre_p"  
+## [41] "oth_ind_p" "fodder_p"
+
crop.yield.dat.2013nuts$geo16 <- crop.yield.dat.2013nuts$geo
+
+#straight recodes
+for(e in nuts.conv[nuts.conv$Change == "recoded", 'Code.2016']) {
+  crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 == e, 'geo'] <- as.character(nuts.conv[nuts.conv$Code.2016 == e, 'Code.2013'])
+}
+#check
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 %in% nuts.conv[nuts.conv$Change == "recoded", 'Code.2016'], c('geo', 'geo16')]
+
##      geo geo16
+## 195 FR51  FRG0
+## 196 FR52  FRH0
+## 200 FR61  FRI1
+## 201 FR63  FRI2
+## 202 FR53  FRI3
+## 214 FR81  FRJ1
+## 215 FR62  FRJ2
+## 222 FR21  FRF2
+## 223 FR26  FRC1
+## 224 FR43  FRC2
+## 225 FR25  FRD1
+## 226 FR23  FRD2
+## 236 FR41  FRF3
+## 237 FR72  FRK1
+## 238 FR71  FRK2
+## 245 FR82  FRL0
+## 246 FR83  FRM0
+## 247 FRA1  FRY1
+## 259 FRA2  FRY2
+## 260 FRA3  FRY3
+## 261 FRA4  FRY4
+## 262 FRA5  FRY5
+## 317 FR30  FRE1
+## 318 FR22  FRE2
+## 319 FR42  FRF1
+## 441 PL32  PL82
+## 454 PL34  PL84
+## 486 PL11  PL71
+## 487 PL33  PL72
+## 488 PL31  PL81
+
#recode and relabel
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 == "FRB0", 'geo'] <- "FR24"
+
+#splits
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 %in% c("LT01", "LT02"), 'geo'] <- "LT00"
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 %in% c("HU11", "HU12"), 'geo'] <- "HU10"
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 %in% c("PL91", "PL92"), 'geo'] <- "PL12"
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 %in% c("UKM8", "UKM9"), 'geo'] <- "UKM3" #approximate split not including NUTS3 UKM24
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 == "UKM7", 'geo'] <- "UKM2" #approximate recode still including NUTS3 UKM24
+
+#IE
+#Cannot translate data from new regions to old NUTS2013 so use NUTS0 data
+crop.yield.dat.2013nuts[crop.yield.dat.2013nuts$geo16 == 'IE',]
+
##    geo rye_a barley_a maize_a tritic_a sorghum_a oth_cer_a rice_a
+## 17  IE    NA 194.4375      NA       NA        NA        NA     NA
+##    pasture_a  rape_a sunflow_a pulses_a potato_a sugbeet_a oth_rt_a
+## 17  96.10625 11.2325      0.01  7.05125     9.81        NA    11.95
+##    wheat_a oats_a oth_oil_a    fibre_a oth_ind_a fodder_a LEVL_CODE rye_p
+## 17  75.315 22.645      0.07 0.02666667   1.96625 19.82042         0    NA
+##    barley_p maize_p tritic_p sorghum_p oth_cer_p rice_p pasture_p  rape_p
+## 17  1501.84      NA       NA        NA        NA     NA        NA 42.7425
+##    sunflow_p pulses_p potato_p sugbeet_p oth_rt_p wheat_p   oats_p
+## 17         0  41.8825 365.6687        NA       NA 699.325 175.1287
+##    oth_oil_p fibre_p oth_ind_p fodder_p geo16
+## 17         0       0   25.8875   730.55    IE
+
## Calculate sum over the split NUTS2 regions
+head(crop.yield.dat.2013nuts)
+
##   geo     rye_a   barley_a   maize_a tritic_a sorghum_a oth_cer_a  rice_a
+## 1  BG 10.707500 186.503750 416.77125 13.81375     4.635 4.0450000 11.2175
+## 2  CH  1.986250  28.315000  15.33500  9.06875        NA 0.1116667  0.0800
+## 3  CY        NA  20.810000        NA  0.42500        NA        NA      NA
+## 4  AL  1.285714   2.742857  56.44286       NA        NA        NA      NA
+## 5  CZ 27.141250 357.867500  98.95625 43.45875        NA 5.3400000      NA
+## 6  BE  0.606250  46.607500  63.19125  5.85875        NA 3.2157143      NA
+##   pasture_a    rape_a   sunflow_a  pulses_a  potato_a sugbeet_a  oth_rt_a
+## 1   0.72750 175.64875 813.4100000 20.115000 12.512500   0.01000 0.1914286
+## 2 117.22000  21.79750   4.1087500  4.702857 11.072500  19.47500 0.6585714
+## 3   0.33500        NA          NA  0.413750  4.706250        NA        NA
+## 4 143.70000        NA   0.9714286 14.128571  9.585714   0.70000        NA
+## 5  37.74286 388.13375  21.6175000 27.933750 24.236250  60.71125 0.6300000
+## 6  79.00375  11.98875          NA  2.387500 80.837143  58.88000 7.5942857
+##      wheat_a    oats_a  oth_oil_a    fibre_a  oth_ind_a  fodder_a
+## 1 1184.90750 16.499750 20.2690000  1.8728571 64.7175000 110.37167
+## 2   88.57500  1.937917  1.6210357  0.0500000  1.2264286 189.00375
+## 3    9.51375  0.397500  0.0737500         NA         NA  34.95625
+## 4   70.64286 14.028571  0.2571429         NA  6.0571429 238.71667
+## 5  834.87625 47.745000 61.8150000  0.3642857 13.1067857 414.21214
+## 6  209.81500  3.721250  0.1466667 12.8037500  0.7129167 181.32375
+##   LEVL_CODE      rye_p    barley_p   maize_p tritic_p sorghum_p oth_cer_p
+## 1         0  20.221250  728.001250 2417.0175  41.0550  11.48875   6.30250
+## 2         0  11.846250  184.607500  141.3250  52.1775        NA   0.34000
+## 3         0         NA   33.743750        NA   0.4400        NA        NA
+## 4         0   2.928571    7.857143  372.7429       NA        NA        NA
+## 5         0 127.265000 1765.546250  758.5862 198.7113        NA   8.05125
+## 6         0   2.410000  378.551250  717.2150  38.0100        NA  14.16000
+##       rice_p  pasture_p     rape_p sunflow_p  pulses_p  potato_p sugbeet_p
+## 1 59.1500000    2.39625  451.34000 1742.8525 43.480000  184.2188     0.540
+## 2  0.2566667 1452.92000   76.99375   11.0975 15.947143  429.4575  1533.865
+## 3         NA         NA         NA        NA  0.876250  105.4137        NA
+## 4         NA         NA         NA    2.1000 25.971429  234.8857    29.600
+## 5         NA  164.70857 1242.46500   50.4650 66.733750  657.4800  3867.503
+## 6         NA  581.97500   48.63875        NA  8.301429 3710.9129  4947.113
+##     oth_rt_p   wheat_p     oats_p  oth_oil_p    fibre_p oth_ind_p
+## 1   2.275714 5083.4000  32.080000 18.7660000  1.7285714 90.718750
+## 2  62.166667  442.3188   9.760417  4.3605000  0.9700000  1.526071
+## 3         NA   19.1050   0.587500  0.2612500         NA        NA
+## 4         NA  284.6571  30.114286  0.5142857         NA 13.925000
+## 5  20.416250 4772.9613 154.703750 61.2250000  0.5428571 23.835357
+## 6 315.505000 1802.9088  19.687500  1.1380000 68.7500000  0.156250
+##    fodder_p geo16
+## 1  930.5467    BG
+## 2  719.1500    CH
+## 3  180.6650    CY
+## 4 6887.9417    AL
+## 5 9824.5921    CZ
+## 6 7722.9237    BE
+
crop.yield.dat.2013nuts.sum <- crop.yield.dat.2013nuts %>% group_by(geo) %>% summarise(rye_a = sum(rye_a,na.rm = F),
+                  barley_a = sum(barley_a,na.rm = F),
+                  maize_a = sum(maize_a,na.rm = F),
+                  tritic_a = sum(tritic_a,na.rm = F),
+                  sorghum_a = sum(sorghum_a,na.rm = F),
+                  oth_cer_a = sum(oth_cer_a,na.rm = F),
+                  rice_a = sum(rice_a,na.rm = F),
+                  pasture_a = sum(pasture_a,na.rm = F),
+                  rape_a = sum(rape_a,na.rm = F),
+                  sunflow_a = sum(sunflow_a,na.rm = F),
+                  pulses_a = sum(pulses_a,na.rm = F),
+                  potato_a = sum(potato_a,na.rm = F),
+                  sugbeet_a = sum(sugbeet_a,na.rm = F),
+                  oth_rt_a = sum(oth_rt_a,na.rm = F),
+                  wheat_a = sum(wheat_a,na.rm = F),
+                  oats_a = sum(oats_a,na.rm = F),
+                  oth_oil_a = sum(oth_oil_a,na.rm = F),
+                  fibre_a = sum(fibre_a,na.rm = F),
+                  oth_ind_a = sum(oth_ind_a,na.rm = F),
+                  fodder_a = sum(fodder_a,na.rm = F),
+                  rye_p = sum(rye_p,na.rm = F),
+                  barley_p = sum(barley_p,na.rm = F),
+                  maize_p = sum(maize_p,na.rm = F),
+                  tritic_p = sum(tritic_p,na.rm = F),
+                  sorghum_p = sum(sorghum_p,na.rm = F),
+                  oth_cer_p = sum(oth_cer_p,na.rm = F),
+                  rice_p = sum(rice_p,na.rm = F),
+                  pasture_p = sum(pasture_p,na.rm = F),
+                  rape_p = sum(rape_p,na.rm = F),
+                  sunflow_p = sum(sunflow_p,na.rm = F),
+                  pulses_p = sum(pulses_p,na.rm = F),
+                  potato_p = sum(potato_p,na.rm = F),
+                  sugbeet_p = sum(sugbeet_p,na.rm = F),
+                  oth_rt_p = sum(oth_rt_p,na.rm = F),
+                  wheat_p = sum(wheat_p,na.rm = F),
+                  oats_p = sum(oats_p,na.rm = F),
+                  oth_oil_p = sum(oth_oil_p,na.rm = F),
+                  fibre_p = sum(fibre_p,na.rm = F),
+                  oth_ind_p = sum(oth_ind_p,na.rm = F),
+                  fodder_p = sum(fodder_p,na.rm = F),
+                  LEVL_CODE = mean(LEVL_CODE,na.rm = F)
+                  )
+head(crop.yield.dat.2013nuts.sum)
+
## # A tibble: 6 x 42
+##   geo   rye_a barley_a maize_a tritic_a sorghum_a oth_cer_a rice_a
+##   <chr> <dbl>    <dbl>   <dbl>    <dbl>     <dbl>     <dbl>  <dbl>
+## 1 AL     1.29     2.74    56.4       NA        NA        NA     NA
+## 2 AL0   NA       NA       NA         NA        NA        NA     NA
+## 3 AL01  NA       NA       NA         NA        NA        NA     NA
+## 4 AL011 NA       NA       NA         NA        NA        NA     NA
+## 5 AL012 NA       NA       NA         NA        NA        NA     NA
+## 6 AL013 NA       NA       NA         NA        NA        NA     NA
+## # ... with 34 more variables: pasture_a <dbl>, rape_a <dbl>,
+## #   sunflow_a <dbl>, pulses_a <dbl>, potato_a <dbl>, sugbeet_a <dbl>,
+## #   oth_rt_a <dbl>, wheat_a <dbl>, oats_a <dbl>, oth_oil_a <dbl>,
+## #   fibre_a <dbl>, oth_ind_a <dbl>, fodder_a <dbl>, rye_p <dbl>,
+## #   barley_p <dbl>, maize_p <dbl>, tritic_p <dbl>, sorghum_p <dbl>,
+## #   oth_cer_p <dbl>, rice_p <dbl>, pasture_p <dbl>, rape_p <dbl>,
+## #   sunflow_p <dbl>, pulses_p <dbl>, potato_p <dbl>, sugbeet_p <dbl>,
+## #   oth_rt_p <dbl>, wheat_p <dbl>, oats_p <dbl>, oth_oil_p <dbl>,
+## #   fibre_p <dbl>, oth_ind_p <dbl>, fodder_p <dbl>, LEVL_CODE <dbl>
+
nrow(crop.yield.dat.2013nuts.sum)
+
## [1] 2013
+
#Second, calculate yield in tonnes / ha (original Eurostat area units are 1000 ha and production units are 1000 tonnes)
+crop.yield <- as.data.frame(crop.yield.dat.2013nuts.sum[,c(1:21,42)])
+names(crop.yield)[2:21] <- gsub("_a", "_y", names(crop.yield)[2:21])
+names(crop.yield.dat.2013nuts.sum[,22:41])
+
##  [1] "rye_p"     "barley_p"  "maize_p"   "tritic_p"  "sorghum_p"
+##  [6] "oth_cer_p" "rice_p"    "pasture_p" "rape_p"    "sunflow_p"
+## [11] "pulses_p"  "potato_p"  "sugbeet_p" "oth_rt_p"  "wheat_p"  
+## [16] "oats_p"    "oth_oil_p" "fibre_p"   "oth_ind_p" "fodder_p"
+
names(crop.yield.dat.2013nuts.sum[,2:21])
+
##  [1] "rye_a"     "barley_a"  "maize_a"   "tritic_a"  "sorghum_a"
+##  [6] "oth_cer_a" "rice_a"    "pasture_a" "rape_a"    "sunflow_a"
+## [11] "pulses_a"  "potato_a"  "sugbeet_a" "oth_rt_a"  "wheat_a"  
+## [16] "oats_a"    "oth_oil_a" "fibre_a"   "oth_ind_a" "fodder_a"
+
crop.yield[,2:21] <- crop.yield.dat.2013nuts.sum[,22:41] / crop.yield.dat.2013nuts.sum[,2:21]
+summary(crop.yield)
+
##      geo                rye_y          barley_y          maize_y      
+##  Length:2013        Min.   :0.000   Min.   : 0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.420   1st Qu.: 2.9170   1st Qu.: 6.070  
+##  Mode  :character   Median :3.164   Median : 4.1183   Median : 8.147  
+##                     Mean   :3.443   Mean   : 4.4163   Mean   : 7.824  
+##                     3rd Qu.:4.510   3rd Qu.: 5.7631   3rd Qu.: 9.961  
+##                     Max.   :7.422   Max.   :30.0000   Max.   :12.976  
+##                     NA's   :1678    NA's   :1638      NA's   :1678    
+##     tritic_y       sorghum_y        oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   : 1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.010   1st Qu.: 3.100   1st Qu.: 1.215   1st Qu.: 4.504  
+##  Median :3.995   Median : 4.431   Median : 2.080   Median : 5.524  
+##  Mean   :4.045   Mean   : 5.062   Mean   : 2.678   Mean   : 5.442  
+##  3rd Qu.:5.223   3rd Qu.: 6.000   3rd Qu.: 3.781   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :49.133   Max.   :11.500   Max.   :10.000  
+##  NA's   :1733    NA's   :1869     NA's   :1917     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y     
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   : 0.000  
+##  1st Qu.: 3.655   1st Qu.:2.369   1st Qu.:1.718   1st Qu.: 1.414  
+##  Median : 6.936   Median :2.914   Median :2.174   Median : 2.040  
+##  Mean   :10.213   Mean   :2.883   Mean   :2.092   Mean   : 2.276  
+##  3rd Qu.:12.340   3rd Qu.:3.544   3rd Qu.:2.577   3rd Qu.: 2.752  
+##  Max.   :49.514   Max.   :5.333   Max.   :4.818   Max.   :40.211  
+##  NA's   :1799     NA's   :1708    NA's   :1749    NA's   :1672    
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  0.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.625   1st Qu.: 54.17   1st Qu.:15.32   1st Qu.: 3.1777  
+##  Median :27.769   Median : 63.70   Median :25.28   Median : 4.5915  
+##  Mean   :29.052   Mean   : 62.67   Mean   :29.87   Mean   : 4.8646  
+##  3rd Qu.:36.514   3rd Qu.: 76.37   3rd Qu.:37.37   3rd Qu.: 6.4084  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1674     NA's   :1753     NA's   :1913    NA's   :1636     
+##      oats_y         oth_oil_y         fibre_y         oth_ind_y      
+##  Min.   :0.5367   Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000  
+##  1st Qu.:2.2609   1st Qu.: 1.389   1st Qu.: 1.125   1st Qu.: 0.6786  
+##  Median :3.1395   Median : 2.000   Median : 3.084   Median : 1.4677  
+##  Mean   :3.3915   Mean   : 2.288   Mean   : 3.537   Mean   : 2.2600  
+##  3rd Qu.:4.5377   3rd Qu.: 2.670   3rd Qu.: 5.492   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :28.000   Max.   :19.400   Max.   :78.4444  
+##  NA's   :1669     NA's   :1748     NA's   :1881     NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.28   1st Qu.:3.000  
+##  Median :18.69   Median :3.000  
+##  Mean   :22.02   Mean   :2.658  
+##  3rd Qu.:34.41   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1689
+
#is.na(crop.yield) <- sapply(crop.yield, is.infinite) #Remove the infinites
+is.na(crop.yield) <- sapply(crop.yield, is.nan) #Remove NaN
+summary(crop.yield) #still some spurious values
+
##      geo                rye_y          barley_y          maize_y      
+##  Length:2013        Min.   :0.000   Min.   : 0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.420   1st Qu.: 2.9170   1st Qu.: 6.070  
+##  Mode  :character   Median :3.164   Median : 4.1183   Median : 8.147  
+##                     Mean   :3.443   Mean   : 4.4163   Mean   : 7.824  
+##                     3rd Qu.:4.510   3rd Qu.: 5.7631   3rd Qu.: 9.961  
+##                     Max.   :7.422   Max.   :30.0000   Max.   :12.976  
+##                     NA's   :1678    NA's   :1638      NA's   :1678    
+##     tritic_y       sorghum_y        oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   : 1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.010   1st Qu.: 3.100   1st Qu.: 1.215   1st Qu.: 4.504  
+##  Median :3.995   Median : 4.431   Median : 2.080   Median : 5.524  
+##  Mean   :4.045   Mean   : 5.062   Mean   : 2.678   Mean   : 5.442  
+##  3rd Qu.:5.223   3rd Qu.: 6.000   3rd Qu.: 3.781   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :49.133   Max.   :11.500   Max.   :10.000  
+##  NA's   :1733    NA's   :1869     NA's   :1917     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y     
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   : 0.000  
+##  1st Qu.: 3.655   1st Qu.:2.369   1st Qu.:1.718   1st Qu.: 1.414  
+##  Median : 6.936   Median :2.914   Median :2.174   Median : 2.040  
+##  Mean   :10.213   Mean   :2.883   Mean   :2.092   Mean   : 2.276  
+##  3rd Qu.:12.340   3rd Qu.:3.544   3rd Qu.:2.577   3rd Qu.: 2.752  
+##  Max.   :49.514   Max.   :5.333   Max.   :4.818   Max.   :40.211  
+##  NA's   :1799     NA's   :1708    NA's   :1749    NA's   :1672    
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  0.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.625   1st Qu.: 54.17   1st Qu.:15.32   1st Qu.: 3.1777  
+##  Median :27.769   Median : 63.70   Median :25.28   Median : 4.5915  
+##  Mean   :29.052   Mean   : 62.67   Mean   :29.87   Mean   : 4.8646  
+##  3rd Qu.:36.514   3rd Qu.: 76.37   3rd Qu.:37.37   3rd Qu.: 6.4084  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1674     NA's   :1753     NA's   :1913    NA's   :1636     
+##      oats_y         oth_oil_y         fibre_y         oth_ind_y      
+##  Min.   :0.5367   Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000  
+##  1st Qu.:2.2609   1st Qu.: 1.389   1st Qu.: 1.125   1st Qu.: 0.6786  
+##  Median :3.1395   Median : 2.000   Median : 3.084   Median : 1.4677  
+##  Mean   :3.3915   Mean   : 2.288   Mean   : 3.537   Mean   : 2.2600  
+##  3rd Qu.:4.5377   3rd Qu.: 2.670   3rd Qu.: 5.492   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :28.000   Max.   :19.400   Max.   :78.4444  
+##  NA's   :1669     NA's   :1748     NA's   :1881     NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.28   1st Qu.:3.000  
+##  Median :18.69   Median :3.000  
+##  Mean   :22.02   Mean   :2.658  
+##  3rd Qu.:34.41   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1689
+
#Whipe BE1, BE10 because of spurios areas (see above)
+crop.yield[which(crop.yield$geo %in% c("BE1", "BE10")), c(2:21)] <- NA
+
+summary(crop.yield)
+
##      geo                rye_y          barley_y          maize_y      
+##  Length:2013        Min.   :0.000   Min.   : 0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.431   1st Qu.: 2.9163   1st Qu.: 6.059  
+##  Mode  :character   Median :3.169   Median : 4.1115   Median : 8.146  
+##                     Mean   :3.451   Mean   : 4.3965   Mean   : 7.801  
+##                     3rd Qu.:4.512   3rd Qu.: 5.7152   3rd Qu.: 9.917  
+##                     Max.   :7.422   Max.   :30.0000   Max.   :12.976  
+##                     NA's   :1680    NA's   :1640      NA's   :1680    
+##     tritic_y       sorghum_y        oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   : 1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.003   1st Qu.: 3.100   1st Qu.: 1.198   1st Qu.: 4.504  
+##  Median :3.990   Median : 4.431   Median : 2.066   Median : 5.524  
+##  Mean   :4.026   Mean   : 5.062   Mean   : 2.491   Mean   : 5.442  
+##  3rd Qu.:5.216   3rd Qu.: 6.000   3rd Qu.: 3.659   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :49.133   Max.   :10.750   Max.   :10.000  
+##  NA's   :1735    NA's   :1869     NA's   :1919     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y     
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   : 0.000  
+##  1st Qu.: 3.655   1st Qu.:2.351   1st Qu.:1.718   1st Qu.: 1.413  
+##  Median : 6.936   Median :2.911   Median :2.174   Median : 2.038  
+##  Mean   :10.213   Mean   :2.873   Mean   :2.092   Mean   : 2.264  
+##  3rd Qu.:12.340   3rd Qu.:3.544   3rd Qu.:2.577   3rd Qu.: 2.749  
+##  Max.   :49.514   Max.   :5.333   Max.   :4.818   Max.   :40.211  
+##  NA's   :1799     NA's   :1710    NA's   :1749    NA's   :1674    
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  0.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.584   1st Qu.: 54.07   1st Qu.:15.11   1st Qu.: 3.1662  
+##  Median :27.763   Median : 63.52   Median :25.10   Median : 4.5731  
+##  Mean   :28.951   Mean   : 62.50   Mean   :29.77   Mean   : 4.8382  
+##  3rd Qu.:36.471   3rd Qu.: 76.07   3rd Qu.:37.46   3rd Qu.: 6.3951  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1676     NA's   :1755     NA's   :1915    NA's   :1638     
+##      oats_y         oth_oil_y         fibre_y         oth_ind_y      
+##  Min.   :0.5367   Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000  
+##  1st Qu.:2.2544   1st Qu.: 1.389   1st Qu.: 1.103   1st Qu.: 0.6786  
+##  Median :3.1310   Median : 2.000   Median : 3.077   Median : 1.4677  
+##  Mean   :3.3798   Mean   : 2.288   Mean   : 3.510   Mean   : 2.2600  
+##  3rd Qu.:4.5236   3rd Qu.: 2.670   3rd Qu.: 5.497   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :28.000   Max.   :19.400   Max.   :78.4444  
+##  NA's   :1671     NA's   :1748     NA's   :1883     NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.27   1st Qu.:3.000  
+##  Median :18.53   Median :3.000  
+##  Mean   :21.88   Mean   :2.658  
+##  3rd Qu.:34.33   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1691
+
#Spurious values checked by looking at Eurostat database for statistical outliers and spatial outliers based on maps
+
+#Spurious rye
+crop.yield[which(crop.yield$rye_y > 6), c('geo', 'rye_y')]
+
##       geo    rye_y
+## 449   DE9 6.410551
+## 499   DEA 6.584930
+## 558   DEB 6.195128
+## 639   DEF 7.142828
+## 656   DEG 6.424054
+## 681    DK 6.006277
+## 682   DK0 6.007563
+## 683  DK01 6.311475
+## 688  DK02 6.703985
+## 691  DK03 6.000547
+## 891   FR1 6.062500
+## 892  FR10 6.062500
+## 902  FR22 6.328767
+## 910  FR43 6.243902
+## 949   FRE 6.329114
+## 1118 ITC2 7.000000
+## 1147 ITF3 7.421875
+## 1352  NO0 6.037915
+## 1356 NO02 6.675000
+## 1359 NO03 6.026316
+## 1609  SE2 6.312086
+## 1615 SE22 6.835658
+## 1915  UKJ 6.600000
+
crop.yield[crop.yield$geo %in% crop.yield$geo[grep('ITF', crop.yield$geo)], c('geo', 'rye_y')]
+
##        geo    rye_y
+## 1138   ITF 3.230439
+## 1139  ITF1 3.041237
+## 1140 ITF11       NA
+## 1141 ITF12       NA
+## 1142 ITF13       NA
+## 1143 ITF14       NA
+## 1144  ITF2       NA
+## 1145 ITF21       NA
+## 1146 ITF22       NA
+## 1147  ITF3 7.421875
+## 1148 ITF31       NA
+## 1149 ITF32       NA
+## 1150 ITF33       NA
+## 1151 ITF34       NA
+## 1152 ITF35       NA
+## 1153  ITF4 2.000000
+## 1154 ITF43       NA
+## 1155 ITF44       NA
+## 1156 ITF45       NA
+## 1157 ITF46       NA
+## 1158 ITF47       NA
+## 1159 ITF48       NA
+## 1160  ITF5 2.000000
+## 1161 ITF51       NA
+## 1162 ITF52       NA
+## 1163  ITF6 3.098109
+## 1164 ITF61       NA
+## 1165 ITF62       NA
+## 1166 ITF63       NA
+## 1167 ITF64       NA
+## 1168 ITF65       NA
+
crop.yield[crop.yield$geo %in% crop.yield$geo[grep('ITC', crop.yield$geo)], c('geo', 'rye_y')]
+
##        geo    rye_y
+## 1108   ITC 3.705986
+## 1109  ITC1 3.311782
+## 1110 ITC11       NA
+## 1111 ITC12       NA
+## 1112 ITC13       NA
+## 1113 ITC14       NA
+## 1114 ITC15       NA
+## 1115 ITC16       NA
+## 1116 ITC17       NA
+## 1117 ITC18       NA
+## 1118  ITC2 7.000000
+## 1119 ITC20       NA
+## 1120  ITC3       NA
+## 1121 ITC31       NA
+## 1122 ITC32       NA
+## 1123 ITC33       NA
+## 1124 ITC34       NA
+## 1125  ITC4 3.794393
+## 1126 ITC41       NA
+## 1127 ITC42       NA
+## 1128 ITC43       NA
+## 1129 ITC44       NA
+## 1130 ITC46       NA
+## 1131 ITC47       NA
+## 1132 ITC48       NA
+## 1133 ITC49       NA
+## 1134 ITC4A       NA
+## 1135 ITC4B       NA
+## 1136 ITC4C       NA
+## 1137 ITC4D       NA
+
crop.yield[which(crop.yield$geo == "ITC2"), 'rye_y'] <- crop.yield[which(crop.yield$geo == "ITC"), 'rye_y']
+crop.yield[which(crop.yield$geo == "ITF3"), 'rye_y'] <- crop.yield[which(crop.yield$geo == "ITF"), 'rye_y']
+
+crop.yield[crop.yield$geo %in% crop.yield$geo[grep('SE', crop.yield$geo)], c('geo', 'rye_y')]
+
##        geo    rye_y
+## 1599    SE 5.953990
+## 1600   SE1 5.588411
+## 1601  SE11 1.090909
+## 1602 SE110       NA
+## 1603  SE12 5.656753
+## 1604 SE121       NA
+## 1605 SE122       NA
+## 1606 SE123       NA
+## 1607 SE124       NA
+## 1608 SE125       NA
+## 1609   SE2 6.312086
+## 1610  SE21 4.584682
+## 1611 SE211       NA
+## 1612 SE212       NA
+## 1613 SE213       NA
+## 1614 SE214       NA
+## 1615  SE22 6.835658
+## 1616 SE221       NA
+## 1617 SE224       NA
+## 1618  SE23 5.424729
+## 1619 SE231       NA
+## 1620 SE232       NA
+## 1621   SE3 2.336957
+## 1622  SE31 1.287671
+## 1623 SE311       NA
+## 1624 SE312       NA
+## 1625 SE313       NA
+## 1626  SE32 0.000000
+## 1627 SE321       NA
+## 1628 SE322       NA
+## 1629  SE33       NA
+## 1630 SE331       NA
+## 1631 SE332       NA
+
crop.yield[crop.yield$geo == "SE11", 'rye_y'] <- crop.yield[crop.yield$geo == "SE1", 'rye_y']
+
+crop.yield[crop.yield$geo %in% c("SE31", "SE32"), 'rye_y'] <- crop.yield[crop.yield$geo == "SE3", 'rye_y']
+
+#Spurious barley
+crop.yield[which(crop.yield$barley_y > 10), c('geo', 'barley_y')]
+
##       geo barley_y
+## 1118 ITC2       30
+
crop.yield[crop.yield$geo %in% crop.yield$geo[grep('ITC', crop.yield$geo)], c('geo', 'barley_y')]
+
##        geo  barley_y
+## 1108   ITC  4.993019
+## 1109  ITC1  5.053235
+## 1110 ITC11        NA
+## 1111 ITC12        NA
+## 1112 ITC13        NA
+## 1113 ITC14        NA
+## 1114 ITC15        NA
+## 1115 ITC16        NA
+## 1116 ITC17        NA
+## 1117 ITC18        NA
+## 1118  ITC2 30.000000
+## 1119 ITC20        NA
+## 1120  ITC3  2.222222
+## 1121 ITC31        NA
+## 1122 ITC32        NA
+## 1123 ITC33        NA
+## 1124 ITC34        NA
+## 1125  ITC4  4.949255
+## 1126 ITC41        NA
+## 1127 ITC42        NA
+## 1128 ITC43        NA
+## 1129 ITC44        NA
+## 1130 ITC46        NA
+## 1131 ITC47        NA
+## 1132 ITC48        NA
+## 1133 ITC49        NA
+## 1134 ITC4A        NA
+## 1135 ITC4B        NA
+## 1136 ITC4C        NA
+## 1137 ITC4D        NA
+
crop.yield[which(crop.yield$geo == "ITC2"), 'barley_y'] <- crop.yield[which(crop.yield$geo == "ITC"), 'barley_y']
+
+summary(crop.yield)
+
##      geo                rye_y          barley_y         maize_y      
+##  Length:2013        Min.   :0.000   Min.   :0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.436   1st Qu.:2.9163   1st Qu.: 6.059  
+##  Mode  :character   Median :3.172   Median :4.1115   Median : 8.146  
+##                     Mean   :3.452   Mean   :4.3295   Mean   : 7.801  
+##                     3rd Qu.:4.507   3rd Qu.:5.6912   3rd Qu.: 9.917  
+##                     Max.   :7.143   Max.   :8.7350   Max.   :12.976  
+##                     NA's   :1680    NA's   :1640     NA's   :1680    
+##     tritic_y       sorghum_y        oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   : 1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.003   1st Qu.: 3.100   1st Qu.: 1.198   1st Qu.: 4.504  
+##  Median :3.990   Median : 4.431   Median : 2.066   Median : 5.524  
+##  Mean   :4.026   Mean   : 5.062   Mean   : 2.491   Mean   : 5.442  
+##  3rd Qu.:5.216   3rd Qu.: 6.000   3rd Qu.: 3.659   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :49.133   Max.   :10.750   Max.   :10.000  
+##  NA's   :1735    NA's   :1869     NA's   :1919     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y     
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   : 0.000  
+##  1st Qu.: 3.655   1st Qu.:2.351   1st Qu.:1.718   1st Qu.: 1.413  
+##  Median : 6.936   Median :2.911   Median :2.174   Median : 2.038  
+##  Mean   :10.213   Mean   :2.873   Mean   :2.092   Mean   : 2.264  
+##  3rd Qu.:12.340   3rd Qu.:3.544   3rd Qu.:2.577   3rd Qu.: 2.749  
+##  Max.   :49.514   Max.   :5.333   Max.   :4.818   Max.   :40.211  
+##  NA's   :1799     NA's   :1710    NA's   :1749    NA's   :1674    
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  0.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.584   1st Qu.: 54.07   1st Qu.:15.11   1st Qu.: 3.1662  
+##  Median :27.763   Median : 63.52   Median :25.10   Median : 4.5731  
+##  Mean   :28.951   Mean   : 62.50   Mean   :29.77   Mean   : 4.8382  
+##  3rd Qu.:36.471   3rd Qu.: 76.07   3rd Qu.:37.46   3rd Qu.: 6.3951  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1676     NA's   :1755     NA's   :1915    NA's   :1638     
+##      oats_y         oth_oil_y         fibre_y         oth_ind_y      
+##  Min.   :0.5367   Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000  
+##  1st Qu.:2.2544   1st Qu.: 1.389   1st Qu.: 1.103   1st Qu.: 0.6786  
+##  Median :3.1310   Median : 2.000   Median : 3.077   Median : 1.4677  
+##  Mean   :3.3798   Mean   : 2.288   Mean   : 3.510   Mean   : 2.2600  
+##  3rd Qu.:4.5236   3rd Qu.: 2.670   3rd Qu.: 5.497   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :28.000   Max.   :19.400   Max.   :78.4444  
+##  NA's   :1671     NA's   :1748     NA's   :1883     NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.27   1st Qu.:3.000  
+##  Median :18.53   Median :3.000  
+##  Mean   :21.88   Mean   :2.658  
+##  3rd Qu.:34.33   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1691
+
#Spurious sorghum
+crop.yield[which(crop.yield$sorghum_y > 8), c('geo', 'sorghum_y')]
+
##      geo sorghum_y
+## 34   AT2  8.445507
+## 39  AT22  8.588477
+## 163  CH0 12.500000
+## 168 CH02 11.500000
+## 717  EL4 16.810345
+## 725 EL43 49.133333
+## 793 ES22 14.215686
+## 906 FR26  9.044248
+## 909 FR42  8.590909
+## 910 FR43  9.000000
+## 934  FRC  9.112782
+
crop.yield[crop.yield$geo %in% crop.yield$geo[grep('EL4', crop.yield$geo)], c('geo', 'sorghum_y')]
+
##       geo sorghum_y
+## 717   EL4 16.810345
+## 718  EL41  4.810811
+## 719 EL411        NA
+## 720 EL412        NA
+## 721 EL413        NA
+## 722  EL42  3.600000
+## 723 EL421        NA
+## 724 EL422        NA
+## 725  EL43 49.133333
+## 726 EL431        NA
+## 727 EL432        NA
+## 728 EL433        NA
+## 729 EL434        NA
+
crop.yield[crop.yield$geo %in% c("EL4", "EL43"), 'sorghum_y'] <- mean(crop.yield[crop.yield$geo %in% c("EL41", "EL42"), 'sorghum_y'])
+
+crop.yield[crop.yield$geo %in% crop.yield$geo[grep('ES2', crop.yield$geo)], c('geo', 'sorghum_y')]
+
##       geo sorghum_y
+## 788   ES2  5.191443
+## 789  ES21        NA
+## 790 ES211        NA
+## 791 ES212        NA
+## 792 ES213        NA
+## 793  ES22 14.215686
+## 794 ES220        NA
+## 795  ES23        NA
+## 796 ES230        NA
+## 797  ES24  4.847777
+## 798 ES241        NA
+## 799 ES242        NA
+## 800 ES243        NA
+
crop.yield[crop.yield$geo == "ES22", 'sorghum_y'] <- crop.yield[crop.yield$geo == "ES2", 'sorghum_y']
+
+crop.yield[crop.yield$geo %in% crop.yield$geo[grep('CH', crop.yield$geo)], c('geo', 'sorghum_y')]
+
##       geo sorghum_y
+## 162    CH        NA
+## 163   CH0      12.5
+## 164  CH01       6.0
+## 165 CH011        NA
+## 166 CH012        NA
+## 167 CH013        NA
+## 168  CH02      11.5
+## 169 CH021        NA
+## 170 CH022        NA
+## 171 CH023        NA
+## 172 CH024        NA
+## 173 CH025        NA
+## 174  CH03       6.0
+## 175 CH031        NA
+## 176 CH032        NA
+## 177 CH033        NA
+## 178  CH04        NA
+## 179 CH040        NA
+## 180  CH05        NA
+## 181 CH051        NA
+## 182 CH052        NA
+## 183 CH053        NA
+## 184 CH054        NA
+## 185 CH055        NA
+## 186 CH056        NA
+## 187 CH057        NA
+## 188  CH06       6.0
+## 189 CH061        NA
+## 190 CH062        NA
+## 191 CH063        NA
+## 192 CH064        NA
+## 193 CH065        NA
+## 194 CH066        NA
+## 195  CH07        NA
+## 196 CH070        NA
+
crop.yield[crop.yield$geo %in% c("CH0", "CH02"), 'sorghum_y'] <- mean(crop.yield[crop.yield$geo %in% c("CH01", "CH03", "CH06"), 'sorghum_y'])
+
+summary(crop.yield)
+
##      geo                rye_y          barley_y         maize_y      
+##  Length:2013        Min.   :0.000   Min.   :0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.436   1st Qu.:2.9163   1st Qu.: 6.059  
+##  Mode  :character   Median :3.172   Median :4.1115   Median : 8.146  
+##                     Mean   :3.452   Mean   :4.3295   Mean   : 7.801  
+##                     3rd Qu.:4.507   3rd Qu.:5.6912   3rd Qu.: 9.917  
+##                     Max.   :7.143   Max.   :8.7350   Max.   :12.976  
+##                     NA's   :1680    NA's   :1640     NA's   :1680    
+##     tritic_y       sorghum_y       oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   :1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.003   1st Qu.:3.100   1st Qu.: 1.198   1st Qu.: 4.504  
+##  Median :3.990   Median :4.393   Median : 2.066   Median : 5.524  
+##  Mean   :4.026   Mean   :4.516   Mean   : 2.491   Mean   : 5.442  
+##  3rd Qu.:5.216   3rd Qu.:5.968   3rd Qu.: 3.659   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :9.113   Max.   :10.750   Max.   :10.000  
+##  NA's   :1735    NA's   :1869    NA's   :1919     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y     
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   : 0.000  
+##  1st Qu.: 3.655   1st Qu.:2.351   1st Qu.:1.718   1st Qu.: 1.413  
+##  Median : 6.936   Median :2.911   Median :2.174   Median : 2.038  
+##  Mean   :10.213   Mean   :2.873   Mean   :2.092   Mean   : 2.264  
+##  3rd Qu.:12.340   3rd Qu.:3.544   3rd Qu.:2.577   3rd Qu.: 2.749  
+##  Max.   :49.514   Max.   :5.333   Max.   :4.818   Max.   :40.211  
+##  NA's   :1799     NA's   :1710    NA's   :1749    NA's   :1674    
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  0.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.584   1st Qu.: 54.07   1st Qu.:15.11   1st Qu.: 3.1662  
+##  Median :27.763   Median : 63.52   Median :25.10   Median : 4.5731  
+##  Mean   :28.951   Mean   : 62.50   Mean   :29.77   Mean   : 4.8382  
+##  3rd Qu.:36.471   3rd Qu.: 76.07   3rd Qu.:37.46   3rd Qu.: 6.3951  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1676     NA's   :1755     NA's   :1915    NA's   :1638     
+##      oats_y         oth_oil_y         fibre_y         oth_ind_y      
+##  Min.   :0.5367   Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000  
+##  1st Qu.:2.2544   1st Qu.: 1.389   1st Qu.: 1.103   1st Qu.: 0.6786  
+##  Median :3.1310   Median : 2.000   Median : 3.077   Median : 1.4677  
+##  Mean   :3.3798   Mean   : 2.288   Mean   : 3.510   Mean   : 2.2600  
+##  3rd Qu.:4.5236   3rd Qu.: 2.670   3rd Qu.: 5.497   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :28.000   Max.   :19.400   Max.   :78.4444  
+##  NA's   :1671     NA's   :1748     NA's   :1883     NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.27   1st Qu.:3.000  
+##  Median :18.53   Median :3.000  
+##  Mean   :21.88   Mean   :2.658  
+##  3rd Qu.:34.33   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1691
+
#Spurious rape
+crop.yield[which(crop.yield$rape_y > 4), c('geo', 'rape_y')]
+
##       geo   rape_y
+## 66     BE 4.057033
+## 86   BE24 4.068966
+## 89   BE25 4.436464
+## 98    BE3 4.071088
+## 99   BE31 4.084127
+## 101  BE32 4.064417
+## 109  BE33 4.439338
+## 902  FR22 4.287457
+## 903  FR23 4.096127
+## 905  FR25 4.017380
+## 907  FR30 4.500000
+## 909  FR42 4.209091
+## 943   FRD 4.064926
+## 949   FRE 4.324003
+## 1094 IE05 4.172932
+## 1098 IE06 4.107794
+## 1170 ITG1 5.333333
+## 1735 TR83 5.000000
+
crop.yield[crop.yield$geo %in% c("ITG", "ITG1", "ITG2"), c('geo', 'rape_y')]
+
##       geo   rape_y
+## 1169  ITG 1.000000
+## 1170 ITG1 5.333333
+## 1180 ITG2 1.000000
+
crop.yield[crop.yield$geo == "ITG1", 'rape_y'] <- crop.yield[crop.yield$geo == "ITG", 'rape_y']
+
+#Spurious sunflower
+crop.yield[which(crop.yield$sunflow_y > 3), c('geo', 'sunflow_y')]
+
##       geo sunflow_y
+## 226   DE1  3.060000
+## 499   DEA  4.000000
+## 558   DEB  3.857143
+## 753   EL6  3.296884
+## 754  EL61  4.392677
+## 904  FR24  3.117628
+## 906  FR26  3.016734
+## 908  FR41  3.260991
+## 910  FR43  3.145299
+## 911  FR51  3.105082
+## 927   FRB  3.117628
+## 934   FRC  3.026071
+## 955   FRF  3.128025
+## 966   FRG  3.105082
+## 1108  ITC  3.255319
+## 1109 ITC1  3.147837
+## 1160 ITF5  4.818182
+## 1169  ITG  4.400000
+## 1202 ITH4  3.990476
+## 1700 TR52  3.659236
+
crop.yield[crop.yield$geo %in% c("ITF", "ITF1", "ITF2", "ITF3", "ITF4", "ITF5", "ITF6"), c('geo', 'sunflow_y')]
+
##       geo sunflow_y
+## 1138  ITF  1.718640
+## 1139 ITF1  1.915966
+## 1144 ITF2  1.483412
+## 1147 ITF3  2.060811
+## 1153 ITF4  1.902977
+## 1160 ITF5  4.818182
+## 1163 ITF6  2.692308
+
crop.yield[crop.yield$geo %in% c("ITG", "ITG1", "ITG2"), c('geo', 'sunflow_y')]
+
##       geo sunflow_y
+## 1169  ITG  4.400000
+## 1170 ITG1  2.500000
+## 1180 ITG2  2.833333
+
crop.yield[crop.yield$geo == "ITF5", 'sunflow_y'] <- mean(crop.yield[crop.yield$geo %in% c("ITF1", "ITF2", "ITF3", "ITF4", "ITF6"), 'sunflow_y'])
+
+crop.yield[crop.yield$geo == "ITG", 'sunflow_y'] <- mean(crop.yield[crop.yield$geo %in% c("ITG1", "ITG2"), 'sunflow_y'])
+
+#Spurious pulses
+crop.yield[which(crop.yield$pulses_y > 5), c('geo', 'pulses_y')]
+
##       geo  pulses_y
+## 1089   IE  5.939727
+## 1090  IE0  5.939727
+## 1091 IE04  6.000000
+## 1094 IE05  6.705146
+## 1098 IE06  6.411765
+## 1120 ITC3 40.210526
+## 1274   ME  7.526316
+## 1615 SE22  5.002454
+
crop.yield[crop.yield$geo %in% crop.yield$geo[grep('ITC', crop.yield$geo)], c('geo', 'pulses_y')]
+
##        geo  pulses_y
+## 1108   ITC  3.094511
+## 1109  ITC1  2.313346
+## 1110 ITC11        NA
+## 1111 ITC12        NA
+## 1112 ITC13        NA
+## 1113 ITC14        NA
+## 1114 ITC15        NA
+## 1115 ITC16        NA
+## 1116 ITC17        NA
+## 1117 ITC18        NA
+## 1118  ITC2        NA
+## 1119 ITC20        NA
+## 1120  ITC3 40.210526
+## 1121 ITC31        NA
+## 1122 ITC32        NA
+## 1123 ITC33        NA
+## 1124 ITC34        NA
+## 1125  ITC4  3.730722
+## 1126 ITC41        NA
+## 1127 ITC42        NA
+## 1128 ITC43        NA
+## 1129 ITC44        NA
+## 1130 ITC46        NA
+## 1131 ITC47        NA
+## 1132 ITC48        NA
+## 1133 ITC49        NA
+## 1134 ITC4A        NA
+## 1135 ITC4B        NA
+## 1136 ITC4C        NA
+## 1137 ITC4D        NA
+
crop.yield[crop.yield$geo == "ITC3", 'pulses_y'] <- crop.yield[crop.yield$geo == "ITC", 'pulses_y']
+crop.yield[crop.yield$geo == "ME", 'pulses_y'] <- NA
+
+summary(crop.yield)
+
##      geo                rye_y          barley_y         maize_y      
+##  Length:2013        Min.   :0.000   Min.   :0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.436   1st Qu.:2.9163   1st Qu.: 6.059  
+##  Mode  :character   Median :3.172   Median :4.1115   Median : 8.146  
+##                     Mean   :3.452   Mean   :4.3295   Mean   : 7.801  
+##                     3rd Qu.:4.507   3rd Qu.:5.6912   3rd Qu.: 9.917  
+##                     Max.   :7.143   Max.   :8.7350   Max.   :12.976  
+##                     NA's   :1680    NA's   :1640     NA's   :1680    
+##     tritic_y       sorghum_y       oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   :1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.003   1st Qu.:3.100   1st Qu.: 1.198   1st Qu.: 4.504  
+##  Median :3.990   Median :4.393   Median : 2.066   Median : 5.524  
+##  Mean   :4.026   Mean   :4.516   Mean   : 2.491   Mean   : 5.442  
+##  3rd Qu.:5.216   3rd Qu.:5.968   3rd Qu.: 3.659   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :9.113   Max.   :10.750   Max.   :10.000  
+##  NA's   :1735    NA's   :1869    NA's   :1919     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y    
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
+##  1st Qu.: 3.655   1st Qu.:2.331   1st Qu.:1.718   1st Qu.:1.413  
+##  Median : 6.936   Median :2.903   Median :2.169   Median :2.034  
+##  Mean   :10.213   Mean   :2.859   Mean   :2.074   Mean   :2.139  
+##  3rd Qu.:12.340   3rd Qu.:3.543   3rd Qu.:2.570   3rd Qu.:2.747  
+##  Max.   :49.514   Max.   :5.000   Max.   :4.393   Max.   :6.705  
+##  NA's   :1799     NA's   :1710    NA's   :1749    NA's   :1675   
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  0.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.584   1st Qu.: 54.07   1st Qu.:15.11   1st Qu.: 3.1662  
+##  Median :27.763   Median : 63.52   Median :25.10   Median : 4.5731  
+##  Mean   :28.951   Mean   : 62.50   Mean   :29.77   Mean   : 4.8382  
+##  3rd Qu.:36.471   3rd Qu.: 76.07   3rd Qu.:37.46   3rd Qu.: 6.3951  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1676     NA's   :1755     NA's   :1915    NA's   :1638     
+##      oats_y         oth_oil_y         fibre_y         oth_ind_y      
+##  Min.   :0.5367   Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000  
+##  1st Qu.:2.2544   1st Qu.: 1.389   1st Qu.: 1.103   1st Qu.: 0.6786  
+##  Median :3.1310   Median : 2.000   Median : 3.077   Median : 1.4677  
+##  Mean   :3.3798   Mean   : 2.288   Mean   : 3.510   Mean   : 2.2600  
+##  3rd Qu.:4.5236   3rd Qu.: 2.670   3rd Qu.: 5.497   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :28.000   Max.   :19.400   Max.   :78.4444  
+##  NA's   :1671     NA's   :1748     NA's   :1883     NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.27   1st Qu.:3.000  
+##  Median :18.53   Median :3.000  
+##  Mean   :21.88   Mean   :2.658  
+##  3rd Qu.:34.33   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1691
+
#Spurious sugar beet
+crop.yield[crop.yield$geo %in% crop.yield$geo[grep('SE', crop.yield$geo)], c('geo', 'sugbeet_y')]
+
##        geo sugbeet_y
+## 1599    SE  62.51595
+## 1600   SE1   0.00000
+## 1601  SE11   0.00000
+## 1602 SE110        NA
+## 1603  SE12   0.00000
+## 1604 SE121        NA
+## 1605 SE122        NA
+## 1606 SE123        NA
+## 1607 SE124        NA
+## 1608 SE125        NA
+## 1609   SE2  62.45091
+## 1610  SE21  47.68000
+## 1611 SE211        NA
+## 1612 SE212        NA
+## 1613 SE213        NA
+## 1614 SE214        NA
+## 1615  SE22  62.59545
+## 1616 SE221        NA
+## 1617 SE224        NA
+## 1618  SE23  58.27869
+## 1619 SE231        NA
+## 1620 SE232        NA
+## 1621   SE3        NA
+## 1622  SE31        NA
+## 1623 SE311        NA
+## 1624 SE312        NA
+## 1625 SE313        NA
+## 1626  SE32        NA
+## 1627 SE321        NA
+## 1628 SE322        NA
+## 1629  SE33        NA
+## 1630 SE331        NA
+## 1631 SE332        NA
+
crop.yield[crop.yield$geo %in% crop.yield$geo[grep('SE1', crop.yield$geo)], 'sugbeet_y'] <- crop.yield[crop.yield$geo == "SE", 'sugbeet_y']
+
+#Spurious potatoes
+crop.yield[crop.yield$geo %in% crop.yield$geo[grep('SE', crop.yield$geo)], c('geo', 'potato_y')]
+
##        geo  potato_y
+## 1599    SE 33.387405
+## 1600   SE1 31.598152
+## 1601  SE11  7.532468
+## 1602 SE110        NA
+## 1603  SE12 32.364603
+## 1604 SE121        NA
+## 1605 SE122        NA
+## 1606 SE123        NA
+## 1607 SE124        NA
+## 1608 SE125        NA
+## 1609   SE2 35.211456
+## 1610  SE21 33.863388
+## 1611 SE211        NA
+## 1612 SE212        NA
+## 1613 SE213        NA
+## 1614 SE214        NA
+## 1615  SE22 36.225223
+## 1616 SE221        NA
+## 1617 SE224        NA
+## 1618  SE23 32.788811
+## 1619 SE231        NA
+## 1620 SE232        NA
+## 1621   SE3 21.584253
+## 1622  SE31 24.928989
+## 1623 SE311        NA
+## 1624 SE312        NA
+## 1625 SE313        NA
+## 1626  SE32 18.534483
+## 1627 SE321        NA
+## 1628 SE322        NA
+## 1629  SE33 16.310241
+## 1630 SE331        NA
+## 1631 SE332        NA
+
crop.yield[crop.yield$geo == "SE11", 'potato_y'] <- crop.yield[crop.yield$geo == "SE1", 'potato_y']
+
+#Spurious other oil crops
+crop.yield[which(crop.yield$oth_oil_y > 5), c('geo', 'oth_oil_y')]
+
##       geo oth_oil_y
+## 66     BE  7.759091
+## 198   CY0 28.000000
+## 199  CY00 28.000000
+## 848  ES62 24.333333
+## 1153 ITF4  9.333333
+
crop.yield[crop.yield$geo %in% crop.yield$geo[grep('BE', crop.yield$geo)], c('geo', 'oth_oil_y')]
+
##       geo  oth_oil_y
+## 66     BE 7.75909091
+## 67    BE1         NA
+## 68   BE10         NA
+## 69  BE100         NA
+## 70    BE2 0.40322581
+## 71   BE21         NA
+## 72  BE211         NA
+## 73  BE212         NA
+## 74  BE213         NA
+## 75   BE22 0.00000000
+## 76  BE221         NA
+## 77  BE222         NA
+## 78  BE223         NA
+## 79   BE23 0.38461538
+## 80  BE231         NA
+## 81  BE232         NA
+## 82  BE233         NA
+## 83  BE234         NA
+## 84  BE235         NA
+## 85  BE236         NA
+## 86   BE24 0.00000000
+## 87  BE241         NA
+## 88  BE242         NA
+## 89   BE25 0.37500000
+## 90  BE251         NA
+## 91  BE252         NA
+## 92  BE253         NA
+## 93  BE254         NA
+## 94  BE255         NA
+## 95  BE256         NA
+## 96  BE257         NA
+## 97  BE258         NA
+## 98    BE3 0.09140768
+## 99   BE31 0.00000000
+## 100 BE310         NA
+## 101  BE32 0.00000000
+## 102 BE321         NA
+## 103 BE322         NA
+## 104 BE323         NA
+## 105 BE324         NA
+## 106 BE325         NA
+## 107 BE326         NA
+## 108 BE327         NA
+## 109  BE33 0.00000000
+## 110 BE331         NA
+## 111 BE332         NA
+## 112 BE334         NA
+## 113 BE335         NA
+## 114 BE336         NA
+## 115  BE34 0.00000000
+## 116 BE341         NA
+## 117 BE342         NA
+## 118 BE343         NA
+## 119 BE344         NA
+## 120 BE345         NA
+## 121  BE35 0.00000000
+## 122 BE351         NA
+## 123 BE352         NA
+## 124 BE353         NA
+
crop.yield[crop.yield$geo == "BE", 'oth_oil_y'] <- mean(crop.yield[crop.yield$geo %in% c("BE2", "BE3"), 'oth_oil_y'])
+
+crop.yield.dat.2013nuts.sum[which(crop.yield.dat.2013nuts.sum$geo == "CY"), c('geo', 'oth_oil_a', 'oth_oil_p')]
+
## # A tibble: 1 x 3
+##   geo   oth_oil_a oth_oil_p
+##   <chr>     <dbl>     <dbl>
+## 1 CY       0.0738     0.261
+
crop.yield[crop.yield$geo %in% c("CY0", "CY00"), 'oth_oil_y'] <- crop.yield[crop.yield$geo == "CY", 'oth_oil_y']
+
+crop.yield[crop.yield$geo %in% crop.yield$geo[grep('ES6', crop.yield$geo)], c('geo', 'oth_oil_y')]
+
##       geo  oth_oil_y
+## 838   ES6  0.3014466
+## 839  ES61  0.2827409
+## 840 ES611         NA
+## 841 ES612         NA
+## 842 ES613         NA
+## 843 ES614         NA
+## 844 ES615         NA
+## 845 ES616         NA
+## 846 ES617         NA
+## 847 ES618         NA
+## 848  ES62 24.3333333
+## 849 ES620         NA
+## 850  ES63         NA
+## 851 ES630         NA
+## 852  ES64         NA
+## 853 ES640         NA
+
crop.yield[crop.yield$geo == "ES62", 'oth_oil_y'] <- crop.yield[crop.yield$geo == "ES6", 'oth_oil_y']
+
+crop.yield[crop.yield$geo %in% crop.yield$geo[grep('ITF', crop.yield$geo)], c('geo', 'oth_oil_y')]
+
##        geo oth_oil_y
+## 1138   ITF  3.322581
+## 1139  ITF1  2.872727
+## 1140 ITF11        NA
+## 1141 ITF12        NA
+## 1142 ITF13        NA
+## 1143 ITF14        NA
+## 1144  ITF2        NA
+## 1145 ITF21        NA
+## 1146 ITF22        NA
+## 1147  ITF3        NA
+## 1148 ITF31        NA
+## 1149 ITF32        NA
+## 1150 ITF33        NA
+## 1151 ITF34        NA
+## 1152 ITF35        NA
+## 1153  ITF4  9.333333
+## 1154 ITF43        NA
+## 1155 ITF44        NA
+## 1156 ITF45        NA
+## 1157 ITF46        NA
+## 1158 ITF47        NA
+## 1159 ITF48        NA
+## 1160  ITF5        NA
+## 1161 ITF51        NA
+## 1162 ITF52        NA
+## 1163  ITF6  4.045455
+## 1164 ITF61        NA
+## 1165 ITF62        NA
+## 1166 ITF63        NA
+## 1167 ITF64        NA
+## 1168 ITF65        NA
+
crop.yield[crop.yield$geo == "ITF4", 'oth_oil_y'] <- crop.yield[crop.yield$geo == "ITF", 'oth_oil_y']
+
+summary(crop.yield)
+
##      geo                rye_y          barley_y         maize_y      
+##  Length:2013        Min.   :0.000   Min.   :0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.436   1st Qu.:2.9163   1st Qu.: 6.059  
+##  Mode  :character   Median :3.172   Median :4.1115   Median : 8.146  
+##                     Mean   :3.452   Mean   :4.3295   Mean   : 7.801  
+##                     3rd Qu.:4.507   3rd Qu.:5.6912   3rd Qu.: 9.917  
+##                     Max.   :7.143   Max.   :8.7350   Max.   :12.976  
+##                     NA's   :1680    NA's   :1640     NA's   :1680    
+##     tritic_y       sorghum_y       oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   :1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.003   1st Qu.:3.100   1st Qu.: 1.198   1st Qu.: 4.504  
+##  Median :3.990   Median :4.393   Median : 2.066   Median : 5.524  
+##  Mean   :4.026   Mean   :4.516   Mean   : 2.491   Mean   : 5.442  
+##  3rd Qu.:5.216   3rd Qu.:5.968   3rd Qu.: 3.659   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :9.113   Max.   :10.750   Max.   :10.000  
+##  NA's   :1735    NA's   :1869    NA's   :1919     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y    
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
+##  1st Qu.: 3.655   1st Qu.:2.331   1st Qu.:1.718   1st Qu.:1.413  
+##  Median : 6.936   Median :2.903   Median :2.169   Median :2.034  
+##  Mean   :10.213   Mean   :2.859   Mean   :2.074   Mean   :2.139  
+##  3rd Qu.:12.340   3rd Qu.:3.543   3rd Qu.:2.570   3rd Qu.:2.747  
+##  Max.   :49.514   Max.   :5.000   Max.   :4.393   Max.   :6.705  
+##  NA's   :1799     NA's   :1710    NA's   :1749    NA's   :1675   
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  5.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.667   1st Qu.: 55.42   1st Qu.:15.11   1st Qu.: 3.1662  
+##  Median :27.769   Median : 62.66   Median :25.10   Median : 4.5731  
+##  Mean   :29.023   Mean   : 63.21   Mean   :29.77   Mean   : 4.8382  
+##  3rd Qu.:36.471   3rd Qu.: 75.22   3rd Qu.:37.46   3rd Qu.: 6.3951  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1676     NA's   :1749     NA's   :1915    NA's   :1638     
+##      oats_y         oth_oil_y        fibre_y         oth_ind_y      
+##  Min.   :0.5367   Min.   :0.000   Min.   : 0.000   Min.   : 0.0000  
+##  1st Qu.:2.2544   1st Qu.:1.327   1st Qu.: 1.103   1st Qu.: 0.6786  
+##  Median :3.1310   Median :2.000   Median : 3.077   Median : 1.4677  
+##  Mean   :3.3798   Mean   :1.962   Mean   : 3.510   Mean   : 2.2600  
+##  3rd Qu.:4.5236   3rd Qu.:2.661   3rd Qu.: 5.497   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :5.000   Max.   :19.400   Max.   :78.4444  
+##  NA's   :1671     NA's   :1748    NA's   :1883     NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.27   1st Qu.:3.000  
+##  Median :18.53   Median :3.000  
+##  Mean   :21.88   Mean   :2.658  
+##  3rd Qu.:34.33   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1691
+
#Spurious fibre crops
+crop.yield[which(crop.yield$fibre_y > 6), c('geo', 'fibre_y')]
+
##       geo   fibre_y
+## 20   AT11  7.777778
+## 39   AT22  7.666667
+## 79   BE23  6.067623
+## 162    CH 19.400000
+## 890    FR  6.637914
+## 901  FR21  6.508475
+## 903  FR23  6.304694
+## 907  FR30  6.100000
+## 912  FR52  6.666667
+## 943   FRD  6.229119
+## 972   FRH  6.666667
+## 1294   NL  6.129700
+## 1295  NL1  7.838207
+## 1652 SK02  8.333333
+
crop.yield[which(crop.yield$geo == "CH"), 'fibre_y'] <- NA
+
+summary(crop.yield)
+
##      geo                rye_y          barley_y         maize_y      
+##  Length:2013        Min.   :0.000   Min.   :0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.436   1st Qu.:2.9163   1st Qu.: 6.059  
+##  Mode  :character   Median :3.172   Median :4.1115   Median : 8.146  
+##                     Mean   :3.452   Mean   :4.3295   Mean   : 7.801  
+##                     3rd Qu.:4.507   3rd Qu.:5.6912   3rd Qu.: 9.917  
+##                     Max.   :7.143   Max.   :8.7350   Max.   :12.976  
+##                     NA's   :1680    NA's   :1640     NA's   :1680    
+##     tritic_y       sorghum_y       oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   :1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.003   1st Qu.:3.100   1st Qu.: 1.198   1st Qu.: 4.504  
+##  Median :3.990   Median :4.393   Median : 2.066   Median : 5.524  
+##  Mean   :4.026   Mean   :4.516   Mean   : 2.491   Mean   : 5.442  
+##  3rd Qu.:5.216   3rd Qu.:5.968   3rd Qu.: 3.659   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :9.113   Max.   :10.750   Max.   :10.000  
+##  NA's   :1735    NA's   :1869    NA's   :1919     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y    
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
+##  1st Qu.: 3.655   1st Qu.:2.331   1st Qu.:1.718   1st Qu.:1.413  
+##  Median : 6.936   Median :2.903   Median :2.169   Median :2.034  
+##  Mean   :10.213   Mean   :2.859   Mean   :2.074   Mean   :2.139  
+##  3rd Qu.:12.340   3rd Qu.:3.543   3rd Qu.:2.570   3rd Qu.:2.747  
+##  Max.   :49.514   Max.   :5.000   Max.   :4.393   Max.   :6.705  
+##  NA's   :1799     NA's   :1710    NA's   :1749    NA's   :1675   
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  5.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.667   1st Qu.: 55.42   1st Qu.:15.11   1st Qu.: 3.1662  
+##  Median :27.769   Median : 62.66   Median :25.10   Median : 4.5731  
+##  Mean   :29.023   Mean   : 63.21   Mean   :29.77   Mean   : 4.8382  
+##  3rd Qu.:36.471   3rd Qu.: 75.22   3rd Qu.:37.46   3rd Qu.: 6.3951  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1676     NA's   :1749     NA's   :1915    NA's   :1638     
+##      oats_y         oth_oil_y        fibre_y        oth_ind_y      
+##  Min.   :0.5367   Min.   :0.000   Min.   :0.000   Min.   : 0.0000  
+##  1st Qu.:2.2544   1st Qu.:1.327   1st Qu.:1.093   1st Qu.: 0.6786  
+##  Median :3.1310   Median :2.000   Median :3.077   Median : 1.4677  
+##  Mean   :3.3798   Mean   :1.962   Mean   :3.387   Mean   : 2.2600  
+##  3rd Qu.:4.5236   3rd Qu.:2.661   3rd Qu.:5.489   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :5.000   Max.   :8.333   Max.   :78.4444  
+##  NA's   :1671     NA's   :1748    NA's   :1884    NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.27   1st Qu.:3.000  
+##  Median :18.53   Median :3.000  
+##  Mean   :21.88   Mean   :2.658  
+##  3rd Qu.:34.33   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1691
+
#Spurious other industrial crops
+crop.yield[which(crop.yield$oth_ind_y > 10), c('geo', 'oth_ind_y')]
+
##       geo oth_ind_y
+## 35   AT21  10.95238
+## 758  EL62  78.44444
+## 1089   IE  13.16592
+## 1090  IE0  13.36992
+
crop.yield[crop.yield$geo == "EL62", 'oth_ind_y'] <- crop.yield[crop.yield$geo == "EL6", 'oth_ind_y']
+
+summary(crop.yield)
+
##      geo                rye_y          barley_y         maize_y      
+##  Length:2013        Min.   :0.000   Min.   :0.1254   Min.   : 0.000  
+##  Class :character   1st Qu.:2.436   1st Qu.:2.9163   1st Qu.: 6.059  
+##  Mode  :character   Median :3.172   Median :4.1115   Median : 8.146  
+##                     Mean   :3.452   Mean   :4.3295   Mean   : 7.801  
+##                     3rd Qu.:4.507   3rd Qu.:5.6912   3rd Qu.: 9.917  
+##                     Max.   :7.143   Max.   :8.7350   Max.   :12.976  
+##                     NA's   :1680    NA's   :1640     NA's   :1680    
+##     tritic_y       sorghum_y       oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   :1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:3.003   1st Qu.:3.100   1st Qu.: 1.198   1st Qu.: 4.504  
+##  Median :3.990   Median :4.393   Median : 2.066   Median : 5.524  
+##  Mean   :4.026   Mean   :4.516   Mean   : 2.491   Mean   : 5.442  
+##  3rd Qu.:5.216   3rd Qu.:5.968   3rd Qu.: 3.659   3rd Qu.: 6.456  
+##  Max.   :7.359   Max.   :9.113   Max.   :10.750   Max.   :10.000  
+##  NA's   :1735    NA's   :1869    NA's   :1919     NA's   :1926    
+##    pasture_y          rape_y        sunflow_y        pulses_y    
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
+##  1st Qu.: 3.655   1st Qu.:2.331   1st Qu.:1.718   1st Qu.:1.413  
+##  Median : 6.936   Median :2.903   Median :2.169   Median :2.034  
+##  Mean   :10.213   Mean   :2.859   Mean   :2.074   Mean   :2.139  
+##  3rd Qu.:12.340   3rd Qu.:3.543   3rd Qu.:2.570   3rd Qu.:2.747  
+##  Max.   :49.514   Max.   :5.000   Max.   :4.393   Max.   :6.705  
+##  NA's   :1799     NA's   :1710    NA's   :1749    NA's   :1675   
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y       
+##  Min.   : 4.361   Min.   :  5.00   Min.   : 0.00   Min.   : 0.6667  
+##  1st Qu.:21.667   1st Qu.: 55.42   1st Qu.:15.11   1st Qu.: 3.1662  
+##  Median :27.769   Median : 62.66   Median :25.10   Median : 4.5731  
+##  Mean   :29.023   Mean   : 63.21   Mean   :29.77   Mean   : 4.8382  
+##  3rd Qu.:36.471   3rd Qu.: 75.22   3rd Qu.:37.46   3rd Qu.: 6.3951  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :10.2402  
+##  NA's   :1676     NA's   :1749     NA's   :1915    NA's   :1638     
+##      oats_y         oth_oil_y        fibre_y        oth_ind_y      
+##  Min.   :0.5367   Min.   :0.000   Min.   :0.000   Min.   : 0.0000  
+##  1st Qu.:2.2544   1st Qu.:1.327   1st Qu.:1.093   1st Qu.: 0.6786  
+##  Median :3.1310   Median :2.000   Median :3.077   Median : 1.4677  
+##  Mean   :3.3798   Mean   :1.962   Mean   :3.387   Mean   : 1.9474  
+##  3rd Qu.:4.5236   3rd Qu.:2.661   3rd Qu.:5.489   3rd Qu.: 2.4167  
+##  Max.   :8.5116   Max.   :5.000   Max.   :8.333   Max.   :13.3699  
+##  NA's   :1671     NA's   :1748    NA's   :1884    NA's   :1772     
+##     fodder_y       LEVL_CODE    
+##  Min.   : 0.00   Min.   :0.000  
+##  1st Qu.:10.27   1st Qu.:3.000  
+##  Median :18.53   Median :3.000  
+##  Mean   :21.88   Mean   :2.658  
+##  3rd Qu.:34.33   3rd Qu.:3.000  
+##  Max.   :51.05   Max.   :3.000  
+##  NA's   :1691
+
#Finally, run script to allocate NUTS1 or NUTS0 fractions to NUTS2 where needed
+
+#list to summarise where data are NUTS2, 1, 0 for each variable
+data.level.crop.y <- vector("list", 4*length(names(crop.yield)[2:21]))
+names(data.level.crop.y) <- c(paste(names(crop.yield)[2:21], 'n2.dat', sep='.'),
+                       paste(names(crop.yield)[2:21], 'n1.dat', sep='.'),
+                       paste(names(crop.yield)[2:21], 'n0.dat', sep='.'),
+                       paste(names(crop.yield)[2:21], 'nuts0.na', sep='.')
+                       )
+labels(data.level.crop.y)
+
##  [1] "rye_y.n2.dat"       "barley_y.n2.dat"    "maize_y.n2.dat"    
+##  [4] "tritic_y.n2.dat"    "sorghum_y.n2.dat"   "oth_cer_y.n2.dat"  
+##  [7] "rice_y.n2.dat"      "pasture_y.n2.dat"   "rape_y.n2.dat"     
+## [10] "sunflow_y.n2.dat"   "pulses_y.n2.dat"    "potato_y.n2.dat"   
+## [13] "sugbeet_y.n2.dat"   "oth_rt_y.n2.dat"    "wheat_y.n2.dat"    
+## [16] "oats_y.n2.dat"      "oth_oil_y.n2.dat"   "fibre_y.n2.dat"    
+## [19] "oth_ind_y.n2.dat"   "fodder_y.n2.dat"    "rye_y.n1.dat"      
+## [22] "barley_y.n1.dat"    "maize_y.n1.dat"     "tritic_y.n1.dat"   
+## [25] "sorghum_y.n1.dat"   "oth_cer_y.n1.dat"   "rice_y.n1.dat"     
+## [28] "pasture_y.n1.dat"   "rape_y.n1.dat"      "sunflow_y.n1.dat"  
+## [31] "pulses_y.n1.dat"    "potato_y.n1.dat"    "sugbeet_y.n1.dat"  
+## [34] "oth_rt_y.n1.dat"    "wheat_y.n1.dat"     "oats_y.n1.dat"     
+## [37] "oth_oil_y.n1.dat"   "fibre_y.n1.dat"     "oth_ind_y.n1.dat"  
+## [40] "fodder_y.n1.dat"    "rye_y.n0.dat"       "barley_y.n0.dat"   
+## [43] "maize_y.n0.dat"     "tritic_y.n0.dat"    "sorghum_y.n0.dat"  
+## [46] "oth_cer_y.n0.dat"   "rice_y.n0.dat"      "pasture_y.n0.dat"  
+## [49] "rape_y.n0.dat"      "sunflow_y.n0.dat"   "pulses_y.n0.dat"   
+## [52] "potato_y.n0.dat"    "sugbeet_y.n0.dat"   "oth_rt_y.n0.dat"   
+## [55] "wheat_y.n0.dat"     "oats_y.n0.dat"      "oth_oil_y.n0.dat"  
+## [58] "fibre_y.n0.dat"     "oth_ind_y.n0.dat"   "fodder_y.n0.dat"   
+## [61] "rye_y.nuts0.na"     "barley_y.nuts0.na"  "maize_y.nuts0.na"  
+## [64] "tritic_y.nuts0.na"  "sorghum_y.nuts0.na" "oth_cer_y.nuts0.na"
+## [67] "rice_y.nuts0.na"    "pasture_y.nuts0.na" "rape_y.nuts0.na"   
+## [70] "sunflow_y.nuts0.na" "pulses_y.nuts0.na"  "potato_y.nuts0.na" 
+## [73] "sugbeet_y.nuts0.na" "oth_rt_y.nuts0.na"  "wheat_y.nuts0.na"  
+## [76] "oats_y.nuts0.na"    "oth_oil_y.nuts0.na" "fibre_y.nuts0.na"  
+## [79] "oth_ind_y.nuts0.na" "fodder_y.nuts0.na"
+
dbase.yield <- as.data.frame(matrix(nrow=nrow(nuts@data), ncol=(ncol(crop.yield[,2:21]) + 1)))
+dbase.yield[,1] <- nuts@data$NUTS_ID
+names(dbase.yield) <- c("NUTS_ID", names(crop.yield)[2:21])
+head(dbase.yield)
+
##   NUTS_ID rye_y barley_y maize_y tritic_y sorghum_y oth_cer_y rice_y
+## 1    AT11    NA       NA      NA       NA        NA        NA     NA
+## 2    AT22    NA       NA      NA       NA        NA        NA     NA
+## 3    AT12    NA       NA      NA       NA        NA        NA     NA
+## 4    AT13    NA       NA      NA       NA        NA        NA     NA
+## 5    AT21    NA       NA      NA       NA        NA        NA     NA
+## 6    AT31    NA       NA      NA       NA        NA        NA     NA
+##   pasture_y rape_y sunflow_y pulses_y potato_y sugbeet_y oth_rt_y wheat_y
+## 1        NA     NA        NA       NA       NA        NA       NA      NA
+## 2        NA     NA        NA       NA       NA        NA       NA      NA
+## 3        NA     NA        NA       NA       NA        NA       NA      NA
+## 4        NA     NA        NA       NA       NA        NA       NA      NA
+## 5        NA     NA        NA       NA       NA        NA       NA      NA
+## 6        NA     NA        NA       NA       NA        NA       NA      NA
+##   oats_y oth_oil_y fibre_y oth_ind_y fodder_y
+## 1     NA        NA      NA        NA       NA
+## 2     NA        NA      NA        NA       NA
+## 3     NA        NA      NA        NA       NA
+## 4     NA        NA      NA        NA       NA
+## 5     NA        NA      NA        NA       NA
+## 6     NA        NA      NA        NA       NA
+
nrow(dbase.yield)
+
## [1] 320
+
attach(crop.yield)
+for(i in names(crop.yield)[2:21]) {
+  (nuts2.na <- crop.yield[LEVL_CODE == 2 & is.na(crop.yield[,i]), 'geo'])
+  (nuts1 <- crop.yield[LEVL_CODE == 1 & geo %in% gsub(".{1}$", "", nuts2.na), 'geo'])
+  (nuts1.na <- crop.yield[geo %in% nuts1 & is.na(crop.yield[,i]), 'geo'])
+  (nuts0 <- crop.yield[LEVL_CODE == 0 & geo %in% gsub(".{1}$", "", nuts1.na), 'geo'])
+  (nuts0.na <- crop.yield[geo %in% nuts0 & is.na(crop.yield[,i]), 'geo'])
+  
+#NUTS2 data
+(n2.dat <- crop.yield[!(geo %in% nuts2.na) & LEVL_CODE == 2, 'geo'])
+#NUTS1 data
+(n1.dat <- nuts1[!nuts1 %in% nuts1.na])
+#NUTS0 data
+(n0.dat <- nuts0[!nuts0 %in% nuts0.na])
+#NO DATA
+nuts0.na
+
+data.level.crop.y[[paste(i, 'n2.dat', sep='.')]] <- n2.dat
+data.level.crop.y[[paste(i, 'n1.dat', sep='.')]] <- n1.dat
+data.level.crop.y[[paste(i, 'n0.dat', sep='.')]] <- n0.dat
+data.level.crop.y[[paste(i, 'nuts0.na', sep='.')]] <- nuts0.na
+
+  for(e in n0.dat) {
+    dbase.yield[dbase.yield$NUTS_ID %in% dbase.yield$NUTS_ID[grep(paste(e, '..', sep=''), dbase.yield$NUTS_ID)], i] <- crop.yield[crop.yield$geo == e, i]
+  }
+
+  for(e in n1.dat) {
+    dbase.yield[dbase.yield$NUTS_ID %in% dbase.yield$NUTS_ID[grep(paste(e, '.', sep=''), dbase.yield$NUTS_ID)], i] <- crop.yield[crop.yield$geo == e, i]
+  }
+
+  for(e in n2.dat) {
+    dbase.yield[dbase.yield$NUTS_ID == e, i] <- crop.yield[crop.yield$geo == e, i]
+  }
+}
+detach(crop.yield)
+
+summary(dbase.yield)
+
##     NUTS_ID        rye_y          barley_y         maize_y      
+##  AT11   :  1   Min.   :0.000   Min.   :0.1254   Min.   : 0.000  
+##  AT12   :  1   1st Qu.:2.584   1st Qu.:3.0000   1st Qu.: 6.320  
+##  AT13   :  1   Median :3.436   Median :4.6743   Median : 7.973  
+##  AT21   :  1   Mean   :3.652   Mean   :4.5982   Mean   : 7.795  
+##  AT22   :  1   3rd Qu.:4.934   3rd Qu.:6.1262   3rd Qu.: 9.835  
+##  AT31   :  1   Max.   :7.143   Max.   :8.4666   Max.   :12.976  
+##  (Other):314   NA's   :10      NA's   :9        NA's   :23      
+##     tritic_y       sorghum_y       oth_cer_y          rice_y      
+##  Min.   :0.000   Min.   :1.000   Min.   : 0.000   Min.   : 0.000  
+##  1st Qu.:2.933   1st Qu.:3.173   1st Qu.: 1.363   1st Qu.: 4.567  
+##  Median :4.000   Median :4.000   Median : 2.114   Median : 5.051  
+##  Mean   :3.966   Mean   :4.349   Mean   : 2.181   Mean   : 5.314  
+##  3rd Qu.:5.218   3rd Qu.:5.675   3rd Qu.: 3.045   3rd Qu.: 5.977  
+##  Max.   :7.359   Max.   :9.044   Max.   :10.750   Max.   :10.000  
+##  NA's   :22      NA's   :175     NA's   :130      NA's   :178     
+##    pasture_y          rape_y        sunflow_y        pulses_y    
+##  Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
+##  1st Qu.: 4.320   1st Qu.:2.417   1st Qu.:1.667   1st Qu.:1.544  
+##  Median : 7.339   Median :3.186   Median :2.159   Median :2.162  
+##  Mean   : 9.295   Mean   :2.946   Mean   :2.102   Mean   :2.173  
+##  3rd Qu.:10.125   3rd Qu.:3.544   3rd Qu.:2.644   3rd Qu.:2.762  
+##  Max.   :49.514   Max.   :5.000   Max.   :4.393   Max.   :5.002  
+##  NA's   :99       NA's   :19      NA's   :91      NA's   :16     
+##     potato_y        sugbeet_y         oth_rt_y        wheat_y      
+##  Min.   : 4.361   Min.   :  5.00   Min.   : 0.00   Min.   :0.6667  
+##  1st Qu.:24.144   1st Qu.: 57.45   1st Qu.: 0.00   1st Qu.:3.3922  
+##  Median :31.598   Median : 67.95   Median :13.27   Median :5.1819  
+##  Mean   :31.469   Mean   : 65.07   Mean   :22.73   Mean   :5.3029  
+##  3rd Qu.:40.329   3rd Qu.: 76.28   3rd Qu.:30.83   3rd Qu.:7.4295  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.40   Max.   :9.2898  
+##  NA's   :7        NA's   :34       NA's   :111     NA's   :9       
+##      oats_y         oth_oil_y        fibre_y        oth_ind_y      
+##  Min.   :0.5367   Min.   :0.000   Min.   :0.000   Min.   : 0.0000  
+##  1st Qu.:2.3067   1st Qu.:1.342   1st Qu.:1.345   1st Qu.: 0.9845  
+##  Median :3.4671   Median :1.972   Median :2.073   Median : 1.8333  
+##  Mean   :3.5414   Mean   :1.947   Mean   :2.951   Mean   : 2.7491  
+##  3rd Qu.:4.7295   3rd Qu.:2.782   3rd Qu.:4.819   3rd Qu.: 3.2637  
+##  Max.   :7.2000   Max.   :5.000   Max.   :8.333   Max.   :13.3699  
+##  NA's   :9        NA's   :18      NA's   :137     NA's   :31       
+##     fodder_y    
+##  Min.   : 0.00  
+##  1st Qu.:12.50  
+##  Median :25.97  
+##  Mean   :25.70  
+##  3rd Qu.:38.13  
+##  Max.   :51.05  
+##  NA's   :11
+
head(dbase.yield)
+
##   NUTS_ID    rye_y barley_y   maize_y tritic_y sorghum_y oth_cer_y rice_y
+## 1    AT11 3.516367 4.435137  8.744878 3.649591  5.164671  3.868583     NA
+## 2    AT22 4.700782 5.715165 11.440518 6.234155  8.588477  3.681250     NA
+## 3    AT12 4.357949 4.924511  9.361459 5.100860  6.237822  3.901009     NA
+## 4    AT13 4.030303 4.468468  8.798165 5.024390  3.000000  3.944444     NA
+## 5    AT21 4.592187 5.394585 10.736016 5.808485  6.694444  3.817204     NA
+## 6    AT31 4.503259 6.279062 10.027501 5.474152  5.848101  3.869565     NA
+##   pasture_y   rape_y sunflow_y pulses_y potato_y sugbeet_y oth_rt_y
+## 1  5.264291 2.800108  2.344708 2.020007 34.06111  68.85210 47.12500
+## 2  6.965349 3.519774  2.430000 2.610619 27.19406  67.16500 51.00000
+## 3  6.666021 3.139327  2.623319 2.295685 31.89232  71.15838 61.30303
+## 4  5.615385 2.918750  2.647059 2.214286 34.87692  69.50661 58.60976
+## 5  7.327343 2.416667  1.912698 3.035211 24.40728  57.81250 40.91667
+## 6  7.502266 3.812215  2.097035 2.751594 30.02403  77.22785 63.89655
+##    wheat_y   oats_y oth_oil_y  fibre_y oth_ind_y fodder_y
+## 1 4.418312 3.320119  2.106922 7.777778  1.333333 18.05331
+## 2 6.405516 3.681343  1.061669 7.666667  6.587339 29.00264
+## 3 5.197458 3.752801  1.601008 4.966805  1.894773 23.44168
+## 4 4.702658 2.909091  2.016393 5.134100  0.000000 11.72727
+## 5 5.443334 3.707787  2.669792 4.800000 10.952381 31.81458
+## 6 6.893955 4.364527  2.660348 5.361111  3.804918 28.50815
+
tail(dbase.yield)
+
##     NUTS_ID    rye_y barley_y  maize_y tritic_y sorghum_y oth_cer_y
+## 315    UKD3 2.908474 5.037901 6.320472 4.000000        NA        NA
+## 316    TRC1 2.747730 2.887574 7.846154 3.321429         4         2
+## 317    TRC2 2.747730 2.147769 7.972516 3.321429         4         2
+## 318    UKD4 2.908474 5.037901 6.320472 4.000000        NA        NA
+## 319    TRC3 2.747730 2.910377 9.320635 3.321429         4         2
+## 320    UKM6 2.908474 5.879533 6.320472 3.000000        NA        NA
+##       rice_y pasture_y   rape_y sunflow_y pulses_y potato_y sugbeet_y
+## 315       NA        NA 3.309524        NA 2.666667 40.32860  61.76471
+## 316 3.333333        NA 3.462185  2.200000 1.528384 25.00000  77.83333
+## 317 3.333333        NA 3.462185  1.714286 1.560811 30.00000  24.00000
+## 318       NA        NA 3.309524        NA 2.666667 40.32860  61.76471
+## 319 3.333333        NA 3.462185  1.807692 1.996479 31.48791  82.00000
+## 320       NA        NA 3.185760        NA 2.666667 40.32860  69.90356
+##     oth_rt_y  wheat_y   oats_y oth_oil_y  fibre_y oth_ind_y fodder_y
+## 315        0 5.985455 4.695652  1.760192       NA 8.0254104 38.12607
+## 316       12 2.744770 2.355556  2.833333 1.784946 0.7666667 25.17500
+## 317       12 3.103288 2.500000  2.704453 1.713115 0.9090909 38.80000
+## 318        0 5.985455 4.695652  1.760192       NA 8.0254104 38.12607
+## 319       12 3.337816 2.355556  2.781250 1.847619 2.0000000 15.97368
+## 320        0 8.106808 5.888889  1.760192       NA 8.0254104 38.12607
+
#check data level for rye_f as an example
+data.level.crop.y$rye_y.n2.dat
+
##   [1] "AT11" "AT12" "AT13" "AT21" "AT22" "AT31" "AT32" "AT33" "AT34" "BE21"
+##  [11] "BE22" "BE23" "BE24" "BE25" "BE31" "BE32" "BE33" "BE34" "BE35" "BG31"
+##  [21] "BG32" "BG33" "BG34" "BG41" "BG42" "CH01" "CH02" "CH03" "CH04" "CH05"
+##  [31] "CH06" "CH07" "CZ01" "CZ02" "CZ03" "CZ04" "CZ05" "CZ06" "CZ07" "CZ08"
+##  [41] "DK01" "DK02" "DK03" "DK04" "DK05" "EL41" "EL42" "EL43" "EL51" "EL52"
+##  [51] "EL53" "EL54" "EL61" "EL63" "EL64" "EL65" "ES11" "ES13" "ES21" "ES22"
+##  [61] "ES23" "ES24" "ES30" "ES41" "ES42" "ES43" "ES51" "ES52" "ES53" "ES61"
+##  [71] "ES62" "ES70" "FI19" "FI1B" "FI1C" "FI1D" "FI20" "FR10" "FR21" "FR22"
+##  [81] "FR23" "FR24" "FR25" "FR26" "FR30" "FR41" "FR42" "FR43" "FR51" "FR52"
+##  [91] "FR53" "FR61" "FR62" "FR63" "FR71" "FR72" "FR81" "FR82" "HR03" "HR04"
+## [101] "HU10" "HU21" "HU22" "HU23" "HU31" "HU32" "HU33" "ITC1" "ITC2" "ITC4"
+## [111] "ITF1" "ITF3" "ITF4" "ITF5" "ITF6" "ITG1" "ITH1" "ITH3" "ITH5" "ITI1"
+## [121] "ITI4" "LT00" "NL11" "NL12" "NL13" "NL21" "NL22" "NL23" "NL31" "NL32"
+## [131] "NL33" "NL34" "NL41" "NL42" "NO01" "NO02" "NO03" "NO06" "PL11" "PL12"
+## [141] "PL21" "PL22" "PL31" "PL32" "PL33" "PL34" "PL41" "PL42" "PL43" "PL51"
+## [151] "PL52" "PL61" "PL62" "PL63" "PT11" "PT15" "PT16" "PT18" "RO11" "RO12"
+## [161] "RO21" "RO22" "RO31" "RO32" "RO41" "RO42" "RS11" "RS12" "RS21" "RS22"
+## [171] "SE11" "SE12" "SE21" "SE22" "SE23" "SE31" "SE32" "SI03" "SI04" "SK01"
+## [181] "SK02" "SK03" "SK04" "TR21" "TR22" "TR31" "TR32" "TR33" "TR41" "TR51"
+## [191] "TR52" "TR61" "TR62" "TR71" "TR72" "TR82" "TR83" "TR90" "TRA1" "TRA2"
+## [201] "TRB2"
+
data.level.crop.y$rye_y.n1.dat
+
##  [1] "DE1" "DE2" "DE4" "DE7" "DE8" "DE9" "DEA" "DEB" "DEC" "DED" "DEE"
+## [12] "DEF" "DEG" "EE0" "EL6" "ES1" "ES6" "IE0" "ITC" "ITF" "ITG" "ITH"
+## [23] "ITI" "LV0" "NO0" "PT1" "SE3" "TR4" "TR6" "TR8" "TRB" "UKE" "UKF"
+## [34] "UKH" "UKJ" "UKK" "UKL"
+
data.level.crop.y$rye_y.n0.dat
+
##  [1] "AL" "BE" "DE" "EL" "LU" "ME" "MK" "PT" "TR" "UK"
+
data.level.crop.y$rye_y.nuts0.na
+
## [1] "CY" "IS" "LI" "MT"
+
#Final fix of spurious data
+#London region (UKI)
+dbase.yield[dbase.yield$NUTS_ID %in% dbase.yield$NUTS_ID[grep('UK', dbase.yield$NUTS_ID)],c('NUTS_ID', 'oats_y', 'rye_y', 'barley_y')]
+
##     NUTS_ID    oats_y    rye_y barley_y
+## 275    UKK1 5.0294118 6.000000 5.756677
+## 276    UKN0 5.8750000 2.908474 5.573684
+## 277    UKM2 5.8888889 2.908474 5.879533
+## 278    UKM3 5.8888889 2.908474 5.879533
+## 279    UKM5 5.8888889 2.908474 5.879533
+## 280    UKJ2 5.3936170 6.600000 6.171761
+## 281    UKK2 5.0294118 6.000000 5.756677
+## 282    UKJ3 5.3936170 6.600000 6.171761
+## 283    UKL1 5.0547945 0.000000 5.500000
+## 284    UKJ4 5.3936170 6.600000 6.171761
+## 285    UKK3 5.0294118 6.000000 5.756677
+## 286    UKK4 5.0294118 6.000000 5.756677
+## 287    UKL2 5.0547945 0.000000 5.500000
+## 288    UKD6 4.6956522 2.908474 5.037901
+## 289    UKD7 4.6956522 2.908474 5.037901
+## 290    UKE2 5.6052632 3.500000 6.554737
+## 291    UKE3 5.6052632 3.500000 6.554737
+## 292    UKE4 5.6052632 3.500000 6.554737
+## 293    UKG3 5.9036145 2.908474 5.956710
+## 294    UKF1 5.4038462 5.000000 6.232624
+## 295    UKE1 5.6052632 3.500000 6.554737
+## 296    UKF2 5.4038462 5.000000 6.232624
+## 297    UKG1 5.9036145 2.908474 5.956710
+## 298    UKH2 6.0204082 5.800000 6.150485
+## 299    UKF3 5.4038462 5.000000 6.232624
+## 300    UKI5 0.5367472 2.908474 3.000000
+## 301    UKI6 0.5367472 2.908474 3.000000
+## 302    UKI3 0.5367472 2.908474 3.000000
+## 303    UKI4 0.5367472 2.908474 3.000000
+## 304    UKH3 6.0204082 5.800000 6.150485
+## 305    UKG2 5.9036145 2.908474 5.956710
+## 306    UKI7 0.5367472 2.908474 3.000000
+## 307    UKJ1 5.3936170 6.600000 6.171761
+## 308    UKH1 6.0204082 5.800000 6.150485
+## 310    UKC1 5.8648649 2.908474 6.072674
+## 311    UKC2 5.8648649 2.908474 6.072674
+## 314    UKD1 4.6956522 2.908474 5.037901
+## 315    UKD3 4.6956522 2.908474 5.037901
+## 318    UKD4 4.6956522 2.908474 5.037901
+## 320    UKM6 5.8888889 2.908474 5.879533
+
dbase.yield[dbase.yield$NUTS_ID %in% dbase.yield$NUTS_ID[grep('UKI', dbase.yield$NUTS_ID)], c('oats_y', 'rye_y', 'barley_y')] <- NA
+
+#Make all zero yields NAs
+names(dbase.yield)
+
##  [1] "NUTS_ID"   "rye_y"     "barley_y"  "maize_y"   "tritic_y" 
+##  [6] "sorghum_y" "oth_cer_y" "rice_y"    "pasture_y" "rape_y"   
+## [11] "sunflow_y" "pulses_y"  "potato_y"  "sugbeet_y" "oth_rt_y" 
+## [16] "wheat_y"   "oats_y"    "oth_oil_y" "fibre_y"   "oth_ind_y"
+## [21] "fodder_y"
+
for(e in names(dbase.yield)[-1]) {
+  dbase.yield[which(dbase.yield[,e] == 0), e] <- NA
+}
+
#Crop areas
+berries_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_berries_a')
+
## Reading layer `crop_berries_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(berries_a)[5] <- 'berries_a'
+head(berries_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA  berries_a
+## 1    AT11         1    70 0.48611111  698.93634
+## 2    AT22         2   284 1.97222222  626.17569
+## 3    AT12         3   333 2.31250000 2923.26766
+## 4    AT13         4     7 0.04861111   12.07276
+## 5    AT21         5   161 1.11805556  267.40934
+## 6    AT31         6   207 1.43750000 1198.84000
+
brassic_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_brassic_a')
+
## Reading layer `crop_brassic_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(brassic_a)[5] <- 'brassic_a'
+head(brassic_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA  brassic_a
+## 1    AT11         1    70 0.48611111 224.465054
+## 2    AT22         2   284 1.97222222 217.960795
+## 3    AT12         3   333 2.31250000 938.814305
+## 4    AT13         4     7 0.04861111   3.877195
+## 5    AT21         5   161 1.11805556  85.879142
+## 6    AT31         6   207 1.43750000 385.010299
+
citrus_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_citrus_a')
+
## Reading layer `crop_citrus_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(citrus_a)[5] <- 'citrus_a'
+head(citrus_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA citrus_a
+## 1    AT11         1    70 0.48611111        0
+## 2    AT22         2   284 1.97222222        0
+## 3    AT12         3   333 2.31250000        0
+## 4    AT13         4     7 0.04861111        0
+## 5    AT21         5   161 1.11805556        0
+## 6    AT31         6   207 1.43750000        0
+
frtrees_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_frtrees_a')
+
## Reading layer `crop_frtrees_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(frtrees_a)[5] <- 'frtrees_a'
+head(frtrees_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA   frtrees_a
+## 1    AT11         1    70 0.48611111  3395.05625
+## 2    AT22         2   284 1.97222222  3106.60397
+## 3    AT12         3   333 2.31250000 14199.65974
+## 4    AT13         4     7 0.04861111    58.64296
+## 5    AT21         5   161 1.11805556  1298.93051
+## 6    AT31         6   207 1.43750000  5823.31900
+
grapes_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_grapes_a')
+
## Reading layer `crop_grapes_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(grapes_a)[5] <- 'grapes_a'
+head(grapes_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA   grapes_a
+## 1    AT11         1    70 0.48611111  8087.4269
+## 2    AT22         2   284 1.97222222  2473.9556
+## 3    AT12         3   333 2.31250000 34108.5900
+## 4    AT13         4     7 0.04861111   142.7438
+## 5    AT21         5   161 1.11805556   962.3650
+## 6    AT31         6   207 1.43750000     0.0000
+
greens_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_greens_a')
+
## Reading layer `crop_greens_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(greens_a)[5] <- 'greens_a'
+head(greens_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA    greens_a
+## 1    AT11         1    70 0.48611111  262.024673
+## 2    AT22         2   284 1.97222222  239.331023
+## 3    AT12         3   333 2.31250000 1095.905600
+## 4    AT13         4     7 0.04861111    4.525964
+## 5    AT21         5   161 1.11805556  100.249250
+## 6    AT31         6   207 1.43750000  449.433863
+
nuts_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_nuts_a')
+
## Reading layer `crop_nuts_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(nuts_a)[5] <- 'nuts_a'
+head(nuts_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA     nuts_a
+## 1    AT11         1    70 0.48611111  663.34164
+## 2    AT22         2   284 1.97222222  594.96624
+## 3    AT12         3   333 2.31250000 2774.39450
+## 4    AT13         4     7 0.04861111   11.45793
+## 5    AT21         5   161 1.11805556  253.79100
+## 6    AT31         6   207 1.43750000 1137.78673
+
olives_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_olives_a')
+
## Reading layer `crop_olives_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(olives_a)[5] <- 'olives_a'
+head(olives_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA     olives_a
+## 1    AT11         1    70 0.48611111 8.965376e-21
+## 2    AT22         2   284 1.97222222 8.295056e+00
+## 3    AT12         3   333 2.31250000 0.000000e+00
+## 4    AT13         4     7 0.04861111 0.000000e+00
+## 5    AT21         5   161 1.11805556 4.135479e-21
+## 6    AT31         6   207 1.43750000 0.000000e+00
+
oth_veg_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_oth_veg_a')
+
## Reading layer `crop_oth_veg_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(oth_veg_a)[5] <- 'oth_veg_a'
+head(oth_veg_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA   oth_veg_a
+## 1    AT11         1    70 0.48611111  343.710066
+## 2    AT22         2   284 1.97222222  307.406884
+## 3    AT12         3   333 2.31250000 1437.550844
+## 4    AT13         4     7 0.04861111    5.936919
+## 5    AT21         5   161 1.11805556  131.501650
+## 6    AT31         6   207 1.43750000  589.543502
+
peas_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_peas_a')
+
## Reading layer `crop_peas_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(peas_a)[5] <- 'peas_a'
+head(peas_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA     peas_a
+## 1    AT11         1    70 0.48611111 183.341788
+## 2    AT22         2   284 1.97222222 168.840186
+## 3    AT12         3   333 2.31250000 766.818212
+## 4    AT13         4     7 0.04861111   3.166871
+## 5    AT21         5   161 1.11805556  70.145596
+## 6    AT31         6   207 1.43750000 314.474229
+
rootveg_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_rootveg_a')
+
## Reading layer `crop_rootveg_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(rootveg_a)[5] <- 'rootveg_a'
+head(rootveg_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA   rootveg_a
+## 1    AT11         1    70 0.48611111  411.738891
+## 2    AT22         2   284 1.97222222  376.291177
+## 3    AT12         3   333 2.31250000 1722.078127
+## 4    AT13         4     7 0.04861111    7.111984
+## 5    AT21         5   161 1.11805556  157.529120
+## 6    AT31         6   207 1.43750000  706.228918
+
tropfr_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_tropfr_a')
+
## Reading layer `crop_tropfr_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(tropfr_a)[5] <- 'tropfr_a'
+head(tropfr_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA tropfr_a
+## 1    AT11         1    70 0.48611111 0.000000
+## 2    AT22         2   284 1.97222222 1.265173
+## 3    AT12         3   333 2.31250000 0.000000
+## 4    AT13         4     7 0.04861111 0.000000
+## 5    AT21         5   161 1.11805556 0.000000
+## 6    AT31         6   207 1.43750000 0.000000
+
vfruits_a <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_vfruits_a')
+
## Reading layer `crop_vfruits_a' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(vfruits_a)[5] <- 'vfruits_a'
+head(vfruits_a)
+
##   NUTS_ID ZONE_CODE COUNT       AREA  vfruits_a
+## 1    AT11         1    70 0.48611111  98.185651
+## 2    AT22         2   284 1.97222222  97.545471
+## 3    AT12         3   333 2.31250000 410.656773
+## 4    AT13         4     7 0.04861111   1.695965
+## 5    AT21         5   161 1.11805556  37.565311
+## 6    AT31         6   207 1.43750000 168.411455
+
#Crop yields
+berries_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_berries_y')
+
## Reading layer `crop_berries_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(berries_y)[5] <- 'berries_y'
+head(berries_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA berries_y
+## 1    AT11         1    70 0.48611111  29.18171
+## 2    AT22         2   284 1.97222222  12.23637
+## 3    AT12         3   333 2.31250000  23.90570
+## 4    AT13         4     7 0.04861111  17.16571
+## 5    AT21         5   161 1.11805556  14.36696
+## 6    AT31         6   207 1.43750000  20.89739
+
brassic_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_brassic_y')
+
## Reading layer `crop_brassic_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(brassic_y)[5] <- 'brassic_y'
+head(brassic_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA brassic_y
+## 1    AT11         1    70 0.48611111  44.06400
+## 2    AT22         2   284 1.97222222  18.54004
+## 3    AT12         3   333 2.31250000  36.09730
+## 4    AT13         4     7 0.04861111  25.92000
+## 5    AT21         5   161 1.11805556  21.69391
+## 6    AT31         6   207 1.43750000  31.55478
+
citrus_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_citrus_y')
+
## Reading layer `crop_citrus_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(citrus_y)[5] <- 'citrus_y'
+head(citrus_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA citrus_y
+## 1    AT11         1    70 0.48611111        0
+## 2    AT22         2   284 1.97222222        0
+## 3    AT12         3   333 2.31250000        0
+## 4    AT13         4     7 0.04861111        0
+## 5    AT21         5   161 1.11805556        0
+## 6    AT31         6   207 1.43750000        0
+
frtrees_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_frtrees_y')
+
## Reading layer `crop_frtrees_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(frtrees_y)[5] <- 'frtrees_y'
+head(frtrees_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA frtrees_y
+## 1    AT11         1    70 0.48611111 106.98343
+## 2    AT22         2   284 1.97222222  45.09310
+## 3    AT12         3   333 2.31250000  87.64099
+## 4    AT13         4     7 0.04861111  62.93143
+## 5    AT21         5   161 1.11805556  52.67087
+## 6    AT31         6   207 1.43750000  76.61217
+
grapes_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_grapes_y')
+
## Reading layer `crop_grapes_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(grapes_y)[5] <- 'grapes_y'
+head(grapes_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA grapes_y
+## 1    AT11         1    70 0.48611111 8.273220
+## 2    AT22         2   284 1.97222222 3.134204
+## 3    AT12         3   333 2.31250000 5.295910
+## 4    AT13         4     7 0.04861111 3.012085
+## 5    AT21         5   161 1.11805556 4.908311
+## 6    AT31         6   207 1.43750000 0.000000
+
greens_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_greens_y')
+
## Reading layer `crop_greens_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(greens_y)[5] <- 'greens_y'
+head(greens_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA greens_y
+## 1    AT11         1    70 0.48611111 61.19029
+## 2    AT22         2   284 1.97222222 25.61430
+## 3    AT12         3   333 2.31250000 50.12718
+## 4    AT13         4     7 0.04861111 35.99429
+## 5    AT21         5   161 1.11805556 30.12565
+## 6    AT31         6   207 1.43750000 43.81913
+
nuts_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_nuts_y')
+
## Reading layer `crop_nuts_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(nuts_y)[5] <- 'nuts_y'
+head(nuts_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA   nuts_y
+## 1    AT11         1    70 0.48611111 2.632571
+## 2    AT22         2   284 1.97222222 1.210000
+## 3    AT12         3   333 2.31250000 2.156607
+## 4    AT13         4     7 0.04861111 1.548571
+## 5    AT21         5   161 1.11805556 1.296087
+## 6    AT31         6   207 1.43750000 1.885217
+
olives_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_olives_y')
+
## Reading layer `crop_olives_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(olives_y)[5] <- 'olives_y'
+head(olives_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA    olives_y
+## 1    AT11         1    70 0.48611111 0.000000000
+## 2    AT22         2   284 1.97222222 0.007077465
+## 3    AT12         3   333 2.31250000 0.000000000
+## 4    AT13         4     7 0.04861111 0.000000000
+## 5    AT21         5   161 1.11805556 0.000000000
+## 6    AT31         6   207 1.43750000 0.000000000
+
oth_veg_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_oth_veg_y')
+
## Reading layer `crop_oth_veg_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(oth_veg_y)[5] <- 'oth_veg_y'
+head(oth_veg_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA oth_veg_y
+## 1    AT11         1    70 0.48611111  29.05543
+## 2    AT22         2   284 1.97222222  12.11144
+## 3    AT12         3   333 2.31250000  23.80225
+## 4    AT13         4     7 0.04861111  17.09143
+## 5    AT21         5   161 1.11805556  14.30478
+## 6    AT31         6   207 1.43750000  20.80696
+
peas_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_peas_y')
+
## Reading layer `crop_peas_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(peas_y)[5] <- 'peas_y'
+head(peas_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA    peas_y
+## 1    AT11         1    70 0.48611111 18.087999
+## 2    AT22         2   284 1.97222222  7.590493
+## 3    AT12         3   333 2.31250000 14.817717
+## 4    AT13         4     7 0.04861111 10.639999
+## 5    AT21         5   161 1.11805556  8.905217
+## 6    AT31         6   207 1.43750000 12.953043
+
rootveg_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_rootveg_y')
+
## Reading layer `crop_rootveg_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(rootveg_y)[5] <- 'rootveg_y'
+head(rootveg_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA rootveg_y
+## 1    AT11         1    70 0.48611111  93.43200
+## 2    AT22         2   284 1.97222222  39.11891
+## 3    AT12         3   333 2.31250000  76.53964
+## 4    AT13         4     7 0.04861111  54.96000
+## 5    AT21         5   161 1.11805556  45.99913
+## 6    AT31         6   207 1.43750000  66.90783
+
tropfr_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_tropfr_y')
+
## Reading layer `crop_tropfr_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(tropfr_y)[5] <- 'tropfr_y'
+head(tropfr_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA    tropfr_y
+## 1    AT11         1    70 0.48611111 0.000000000
+## 2    AT22         2   284 1.97222222 0.002429577
+## 3    AT12         3   333 2.31250000 0.000000000
+## 4    AT13         4     7 0.04861111 0.000000000
+## 5    AT21         5   161 1.11805556 0.000000000
+## 6    AT31         6   207 1.43750000 0.000000000
+
vfruits_y <- st_read(dsn='C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/EU_Farming_Systems_20181015.gdb', layer='crop_vfruits_y')
+
## Reading layer `crop_vfruits_y' from data source `C:\Users\mu5106sc\Dropbox\STAGS\D1_Database\EU_Farming_Systems_20181015.gdb' using driver `OpenFileGDB'
+
## Warning: no simple feature geometries present: returning a data.frame or
+## tbl_df
+
names(vfruits_y)[5] <- 'vfruits_y'
+head(vfruits_y)
+
##   NUTS_ID ZONE_CODE COUNT       AREA vfruits_y
+## 1    AT11         1    70 0.48611111  258.4971
+## 2    AT22         2   284 1.97222222  107.9879
+## 3    AT12         3   333 2.31250000  211.7612
+## 4    AT13         4     7 0.04861111  152.0571
+## 5    AT21         5   161 1.11805556  127.2652
+## 6    AT31         6   207 1.43750000  185.1130
+
#Merge all tables
+crop.earthstat <- berries_a[,c(1,5)]
+crop.earthstat <- left_join(crop.earthstat, brassic_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, citrus_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, frtrees_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, grapes_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, greens_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, nuts_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, olives_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, oth_veg_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, peas_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, rootveg_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, tropfr_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, vfruits_a[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, berries_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, brassic_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, citrus_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, frtrees_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, grapes_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, greens_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, nuts_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, olives_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, oth_veg_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, peas_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, rootveg_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, tropfr_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
crop.earthstat <- left_join(crop.earthstat, vfruits_y[,c(1,5)])
+
## Joining, by = "NUTS_ID"
+
head(crop.earthstat)
+
##   NUTS_ID  berries_a  brassic_a citrus_a   frtrees_a   grapes_a
+## 1    AT11  698.93634 224.465054        0  3395.05625  8087.4269
+## 2    AT22  626.17569 217.960795        0  3106.60397  2473.9556
+## 3    AT12 2923.26766 938.814305        0 14199.65974 34108.5900
+## 4    AT13   12.07276   3.877195        0    58.64296   142.7438
+## 5    AT21  267.40934  85.879142        0  1298.93051   962.3650
+## 6    AT31 1198.84000 385.010299        0  5823.31900     0.0000
+##      greens_a     nuts_a     olives_a   oth_veg_a     peas_a   rootveg_a
+## 1  262.024673  663.34164 8.965376e-21  343.710066 183.341788  411.738891
+## 2  239.331023  594.96624 8.295056e+00  307.406884 168.840186  376.291177
+## 3 1095.905600 2774.39450 0.000000e+00 1437.550844 766.818212 1722.078127
+## 4    4.525964   11.45793 0.000000e+00    5.936919   3.166871    7.111984
+## 5  100.249250  253.79100 4.135479e-21  131.501650  70.145596  157.529120
+## 6  449.433863 1137.78673 0.000000e+00  589.543502 314.474229  706.228918
+##   tropfr_a  vfruits_a berries_y brassic_y citrus_y frtrees_y grapes_y
+## 1 0.000000  98.185651  29.18171  44.06400        0 106.98343 8.273220
+## 2 1.265173  97.545471  12.23637  18.54004        0  45.09310 3.134204
+## 3 0.000000 410.656773  23.90570  36.09730        0  87.64099 5.295910
+## 4 0.000000   1.695965  17.16571  25.92000        0  62.93143 3.012085
+## 5 0.000000  37.565311  14.36696  21.69391        0  52.67087 4.908311
+## 6 0.000000 168.411455  20.89739  31.55478        0  76.61217 0.000000
+##   greens_y   nuts_y    olives_y oth_veg_y    peas_y rootveg_y    tropfr_y
+## 1 61.19029 2.632571 0.000000000  29.05543 18.087999  93.43200 0.000000000
+## 2 25.61430 1.210000 0.007077465  12.11144  7.590493  39.11891 0.002429577
+## 3 50.12718 2.156607 0.000000000  23.80225 14.817717  76.53964 0.000000000
+## 4 35.99429 1.548571 0.000000000  17.09143 10.639999  54.96000 0.000000000
+## 5 30.12565 1.296087 0.000000000  14.30478  8.905217  45.99913 0.000000000
+## 6 43.81913 1.885217 0.000000000  20.80696 12.953043  66.90783 0.000000000
+##   vfruits_y
+## 1  258.4971
+## 2  107.9879
+## 3  211.7612
+## 4  152.0571
+## 5  127.2652
+## 6  185.1130
+
summary(crop.earthstat)
+
##     NUTS_ID      berries_a          brassic_a         citrus_a      
+##  AT11   :  1   Min.   :    0.00   Min.   :   0.0   Min.   :    0.0  
+##  AT12   :  1   1st Qu.:   91.16   1st Qu.: 255.2   1st Qu.:    0.0  
+##  AT13   :  1   Median :  344.64   Median : 642.6   Median :    0.0  
+##  AT21   :  1   Mean   :  838.48   Mean   :1213.3   Mean   : 1959.2  
+##  AT22   :  1   3rd Qu.:  760.38   3rd Qu.:1521.2   3rd Qu.:  468.2  
+##  AT31   :  1   Max.   :13842.40   Max.   :7754.3   Max.   :63996.0  
+##  (Other):312                                                        
+##    frtrees_a          grapes_a           greens_a       
+##  Min.   :    0.0   Min.   :     0.0   Min.   :    0.00  
+##  1st Qu.:  490.4   1st Qu.:     0.0   1st Qu.:   65.56  
+##  Median : 2976.8   Median :   108.6   Median :  297.01  
+##  Mean   : 6512.1   Mean   : 13319.7   Mean   : 1058.31  
+##  3rd Qu.: 9645.0   3rd Qu.: 12667.6   3rd Qu.: 1032.31  
+##  Max.   :50265.7   Max.   :582102.2   Max.   :16478.72  
+##                                                         
+##      nuts_a            olives_a           oth_veg_a      
+##  Min.   :     0.0   Min.   :      0.0   Min.   :    0.0  
+##  1st Qu.:     0.0   1st Qu.:      0.0   1st Qu.:  263.1  
+##  Median :   100.8   Median :      0.0   Median :  748.2  
+##  Mean   :  4828.6   Mean   :  14648.2   Mean   : 2079.2  
+##  3rd Qu.:  1915.0   3rd Qu.:    234.3   3rd Qu.: 2197.1  
+##  Max.   :149274.3   Max.   :1416618.8   Max.   :21937.9  
+##                                                          
+##      peas_a          rootveg_a          tropfr_a         vfruits_a       
+##  Min.   :    0.0   Min.   :    0.0   Min.   :    0.0   Min.   :    0.00  
+##  1st Qu.:  116.9   1st Qu.:  215.3   1st Qu.:    0.0   1st Qu.:   29.81  
+##  Median :  607.3   Median :  705.4   Median :    0.0   Median :  297.76  
+##  Mean   : 1284.3   Mean   : 1703.2   Mean   :  814.5   Mean   : 4691.09  
+##  3rd Qu.: 1775.3   3rd Qu.: 2039.5   3rd Qu.:  297.5   3rd Qu.: 3972.33  
+##  Max.   :19403.6   Max.   :32140.5   Max.   :50414.8   Max.   :59073.51  
+##                                                                          
+##    berries_y       brassic_y        citrus_y        frtrees_y     
+##  Min.   : 0.00   Min.   : 0.00   Min.   :  0.00   Min.   :  0.00  
+##  1st Qu.:17.09   1st Qu.:27.92   1st Qu.:  0.00   1st Qu.: 39.70  
+##  Median :25.00   Median :36.53   Median :  0.00   Median : 59.41  
+##  Mean   :24.65   Mean   :36.71   Mean   : 26.52   Mean   : 60.62  
+##  3rd Qu.:34.64   3rd Qu.:42.06   3rd Qu.: 52.47   3rd Qu.: 87.56  
+##  Max.   :57.13   Max.   :77.33   Max.   :122.23   Max.   :180.73  
+##                                                                   
+##     grapes_y            greens_y         nuts_y          olives_y     
+##  Min.   : 0.000000   Min.   : 0.00   Min.   : 0.000   Min.   :0.0000  
+##  1st Qu.: 0.007437   1st Qu.:20.73   1st Qu.: 0.000   1st Qu.:0.0000  
+##  Median : 2.495389   Median :35.52   Median : 3.073   Median :0.0000  
+##  Mean   : 3.456924   Mean   :31.97   Mean   : 4.232   Mean   :0.4511  
+##  3rd Qu.: 6.183397   3rd Qu.:43.31   3rd Qu.: 7.779   3rd Qu.:0.8111  
+##  Max.   :13.714184   Max.   :71.30   Max.   :22.863   Max.   :4.2988  
+##                                                                       
+##    oth_veg_y          peas_y        rootveg_y         tropfr_y    
+##  Min.   : 0.000   Min.   : 0.00   Min.   :  0.00   Min.   : 0.00  
+##  1st Qu.: 9.276   1st Qu.:12.10   1st Qu.: 41.92   1st Qu.: 0.00  
+##  Median :14.125   Median :20.78   Median : 68.70   Median : 0.00  
+##  Mean   :16.035   Mean   :20.47   Mean   : 67.66   Mean   :16.12  
+##  3rd Qu.:18.919   3rd Qu.:28.03   3rd Qu.: 86.82   3rd Qu.:32.13  
+##  Max.   :41.340   Max.   :44.57   Max.   :170.17   Max.   :98.40  
+##                                                                   
+##    vfruits_y      
+##  Min.   :   0.00  
+##  1st Qu.:  94.11  
+##  Median : 177.77  
+##  Mean   : 281.39  
+##  3rd Qu.: 303.62  
+##  Max.   :1194.74  
+## 
+
#Calculate fraction of agricultural area
+head(corine.aa.all.nuts)
+
##    geo   sum_uaa
+## 1 AT11  219300.0
+## 2 AT22  411556.2
+## 3 AT12 1004956.2
+## 4 AT13    6075.0
+## 5 AT21  191312.5
+## 6 AT31  582587.5
+
crop.earthstat$geo <- crop.earthstat$NUTS_ID
+crop.earthstat <- left_join(crop.earthstat, corine.aa.all.nuts)
+
## Joining, by = "geo"
+
## Warning: Column `geo` joining factor and character vector, coercing into
+## character vector
+
crop.earthstat$berries_f <- crop.earthstat$berries_a / crop.earthstat$sum_uaa
+crop.earthstat$brassic_f <- crop.earthstat$brassic_a / crop.earthstat$sum_uaa
+crop.earthstat$citrus_f <- crop.earthstat$citrus_a / crop.earthstat$sum_uaa
+crop.earthstat$frtrees_f <- crop.earthstat$frtrees_a / crop.earthstat$sum_uaa
+crop.earthstat$grapes_f <- crop.earthstat$grapes_a / crop.earthstat$sum_uaa
+crop.earthstat$greens_f <- crop.earthstat$greens_a / crop.earthstat$sum_uaa
+crop.earthstat$nuts_f <- crop.earthstat$nuts_a / crop.earthstat$sum_uaa
+crop.earthstat$olives_f <- crop.earthstat$olives_a / crop.earthstat$sum_uaa
+crop.earthstat$oth_veg_f <- crop.earthstat$oth_veg_a / crop.earthstat$sum_uaa
+crop.earthstat$peas_f <- crop.earthstat$peas_a / crop.earthstat$sum_uaa
+crop.earthstat$rootveg_f <- crop.earthstat$rootveg_a / crop.earthstat$sum_uaa
+crop.earthstat$tropfr_f <- crop.earthstat$tropfr_a / crop.earthstat$sum_uaa
+crop.earthstat$vfruits_f <- crop.earthstat$vfruits_a / crop.earthstat$sum_uaa
+
+summary(crop.earthstat)
+
##     NUTS_ID      berries_a          brassic_a         citrus_a      
+##  AT11   :  1   Min.   :    0.00   Min.   :   0.0   Min.   :    0.0  
+##  AT12   :  1   1st Qu.:   91.16   1st Qu.: 255.2   1st Qu.:    0.0  
+##  AT13   :  1   Median :  344.64   Median : 642.6   Median :    0.0  
+##  AT21   :  1   Mean   :  838.48   Mean   :1213.3   Mean   : 1959.2  
+##  AT22   :  1   3rd Qu.:  760.38   3rd Qu.:1521.2   3rd Qu.:  468.2  
+##  AT31   :  1   Max.   :13842.40   Max.   :7754.3   Max.   :63996.0  
+##  (Other):312                                                        
+##    frtrees_a          grapes_a           greens_a       
+##  Min.   :    0.0   Min.   :     0.0   Min.   :    0.00  
+##  1st Qu.:  490.4   1st Qu.:     0.0   1st Qu.:   65.56  
+##  Median : 2976.8   Median :   108.6   Median :  297.01  
+##  Mean   : 6512.1   Mean   : 13319.7   Mean   : 1058.31  
+##  3rd Qu.: 9645.0   3rd Qu.: 12667.6   3rd Qu.: 1032.31  
+##  Max.   :50265.7   Max.   :582102.2   Max.   :16478.72  
+##                                                         
+##      nuts_a            olives_a           oth_veg_a      
+##  Min.   :     0.0   Min.   :      0.0   Min.   :    0.0  
+##  1st Qu.:     0.0   1st Qu.:      0.0   1st Qu.:  263.1  
+##  Median :   100.8   Median :      0.0   Median :  748.2  
+##  Mean   :  4828.6   Mean   :  14648.2   Mean   : 2079.2  
+##  3rd Qu.:  1915.0   3rd Qu.:    234.3   3rd Qu.: 2197.1  
+##  Max.   :149274.3   Max.   :1416618.8   Max.   :21937.9  
+##                                                          
+##      peas_a          rootveg_a          tropfr_a         vfruits_a       
+##  Min.   :    0.0   Min.   :    0.0   Min.   :    0.0   Min.   :    0.00  
+##  1st Qu.:  116.9   1st Qu.:  215.3   1st Qu.:    0.0   1st Qu.:   29.81  
+##  Median :  607.3   Median :  705.4   Median :    0.0   Median :  297.76  
+##  Mean   : 1284.3   Mean   : 1703.2   Mean   :  814.5   Mean   : 4691.09  
+##  3rd Qu.: 1775.3   3rd Qu.: 2039.5   3rd Qu.:  297.5   3rd Qu.: 3972.33  
+##  Max.   :19403.6   Max.   :32140.5   Max.   :50414.8   Max.   :59073.51  
+##                                                                          
+##    berries_y       brassic_y        citrus_y        frtrees_y     
+##  Min.   : 0.00   Min.   : 0.00   Min.   :  0.00   Min.   :  0.00  
+##  1st Qu.:17.09   1st Qu.:27.92   1st Qu.:  0.00   1st Qu.: 39.70  
+##  Median :25.00   Median :36.53   Median :  0.00   Median : 59.41  
+##  Mean   :24.65   Mean   :36.71   Mean   : 26.52   Mean   : 60.62  
+##  3rd Qu.:34.64   3rd Qu.:42.06   3rd Qu.: 52.47   3rd Qu.: 87.56  
+##  Max.   :57.13   Max.   :77.33   Max.   :122.23   Max.   :180.73  
+##                                                                   
+##     grapes_y            greens_y         nuts_y          olives_y     
+##  Min.   : 0.000000   Min.   : 0.00   Min.   : 0.000   Min.   :0.0000  
+##  1st Qu.: 0.007437   1st Qu.:20.73   1st Qu.: 0.000   1st Qu.:0.0000  
+##  Median : 2.495389   Median :35.52   Median : 3.073   Median :0.0000  
+##  Mean   : 3.456924   Mean   :31.97   Mean   : 4.232   Mean   :0.4511  
+##  3rd Qu.: 6.183397   3rd Qu.:43.31   3rd Qu.: 7.779   3rd Qu.:0.8111  
+##  Max.   :13.714184   Max.   :71.30   Max.   :22.863   Max.   :4.2988  
+##                                                                       
+##    oth_veg_y          peas_y        rootveg_y         tropfr_y    
+##  Min.   : 0.000   Min.   : 0.00   Min.   :  0.00   Min.   : 0.00  
+##  1st Qu.: 9.276   1st Qu.:12.10   1st Qu.: 41.92   1st Qu.: 0.00  
+##  Median :14.125   Median :20.78   Median : 68.70   Median : 0.00  
+##  Mean   :16.035   Mean   :20.47   Mean   : 67.66   Mean   :16.12  
+##  3rd Qu.:18.919   3rd Qu.:28.03   3rd Qu.: 86.82   3rd Qu.:32.13  
+##  Max.   :41.340   Max.   :44.57   Max.   :170.17   Max.   :98.40  
+##                                                                   
+##    vfruits_y           geo               sum_uaa       
+##  Min.   :   0.00   Length:318         Min.   :    438  
+##  1st Qu.:  94.11   Class :character   1st Qu.: 214006  
+##  Median : 177.77   Mode  :character   Median : 514169  
+##  Mean   : 281.39                      Mean   : 756028  
+##  3rd Qu.: 303.62                      3rd Qu.:1015952  
+##  Max.   :1194.74                      Max.   :5007938  
+##                                       NA's   :2        
+##    berries_f           brassic_f            citrus_f       
+##  Min.   :0.0000000   Min.   :0.0000000   Min.   :0.000000  
+##  1st Qu.:0.0002767   1st Qu.:0.0007169   1st Qu.:0.000000  
+##  Median :0.0005373   Median :0.0011628   Median :0.000000  
+##  Mean   :0.0010681   Mean   :0.0016514   Mean   :0.002161  
+##  3rd Qu.:0.0013782   3rd Qu.:0.0021024   3rd Qu.:0.001833  
+##  Max.   :0.0093737   Max.   :0.0115583   Max.   :0.029824  
+##  NA's   :2           NA's   :2           NA's   :2         
+##    frtrees_f           grapes_f            greens_f        
+##  Min.   :0.000000   Min.   :0.0000000   Min.   :0.0000000  
+##  1st Qu.:0.001741   1st Qu.:0.0000001   1st Qu.:0.0002412  
+##  Median :0.007025   Median :0.0003219   Median :0.0008391  
+##  Mean   :0.007964   Mean   :0.0149535   Mean   :0.0013980  
+##  3rd Qu.:0.010316   3rd Qu.:0.0173696   3rd Qu.:0.0013922  
+##  Max.   :0.092826   Max.   :0.2683389   Max.   :0.0156956  
+##  NA's   :2          NA's   :2           NA's   :2          
+##      nuts_f             olives_f          oth_veg_f       
+##  Min.   :0.0000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.0000000   1st Qu.:0.000000   1st Qu.:0.000530  
+##  Median :0.0002118   Median :0.000000   Median :0.001639  
+##  Mean   :0.0043429   Mean   :0.015398   Mean   :0.002617  
+##  3rd Qu.:0.0025973   3rd Qu.:0.000905   3rd Qu.:0.003725  
+##  Max.   :0.0360333   Max.   :0.452390   Max.   :0.019446  
+##  NA's   :2           NA's   :2          NA's   :2         
+##      peas_f            rootveg_f            tropfr_f        
+##  Min.   :0.0000000   Min.   :0.0000000   Min.   :0.0000000  
+##  1st Qu.:0.0003897   1st Qu.:0.0006968   1st Qu.:0.0000000  
+##  Median :0.0013164   Median :0.0014379   Median :0.0000000  
+##  Mean   :0.0018037   Mean   :0.0021119   Mean   :0.0010535  
+##  3rd Qu.:0.0025984   3rd Qu.:0.0026550   3rd Qu.:0.0003944  
+##  Max.   :0.0210358   Max.   :0.0182248   Max.   :0.0434730  
+##  NA's   :2           NA's   :2           NA's   :2          
+##    vfruits_f        
+##  Min.   :0.0000000  
+##  1st Qu.:0.0001177  
+##  Median :0.0008814  
+##  Mean   :0.0047733  
+##  3rd Qu.:0.0068120  
+##  Max.   :0.0417147  
+##  NA's   :2
+
#Remove zero yields from regions with zero crop areas
+names(crop.earthstat)
+
##  [1] "NUTS_ID"   "berries_a" "brassic_a" "citrus_a"  "frtrees_a"
+##  [6] "grapes_a"  "greens_a"  "nuts_a"    "olives_a"  "oth_veg_a"
+## [11] "peas_a"    "rootveg_a" "tropfr_a"  "vfruits_a" "berries_y"
+## [16] "brassic_y" "citrus_y"  "frtrees_y" "grapes_y"  "greens_y" 
+## [21] "nuts_y"    "olives_y"  "oth_veg_y" "peas_y"    "rootveg_y"
+## [26] "tropfr_y"  "vfruits_y" "geo"       "sum_uaa"   "berries_f"
+## [31] "brassic_f" "citrus_f"  "frtrees_f" "grapes_f"  "greens_f" 
+## [36] "nuts_f"    "olives_f"  "oth_veg_f" "peas_f"    "rootveg_f"
+## [41] "tropfr_f"  "vfruits_f"
+
for(e in gsub('_a', '', names(crop.earthstat)[2:14])) {
+  crop.earthstat[crop.earthstat[,paste(e, '_a', sep='')] == 0, paste(e, '_y', sep='')] <- NA
+}
+
+#Check for berries
+crop.earthstat[crop.earthstat$berries_a == 0, c('NUTS_ID', 'berries_a', 'berries_y')]
+
##     NUTS_ID berries_a berries_y
+## 12     BE10         0        NA
+## 28     CY00         0        NA
+## 108    FI20         0        NA
+## 117    ES70         0        NA
+## 125    FRA1         0        NA
+## 128    FRA2         0        NA
+## 148    IS00         0        NA
+## 164    FRA4         0        NA
+## 165    FRA5         0        NA
+## 174    MT00         0        NA
+## 187    LI00         0        NA
+## 204    NO06         0        NA
+## 205    NO07         0        NA
+## 228    PT20         0        NA
+## 229    PT30         0        NA
+## 268    SE33         0        NA
+## 298    UKI5         0        NA
+## 300    UKI3         0        NA
+## 301    UKI4         0        NA
+
livestock <- read.csv("C:/Users/mu5106sc/Dropbox/STAGS/SDG_data_eurostat/Final_database/Livestock/livestock_mean_allnuts.csv", head=T)
+head(livestock)
+
##   geo   bovine milk_cows      pigs     sheep    goats
+## 1  BG  561.070 294.86250  594.0500 1230.0863 293.1725
+## 2  CH 1560.403 572.43667 1458.1100        NA       NA
+## 3  CY   59.265  25.80625  378.5325  333.1833 264.8317
+## 4  AL       NA        NA        NA        NA       NA
+## 5  CZ 1344.674 370.75875 1573.4913        NA       NA
+## 6  BE 2492.199 518.07875 6383.0975  109.5025  39.2350
+
#Need to adjust the NUTS2016 data to NUTS2013 codes
+livestock.2013nuts <- livestock
+names(livestock.2013nuts)
+
## [1] "geo"       "bovine"    "milk_cows" "pigs"      "sheep"     "goats"
+
livestock.2013nuts$geo16 <- livestock.2013nuts$geo
+livestock.2013nuts$geo <- as.character(livestock.2013nuts$geo)
+livestock.2013nuts <- left_join(livestock.2013nuts, geodata@data[,c(4,7)])
+
## Joining, by = "geo"
+
#straight recodes
+for(e in nuts.conv[nuts.conv$Change == "recoded", 'Code.2016']) {
+  livestock.2013nuts[livestock.2013nuts$geo16 == e, 'geo'] <- as.character(nuts.conv[nuts.conv$Code.2016 == e, 'Code.2013'])
+}
+#check
+livestock.2013nuts[livestock.2013nuts$geo16 %in% nuts.conv[nuts.conv$Change == "recoded", 'Code.2016'], c('geo', 'geo16')]
+
##      geo geo16
+## 195 FR51  FRG0
+## 196 FR52  FRH0
+## 200 FR61  FRI1
+## 201 FR63  FRI2
+## 202 FR53  FRI3
+## 214 FR81  FRJ1
+## 215 FR62  FRJ2
+## 222 FR21  FRF2
+## 223 FR26  FRC1
+## 224 FR43  FRC2
+## 225 FR25  FRD1
+## 226 FR23  FRD2
+## 236 FR41  FRF3
+## 237 FR72  FRK1
+## 238 FR71  FRK2
+## 245 FR82  FRL0
+## 246 FR83  FRM0
+## 247 FRA1  FRY1
+## 259 FRA2  FRY2
+## 260 FRA3  FRY3
+## 261 FRA4  FRY4
+## 262 FRA5  FRY5
+## 317 FR30  FRE1
+## 318 FR22  FRE2
+## 319 FR42  FRF1
+## 441 PL32  PL82
+## 454 PL34  PL84
+## 486 PL11  PL71
+## 487 PL33  PL72
+## 488 PL31  PL81
+
#recode and relabel
+livestock.2013nuts[livestock.2013nuts$geo16 == "FRB0", 'geo'] <- "FR24"
+
+#splits
+livestock.2013nuts[livestock.2013nuts$geo16 %in% c("LT01", "LT02"), 'geo'] <- "LT00"
+livestock.2013nuts[livestock.2013nuts$geo16 %in% c("HU11", "HU12"), 'geo'] <- "HU10"
+livestock.2013nuts[livestock.2013nuts$geo16 %in% c("PL91", "PL92"), 'geo'] <- "PL12"
+livestock.2013nuts[livestock.2013nuts$geo16 %in% c("UKM8", "UKM9"), 'geo'] <- "UKM3" #approximate split not including NUTS3 UKM24
+livestock.2013nuts[livestock.2013nuts$geo16 == "UKM7", 'geo'] <- "UKM2" #approximate recode still including NUTS3 UKM24
+
+#IE
+#Cannot translate data from new regions to old NUTS2013 so use NUTS0 data
+livestock.2013nuts[livestock.2013nuts$geo16 == 'IE',]
+
##    geo   bovine milk_cows     pigs   sheep goats geo16 LEVL_CODE
+## 17  IE 6294.701  1148.925 1517.544 3395.07     0    IE         0
+
## Calculate sum over the split NUTS2 regions
+head(livestock.2013nuts)
+
##   geo   bovine milk_cows      pigs     sheep    goats geo16 LEVL_CODE
+## 1  BG  561.070 294.86250  594.0500 1230.0863 293.1725    BG         0
+## 2  CH 1560.403 572.43667 1458.1100        NA       NA    CH         0
+## 3  CY   59.265  25.80625  378.5325  333.1833 264.8317    CY         0
+## 4  AL       NA        NA        NA        NA       NA    AL         0
+## 5  CZ 1344.674 370.75875 1573.4913        NA       NA    CZ         0
+## 6  BE 2492.199 518.07875 6383.0975  109.5025  39.2350    BE         0
+
livestock.2013nuts.sum <- livestock.2013nuts %>% group_by(geo) %>% summarise(bovine = sum(bovine,na.rm = F),
+                  milk_cows = sum(milk_cows,na.rm = F),
+                  pigs = sum(pigs,na.rm = F),
+                  sheep = sum(sheep,na.rm = F),
+                  goats = sum(goats,na.rm = F),
+                  LEVL_CODE = mean(LEVL_CODE,na.rm = F)
+                  )
+head(livestock.2013nuts.sum)
+
## # A tibble: 6 x 7
+##   geo   bovine milk_cows  pigs sheep goats LEVL_CODE
+##   <chr>  <dbl>     <dbl> <dbl> <dbl> <dbl>     <dbl>
+## 1 AL        NA        NA    NA    NA    NA         0
+## 2 AL0       NA        NA    NA    NA    NA         1
+## 3 AL01      NA        NA    NA    NA    NA         2
+## 4 AL011     NA        NA    NA    NA    NA         3
+## 5 AL012     NA        NA    NA    NA    NA         3
+## 6 AL013     NA        NA    NA    NA    NA         3
+
nrow(livestock.2013nuts.sum)
+
## [1] 2013
+
#Next, we calculate livestock density
+#Join UAA dataframe to livestock dataframe
+head(corine.aa.all.nuts)
+
##    geo   sum_uaa
+## 1 AT11  219300.0
+## 2 AT22  411556.2
+## 3 AT12 1004956.2
+## 4 AT13    6075.0
+## 5 AT21  191312.5
+## 6 AT31  582587.5
+
names(livestock.2013nuts.sum)
+
## [1] "geo"       "bovine"    "milk_cows" "pigs"      "sheep"     "goats"    
+## [7] "LEVL_CODE"
+
livestock.2013nuts.sum <- left_join(livestock.2013nuts.sum, corine.aa.all.nuts)
+
## Joining, by = "geo"
+
livestock.dens <- as.data.frame(livestock.2013nuts.sum[,1:7])
+livestock.dens[,2:6] <- 1000 * livestock.2013nuts.sum[,2:6] / livestock.2013nuts.sum$sum_uaa
+
+summary(livestock.dens)
+
##      geo                bovine         milk_cows           pigs        
+##  Length:2013        Min.   :0.0000   Min.   :0.0000   Min.   : 0.0000  
+##  Class :character   1st Qu.:0.1716   1st Qu.:0.0423   1st Qu.: 0.1459  
+##  Mode  :character   Median :0.3327   Median :0.0865   Median : 0.3490  
+##                     Mean   :0.5387   Mean   :0.1582   Mean   : 0.9125  
+##                     3rd Qu.:0.7559   3rd Qu.:0.1951   3rd Qu.: 0.7265  
+##                     Max.   :2.8302   Max.   :1.0433   Max.   :17.2590  
+##                     NA's   :1676     NA's   :1676     NA's   :1684     
+##      sheep             goats           LEVL_CODE    
+##  Min.   : 0.0000   Min.   : 0.0000   Min.   :0.000  
+##  1st Qu.: 0.0698   1st Qu.: 0.0069   1st Qu.:3.000  
+##  Median : 0.2423   Median : 0.0335   Median :3.000  
+##  Mean   : 0.5418   Mean   : 0.1705   Mean   :2.658  
+##  3rd Qu.: 0.6588   3rd Qu.: 0.1024   3rd Qu.:3.000  
+##  Max.   :15.6759   Max.   :10.7158   Max.   :3.000  
+##  NA's   :1721      NA's   :1721
+
#Spurious sheep and goats
+livestock.dens[which(livestock.dens$sheep > 3), c('geo', 'sheep', 'goats')]
+
##       geo     sheep        goats
+## 32   AT13 15.675926 10.715843621
+## 717   EL4  3.771622  1.438400015
+## 718  EL41  3.010956  0.713143534
+## 725  EL43  5.225374  1.715879417
+## 749  EL54  3.422067  0.864509519
+## 763  EL63  3.081964  0.999435086
+## 1958  UKL  4.718953  0.007285458
+
as.data.frame(livestock.2013nuts.sum[livestock.2013nuts.sum$geo %in% livestock.2013nuts.sum$geo[grep('AT', livestock.2013nuts.sum$geo)], c('geo', 'sheep', 'goats', 'sum_uaa')])
+
##      geo     sheep     goats   sum_uaa
+## 1     AT 365.54375  76.32625 2677543.8
+## 2    AT1  78.67625  16.50000 1230331.2
+## 3   AT11   5.56375 241.68000  219300.0
+## 4  AT111        NA        NA        NA
+## 5  AT112        NA        NA        NA
+## 6  AT113        NA        NA        NA
+## 7   AT12  72.91375  15.29125 1004956.2
+## 8  AT121        NA        NA        NA
+## 9  AT122        NA        NA        NA
+## 10 AT123        NA        NA        NA
+## 11 AT124        NA        NA        NA
+## 12 AT125        NA        NA        NA
+## 13 AT126        NA        NA        NA
+## 14 AT127        NA        NA        NA
+## 15  AT13  95.23125  65.09875    6075.0
+## 16 AT130        NA        NA        NA
+## 17   AT2 111.83750  11.94250  602868.8
+## 18  AT21  44.95125   4.60375  191312.5
+## 19 AT211        NA        NA        NA
+## 20 AT212        NA        NA        NA
+## 21 AT213        NA        NA        NA
+## 22  AT22  66.88500   7.33750  411556.2
+## 23 AT221        NA        NA        NA
+## 24 AT222        NA        NA        NA
+## 25 AT223        NA        NA        NA
+## 26 AT224        NA        NA        NA
+## 27 AT225        NA        NA        NA
+## 28 AT226        NA        NA        NA
+## 29   AT3 175.02875  47.88500  844343.8
+## 30  AT31  55.56000  23.02125  582587.5
+## 31 AT311        NA        NA        NA
+## 32 AT312        NA        NA        NA
+## 33 AT313        NA        NA        NA
+## 34 AT314        NA        NA        NA
+## 35 AT315        NA        NA        NA
+## 36  AT32  28.94375   5.47375  115275.0
+## 37 AT321        NA        NA        NA
+## 38 AT322        NA        NA        NA
+## 39 AT323        NA        NA        NA
+## 40  AT33  79.39125  15.08125  112431.2
+## 41 AT331        NA        NA        NA
+## 42 AT332        NA        NA        NA
+## 43 AT333        NA        NA        NA
+## 44 AT334        NA        NA        NA
+## 45 AT335        NA        NA        NA
+## 46  AT34  11.13375   4.30875   34050.0
+## 47 AT341        NA        NA        NA
+## 48 AT342        NA        NA        NA
+
livestock.dens[which(livestock.dens$geo %in% livestock.2013nuts.sum$geo[grep('AT', livestock.2013nuts.sum$geo)]), c('geo', 'sheep', 'goats')]
+
##      geo       sheep       goats
+## 18    AT  0.13652205  0.02850607
+## 19   AT1  0.06394721  0.01341102
+## 20  AT11  0.02537050  1.10205198
+## 21 AT111          NA          NA
+## 22 AT112          NA          NA
+## 23 AT113          NA          NA
+## 24  AT12  0.07255415  0.01521584
+## 25 AT121          NA          NA
+## 26 AT122          NA          NA
+## 27 AT123          NA          NA
+## 28 AT124          NA          NA
+## 29 AT125          NA          NA
+## 30 AT126          NA          NA
+## 31 AT127          NA          NA
+## 32  AT13 15.67592593 10.71584362
+## 33 AT130          NA          NA
+## 34   AT2  0.18550887  0.01980945
+## 35  AT21  0.23496243  0.02406403
+## 36 AT211          NA          NA
+## 37 AT212          NA          NA
+## 38 AT213          NA          NA
+## 39  AT22  0.16251727  0.01782867
+## 40 AT221          NA          NA
+## 41 AT222          NA          NA
+## 42 AT223          NA          NA
+## 43 AT224          NA          NA
+## 44 AT225          NA          NA
+## 45 AT226          NA          NA
+## 46   AT3  0.20729561  0.05671268
+## 47  AT31  0.09536765  0.03951552
+## 48 AT311          NA          NA
+## 49 AT312          NA          NA
+## 50 AT313          NA          NA
+## 51 AT314          NA          NA
+## 52 AT315          NA          NA
+## 53  AT32  0.25108436  0.04748428
+## 54 AT321          NA          NA
+## 55 AT322          NA          NA
+## 56 AT323          NA          NA
+## 57  AT33  0.70613152  0.13413753
+## 58 AT331          NA          NA
+## 59 AT332          NA          NA
+## 60 AT333          NA          NA
+## 61 AT334          NA          NA
+## 62 AT335          NA          NA
+## 63  AT34  0.32698238  0.12654185
+## 64 AT341          NA          NA
+## 65 AT342          NA          NA
+
livestock.dens[livestock.dens$geo == "AT13", c('sheep', 'goats')] <- livestock.dens[livestock.dens$geo == "AT1", c('sheep', 'goats')]
+
+livestock.dens[livestock.dens$geo == "AT11", 'goats'] <- livestock.dens[livestock.dens$geo == "AT1", 'goats']
+
+#Finally, run script to allocate NUTS1 or NUTS0 densities to NUTS2 where needed
+#list to summarise where data are NUTS2, 1, 0 for each variable
+data.level.livestock <- vector("list", 4*length(names(livestock.dens)[2:6]))
+names(data.level.livestock) <- c(paste(names(livestock.dens)[2:6], 'n2.dat', sep='.'),
+                       paste(names(livestock.dens)[2:6], 'n1.dat', sep='.'),
+                       paste(names(livestock.dens)[2:6], 'n0.dat', sep='.'),
+                       paste(names(livestock.dens)[2:6], 'nuts0.na', sep='.')
+                       )
+labels(data.level.livestock)
+
##  [1] "bovine.n2.dat"      "milk_cows.n2.dat"   "pigs.n2.dat"       
+##  [4] "sheep.n2.dat"       "goats.n2.dat"       "bovine.n1.dat"     
+##  [7] "milk_cows.n1.dat"   "pigs.n1.dat"        "sheep.n1.dat"      
+## [10] "goats.n1.dat"       "bovine.n0.dat"      "milk_cows.n0.dat"  
+## [13] "pigs.n0.dat"        "sheep.n0.dat"       "goats.n0.dat"      
+## [16] "bovine.nuts0.na"    "milk_cows.nuts0.na" "pigs.nuts0.na"     
+## [19] "sheep.nuts0.na"     "goats.nuts0.na"
+
dbase.livestock <- as.data.frame(matrix(nrow=nrow(nuts@data), ncol=(ncol(livestock.dens[,2:6]) + 1)))
+dbase.livestock[,1] <- nuts@data$NUTS_ID
+names(dbase.livestock) <- c("NUTS_ID", names(livestock.dens)[2:6])
+head(dbase.livestock)
+
##   NUTS_ID bovine milk_cows pigs sheep goats
+## 1    AT11     NA        NA   NA    NA    NA
+## 2    AT22     NA        NA   NA    NA    NA
+## 3    AT12     NA        NA   NA    NA    NA
+## 4    AT13     NA        NA   NA    NA    NA
+## 5    AT21     NA        NA   NA    NA    NA
+## 6    AT31     NA        NA   NA    NA    NA
+
nrow(dbase.livestock)
+
## [1] 320
+
attach(livestock.dens)
+for(i in names(livestock.dens)[2:6]) {
+  (nuts2.na <- livestock.dens[LEVL_CODE == 2 & is.na(livestock.dens[,i]), 'geo'])
+  (nuts1 <- livestock.dens[LEVL_CODE == 1 & geo %in% gsub(".{1}$", "", nuts2.na), 'geo'])
+  (nuts1.na <- livestock.dens[geo %in% nuts1 & is.na(livestock.dens[,i]), 'geo'])
+  (nuts0 <- livestock.dens[LEVL_CODE == 0 & geo %in% gsub(".{1}$", "", nuts1.na), 'geo'])
+  (nuts0.na <- livestock.dens[geo %in% nuts0 & is.na(livestock.dens[,i]), 'geo'])
+  
+#NUTS2 data
+(n2.dat <- livestock.dens[!(geo %in% nuts2.na) & LEVL_CODE == 2, 'geo'])
+#NUTS1 data
+(n1.dat <- nuts1[!nuts1 %in% nuts1.na])
+#NUTS0 data
+(n0.dat <- nuts0[!nuts0 %in% nuts0.na])
+#NO DATA
+nuts0.na
+
+data.level.livestock[[paste(i, 'n2.dat', sep='.')]] <- n2.dat
+data.level.livestock[[paste(i, 'n1.dat', sep='.')]] <- n1.dat
+data.level.livestock[[paste(i, 'n0.dat', sep='.')]] <- n0.dat
+data.level.livestock[[paste(i, 'nuts0.na', sep='.')]] <- nuts0.na
+
+  for(e in n0.dat) {
+    dbase.livestock[dbase.livestock$NUTS_ID %in% dbase.livestock$NUTS_ID[grep(paste(e, '..', sep=''), dbase.livestock$NUTS_ID)], i] <- livestock.dens[livestock.dens$geo == e, i]
+  }
+
+  for(e in n1.dat) {
+    dbase.livestock[dbase.livestock$NUTS_ID %in% dbase.livestock$NUTS_ID[grep(paste(e, '.', sep=''), dbase.livestock$NUTS_ID)], i] <- livestock.dens[livestock.dens$geo == e, i]
+  }
+
+  for(e in n2.dat) {
+    dbase.livestock[dbase.livestock$NUTS_ID == e, i] <- livestock.dens[livestock.dens$geo == e, i]
+  }
+}
+detach(livestock.dens)
+
+summary(dbase.livestock)
+
##     NUTS_ID        bovine         milk_cows            pigs        
+##  AT11   :  1   Min.   :0.0000   Min.   :0.00000   Min.   : 0.0000  
+##  AT12   :  1   1st Qu.:0.1913   1st Qu.:0.04591   1st Qu.: 0.1446  
+##  AT13   :  1   Median :0.4288   Median :0.10738   Median : 0.2961  
+##  AT21   :  1   Mean   :0.5536   Mean   :0.16021   Mean   : 0.8674  
+##  AT22   :  1   3rd Qu.:0.7943   3rd Qu.:0.19825   3rd Qu.: 0.7611  
+##  AT31   :  1   Max.   :2.8302   Max.   :1.04331   Max.   :17.2590  
+##  (Other):314   NA's   :8        NA's   :8         NA's   :34       
+##      sheep             goats        
+##  Min.   :0.00000   Min.   :0.00000  
+##  1st Qu.:0.07257   1st Qu.:0.00660  
+##  Median :0.27634   Median :0.02031  
+##  Mean   :0.58262   Mean   :0.11852  
+##  3rd Qu.:0.90416   3rd Qu.:0.12719  
+##  Max.   :5.22537   Max.   :2.33485  
+##  NA's   :36        NA's   :36
+
head(dbase.livestock)
+
##   NUTS_ID     bovine   milk_cows      pigs      sheep      goats
+## 1    AT11 0.09572503 0.019237346 0.2189751 0.02537050 0.01341102
+## 2    AT22 0.78983432 0.197439597 1.9225909 0.16251727 0.01782867
+## 3    AT12 0.44254290 0.103387585 0.7942592 0.07255415 0.01521584
+## 4    AT13 0.01646091 0.004320988 0.0308642 0.06394721 0.01341102
+## 5    AT21 0.99191114 0.177059784 0.6761058 0.23496243 0.02406403
+## 6    AT31 0.98488639 0.287276589 1.9368813 0.09536765 0.03951552
+
tail(dbase.livestock)
+
##     NUTS_ID    bovine  milk_cows      pigs     sheep       goats
+## 315    UKD3 1.1849014 0.35018842 0.1661858 2.6134052 0.007820507
+## 316    TRC1 0.4287780 0.16379951        NA 0.9041624 0.291533492
+## 317    TRC2 0.4287780 0.16379951        NA 0.9041624 0.291533492
+## 318    UKD4 1.1849014 0.35018842 0.1661858 2.6134052 0.007820507
+## 319    TRC3 0.4287780 0.16379951        NA 0.9041624 0.291533492
+## 320    UKM6 0.8339355 0.08490498 0.1671604 2.3316352 0.001830575
+
#check data level for bovine as an example
+data.level.livestock$bovine.n2.dat
+
##   [1] "AT11" "AT12" "AT13" "AT21" "AT22" "AT31" "AT32" "AT33" "AT34" "BE10"
+##  [11] "BE21" "BE22" "BE23" "BE24" "BE25" "BE31" "BE32" "BE33" "BE34" "BE35"
+##  [21] "BG31" "BG32" "BG33" "BG34" "BG41" "BG42" "CH01" "CH02" "CH03" "CH04"
+##  [31] "CH05" "CH06" "CH07" "CY00" "CZ01" "CZ02" "CZ03" "CZ04" "CZ05" "CZ06"
+##  [41] "CZ07" "CZ08" "DE30" "DE50" "DE60" "DE80" "DEE0" "DEF0" "DEG0" "DK01"
+##  [51] "DK02" "DK03" "DK04" "DK05" "EE00" "EL30" "EL41" "EL42" "EL43" "EL51"
+##  [61] "EL52" "EL53" "EL54" "EL61" "EL62" "EL63" "EL64" "EL65" "ES11" "ES12"
+##  [71] "ES13" "ES21" "ES22" "ES23" "ES24" "ES30" "ES41" "ES42" "ES43" "ES51"
+##  [81] "ES52" "ES53" "ES61" "ES62" "ES63" "ES64" "ES70" "FI19" "FI1B" "FI1C"
+##  [91] "FI1D" "FI20" "FR10" "FR21" "FR22" "FR23" "FR24" "FR25" "FR26" "FR30"
+## [101] "FR41" "FR42" "FR43" "FR51" "FR52" "FR53" "FR61" "FR62" "FR63" "FR71"
+## [111] "FR72" "FR81" "FR82" "FR83" "FRA1" "FRA2" "FRA3" "FRA4" "FRA5" "HR03"
+## [121] "HR04" "HU10" "HU21" "HU22" "HU23" "HU31" "HU32" "HU33" "IS00" "ITC1"
+## [131] "ITC2" "ITC3" "ITC4" "ITF1" "ITF2" "ITF3" "ITF4" "ITF5" "ITF6" "ITG1"
+## [141] "ITG2" "ITH1" "ITH2" "ITH3" "ITH4" "ITH5" "ITI1" "ITI2" "ITI3" "ITI4"
+## [151] "LT00" "LU00" "LV00" "ME00" "MK00" "MT00" "NL11" "NL12" "NL13" "NL21"
+## [161] "NL22" "NL23" "NL31" "NL32" "NL33" "NL34" "NL41" "NL42" "PL11" "PL12"
+## [171] "PL21" "PL22" "PL31" "PL32" "PL33" "PL34" "PL41" "PL42" "PL43" "PL51"
+## [181] "PL52" "PL61" "PL62" "PL63" "PT11" "PT15" "PT16" "PT17" "PT18" "PT20"
+## [191] "PT30" "RO11" "RO12" "RO21" "RO22" "RO31" "RO32" "RO41" "RO42" "SE11"
+## [201] "SE12" "SE21" "SE22" "SE23" "SE31" "SE32" "SE33" "SI03" "SI04" "SK01"
+## [211] "SK02" "SK03" "SK04" "UKN0"
+
data.level.livestock$bovine.n1.dat
+
##  [1] "DE1" "DE2" "DE4" "DE7" "DE9" "DEA" "DEB" "DEC" "DED" "IE0" "UKC"
+## [12] "UKD" "UKE" "UKF" "UKG" "UKH" "UKI" "UKJ" "UKK" "UKL" "UKM"
+
data.level.livestock$bovine.n0.dat
+
## [1] "TR"
+
data.level.livestock$bovine.nuts0.na
+
## [1] "AL" "LI" "NO" "RS"
+
names(dbase.clean.gis)
+
##  [1] "NUTS_ID"        "risk_pov"       "train35bas"     "train35ful"    
+##  [5] "train_bas"      "train_ful"      "nitr_high"      "nitr_mod"      
+##  [9] "nitr_poor"      "irrigated"      "forest"         "artific"       
+## [13] "soil_loss"      "com_birds"      "farm_birds"     "org_farm"      
+## [17] "energy_rt"      "renew_pct"      "renew_prod"     "gross_N"       
+## [21] "gross_P"        "conv_till"      "cons_till"      "zero_till"     
+## [25] "nfert"          "arable"         "grassland"      "permanent"     
+## [29] "soil_prod"      "geo"            "irrig_rate"     "afi_awu"       
+## [33] "gva_awu"        "labour_use"     "pest_rate"      "tot_gdp_cap"   
+## [37] "tot_pps_cap"    "emp_rate_15_64" "tot_unemp"      "yth_unemp"     
+## [41] "rur_gdp_cap"    "rur_pps_cap"    "int_gdp_cap"    "int_pps_cap"   
+## [45] "urb_gdp_cap"    "urb_pps_cap"    "C_factor"       "emi_co2eq"     
+## [49] "emi_nh3"        "emi_pm10"       "emi_pm25"       "soc"           
+## [53] "biol_threats"   "nat2000_ag"     "nat2000_pr"     "cal_frac"      
+## [57] "precip"         "deg_days"       "crop_suit"
+
#Edit a few names to avoid truncation
+names(dbase.clean.gis)[c(36,37,41:46)] <- c("gdp_cap", "pps_cap", sub("_cap", "", names(dbase.clean.gis)[41:46]))
+names(dbase.clean.gis)[38] <- "emp_rate"
+names(dbase.clean.gis)[53] <- "bio_threat"
+
+names(dbase.crop)
+
##  [1] "NUTS_ID"   "rye_f"     "barley_f"  "maize_f"   "tritic_f" 
+##  [6] "sorghum_f" "oth_cer_f" "rice_f"    "pasture_f" "rape_f"   
+## [11] "sunflow_f" "pulses_f"  "potato_f"  "sugbeet_f" "oth_rt_f" 
+## [16] "wheat_f"   "oats_f"    "oth_oil_f" "fibre_f"   "oth_ind_f"
+## [21] "fodder_f"
+
names(dbase.yield)
+
##  [1] "NUTS_ID"   "rye_y"     "barley_y"  "maize_y"   "tritic_y" 
+##  [6] "sorghum_y" "oth_cer_y" "rice_y"    "pasture_y" "rape_y"   
+## [11] "sunflow_y" "pulses_y"  "potato_y"  "sugbeet_y" "oth_rt_y" 
+## [16] "wheat_y"   "oats_y"    "oth_oil_y" "fibre_y"   "oth_ind_y"
+## [21] "fodder_y"
+
names(crop.earthstat)
+
##  [1] "NUTS_ID"   "berries_a" "brassic_a" "citrus_a"  "frtrees_a"
+##  [6] "grapes_a"  "greens_a"  "nuts_a"    "olives_a"  "oth_veg_a"
+## [11] "peas_a"    "rootveg_a" "tropfr_a"  "vfruits_a" "berries_y"
+## [16] "brassic_y" "citrus_y"  "frtrees_y" "grapes_y"  "greens_y" 
+## [21] "nuts_y"    "olives_y"  "oth_veg_y" "peas_y"    "rootveg_y"
+## [26] "tropfr_y"  "vfruits_y" "geo"       "sum_uaa"   "berries_f"
+## [31] "brassic_f" "citrus_f"  "frtrees_f" "grapes_f"  "greens_f" 
+## [36] "nuts_f"    "olives_f"  "oth_veg_f" "peas_f"    "rootveg_f"
+## [41] "tropfr_f"  "vfruits_f"
+
names(dbase.livestock)
+
## [1] "NUTS_ID"   "bovine"    "milk_cows" "pigs"      "sheep"     "goats"
+
dbase.final <- left_join(dbase.clean.gis[,-30], dbase.crop)
+
## Joining, by = "NUTS_ID"
+
dbase.final <- left_join(dbase.final, dbase.yield)
+
## Joining, by = "NUTS_ID"
+
dbase.final <- left_join(dbase.final, crop.earthstat[,-c(2:14,28,29)])
+
## Joining, by = "NUTS_ID"
+
## Warning: Column `NUTS_ID` joining factors with different levels, coercing
+## to character vector
+
dbase.final <- left_join(dbase.final, dbase.livestock)
+
## Joining, by = "NUTS_ID"
+
## Warning: Column `NUTS_ID` joining character vector and factor, coercing
+## into character vector
+
head(dbase.final)
+
##   NUTS_ID risk_pov train35bas train35ful train_bas train_ful nitr_high
+## 1    AT11 13.73333  0.1375661  0.3333333 0.1243050 0.1779190  64.58924
+## 2    AT22 17.26667  0.2160980  0.3648294 0.2017089 0.2413594  64.58924
+## 3    AT12 13.83333  0.2084775  0.4809689 0.2534787 0.3449437  64.58924
+## 4    AT13 27.23333  0.3750000  0.7500000 0.1753247 0.4740260  64.58924
+## 5    AT21 17.20000  0.2306238  0.3648393 0.2076173 0.2250348  64.58924
+## 6    AT31 15.00000  0.2508418  0.4284512 0.2014381 0.2857610  64.58924
+##   nitr_mod nitr_poor irrigated    forest    artific soil_loss com_birds
+## 1 20.20774  15.20302     5.850 0.3161203 0.04355635     1.842        NA
+## 2 20.20774  15.20302     0.325 0.6127954 0.03306278     5.804        NA
+## 3 20.20774  15.20302     2.650 0.4286079 0.04875064     2.236        NA
+## 4 20.20774  15.20302    10.525 0.1469534 0.73118280     1.014        NA
+## 5 20.20774  15.20302     0.100 0.5998934 0.03047416    11.671        NA
+## 6 20.20774  15.20302     0.125 0.4027358 0.04900973     3.791        NA
+##   farm_birds org_farm  energy_rt renew_pct renew_prod  gross_N  gross_P
+## 1      65.98 19.43430 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 2      65.98 12.80858 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 3      65.98 13.41584 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 4      65.98 16.44137 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 5      65.98 10.68078 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 6      65.98 12.31071 0.08319988  32.65559   7.068917 32.57143 1.833333
+##   conv_till  cons_till   zero_till    nfert   arable grassland  permanent
+## 1 0.6182190 0.31992068 0.025012794 7.684000 83.64566  8.715722  7.5451998
+## 2 0.8887161 0.05005656 0.024109163 7.551429 37.02489 58.694493  4.2139773
+## 3 0.6226791 0.32803537 0.019896256 7.452800 76.22380 20.140837  3.5858503
+## 4 0.5109890 0.40476190 0.007326007 7.497000 79.80050 10.099751 10.0997506
+## 5 0.8592546 0.05928605 0.032552288 8.131500 28.48779 71.285286  0.1679223
+## 6 0.8442576 0.12043311 0.014198645 9.138333 56.49367 43.164202  0.2648111
+##   soil_prod  irrig_rate   afi_awu  gva_awu  labour_use pest_rate gdp_cap
+## 1         6  16.6922025 24788.951 25367.72 0.036978892  1.335093   26700
+## 2         6   2.2086896 13958.345 18388.41 0.034606425  1.335093   34800
+## 3         6  10.7753945 23056.271 25423.66 0.035521808  1.335093   31800
+## 4         6 208.4609053  6431.103  9952.26 0.001572703  1.335093   47300
+## 5         6   0.5770663 10741.948 11827.48 0.028435272  1.335093   32700
+## 6         6   0.5491021 15263.545 22028.20 0.030650579  1.335093   39600
+##   pps_cap emp_rate tot_unemp yth_unemp  rur_gdp  rur_pps  int_gdp  int_pps
+## 1   24600 69.81530       5.7      15.0 26690.97 24628.47       NA       NA
+## 2   32100 71.37077       5.1      10.2 28015.67 25851.10 42289.66 39020.69
+## 3   29300 73.06232       5.2       9.3 29392.91 27122.78 37037.74 34176.67
+## 4   43700 64.90683      11.3      20.3       NA       NA       NA       NA
+## 5   30200 69.88481       5.4      12.2 27340.58 25228.26 38025.00 35085.71
+## 6   36500 75.46507       4.5       7.6 33453.92 30867.51 48936.17 45154.26
+##    urb_gdp  urb_pps C_factor emi_co2eq   emi_nh3  emi_pm10 emi_pm25
+## 1       NA       NA 0.204701  653096.3  8690.371 1025.7531 818.8200
+## 2       NA       NA 0.305978  954802.7 13155.494  850.9032 331.4128
+## 3 27574.60 25441.27 0.195147 1136117.0 14253.957 1285.1966 856.1814
+## 4 47307.69 43651.88 0.188655  457098.6  6375.132  609.2881 426.0527
+## 5       NA       NA 0.278675  888623.1 11187.491  471.5642 220.7500
+## 6       NA       NA 0.241675 1943324.3 22472.061 1298.3660 629.6217
+##         soc bio_threat  nat2000_ag nat2000_pr  cal_frac    precip deg_days
+## 1  59.33579  0.2693722 0.202690378 0.27756336 0.5245497  666.3237 1965.423
+## 2  97.49513  0.2525372 0.084071132 0.15248896 0.2280712 1149.1807 1264.833
+## 3  64.25874  0.2670201 0.139216259 0.22137659 0.5354631  714.3840 1764.210
+## 4  49.22190  0.2611445 0.182098765 0.13365361 0.5256335  580.7077 2040.184
+## 5 101.03633  0.2280257 0.008167266 0.06126958 0.2194208 1357.1470 1095.291
+## 6 100.91691  0.2672487 0.009193898 0.06533375 0.3612959 1057.0594 1537.084
+##   crop_suit       rye_f   barley_f    maize_f    tritic_f    sorghum_f
+## 1  5.319588 0.023158915 0.04191746 0.10322047 0.010459416 0.0019037848
+## 2  3.217877 0.004659144 0.02072924 0.12555999 0.009728318 0.0014761044
+## 3  4.347383 0.031413059 0.08222000 0.06573296 0.023418930 0.0008681970
+## 4  5.466667 0.040740741 0.04567901 0.02242798 0.008436214 0.0016460905
+## 5  2.668044 0.004181640 0.02944136 0.08118262 0.016478275 0.0003136230
+## 6  3.925520 0.013497972 0.07211149 0.08582187 0.027766215 0.0001695024
+##      oth_cer_f rice_f   pasture_f      rape_f    sunflow_f    pulses_f
+## 1 0.0088828089      0 0.016951664 0.042230962 0.0163702690 0.017379161
+## 2 0.0007775365      0 0.029889596 0.001075187 0.0006074504 0.001716047
+## 3 0.0051265913      0 0.016059157 0.028237548 0.0174845920 0.012741848
+## 4 0.0177777778      0 0.002674897 0.032921811 0.0039976484 0.014403292
+## 5 0.0009722313      0 0.053812480 0.000313623 0.0008232604 0.003711205
+## 6 0.0007895810      0 0.014203875 0.018338447 0.0007960178 0.007065462
+##      potato_f    sugbeet_f     oth_rt_f    wheat_f      oats_f  oth_oil_f
+## 1 0.004103967 0.0187300502 7.295942e-05 0.22119243 0.006499088 0.08481532
+## 2 0.001737308 0.0006074504 4.373643e-05 0.01938374 0.002786982 0.04561345
+## 3 0.017488323 0.0360314193 6.567450e-05 0.19060904 0.014256840 0.02270124
+## 4 0.013374486 0.0467078189 0.000000e+00 0.24773663 0.003621399 0.01255144
+## 5 0.001973211 0.0001045410 0.000000e+00 0.01787216 0.006377001 0.01950996
+## 6 0.002589740 0.0100478469 9.955586e-05 0.08748686 0.017608514 0.02501556
+##        fibre_f    oth_ind_f   fodder_f    rye_y barley_y   maize_y
+## 1 2.735978e-04 0.0009746922 0.04886001 3.516367 4.435137  8.744878
+## 2 7.289405e-05 0.0005181552 0.04823763 4.700782 5.715165 11.440518
+## 3 4.796229e-04 0.0029031115 0.06777658 4.357949 4.924511  9.361459
+## 4 0.000000e+00 0.0016460905 0.03168724 4.030303 4.468468  8.798165
+## 5 5.227050e-05 0.0002744201 0.07297615 4.592187 5.394585 10.736016
+## 6 1.235866e-04 0.0026176325 0.09126311 4.503259 6.279062 10.027501
+##   tritic_y sorghum_y oth_cer_y rice_y pasture_y   rape_y sunflow_y
+## 1 3.649591  5.164671  3.868583     NA  5.264291 2.800108  2.344708
+## 2 6.234155  8.588477  3.681250     NA  6.965349 3.519774  2.430000
+## 3 5.100860  6.237822  3.901009     NA  6.666021 3.139327  2.623319
+## 4 5.024390  3.000000  3.944444     NA  5.615385 2.918750  2.647059
+## 5 5.808485  6.694444  3.817204     NA  7.327343 2.416667  1.912698
+## 6 5.474152  5.848101  3.869565     NA  7.502266 3.812215  2.097035
+##   pulses_y potato_y sugbeet_y oth_rt_y  wheat_y   oats_y oth_oil_y
+## 1 2.020007 34.06111  68.85210 47.12500 4.418312 3.320119  2.106922
+## 2 2.610619 27.19406  67.16500 51.00000 6.405516 3.681343  1.061669
+## 3 2.295685 31.89232  71.15838 61.30303 5.197458 3.752801  1.601008
+## 4 2.214286 34.87692  69.50661 58.60976 4.702658 2.909091  2.016393
+## 5 3.035211 24.40728  57.81250 40.91667 5.443334 3.707787  2.669792
+## 6 2.751594 30.02403  77.22785 63.89655 6.893955 4.364527  2.660348
+##    fibre_y oth_ind_y fodder_y berries_y brassic_y citrus_y frtrees_y
+## 1 7.777778  1.333333 18.05331  29.18171  44.06400       NA 106.98343
+## 2 7.666667  6.587339 29.00264  12.23637  18.54004       NA  45.09310
+## 3 4.966805  1.894773 23.44168  23.90570  36.09730       NA  87.64099
+## 4 5.134100        NA 11.72727  17.16571  25.92000       NA  62.93143
+## 5 4.800000 10.952381 31.81458  14.36696  21.69391       NA  52.67087
+## 6 5.361111  3.804918 28.50815  20.89739  31.55478       NA  76.61217
+##   grapes_y greens_y   nuts_y    olives_y oth_veg_y    peas_y rootveg_y
+## 1 8.273220 61.19029 2.632571 0.000000000  29.05543 18.087999  93.43200
+## 2 3.134204 25.61430 1.210000 0.007077465  12.11144  7.590493  39.11891
+## 3 5.295910 50.12718 2.156607          NA  23.80225 14.817717  76.53964
+## 4 3.012085 35.99429 1.548571          NA  17.09143 10.639999  54.96000
+## 5 4.908311 30.12565 1.296087 0.000000000  14.30478  8.905217  45.99913
+## 6       NA 43.81913 1.885217          NA  20.80696 12.953043  66.90783
+##      tropfr_y vfruits_y   berries_f    brassic_f citrus_f   frtrees_f
+## 1          NA  258.4971 0.003187124 0.0010235525        0 0.015481333
+## 2 0.002429577  107.9879 0.001521483 0.0005296015        0 0.007548431
+## 3          NA  211.7612 0.002908851 0.0009341843        0 0.014129630
+## 4          NA  152.0571 0.001987285 0.0006382214        0 0.009653162
+## 5          NA  127.2652 0.001397762 0.0004488946        0 0.006789575
+## 6          NA  185.1130 0.002057785 0.0006608626        0 0.009995613
+##      grapes_f     greens_f      nuts_f     olives_f    oth_veg_f
+## 1 0.036878371 0.0011948229 0.003024814 4.088179e-26 0.0015673054
+## 2 0.006011221 0.0005815269 0.001445650 2.015534e-05 0.0007469377
+## 3 0.033940373 0.0010905008 0.002760712 0.000000e+00 0.0014304611
+## 4 0.023496916 0.0007450146 0.001886079 0.000000e+00 0.0009772707
+## 5 0.005030330 0.0005240078 0.001326578 2.161636e-26 0.0006873657
+## 6 0.000000000 0.0007714444 0.001952989 0.000000e+00 0.0010119398
+##         peas_f    rootveg_f    tropfr_f    vfruits_f     bovine
+## 1 0.0008360319 0.0018775143 0.00000e+00 0.0004477230 0.09572503
+## 2 0.0004102481 0.0009143129 3.07412e-06 0.0002370161 0.78983432
+## 3 0.0007630364 0.0017135852 0.00000e+00 0.0004086315 0.44254290
+## 4 0.0005212956 0.0011706969 0.00000e+00 0.0002791712 0.01646091
+## 5 0.0003666545 0.0008234126 0.00000e+00 0.0001963558 0.99191114
+## 6 0.0005397888 0.0012122281 0.00000e+00 0.0002890750 0.98488639
+##     milk_cows      pigs      sheep      goats
+## 1 0.019237346 0.2189751 0.02537050 0.01341102
+## 2 0.197439597 1.9225909 0.16251727 0.01782867
+## 3 0.103387585 0.7942592 0.07255415 0.01521584
+## 4 0.004320988 0.0308642 0.06394721 0.01341102
+## 5 0.177059784 0.6761058 0.23496243 0.02406403
+## 6 0.287276589 1.9368813 0.09536765 0.03951552
+
names(dbase.final)
+
##   [1] "NUTS_ID"    "risk_pov"   "train35bas" "train35ful" "train_bas" 
+##   [6] "train_ful"  "nitr_high"  "nitr_mod"   "nitr_poor"  "irrigated" 
+##  [11] "forest"     "artific"    "soil_loss"  "com_birds"  "farm_birds"
+##  [16] "org_farm"   "energy_rt"  "renew_pct"  "renew_prod" "gross_N"   
+##  [21] "gross_P"    "conv_till"  "cons_till"  "zero_till"  "nfert"     
+##  [26] "arable"     "grassland"  "permanent"  "soil_prod"  "irrig_rate"
+##  [31] "afi_awu"    "gva_awu"    "labour_use" "pest_rate"  "gdp_cap"   
+##  [36] "pps_cap"    "emp_rate"   "tot_unemp"  "yth_unemp"  "rur_gdp"   
+##  [41] "rur_pps"    "int_gdp"    "int_pps"    "urb_gdp"    "urb_pps"   
+##  [46] "C_factor"   "emi_co2eq"  "emi_nh3"    "emi_pm10"   "emi_pm25"  
+##  [51] "soc"        "bio_threat" "nat2000_ag" "nat2000_pr" "cal_frac"  
+##  [56] "precip"     "deg_days"   "crop_suit"  "rye_f"      "barley_f"  
+##  [61] "maize_f"    "tritic_f"   "sorghum_f"  "oth_cer_f"  "rice_f"    
+##  [66] "pasture_f"  "rape_f"     "sunflow_f"  "pulses_f"   "potato_f"  
+##  [71] "sugbeet_f"  "oth_rt_f"   "wheat_f"    "oats_f"     "oth_oil_f" 
+##  [76] "fibre_f"    "oth_ind_f"  "fodder_f"   "rye_y"      "barley_y"  
+##  [81] "maize_y"    "tritic_y"   "sorghum_y"  "oth_cer_y"  "rice_y"    
+##  [86] "pasture_y"  "rape_y"     "sunflow_y"  "pulses_y"   "potato_y"  
+##  [91] "sugbeet_y"  "oth_rt_y"   "wheat_y"    "oats_y"     "oth_oil_y" 
+##  [96] "fibre_y"    "oth_ind_y"  "fodder_y"   "berries_y"  "brassic_y" 
+## [101] "citrus_y"   "frtrees_y"  "grapes_y"   "greens_y"   "nuts_y"    
+## [106] "olives_y"   "oth_veg_y"  "peas_y"     "rootveg_y"  "tropfr_y"  
+## [111] "vfruits_y"  "berries_f"  "brassic_f"  "citrus_f"   "frtrees_f" 
+## [116] "grapes_f"   "greens_f"   "nuts_f"     "olives_f"   "oth_veg_f" 
+## [121] "peas_f"     "rootveg_f"  "tropfr_f"   "vfruits_f"  "bovine"    
+## [126] "milk_cows"  "pigs"       "sheep"      "goats"
+
summary(dbase.final)
+
##    NUTS_ID             risk_pov        train35bas        train35ful    
+##  Length:320         Min.   : 9.971   Min.   :0.00000   Min.   :0.0000  
+##  Class :character   1st Qu.:18.586   1st Qu.:0.09613   1st Qu.:0.1264  
+##  Mode  :character   Median :23.514   Median :0.22800   Median :0.2600  
+##                     Mean   :25.960   Mean   :0.26424   Mean   :0.2738  
+##                     3rd Qu.:29.680   3rd Qu.:0.35996   3rd Qu.:0.3825  
+##                     Max.   :54.150   Max.   :0.88217   Max.   :0.8550  
+##                     NA's   :2        NA's   :52        NA's   :52      
+##    train_bas         train_ful         nitr_high          nitr_mod     
+##  Min.   :0.01171   Min.   :0.00188   Min.   :  4.082   Min.   : 0.000  
+##  1st Qu.:0.10627   1st Qu.:0.04939   1st Qu.: 66.302   1st Qu.: 6.533  
+##  Median :0.19961   Median :0.12807   Median : 70.505   Median :15.896  
+##  Mean   :0.26250   Mean   :0.15796   Mean   : 75.328   Mean   :13.773  
+##  3rd Qu.:0.36488   3rd Qu.:0.25108   3rd Qu.: 87.591   3rd Qu.:18.416  
+##  Max.   :0.94840   Max.   :0.50303   Max.   :100.000   Max.   :60.000  
+##  NA's   :50        NA's   :50        NA's   :44        NA's   :44      
+##    nitr_poor        irrigated           forest           artific       
+##  Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000   Min.   :0.00000  
+##  1st Qu.: 4.106   1st Qu.: 0.3312   1st Qu.:0.08957   1st Qu.:0.02056  
+##  Median : 8.883   Median : 1.2250   Median :0.24904   Median :0.04020  
+##  Mean   :10.898   Mean   : 5.7571   Mean   :0.25141   Mean   :0.09023  
+##  3rd Qu.:15.385   3rd Qu.: 6.5000   3rd Qu.:0.37365   3rd Qu.:0.08193  
+##  Max.   :68.367   Max.   :74.5500   Max.   :0.75860   Max.   :1.00000  
+##  NA's   :44       NA's   :28                                           
+##    soil_loss         com_birds       farm_birds        org_farm     
+##  Min.   : 0.0300   Min.   :54.92   Min.   : 63.78   Min.   : 0.000  
+##  1st Qu.: 0.7047   1st Qu.:62.14   1st Qu.: 81.34   1st Qu.: 1.200  
+##  Median : 1.5005   Median :69.50   Median : 83.82   Median : 2.687  
+##  Mean   : 2.5482   Mean   :69.70   Mean   : 81.90   Mean   : 4.056  
+##  3rd Qu.: 2.9420   3rd Qu.:81.30   3rd Qu.: 85.30   3rd Qu.: 5.204  
+##  Max.   :17.6050   Max.   :97.22   Max.   :116.60   Max.   :27.487  
+##  NA's   :44        NA's   :158     NA's   :94       NA's   :28      
+##    energy_rt         renew_pct        renew_prod         gross_N       
+##  Min.   :0.00000   Min.   : 0.000   Min.   : 0.8855   Min.   :  2.857  
+##  1st Qu.:0.03503   1st Qu.: 3.074   1st Qu.: 6.2422   1st Qu.: 41.821  
+##  Median :0.06128   Median : 6.124   Median : 8.3156   Median : 67.333  
+##  Mean   :0.15052   Mean   :11.225   Mean   :12.4318   Mean   : 67.553  
+##  3rd Qu.:0.09725   3rd Qu.:22.515   3rd Qu.:18.0797   3rd Qu.: 85.988  
+##  Max.   :1.75149   Max.   :41.011   Max.   :37.7797   Max.   :190.167  
+##  NA's   :44        NA's   :82       NA's   :45        NA's   :30       
+##     gross_P         conv_till         cons_till         zero_till      
+##  Min.   :-6.500   Min.   :0.08646   Min.   :0.00000   Min.   :0.00000  
+##  1st Qu.:-1.667   1st Qu.:0.46182   1st Qu.:0.05077   1st Qu.:0.00920  
+##  Median : 1.833   Median :0.61740   Median :0.12499   Median :0.01843  
+##  Mean   : 1.941   Mean   :0.60410   Mean   :0.18031   Mean   :0.03000  
+##  3rd Qu.: 4.714   3rd Qu.:0.73832   3rd Qu.:0.28382   3rd Qu.:0.04003  
+##  Max.   :31.000   Max.   :0.99752   Max.   :0.65066   Max.   :0.19303  
+##  NA's   :30       NA's   :53        NA's   :53        NA's   :53       
+##      nfert            arable        grassland       permanent      
+##  Min.   : 0.000   Min.   : 0.00   Min.   : 0.00   Min.   : 0.0000  
+##  1st Qu.: 6.448   1st Qu.:39.68   1st Qu.:17.20   1st Qu.: 0.3546  
+##  Median : 9.917   Median :62.28   Median :32.97   Median : 1.1324  
+##  Mean   :10.975   Mean   :57.72   Mean   :35.74   Mean   : 5.7197  
+##  3rd Qu.:14.254   3rd Qu.:78.19   3rd Qu.:48.62   3rd Qu.: 5.6520  
+##  Max.   :29.456   Max.   :99.28   Max.   :98.84   Max.   :64.6743  
+##  NA's   :11       NA's   :44      NA's   :44      NA's   :44       
+##    soil_prod      irrig_rate          afi_awu          gva_awu        
+##  Min.   :3.00   Min.   :   0.000   Min.   : -3221   Min.   :   697.4  
+##  1st Qu.:6.00   1st Qu.:   1.181   1st Qu.: 11878   1st Qu.: 10428.3  
+##  Median :6.00   Median :   7.396   Median : 20559   Median : 24639.3  
+##  Mean   :6.45   Mean   : 157.441   Mean   : 24680   Mean   : 26611.6  
+##  3rd Qu.:7.00   3rd Qu.:  82.321   3rd Qu.: 34388   3rd Qu.: 38162.3  
+##  Max.   :8.00   Max.   :4156.725   Max.   :107266   Max.   :122952.6  
+##  NA's   :51     NA's   :22         NA's   :29       NA's   :29        
+##    labour_use        pest_rate          gdp_cap          pps_cap      
+##  Min.   :0.00000   Min.   : 0.3874   Min.   :  3800   Min.   :  8200  
+##  1st Qu.:0.01049   1st Qu.: 1.2195   1st Qu.: 16350   1st Qu.: 19750  
+##  Median :0.01976   Median : 1.8836   Median : 26500   Median : 24750  
+##  Mean   :0.03594   Mean   : 2.4120   Mean   : 26832   Mean   : 26526  
+##  3rd Qu.:0.04539   3rd Qu.: 3.1595   3rd Qu.: 33750   3rd Qu.: 31025  
+##  Max.   :0.22557   Max.   :13.1415   Max.   :191400   Max.   :163500  
+##  NA's   :44        NA's   :37        NA's   :44       NA's   :44      
+##     emp_rate       tot_unemp        yth_unemp        rur_gdp     
+##  Min.   :37.87   Min.   : 2.100   Min.   : 4.20   Min.   : 3117  
+##  1st Qu.:62.16   1st Qu.: 4.700   1st Qu.:10.60   1st Qu.:11513  
+##  Median :67.91   Median : 6.850   Median :17.00   Median :22369  
+##  Mean   :66.82   Mean   : 8.767   Mean   :21.39   Mean   :20844  
+##  3rd Qu.:73.84   3rd Qu.:10.600   3rd Qu.:29.10   3rd Qu.:27611  
+##  Max.   :81.42   Max.   :31.300   Max.   :69.10   Max.   :46295  
+##  NA's   :44      NA's   :44       NA's   :44      NA's   :164    
+##     rur_pps         int_gdp         int_pps         urb_gdp      
+##  Min.   : 6723   Min.   : 3585   Min.   : 7737   Min.   :  5329  
+##  1st Qu.:15264   1st Qu.:13307   1st Qu.:17891   1st Qu.: 23998  
+##  Median :20976   Median :25837   Median :24042   Median : 30497  
+##  Mean   :21122   Mean   :24197   Mean   :24202   Mean   : 33196  
+##  3rd Qu.:25354   3rd Qu.:32205   3rd Qu.:28793   3rd Qu.: 39796  
+##  Max.   :42716   Max.   :88303   Max.   :73498   Max.   :191423  
+##  NA's   :164     NA's   :112     NA's   :112     NA's   :184     
+##     urb_pps          C_factor        emi_co2eq          emi_nh3     
+##  Min.   : 10761   Min.   :0.1477   Min.   :      0   Min.   :    0  
+##  1st Qu.: 24530   1st Qu.:0.1959   1st Qu.: 745804   1st Qu.: 8322  
+##  Median : 29715   Median :0.2249   Median :1121468   Median :13177  
+##  Mean   : 32561   Mean   :0.2356   Mean   :1466069   Mean   :16252  
+##  3rd Qu.: 37017   3rd Qu.:0.2631   3rd Qu.:1804807   3rd Qu.:19954  
+##  Max.   :163515   Max.   :0.4395   Max.   :7270058   Max.   :88272  
+##  NA's   :184      NA's   :54       NA's   :2         NA's   :2      
+##     emi_pm10       emi_pm25           soc           bio_threat    
+##  Min.   :   0   Min.   :   0.0   Min.   : 31.86   Min.   :0.1920  
+##  1st Qu.: 434   1st Qu.: 228.1   1st Qu.: 61.59   1st Qu.:0.2507  
+##  Median :1012   Median : 608.9   Median : 81.03   Median :0.2858  
+##  Mean   :1269   Mean   : 765.3   Mean   : 92.96   Mean   :0.2981  
+##  3rd Qu.:1759   3rd Qu.:1096.7   3rd Qu.:109.75   3rd Qu.:0.3192  
+##  Max.   :7418   Max.   :4020.6   Max.   :374.18   Max.   :0.6029  
+##  NA's   :2      NA's   :2        NA's   :40       NA's   :43      
+##    nat2000_ag         nat2000_pr         cal_frac          precip      
+##  Min.   :0.000000   Min.   :0.00000   Min.   :0.1612   Min.   : 261.3  
+##  1st Qu.:0.006678   1st Qu.:0.03956   1st Qu.:0.3928   1st Qu.: 610.6  
+##  Median :0.044534   Median :0.12229   Median :0.4986   Median : 745.8  
+##  Mean   :0.065957   Mean   :0.13586   Mean   :0.5239   Mean   : 800.3  
+##  3rd Qu.:0.097822   3rd Qu.:0.21187   3rd Qu.:0.6305   3rd Qu.: 882.9  
+##  Max.   :0.507382   Max.   :0.49982   Max.   :0.9640   Max.   :2707.9  
+##  NA's   :2          NA's   :2         NA's   :8                        
+##     deg_days      crop_suit         rye_f              barley_f      
+##  Min.   : 242   Min.   :0.000   Min.   :0.0000000   Min.   :0.00000  
+##  1st Qu.:1536   1st Qu.:3.141   1st Qu.:0.0001763   1st Qu.:0.01824  
+##  Median :1711   Median :4.267   Median :0.0010613   Median :0.04720  
+##  Mean   :2016   Mean   :4.149   Mean   :0.0078947   Mean   :0.05499  
+##  3rd Qu.:2246   3rd Qu.:5.202   3rd Qu.:0.0069115   3rd Qu.:0.08157  
+##  Max.   :7765   Max.   :6.000   Max.   :0.1270547   Max.   :0.22525  
+##                 NA's   :2       NA's   :1           NA's   :1        
+##     maize_f            tritic_f           sorghum_f        
+##  Min.   :0.000000   Min.   :0.0000000   Min.   :0.0000000  
+##  1st Qu.:0.000236   1st Qu.:0.0006165   1st Qu.:0.0000000  
+##  Median :0.008416   Median :0.0028821   Median :0.0000000  
+##  Mean   :0.029646   Mean   :0.0090348   Mean   :0.0003367  
+##  3rd Qu.:0.033520   3rd Qu.:0.0101557   3rd Qu.:0.0000793  
+##  Max.   :0.319030   Max.   :0.1181380   Max.   :0.0180664  
+##  NA's   :1          NA's   :1           NA's   :1          
+##    oth_cer_f           rice_f           pasture_f       
+##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.006223  
+##  Median :0.00008   Median :0.000000   Median :0.021880  
+##  Mean   :0.00097   Mean   :0.001738   Mean   :0.047618  
+##  3rd Qu.:0.00128   3rd Qu.:0.000000   3rd Qu.:0.084760  
+##  Max.   :0.01778   Max.   :0.107097   Max.   :0.412707  
+##  NA's   :65        NA's   :1          NA's   :8         
+##      rape_f            sunflow_f           pulses_f       
+##  Min.   :0.0000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.0002153   1st Qu.:0.000000   1st Qu.:0.001809  
+##  Median :0.0125632   Median :0.000192   Median :0.004299  
+##  Mean   :0.0268691   Mean   :0.011671   Mean   :0.008475  
+##  3rd Qu.:0.0427553   3rd Qu.:0.004120   3rd Qu.:0.011267  
+##  Max.   :0.1461908   Max.   :0.218639   Max.   :0.064771  
+##  NA's   :1           NA's   :1          NA's   :1         
+##     potato_f          sugbeet_f           oth_rt_f      
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.00000  
+##  1st Qu.:0.002072   1st Qu.:0.000000   1st Qu.:0.00000  
+##  Median :0.005518   Median :0.001072   Median :0.00015  
+##  Mean   :0.012031   Mean   :0.009541   Mean   :0.00091  
+##  3rd Qu.:0.013376   3rd Qu.:0.013027   3rd Qu.:0.00108  
+##  Max.   :0.188248   Max.   :0.113660   Max.   :0.01231  
+##  NA's   :1          NA's   :1          NA's   :35       
+##     wheat_f             oats_f            oth_oil_f       
+##  Min.   :0.000000   Min.   :0.0000000   Min.   :0.000000  
+##  1st Qu.:0.006516   1st Qu.:0.0002925   1st Qu.:0.000000  
+##  Median :0.103025   Median :0.0037227   Median :0.000000  
+##  Mean   :0.103869   Mean   :0.0096505   Mean   :0.004699  
+##  3rd Qu.:0.170883   3rd Qu.:0.0106135   3rd Qu.:0.001537  
+##  Max.   :0.389937   Max.   :0.1912935   Max.   :0.190498  
+##  NA's   :1          NA's   :1           NA's   :1         
+##     fibre_f            oth_ind_f            fodder_f       
+##  Min.   :0.0000000   Min.   :0.0000000   Min.   :0.000000  
+##  1st Qu.:0.0000000   1st Qu.:0.0000000   1st Qu.:0.007706  
+##  Median :0.0000000   Median :0.0002218   Median :0.032609  
+##  Mean   :0.0030096   Mean   :0.0015573   Mean   :0.045518  
+##  3rd Qu.:0.0000127   3rd Qu.:0.0010943   3rd Qu.:0.069995  
+##  Max.   :0.1513896   Max.   :0.0356431   Max.   :0.253901  
+##  NA's   :1           NA's   :1           NA's   :1         
+##      rye_y           barley_y         maize_y          tritic_y     
+##  Min.   :0.4454   Min.   :0.1254   Min.   : 1.000   Min.   :0.5839  
+##  1st Qu.:2.6135   1st Qu.:3.0860   1st Qu.: 6.320   1st Qu.:2.9683  
+##  Median :3.5000   Median :4.7376   Median : 8.100   Median :4.0000  
+##  Mean   :3.7258   Mean   :4.6243   Mean   : 7.929   Mean   :3.9796  
+##  3rd Qu.:5.0000   3rd Qu.:6.1387   3rd Qu.: 9.876   3rd Qu.:5.2188  
+##  Max.   :7.1428   Max.   :8.4666   Max.   :12.976   Max.   :7.3594  
+##  NA's   :20       NA's   :14       NA's   :28       NA's   :23      
+##    sorghum_y       oth_cer_y          rice_y         pasture_y      
+##  Min.   :1.000   Min.   : 0.500   Min.   : 1.857   Min.   : 0.0003  
+##  1st Qu.:3.173   1st Qu.: 1.465   1st Qu.: 4.567   1st Qu.: 4.3362  
+##  Median :4.000   Median : 2.174   Median : 5.051   Median : 7.3664  
+##  Mean   :4.349   Mean   : 2.423   Mean   : 5.352   Mean   : 9.6893  
+##  3rd Qu.:5.675   3rd Qu.: 3.272   3rd Qu.: 5.985   3rd Qu.:10.1248  
+##  Max.   :9.044   Max.   :10.750   Max.   :10.000   Max.   :49.5135  
+##  NA's   :175     NA's   :149      NA's   :179      NA's   :108      
+##      rape_y           sunflow_y        pulses_y          potato_y     
+##  Min.   :0.006547   Min.   :0.700   Min.   :0.09302   Min.   : 4.361  
+##  1st Qu.:2.460195   1st Qu.:1.866   1st Qu.:1.65220   1st Qu.:24.144  
+##  Median :3.185760   Median :2.203   Median :2.20927   Median :31.598  
+##  Mean   :2.975625   Mean   :2.239   Mean   :2.23948   Mean   :31.469  
+##  3rd Qu.:3.544947   3rd Qu.:2.686   3rd Qu.:2.77736   3rd Qu.:40.329  
+##  Max.   :5.000000   Max.   :4.393   Max.   :5.00245   Max.   :53.327  
+##  NA's   :22         NA's   :105     NA's   :25        NA's   :7       
+##    sugbeet_y         oth_rt_y         wheat_y           oats_y      
+##  Min.   :  5.00   Min.   : 9.333   Min.   :0.6667   Min.   :0.5543  
+##  1st Qu.: 57.45   1st Qu.:12.000   1st Qu.:3.3922   1st Qu.:2.3556  
+##  Median : 67.95   Median :24.857   Median :5.1819   Median :3.6032  
+##  Mean   : 65.07   Mean   :32.544   Mean   :5.3029   Mean   :3.5905  
+##  3rd Qu.: 76.28   3rd Qu.:52.350   3rd Qu.:7.4295   3rd Qu.:4.7295  
+##  Max.   :107.50   Max.   :94.396   Max.   :9.2898   Max.   :7.2000  
+##  NA's   :34       NA's   :174      NA's   :9        NA's   :14      
+##    oth_oil_y         fibre_y        oth_ind_y           fodder_y      
+##  Min.   :0.1428   Min.   :0.350   Min.   : 0.02671   Min.   : 0.6603  
+##  1st Qu.:1.6668   1st Qu.:1.523   1st Qu.: 0.98452   1st Qu.:12.5398  
+##  Median :2.0000   Median :2.750   Median : 1.88764   Median :26.1963  
+##  Mean   :2.1776   Mean   :3.104   Mean   : 2.98676   Mean   :25.7794  
+##  3rd Qu.:2.8162   3rd Qu.:4.838   3rd Qu.: 3.26371   3rd Qu.:38.1261  
+##  Max.   :5.0000   Max.   :8.333   Max.   :13.36992   Max.   :51.0516  
+##  NA's   :50       NA's   :146     NA's   :54         NA's   :12       
+##    berries_y          brassic_y           citrus_y        
+##  Min.   : 0.00396   Min.   : 0.03086   Min.   :  0.02187  
+##  1st Qu.:18.60179   1st Qu.:29.19964   1st Qu.: 43.38803  
+##  Median :25.99651   Median :37.54545   Median : 73.80700  
+##  Mean   :26.03682   Mean   :38.47796   Mean   : 67.53417  
+##  3rd Qu.:34.90468   3rd Qu.:43.59471   3rd Qu.:101.62819  
+##  Max.   :57.13000   Max.   :77.33000   Max.   :122.22785  
+##  NA's   :21         NA's   :21         NA's   :198        
+##    frtrees_y            grapes_y         greens_y           nuts_y       
+##  Min.   :  0.00316   Min.   : 0.000   Min.   : 0.0116   Min.   : 0.0000  
+##  1st Qu.: 41.05565   1st Qu.: 1.878   1st Qu.:23.1703   1st Qu.: 0.4243  
+##  Median : 62.58810   Median : 4.266   Median :37.2801   Median : 3.2225  
+##  Mean   : 63.80520   Mean   : 4.486   Mean   :34.3396   Mean   : 4.8389  
+##  3rd Qu.: 88.15527   3rd Qu.: 6.531   3rd Qu.:43.8130   3rd Qu.: 8.3606  
+##  Max.   :180.73000   Max.   :13.714   Max.   :71.3000   Max.   :22.8630  
+##  NA's   :20          NA's   :79       NA's   :28        NA's   :44       
+##     olives_y        oth_veg_y            peas_y           rootveg_y       
+##  Min.   :0.0000   Min.   : 0.00974   Min.   : 0.00246   Min.   :  0.0421  
+##  1st Qu.:0.8311   1st Qu.: 9.32000   1st Qu.:12.93950   1st Qu.: 43.9722  
+##  Median :1.5341   Median :14.55378   Median :21.42656   Median : 71.1353  
+##  Mean   :1.3702   Mean   :16.73389   Mean   :21.45191   Mean   : 71.1527  
+##  3rd Qu.:1.8099   3rd Qu.:18.99319   3rd Qu.:28.22034   3rd Qu.: 93.5171  
+##  Max.   :4.2988   Max.   :41.34000   Max.   :44.57000   Max.   :170.1700  
+##  NA's   :216      NA's   :20         NA's   :20         NA's   :21        
+##     tropfr_y          vfruits_y           berries_f       
+##  Min.   : 0.00243   Min.   :   0.1976   Min.   :0.000000  
+##  1st Qu.:25.81930   1st Qu.: 117.0439   1st Qu.:0.000277  
+##  Median :37.45034   Median : 181.0226   Median :0.000537  
+##  Mean   :36.78523   Mean   : 296.2257   Mean   :0.001068  
+##  3rd Qu.:48.14715   3rd Qu.: 307.1032   3rd Qu.:0.001378  
+##  Max.   :88.52000   Max.   :1194.7400   Max.   :0.009374  
+##  NA's   :189        NA's   :20          NA's   :4         
+##    brassic_f           citrus_f          frtrees_f       
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.000717   1st Qu.:0.000000   1st Qu.:0.001741  
+##  Median :0.001163   Median :0.000000   Median :0.007025  
+##  Mean   :0.001651   Mean   :0.002161   Mean   :0.007964  
+##  3rd Qu.:0.002102   3rd Qu.:0.001833   3rd Qu.:0.010316  
+##  Max.   :0.011558   Max.   :0.029824   Max.   :0.092826  
+##  NA's   :4          NA's   :4          NA's   :4         
+##     grapes_f           greens_f            nuts_f        
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.000000   1st Qu.:0.000241   1st Qu.:0.000000  
+##  Median :0.000322   Median :0.000839   Median :0.000212  
+##  Mean   :0.014954   Mean   :0.001398   Mean   :0.004343  
+##  3rd Qu.:0.017370   3rd Qu.:0.001392   3rd Qu.:0.002597  
+##  Max.   :0.268339   Max.   :0.015696   Max.   :0.036033  
+##  NA's   :4          NA's   :4          NA's   :4         
+##     olives_f          oth_veg_f            peas_f        
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.000000   1st Qu.:0.000530   1st Qu.:0.000390  
+##  Median :0.000000   Median :0.001639   Median :0.001316  
+##  Mean   :0.015398   Mean   :0.002617   Mean   :0.001804  
+##  3rd Qu.:0.000905   3rd Qu.:0.003725   3rd Qu.:0.002598  
+##  Max.   :0.452390   Max.   :0.019446   Max.   :0.021036  
+##  NA's   :4          NA's   :4          NA's   :4         
+##    rootveg_f           tropfr_f          vfruits_f            bovine      
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000   Min.   :0.0000  
+##  1st Qu.:0.000697   1st Qu.:0.000000   1st Qu.:0.000118   1st Qu.:0.1913  
+##  Median :0.001438   Median :0.000000   Median :0.000881   Median :0.4288  
+##  Mean   :0.002112   Mean   :0.001053   Mean   :0.004773   Mean   :0.5536  
+##  3rd Qu.:0.002655   3rd Qu.:0.000394   3rd Qu.:0.006812   3rd Qu.:0.7943  
+##  Max.   :0.018225   Max.   :0.043473   Max.   :0.041715   Max.   :2.8302  
+##  NA's   :4          NA's   :4          NA's   :4          NA's   :8       
+##    milk_cows            pigs             sheep             goats        
+##  Min.   :0.00000   Min.   : 0.0000   Min.   :0.00000   Min.   :0.00000  
+##  1st Qu.:0.04591   1st Qu.: 0.1446   1st Qu.:0.07257   1st Qu.:0.00660  
+##  Median :0.10738   Median : 0.2961   Median :0.27634   Median :0.02031  
+##  Mean   :0.16021   Mean   : 0.8674   Mean   :0.58262   Mean   :0.11852  
+##  3rd Qu.:0.19825   3rd Qu.: 0.7611   3rd Qu.:0.90416   3rd Qu.:0.12719  
+##  Max.   :1.04331   Max.   :17.2590   Max.   :5.22537   Max.   :2.33485  
+##  NA's   :8         NA's   :34        NA's   :36        NA's   :36
+
head(dbase.final)
+
##   NUTS_ID risk_pov train35bas train35ful train_bas train_ful nitr_high
+## 1    AT11 13.73333  0.1375661  0.3333333 0.1243050 0.1779190  64.58924
+## 2    AT22 17.26667  0.2160980  0.3648294 0.2017089 0.2413594  64.58924
+## 3    AT12 13.83333  0.2084775  0.4809689 0.2534787 0.3449437  64.58924
+## 4    AT13 27.23333  0.3750000  0.7500000 0.1753247 0.4740260  64.58924
+## 5    AT21 17.20000  0.2306238  0.3648393 0.2076173 0.2250348  64.58924
+## 6    AT31 15.00000  0.2508418  0.4284512 0.2014381 0.2857610  64.58924
+##   nitr_mod nitr_poor irrigated    forest    artific soil_loss com_birds
+## 1 20.20774  15.20302     5.850 0.3161203 0.04355635     1.842        NA
+## 2 20.20774  15.20302     0.325 0.6127954 0.03306278     5.804        NA
+## 3 20.20774  15.20302     2.650 0.4286079 0.04875064     2.236        NA
+## 4 20.20774  15.20302    10.525 0.1469534 0.73118280     1.014        NA
+## 5 20.20774  15.20302     0.100 0.5998934 0.03047416    11.671        NA
+## 6 20.20774  15.20302     0.125 0.4027358 0.04900973     3.791        NA
+##   farm_birds org_farm  energy_rt renew_pct renew_prod  gross_N  gross_P
+## 1      65.98 19.43430 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 2      65.98 12.80858 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 3      65.98 13.41584 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 4      65.98 16.44137 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 5      65.98 10.68078 0.08319988  32.65559   7.068917 32.57143 1.833333
+## 6      65.98 12.31071 0.08319988  32.65559   7.068917 32.57143 1.833333
+##   conv_till  cons_till   zero_till    nfert   arable grassland  permanent
+## 1 0.6182190 0.31992068 0.025012794 7.684000 83.64566  8.715722  7.5451998
+## 2 0.8887161 0.05005656 0.024109163 7.551429 37.02489 58.694493  4.2139773
+## 3 0.6226791 0.32803537 0.019896256 7.452800 76.22380 20.140837  3.5858503
+## 4 0.5109890 0.40476190 0.007326007 7.497000 79.80050 10.099751 10.0997506
+## 5 0.8592546 0.05928605 0.032552288 8.131500 28.48779 71.285286  0.1679223
+## 6 0.8442576 0.12043311 0.014198645 9.138333 56.49367 43.164202  0.2648111
+##   soil_prod  irrig_rate   afi_awu  gva_awu  labour_use pest_rate gdp_cap
+## 1         6  16.6922025 24788.951 25367.72 0.036978892  1.335093   26700
+## 2         6   2.2086896 13958.345 18388.41 0.034606425  1.335093   34800
+## 3         6  10.7753945 23056.271 25423.66 0.035521808  1.335093   31800
+## 4         6 208.4609053  6431.103  9952.26 0.001572703  1.335093   47300
+## 5         6   0.5770663 10741.948 11827.48 0.028435272  1.335093   32700
+## 6         6   0.5491021 15263.545 22028.20 0.030650579  1.335093   39600
+##   pps_cap emp_rate tot_unemp yth_unemp  rur_gdp  rur_pps  int_gdp  int_pps
+## 1   24600 69.81530       5.7      15.0 26690.97 24628.47       NA       NA
+## 2   32100 71.37077       5.1      10.2 28015.67 25851.10 42289.66 39020.69
+## 3   29300 73.06232       5.2       9.3 29392.91 27122.78 37037.74 34176.67
+## 4   43700 64.90683      11.3      20.3       NA       NA       NA       NA
+## 5   30200 69.88481       5.4      12.2 27340.58 25228.26 38025.00 35085.71
+## 6   36500 75.46507       4.5       7.6 33453.92 30867.51 48936.17 45154.26
+##    urb_gdp  urb_pps C_factor emi_co2eq   emi_nh3  emi_pm10 emi_pm25
+## 1       NA       NA 0.204701  653096.3  8690.371 1025.7531 818.8200
+## 2       NA       NA 0.305978  954802.7 13155.494  850.9032 331.4128
+## 3 27574.60 25441.27 0.195147 1136117.0 14253.957 1285.1966 856.1814
+## 4 47307.69 43651.88 0.188655  457098.6  6375.132  609.2881 426.0527
+## 5       NA       NA 0.278675  888623.1 11187.491  471.5642 220.7500
+## 6       NA       NA 0.241675 1943324.3 22472.061 1298.3660 629.6217
+##         soc bio_threat  nat2000_ag nat2000_pr  cal_frac    precip deg_days
+## 1  59.33579  0.2693722 0.202690378 0.27756336 0.5245497  666.3237 1965.423
+## 2  97.49513  0.2525372 0.084071132 0.15248896 0.2280712 1149.1807 1264.833
+## 3  64.25874  0.2670201 0.139216259 0.22137659 0.5354631  714.3840 1764.210
+## 4  49.22190  0.2611445 0.182098765 0.13365361 0.5256335  580.7077 2040.184
+## 5 101.03633  0.2280257 0.008167266 0.06126958 0.2194208 1357.1470 1095.291
+## 6 100.91691  0.2672487 0.009193898 0.06533375 0.3612959 1057.0594 1537.084
+##   crop_suit       rye_f   barley_f    maize_f    tritic_f    sorghum_f
+## 1  5.319588 0.023158915 0.04191746 0.10322047 0.010459416 0.0019037848
+## 2  3.217877 0.004659144 0.02072924 0.12555999 0.009728318 0.0014761044
+## 3  4.347383 0.031413059 0.08222000 0.06573296 0.023418930 0.0008681970
+## 4  5.466667 0.040740741 0.04567901 0.02242798 0.008436214 0.0016460905
+## 5  2.668044 0.004181640 0.02944136 0.08118262 0.016478275 0.0003136230
+## 6  3.925520 0.013497972 0.07211149 0.08582187 0.027766215 0.0001695024
+##      oth_cer_f rice_f   pasture_f      rape_f    sunflow_f    pulses_f
+## 1 0.0088828089      0 0.016951664 0.042230962 0.0163702690 0.017379161
+## 2 0.0007775365      0 0.029889596 0.001075187 0.0006074504 0.001716047
+## 3 0.0051265913      0 0.016059157 0.028237548 0.0174845920 0.012741848
+## 4 0.0177777778      0 0.002674897 0.032921811 0.0039976484 0.014403292
+## 5 0.0009722313      0 0.053812480 0.000313623 0.0008232604 0.003711205
+## 6 0.0007895810      0 0.014203875 0.018338447 0.0007960178 0.007065462
+##      potato_f    sugbeet_f     oth_rt_f    wheat_f      oats_f  oth_oil_f
+## 1 0.004103967 0.0187300502 7.295942e-05 0.22119243 0.006499088 0.08481532
+## 2 0.001737308 0.0006074504 4.373643e-05 0.01938374 0.002786982 0.04561345
+## 3 0.017488323 0.0360314193 6.567450e-05 0.19060904 0.014256840 0.02270124
+## 4 0.013374486 0.0467078189 0.000000e+00 0.24773663 0.003621399 0.01255144
+## 5 0.001973211 0.0001045410 0.000000e+00 0.01787216 0.006377001 0.01950996
+## 6 0.002589740 0.0100478469 9.955586e-05 0.08748686 0.017608514 0.02501556
+##        fibre_f    oth_ind_f   fodder_f    rye_y barley_y   maize_y
+## 1 2.735978e-04 0.0009746922 0.04886001 3.516367 4.435137  8.744878
+## 2 7.289405e-05 0.0005181552 0.04823763 4.700782 5.715165 11.440518
+## 3 4.796229e-04 0.0029031115 0.06777658 4.357949 4.924511  9.361459
+## 4 0.000000e+00 0.0016460905 0.03168724 4.030303 4.468468  8.798165
+## 5 5.227050e-05 0.0002744201 0.07297615 4.592187 5.394585 10.736016
+## 6 1.235866e-04 0.0026176325 0.09126311 4.503259 6.279062 10.027501
+##   tritic_y sorghum_y oth_cer_y rice_y pasture_y   rape_y sunflow_y
+## 1 3.649591  5.164671  3.868583     NA  5.264291 2.800108  2.344708
+## 2 6.234155  8.588477  3.681250     NA  6.965349 3.519774  2.430000
+## 3 5.100860  6.237822  3.901009     NA  6.666021 3.139327  2.623319
+## 4 5.024390  3.000000  3.944444     NA  5.615385 2.918750  2.647059
+## 5 5.808485  6.694444  3.817204     NA  7.327343 2.416667  1.912698
+## 6 5.474152  5.848101  3.869565     NA  7.502266 3.812215  2.097035
+##   pulses_y potato_y sugbeet_y oth_rt_y  wheat_y   oats_y oth_oil_y
+## 1 2.020007 34.06111  68.85210 47.12500 4.418312 3.320119  2.106922
+## 2 2.610619 27.19406  67.16500 51.00000 6.405516 3.681343  1.061669
+## 3 2.295685 31.89232  71.15838 61.30303 5.197458 3.752801  1.601008
+## 4 2.214286 34.87692  69.50661 58.60976 4.702658 2.909091  2.016393
+## 5 3.035211 24.40728  57.81250 40.91667 5.443334 3.707787  2.669792
+## 6 2.751594 30.02403  77.22785 63.89655 6.893955 4.364527  2.660348
+##    fibre_y oth_ind_y fodder_y berries_y brassic_y citrus_y frtrees_y
+## 1 7.777778  1.333333 18.05331  29.18171  44.06400       NA 106.98343
+## 2 7.666667  6.587339 29.00264  12.23637  18.54004       NA  45.09310
+## 3 4.966805  1.894773 23.44168  23.90570  36.09730       NA  87.64099
+## 4 5.134100        NA 11.72727  17.16571  25.92000       NA  62.93143
+## 5 4.800000 10.952381 31.81458  14.36696  21.69391       NA  52.67087
+## 6 5.361111  3.804918 28.50815  20.89739  31.55478       NA  76.61217
+##   grapes_y greens_y   nuts_y    olives_y oth_veg_y    peas_y rootveg_y
+## 1 8.273220 61.19029 2.632571 0.000000000  29.05543 18.087999  93.43200
+## 2 3.134204 25.61430 1.210000 0.007077465  12.11144  7.590493  39.11891
+## 3 5.295910 50.12718 2.156607          NA  23.80225 14.817717  76.53964
+## 4 3.012085 35.99429 1.548571          NA  17.09143 10.639999  54.96000
+## 5 4.908311 30.12565 1.296087 0.000000000  14.30478  8.905217  45.99913
+## 6       NA 43.81913 1.885217          NA  20.80696 12.953043  66.90783
+##      tropfr_y vfruits_y   berries_f    brassic_f citrus_f   frtrees_f
+## 1          NA  258.4971 0.003187124 0.0010235525        0 0.015481333
+## 2 0.002429577  107.9879 0.001521483 0.0005296015        0 0.007548431
+## 3          NA  211.7612 0.002908851 0.0009341843        0 0.014129630
+## 4          NA  152.0571 0.001987285 0.0006382214        0 0.009653162
+## 5          NA  127.2652 0.001397762 0.0004488946        0 0.006789575
+## 6          NA  185.1130 0.002057785 0.0006608626        0 0.009995613
+##      grapes_f     greens_f      nuts_f     olives_f    oth_veg_f
+## 1 0.036878371 0.0011948229 0.003024814 4.088179e-26 0.0015673054
+## 2 0.006011221 0.0005815269 0.001445650 2.015534e-05 0.0007469377
+## 3 0.033940373 0.0010905008 0.002760712 0.000000e+00 0.0014304611
+## 4 0.023496916 0.0007450146 0.001886079 0.000000e+00 0.0009772707
+## 5 0.005030330 0.0005240078 0.001326578 2.161636e-26 0.0006873657
+## 6 0.000000000 0.0007714444 0.001952989 0.000000e+00 0.0010119398
+##         peas_f    rootveg_f    tropfr_f    vfruits_f     bovine
+## 1 0.0008360319 0.0018775143 0.00000e+00 0.0004477230 0.09572503
+## 2 0.0004102481 0.0009143129 3.07412e-06 0.0002370161 0.78983432
+## 3 0.0007630364 0.0017135852 0.00000e+00 0.0004086315 0.44254290
+## 4 0.0005212956 0.0011706969 0.00000e+00 0.0002791712 0.01646091
+## 5 0.0003666545 0.0008234126 0.00000e+00 0.0001963558 0.99191114
+## 6 0.0005397888 0.0012122281 0.00000e+00 0.0002890750 0.98488639
+##     milk_cows      pigs      sheep      goats
+## 1 0.019237346 0.2189751 0.02537050 0.01341102
+## 2 0.197439597 1.9225909 0.16251727 0.01782867
+## 3 0.103387585 0.7942592 0.07255415 0.01521584
+## 4 0.004320988 0.0308642 0.06394721 0.01341102
+## 5 0.177059784 0.6761058 0.23496243 0.02406403
+## 6 0.287276589 1.9368813 0.09536765 0.03951552
+
head(nuts@data)
+
##   CNTR_CODE FID_1 LEVL_CODE NUTS_ID         NUTS_NAME Shape_Length
+## 1        AT  AT11         2    AT11   Burgenland (AT)     628921.6
+## 2        AT  AT22         2    AT22        Steiermark     814700.6
+## 3        AT  AT12         2    AT12 Niederösterreich    1044711.5
+## 4        AT  AT13         2    AT13              Wien     116902.7
+## 5        AT  AT21         2    AT21          Kärnten     625727.5
+## 6        AT  AT31         2    AT31   Oberösterreich     790984.0
+##    Shape_Area
+## 1  3963509482
+## 2 16414303341
+## 3 19201725666
+## 4   411979159
+## 5  9541848203
+## 6 11984617500
+
sp.dbase <- nuts
+sp.dbase@data <- left_join(nuts@data, dbase.final)
+
## Joining, by = "NUTS_ID"
+
## Warning: Column `NUTS_ID` joining factor and character vector, coercing
+## into character vector
+
names(sp.dbase@data) 
+
##   [1] "CNTR_CODE"    "FID_1"        "LEVL_CODE"    "NUTS_ID"     
+##   [5] "NUTS_NAME"    "Shape_Length" "Shape_Area"   "risk_pov"    
+##   [9] "train35bas"   "train35ful"   "train_bas"    "train_ful"   
+##  [13] "nitr_high"    "nitr_mod"     "nitr_poor"    "irrigated"   
+##  [17] "forest"       "artific"      "soil_loss"    "com_birds"   
+##  [21] "farm_birds"   "org_farm"     "energy_rt"    "renew_pct"   
+##  [25] "renew_prod"   "gross_N"      "gross_P"      "conv_till"   
+##  [29] "cons_till"    "zero_till"    "nfert"        "arable"      
+##  [33] "grassland"    "permanent"    "soil_prod"    "irrig_rate"  
+##  [37] "afi_awu"      "gva_awu"      "labour_use"   "pest_rate"   
+##  [41] "gdp_cap"      "pps_cap"      "emp_rate"     "tot_unemp"   
+##  [45] "yth_unemp"    "rur_gdp"      "rur_pps"      "int_gdp"     
+##  [49] "int_pps"      "urb_gdp"      "urb_pps"      "C_factor"    
+##  [53] "emi_co2eq"    "emi_nh3"      "emi_pm10"     "emi_pm25"    
+##  [57] "soc"          "bio_threat"   "nat2000_ag"   "nat2000_pr"  
+##  [61] "cal_frac"     "precip"       "deg_days"     "crop_suit"   
+##  [65] "rye_f"        "barley_f"     "maize_f"      "tritic_f"    
+##  [69] "sorghum_f"    "oth_cer_f"    "rice_f"       "pasture_f"   
+##  [73] "rape_f"       "sunflow_f"    "pulses_f"     "potato_f"    
+##  [77] "sugbeet_f"    "oth_rt_f"     "wheat_f"      "oats_f"      
+##  [81] "oth_oil_f"    "fibre_f"      "oth_ind_f"    "fodder_f"    
+##  [85] "rye_y"        "barley_y"     "maize_y"      "tritic_y"    
+##  [89] "sorghum_y"    "oth_cer_y"    "rice_y"       "pasture_y"   
+##  [93] "rape_y"       "sunflow_y"    "pulses_y"     "potato_y"    
+##  [97] "sugbeet_y"    "oth_rt_y"     "wheat_y"      "oats_y"      
+## [101] "oth_oil_y"    "fibre_y"      "oth_ind_y"    "fodder_y"    
+## [105] "berries_y"    "brassic_y"    "citrus_y"     "frtrees_y"   
+## [109] "grapes_y"     "greens_y"     "nuts_y"       "olives_y"    
+## [113] "oth_veg_y"    "peas_y"       "rootveg_y"    "tropfr_y"    
+## [117] "vfruits_y"    "berries_f"    "brassic_f"    "citrus_f"    
+## [121] "frtrees_f"    "grapes_f"     "greens_f"     "nuts_f"      
+## [125] "olives_f"     "oth_veg_f"    "peas_f"       "rootveg_f"   
+## [129] "tropfr_f"     "vfruits_f"    "bovine"       "milk_cows"   
+## [133] "pigs"         "sheep"        "goats"
+
dbase.shp <- sp.dbase
+names(dbase.shp@data)
+
##   [1] "CNTR_CODE"    "FID_1"        "LEVL_CODE"    "NUTS_ID"     
+##   [5] "NUTS_NAME"    "Shape_Length" "Shape_Area"   "risk_pov"    
+##   [9] "train35bas"   "train35ful"   "train_bas"    "train_ful"   
+##  [13] "nitr_high"    "nitr_mod"     "nitr_poor"    "irrigated"   
+##  [17] "forest"       "artific"      "soil_loss"    "com_birds"   
+##  [21] "farm_birds"   "org_farm"     "energy_rt"    "renew_pct"   
+##  [25] "renew_prod"   "gross_N"      "gross_P"      "conv_till"   
+##  [29] "cons_till"    "zero_till"    "nfert"        "arable"      
+##  [33] "grassland"    "permanent"    "soil_prod"    "irrig_rate"  
+##  [37] "afi_awu"      "gva_awu"      "labour_use"   "pest_rate"   
+##  [41] "gdp_cap"      "pps_cap"      "emp_rate"     "tot_unemp"   
+##  [45] "yth_unemp"    "rur_gdp"      "rur_pps"      "int_gdp"     
+##  [49] "int_pps"      "urb_gdp"      "urb_pps"      "C_factor"    
+##  [53] "emi_co2eq"    "emi_nh3"      "emi_pm10"     "emi_pm25"    
+##  [57] "soc"          "bio_threat"   "nat2000_ag"   "nat2000_pr"  
+##  [61] "cal_frac"     "precip"       "deg_days"     "crop_suit"   
+##  [65] "rye_f"        "barley_f"     "maize_f"      "tritic_f"    
+##  [69] "sorghum_f"    "oth_cer_f"    "rice_f"       "pasture_f"   
+##  [73] "rape_f"       "sunflow_f"    "pulses_f"     "potato_f"    
+##  [77] "sugbeet_f"    "oth_rt_f"     "wheat_f"      "oats_f"      
+##  [81] "oth_oil_f"    "fibre_f"      "oth_ind_f"    "fodder_f"    
+##  [85] "rye_y"        "barley_y"     "maize_y"      "tritic_y"    
+##  [89] "sorghum_y"    "oth_cer_y"    "rice_y"       "pasture_y"   
+##  [93] "rape_y"       "sunflow_y"    "pulses_y"     "potato_y"    
+##  [97] "sugbeet_y"    "oth_rt_y"     "wheat_y"      "oats_y"      
+## [101] "oth_oil_y"    "fibre_y"      "oth_ind_y"    "fodder_y"    
+## [105] "berries_y"    "brassic_y"    "citrus_y"     "frtrees_y"   
+## [109] "grapes_y"     "greens_y"     "nuts_y"       "olives_y"    
+## [113] "oth_veg_y"    "peas_y"       "rootveg_y"    "tropfr_y"    
+## [117] "vfruits_y"    "berries_f"    "brassic_f"    "citrus_f"    
+## [121] "frtrees_f"    "grapes_f"     "greens_f"     "nuts_f"      
+## [125] "olives_f"     "oth_veg_f"    "peas_f"       "rootveg_f"   
+## [129] "tropfr_f"     "vfruits_f"    "bovine"       "milk_cows"   
+## [133] "pigs"         "sheep"        "goats"
+
#We exclude some variables
+dbase.shp@data <- dbase.shp@data[,-c(2,3)]
+
+#Make NAs -99999 for shapefile
+summary(dbase.shp@data)
+
##    CNTR_CODE     NUTS_ID                                  NUTS_NAME  
+##  UK     : 40   Length:320         Östra Mellansverige         :  1  
+##  DE     : 38   Class :character   Övre Norrland               :  1  
+##  FR     : 27   Mode  :character   Åland                       :  1  
+##  TR     : 26                      Észak-Alföld               :  1  
+##  IT     : 21                      Észak-Magyarország         :  1  
+##  ES     : 19                      Ã\201rea Metropolitana de Lisboa:  1  
+##  (Other):149                      (Other)                      :314  
+##   Shape_Length        Shape_Area           risk_pov        train35bas     
+##  Min.   :   15458   Min.   :1.304e+07   Min.   : 9.971   Min.   :0.00000  
+##  1st Qu.:  545979   1st Qu.:5.074e+09   1st Qu.:18.586   1st Qu.:0.09613  
+##  Median :  836914   Median :1.131e+10   Median :23.514   Median :0.22800  
+##  Mean   : 1114048   Mean   :1.799e+10   Mean   :25.960   Mean   :0.26424  
+##  3rd Qu.: 1200811   3rd Qu.:2.403e+10   3rd Qu.:29.680   3rd Qu.:0.35996  
+##  Max.   :18760323   Max.   :2.271e+11   Max.   :54.150   Max.   :0.88217  
+##                                         NA's   :2        NA's   :52       
+##    train35ful       train_bas         train_ful         nitr_high      
+##  Min.   :0.0000   Min.   :0.01171   Min.   :0.00188   Min.   :  4.082  
+##  1st Qu.:0.1264   1st Qu.:0.10627   1st Qu.:0.04939   1st Qu.: 66.302  
+##  Median :0.2600   Median :0.19961   Median :0.12807   Median : 70.505  
+##  Mean   :0.2738   Mean   :0.26250   Mean   :0.15796   Mean   : 75.328  
+##  3rd Qu.:0.3825   3rd Qu.:0.36488   3rd Qu.:0.25108   3rd Qu.: 87.591  
+##  Max.   :0.8550   Max.   :0.94840   Max.   :0.50303   Max.   :100.000  
+##  NA's   :52       NA's   :50        NA's   :50        NA's   :44       
+##     nitr_mod        nitr_poor        irrigated           forest       
+##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000  
+##  1st Qu.: 6.533   1st Qu.: 4.106   1st Qu.: 0.3312   1st Qu.:0.08957  
+##  Median :15.896   Median : 8.883   Median : 1.2250   Median :0.24904  
+##  Mean   :13.773   Mean   :10.898   Mean   : 5.7571   Mean   :0.25141  
+##  3rd Qu.:18.416   3rd Qu.:15.385   3rd Qu.: 6.5000   3rd Qu.:0.37365  
+##  Max.   :60.000   Max.   :68.367   Max.   :74.5500   Max.   :0.75860  
+##  NA's   :44       NA's   :44       NA's   :28                         
+##     artific          soil_loss         com_birds       farm_birds    
+##  Min.   :0.00000   Min.   : 0.0300   Min.   :54.92   Min.   : 63.78  
+##  1st Qu.:0.02056   1st Qu.: 0.7047   1st Qu.:62.14   1st Qu.: 81.34  
+##  Median :0.04020   Median : 1.5005   Median :69.50   Median : 83.82  
+##  Mean   :0.09023   Mean   : 2.5482   Mean   :69.70   Mean   : 81.90  
+##  3rd Qu.:0.08193   3rd Qu.: 2.9420   3rd Qu.:81.30   3rd Qu.: 85.30  
+##  Max.   :1.00000   Max.   :17.6050   Max.   :97.22   Max.   :116.60  
+##                    NA's   :44        NA's   :158     NA's   :94      
+##     org_farm        energy_rt         renew_pct        renew_prod     
+##  Min.   : 0.000   Min.   :0.00000   Min.   : 0.000   Min.   : 0.8855  
+##  1st Qu.: 1.200   1st Qu.:0.03503   1st Qu.: 3.074   1st Qu.: 6.2422  
+##  Median : 2.687   Median :0.06128   Median : 6.124   Median : 8.3156  
+##  Mean   : 4.056   Mean   :0.15052   Mean   :11.225   Mean   :12.4318  
+##  3rd Qu.: 5.204   3rd Qu.:0.09725   3rd Qu.:22.515   3rd Qu.:18.0797  
+##  Max.   :27.487   Max.   :1.75149   Max.   :41.011   Max.   :37.7797  
+##  NA's   :28       NA's   :44        NA's   :82       NA's   :45       
+##     gross_N           gross_P         conv_till         cons_till      
+##  Min.   :  2.857   Min.   :-6.500   Min.   :0.08646   Min.   :0.00000  
+##  1st Qu.: 41.821   1st Qu.:-1.667   1st Qu.:0.46182   1st Qu.:0.05077  
+##  Median : 67.333   Median : 1.833   Median :0.61740   Median :0.12499  
+##  Mean   : 67.553   Mean   : 1.941   Mean   :0.60410   Mean   :0.18031  
+##  3rd Qu.: 85.988   3rd Qu.: 4.714   3rd Qu.:0.73832   3rd Qu.:0.28382  
+##  Max.   :190.167   Max.   :31.000   Max.   :0.99752   Max.   :0.65066  
+##  NA's   :30        NA's   :30       NA's   :53        NA's   :53       
+##    zero_till           nfert            arable        grassland    
+##  Min.   :0.00000   Min.   : 0.000   Min.   : 0.00   Min.   : 0.00  
+##  1st Qu.:0.00920   1st Qu.: 6.448   1st Qu.:39.68   1st Qu.:17.20  
+##  Median :0.01843   Median : 9.917   Median :62.28   Median :32.97  
+##  Mean   :0.03000   Mean   :10.975   Mean   :57.72   Mean   :35.74  
+##  3rd Qu.:0.04003   3rd Qu.:14.254   3rd Qu.:78.19   3rd Qu.:48.62  
+##  Max.   :0.19303   Max.   :29.456   Max.   :99.28   Max.   :98.84  
+##  NA's   :53        NA's   :11       NA's   :44      NA's   :44     
+##    permanent         soil_prod      irrig_rate          afi_awu      
+##  Min.   : 0.0000   Min.   :3.00   Min.   :   0.000   Min.   : -3221  
+##  1st Qu.: 0.3546   1st Qu.:6.00   1st Qu.:   1.181   1st Qu.: 11878  
+##  Median : 1.1324   Median :6.00   Median :   7.396   Median : 20559  
+##  Mean   : 5.7197   Mean   :6.45   Mean   : 157.441   Mean   : 24680  
+##  3rd Qu.: 5.6520   3rd Qu.:7.00   3rd Qu.:  82.321   3rd Qu.: 34388  
+##  Max.   :64.6743   Max.   :8.00   Max.   :4156.725   Max.   :107266  
+##  NA's   :44        NA's   :51     NA's   :22         NA's   :29      
+##     gva_awu           labour_use        pest_rate          gdp_cap      
+##  Min.   :   697.4   Min.   :0.00000   Min.   : 0.3874   Min.   :  3800  
+##  1st Qu.: 10428.3   1st Qu.:0.01049   1st Qu.: 1.2195   1st Qu.: 16350  
+##  Median : 24639.3   Median :0.01976   Median : 1.8836   Median : 26500  
+##  Mean   : 26611.6   Mean   :0.03594   Mean   : 2.4120   Mean   : 26832  
+##  3rd Qu.: 38162.3   3rd Qu.:0.04539   3rd Qu.: 3.1595   3rd Qu.: 33750  
+##  Max.   :122952.6   Max.   :0.22557   Max.   :13.1415   Max.   :191400  
+##  NA's   :29         NA's   :44        NA's   :37        NA's   :44      
+##     pps_cap          emp_rate       tot_unemp        yth_unemp    
+##  Min.   :  8200   Min.   :37.87   Min.   : 2.100   Min.   : 4.20  
+##  1st Qu.: 19750   1st Qu.:62.16   1st Qu.: 4.700   1st Qu.:10.60  
+##  Median : 24750   Median :67.91   Median : 6.850   Median :17.00  
+##  Mean   : 26526   Mean   :66.82   Mean   : 8.767   Mean   :21.39  
+##  3rd Qu.: 31025   3rd Qu.:73.84   3rd Qu.:10.600   3rd Qu.:29.10  
+##  Max.   :163500   Max.   :81.42   Max.   :31.300   Max.   :69.10  
+##  NA's   :44       NA's   :44      NA's   :44       NA's   :44     
+##     rur_gdp         rur_pps         int_gdp         int_pps     
+##  Min.   : 3117   Min.   : 6723   Min.   : 3585   Min.   : 7737  
+##  1st Qu.:11513   1st Qu.:15264   1st Qu.:13307   1st Qu.:17891  
+##  Median :22369   Median :20976   Median :25837   Median :24042  
+##  Mean   :20844   Mean   :21122   Mean   :24197   Mean   :24202  
+##  3rd Qu.:27611   3rd Qu.:25354   3rd Qu.:32205   3rd Qu.:28793  
+##  Max.   :46295   Max.   :42716   Max.   :88303   Max.   :73498  
+##  NA's   :164     NA's   :164     NA's   :112     NA's   :112    
+##     urb_gdp          urb_pps          C_factor        emi_co2eq      
+##  Min.   :  5329   Min.   : 10761   Min.   :0.1477   Min.   :      0  
+##  1st Qu.: 23998   1st Qu.: 24530   1st Qu.:0.1959   1st Qu.: 745804  
+##  Median : 30497   Median : 29715   Median :0.2249   Median :1121468  
+##  Mean   : 33196   Mean   : 32561   Mean   :0.2356   Mean   :1466069  
+##  3rd Qu.: 39796   3rd Qu.: 37017   3rd Qu.:0.2631   3rd Qu.:1804807  
+##  Max.   :191423   Max.   :163515   Max.   :0.4395   Max.   :7270058  
+##  NA's   :184      NA's   :184      NA's   :54       NA's   :2        
+##     emi_nh3         emi_pm10       emi_pm25           soc        
+##  Min.   :    0   Min.   :   0   Min.   :   0.0   Min.   : 31.86  
+##  1st Qu.: 8322   1st Qu.: 434   1st Qu.: 228.1   1st Qu.: 61.59  
+##  Median :13177   Median :1012   Median : 608.9   Median : 81.03  
+##  Mean   :16252   Mean   :1269   Mean   : 765.3   Mean   : 92.96  
+##  3rd Qu.:19954   3rd Qu.:1759   3rd Qu.:1096.7   3rd Qu.:109.75  
+##  Max.   :88272   Max.   :7418   Max.   :4020.6   Max.   :374.18  
+##  NA's   :2       NA's   :2      NA's   :2        NA's   :40      
+##    bio_threat       nat2000_ag         nat2000_pr         cal_frac     
+##  Min.   :0.1920   Min.   :0.000000   Min.   :0.00000   Min.   :0.1612  
+##  1st Qu.:0.2507   1st Qu.:0.006678   1st Qu.:0.03956   1st Qu.:0.3928  
+##  Median :0.2858   Median :0.044534   Median :0.12229   Median :0.4986  
+##  Mean   :0.2981   Mean   :0.065957   Mean   :0.13586   Mean   :0.5239  
+##  3rd Qu.:0.3192   3rd Qu.:0.097822   3rd Qu.:0.21187   3rd Qu.:0.6305  
+##  Max.   :0.6029   Max.   :0.507382   Max.   :0.49982   Max.   :0.9640  
+##  NA's   :43       NA's   :2          NA's   :2         NA's   :8       
+##      precip          deg_days      crop_suit         rye_f          
+##  Min.   : 261.3   Min.   : 242   Min.   :0.000   Min.   :0.0000000  
+##  1st Qu.: 610.6   1st Qu.:1536   1st Qu.:3.141   1st Qu.:0.0001763  
+##  Median : 745.8   Median :1711   Median :4.267   Median :0.0010613  
+##  Mean   : 800.3   Mean   :2016   Mean   :4.149   Mean   :0.0078947  
+##  3rd Qu.: 882.9   3rd Qu.:2246   3rd Qu.:5.202   3rd Qu.:0.0069115  
+##  Max.   :2707.9   Max.   :7765   Max.   :6.000   Max.   :0.1270547  
+##                                  NA's   :2       NA's   :1          
+##     barley_f          maize_f            tritic_f        
+##  Min.   :0.00000   Min.   :0.000000   Min.   :0.0000000  
+##  1st Qu.:0.01824   1st Qu.:0.000236   1st Qu.:0.0006165  
+##  Median :0.04720   Median :0.008416   Median :0.0028821  
+##  Mean   :0.05499   Mean   :0.029646   Mean   :0.0090348  
+##  3rd Qu.:0.08157   3rd Qu.:0.033520   3rd Qu.:0.0101557  
+##  Max.   :0.22525   Max.   :0.319030   Max.   :0.1181380  
+##  NA's   :1         NA's   :1          NA's   :1          
+##    sorghum_f           oth_cer_f           rice_f        
+##  Min.   :0.0000000   Min.   :0.00000   Min.   :0.000000  
+##  1st Qu.:0.0000000   1st Qu.:0.00000   1st Qu.:0.000000  
+##  Median :0.0000000   Median :0.00008   Median :0.000000  
+##  Mean   :0.0003367   Mean   :0.00097   Mean   :0.001738  
+##  3rd Qu.:0.0000793   3rd Qu.:0.00128   3rd Qu.:0.000000  
+##  Max.   :0.0180664   Max.   :0.01778   Max.   :0.107097  
+##  NA's   :1           NA's   :65        NA's   :1         
+##    pasture_f            rape_f            sunflow_f       
+##  Min.   :0.000000   Min.   :0.0000000   Min.   :0.000000  
+##  1st Qu.:0.006223   1st Qu.:0.0002153   1st Qu.:0.000000  
+##  Median :0.021880   Median :0.0125632   Median :0.000192  
+##  Mean   :0.047618   Mean   :0.0268691   Mean   :0.011671  
+##  3rd Qu.:0.084760   3rd Qu.:0.0427553   3rd Qu.:0.004120  
+##  Max.   :0.412707   Max.   :0.1461908   Max.   :0.218639  
+##  NA's   :8          NA's   :1           NA's   :1         
+##     pulses_f           potato_f          sugbeet_f       
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.001809   1st Qu.:0.002072   1st Qu.:0.000000  
+##  Median :0.004299   Median :0.005518   Median :0.001072  
+##  Mean   :0.008475   Mean   :0.012031   Mean   :0.009541  
+##  3rd Qu.:0.011267   3rd Qu.:0.013376   3rd Qu.:0.013027  
+##  Max.   :0.064771   Max.   :0.188248   Max.   :0.113660  
+##  NA's   :1          NA's   :1          NA's   :1         
+##     oth_rt_f          wheat_f             oats_f         
+##  Min.   :0.00000   Min.   :0.000000   Min.   :0.0000000  
+##  1st Qu.:0.00000   1st Qu.:0.006516   1st Qu.:0.0002925  
+##  Median :0.00015   Median :0.103025   Median :0.0037227  
+##  Mean   :0.00091   Mean   :0.103869   Mean   :0.0096505  
+##  3rd Qu.:0.00108   3rd Qu.:0.170883   3rd Qu.:0.0106135  
+##  Max.   :0.01231   Max.   :0.389937   Max.   :0.1912935  
+##  NA's   :35        NA's   :1          NA's   :1          
+##    oth_oil_f           fibre_f            oth_ind_f        
+##  Min.   :0.000000   Min.   :0.0000000   Min.   :0.0000000  
+##  1st Qu.:0.000000   1st Qu.:0.0000000   1st Qu.:0.0000000  
+##  Median :0.000000   Median :0.0000000   Median :0.0002218  
+##  Mean   :0.004699   Mean   :0.0030096   Mean   :0.0015573  
+##  3rd Qu.:0.001537   3rd Qu.:0.0000127   3rd Qu.:0.0010943  
+##  Max.   :0.190498   Max.   :0.1513896   Max.   :0.0356431  
+##  NA's   :1          NA's   :1           NA's   :1          
+##     fodder_f            rye_y           barley_y         maize_y      
+##  Min.   :0.000000   Min.   :0.4454   Min.   :0.1254   Min.   : 1.000  
+##  1st Qu.:0.007706   1st Qu.:2.6135   1st Qu.:3.0860   1st Qu.: 6.320  
+##  Median :0.032609   Median :3.5000   Median :4.7376   Median : 8.100  
+##  Mean   :0.045518   Mean   :3.7258   Mean   :4.6243   Mean   : 7.929  
+##  3rd Qu.:0.069995   3rd Qu.:5.0000   3rd Qu.:6.1387   3rd Qu.: 9.876  
+##  Max.   :0.253901   Max.   :7.1428   Max.   :8.4666   Max.   :12.976  
+##  NA's   :1          NA's   :20       NA's   :14       NA's   :28      
+##     tritic_y        sorghum_y       oth_cer_y          rice_y      
+##  Min.   :0.5839   Min.   :1.000   Min.   : 0.500   Min.   : 1.857  
+##  1st Qu.:2.9683   1st Qu.:3.173   1st Qu.: 1.465   1st Qu.: 4.567  
+##  Median :4.0000   Median :4.000   Median : 2.174   Median : 5.051  
+##  Mean   :3.9796   Mean   :4.349   Mean   : 2.423   Mean   : 5.352  
+##  3rd Qu.:5.2188   3rd Qu.:5.675   3rd Qu.: 3.272   3rd Qu.: 5.985  
+##  Max.   :7.3594   Max.   :9.044   Max.   :10.750   Max.   :10.000  
+##  NA's   :23       NA's   :175     NA's   :149      NA's   :179     
+##    pasture_y           rape_y           sunflow_y        pulses_y      
+##  Min.   : 0.0003   Min.   :0.006547   Min.   :0.700   Min.   :0.09302  
+##  1st Qu.: 4.3362   1st Qu.:2.460195   1st Qu.:1.866   1st Qu.:1.65220  
+##  Median : 7.3664   Median :3.185760   Median :2.203   Median :2.20927  
+##  Mean   : 9.6893   Mean   :2.975625   Mean   :2.239   Mean   :2.23948  
+##  3rd Qu.:10.1248   3rd Qu.:3.544947   3rd Qu.:2.686   3rd Qu.:2.77736  
+##  Max.   :49.5135   Max.   :5.000000   Max.   :4.393   Max.   :5.00245  
+##  NA's   :108       NA's   :22         NA's   :105     NA's   :25       
+##     potato_y        sugbeet_y         oth_rt_y         wheat_y      
+##  Min.   : 4.361   Min.   :  5.00   Min.   : 9.333   Min.   :0.6667  
+##  1st Qu.:24.144   1st Qu.: 57.45   1st Qu.:12.000   1st Qu.:3.3922  
+##  Median :31.598   Median : 67.95   Median :24.857   Median :5.1819  
+##  Mean   :31.469   Mean   : 65.07   Mean   :32.544   Mean   :5.3029  
+##  3rd Qu.:40.329   3rd Qu.: 76.28   3rd Qu.:52.350   3rd Qu.:7.4295  
+##  Max.   :53.327   Max.   :107.50   Max.   :94.396   Max.   :9.2898  
+##  NA's   :7        NA's   :34       NA's   :174      NA's   :9       
+##      oats_y         oth_oil_y         fibre_y        oth_ind_y       
+##  Min.   :0.5543   Min.   :0.1428   Min.   :0.350   Min.   : 0.02671  
+##  1st Qu.:2.3556   1st Qu.:1.6668   1st Qu.:1.523   1st Qu.: 0.98452  
+##  Median :3.6032   Median :2.0000   Median :2.750   Median : 1.88764  
+##  Mean   :3.5905   Mean   :2.1776   Mean   :3.104   Mean   : 2.98676  
+##  3rd Qu.:4.7295   3rd Qu.:2.8162   3rd Qu.:4.838   3rd Qu.: 3.26371  
+##  Max.   :7.2000   Max.   :5.0000   Max.   :8.333   Max.   :13.36992  
+##  NA's   :14       NA's   :50       NA's   :146     NA's   :54        
+##     fodder_y         berries_y          brassic_y       
+##  Min.   : 0.6603   Min.   : 0.00396   Min.   : 0.03086  
+##  1st Qu.:12.5398   1st Qu.:18.60179   1st Qu.:29.19964  
+##  Median :26.1963   Median :25.99651   Median :37.54545  
+##  Mean   :25.7794   Mean   :26.03682   Mean   :38.47796  
+##  3rd Qu.:38.1261   3rd Qu.:34.90468   3rd Qu.:43.59471  
+##  Max.   :51.0516   Max.   :57.13000   Max.   :77.33000  
+##  NA's   :12        NA's   :21         NA's   :21        
+##     citrus_y           frtrees_y            grapes_y     
+##  Min.   :  0.02187   Min.   :  0.00316   Min.   : 0.000  
+##  1st Qu.: 43.38803   1st Qu.: 41.05565   1st Qu.: 1.878  
+##  Median : 73.80700   Median : 62.58810   Median : 4.266  
+##  Mean   : 67.53417   Mean   : 63.80520   Mean   : 4.486  
+##  3rd Qu.:101.62819   3rd Qu.: 88.15527   3rd Qu.: 6.531  
+##  Max.   :122.22785   Max.   :180.73000   Max.   :13.714  
+##  NA's   :198         NA's   :20          NA's   :79      
+##     greens_y           nuts_y           olives_y        oth_veg_y       
+##  Min.   : 0.0116   Min.   : 0.0000   Min.   :0.0000   Min.   : 0.00974  
+##  1st Qu.:23.1703   1st Qu.: 0.4243   1st Qu.:0.8311   1st Qu.: 9.32000  
+##  Median :37.2801   Median : 3.2225   Median :1.5341   Median :14.55378  
+##  Mean   :34.3396   Mean   : 4.8389   Mean   :1.3702   Mean   :16.73389  
+##  3rd Qu.:43.8130   3rd Qu.: 8.3606   3rd Qu.:1.8099   3rd Qu.:18.99319  
+##  Max.   :71.3000   Max.   :22.8630   Max.   :4.2988   Max.   :41.34000  
+##  NA's   :28        NA's   :44        NA's   :216      NA's   :20        
+##      peas_y           rootveg_y           tropfr_y       
+##  Min.   : 0.00246   Min.   :  0.0421   Min.   : 0.00243  
+##  1st Qu.:12.93950   1st Qu.: 43.9722   1st Qu.:25.81930  
+##  Median :21.42656   Median : 71.1353   Median :37.45034  
+##  Mean   :21.45191   Mean   : 71.1527   Mean   :36.78523  
+##  3rd Qu.:28.22034   3rd Qu.: 93.5171   3rd Qu.:48.14715  
+##  Max.   :44.57000   Max.   :170.1700   Max.   :88.52000  
+##  NA's   :20         NA's   :21         NA's   :189       
+##    vfruits_y           berries_f          brassic_f       
+##  Min.   :   0.1976   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.: 117.0439   1st Qu.:0.000277   1st Qu.:0.000717  
+##  Median : 181.0226   Median :0.000537   Median :0.001163  
+##  Mean   : 296.2257   Mean   :0.001068   Mean   :0.001651  
+##  3rd Qu.: 307.1032   3rd Qu.:0.001378   3rd Qu.:0.002102  
+##  Max.   :1194.7400   Max.   :0.009374   Max.   :0.011558  
+##  NA's   :20          NA's   :4          NA's   :4         
+##     citrus_f          frtrees_f           grapes_f       
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.000000   1st Qu.:0.001741   1st Qu.:0.000000  
+##  Median :0.000000   Median :0.007025   Median :0.000322  
+##  Mean   :0.002161   Mean   :0.007964   Mean   :0.014954  
+##  3rd Qu.:0.001833   3rd Qu.:0.010316   3rd Qu.:0.017370  
+##  Max.   :0.029824   Max.   :0.092826   Max.   :0.268339  
+##  NA's   :4          NA's   :4          NA's   :4         
+##     greens_f            nuts_f            olives_f       
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.000241   1st Qu.:0.000000   1st Qu.:0.000000  
+##  Median :0.000839   Median :0.000212   Median :0.000000  
+##  Mean   :0.001398   Mean   :0.004343   Mean   :0.015398  
+##  3rd Qu.:0.001392   3rd Qu.:0.002597   3rd Qu.:0.000905  
+##  Max.   :0.015696   Max.   :0.036033   Max.   :0.452390  
+##  NA's   :4          NA's   :4          NA's   :4         
+##    oth_veg_f            peas_f           rootveg_f       
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
+##  1st Qu.:0.000530   1st Qu.:0.000390   1st Qu.:0.000697  
+##  Median :0.001639   Median :0.001316   Median :0.001438  
+##  Mean   :0.002617   Mean   :0.001804   Mean   :0.002112  
+##  3rd Qu.:0.003725   3rd Qu.:0.002598   3rd Qu.:0.002655  
+##  Max.   :0.019446   Max.   :0.021036   Max.   :0.018225  
+##  NA's   :4          NA's   :4          NA's   :4         
+##     tropfr_f          vfruits_f            bovine         milk_cows      
+##  Min.   :0.000000   Min.   :0.000000   Min.   :0.0000   Min.   :0.00000  
+##  1st Qu.:0.000000   1st Qu.:0.000118   1st Qu.:0.1913   1st Qu.:0.04591  
+##  Median :0.000000   Median :0.000881   Median :0.4288   Median :0.10738  
+##  Mean   :0.001053   Mean   :0.004773   Mean   :0.5536   Mean   :0.16021  
+##  3rd Qu.:0.000394   3rd Qu.:0.006812   3rd Qu.:0.7943   3rd Qu.:0.19825  
+##  Max.   :0.043473   Max.   :0.041715   Max.   :2.8302   Max.   :1.04331  
+##  NA's   :4          NA's   :4          NA's   :8        NA's   :8        
+##       pigs             sheep             goats        
+##  Min.   : 0.0000   Min.   :0.00000   Min.   :0.00000  
+##  1st Qu.: 0.1446   1st Qu.:0.07257   1st Qu.:0.00660  
+##  Median : 0.2961   Median :0.27634   Median :0.02031  
+##  Mean   : 0.8674   Mean   :0.58262   Mean   :0.11852  
+##  3rd Qu.: 0.7611   3rd Qu.:0.90416   3rd Qu.:0.12719  
+##  Max.   :17.2590   Max.   :5.22537   Max.   :2.33485  
+##  NA's   :34        NA's   :36        NA's   :36
+
dbase.shp@data[is.na(dbase.shp@data)] <- -99999
+summary(dbase.shp@data)
+
##    CNTR_CODE     NUTS_ID                                  NUTS_NAME  
+##  UK     : 40   Length:320         Östra Mellansverige         :  1  
+##  DE     : 38   Class :character   Övre Norrland               :  1  
+##  FR     : 27   Mode  :character   Åland                       :  1  
+##  TR     : 26                      Észak-Alföld               :  1  
+##  IT     : 21                      Észak-Magyarország         :  1  
+##  ES     : 19                      Ã\201rea Metropolitana de Lisboa:  1  
+##  (Other):149                      (Other)                      :314  
+##   Shape_Length        Shape_Area           risk_pov        
+##  Min.   :   15458   Min.   :1.304e+07   Min.   :-99999.00  
+##  1st Qu.:  545979   1st Qu.:5.074e+09   1st Qu.:    18.59  
+##  Median :  836914   Median :1.131e+10   Median :    23.41  
+##  Mean   : 1114048   Mean   :1.799e+10   Mean   :  -599.20  
+##  3rd Qu.: 1200811   3rd Qu.:2.403e+10   3rd Qu.:    29.68  
+##  Max.   :18760323   Max.   :2.271e+11   Max.   :    54.15  
+##                                                            
+##    train35bas          train35ful          train_bas        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.06   1st Qu.:     0.03   1st Qu.:     0.07  
+##  Median :     0.17   Median :     0.21   Median :     0.16  
+##  Mean   :-16249.62   Mean   :-16249.61   Mean   :-15624.62  
+##  3rd Qu.:     0.29   3rd Qu.:     0.36   3rd Qu.:     0.33  
+##  Max.   :     0.88   Max.   :     0.86   Max.   :     0.95  
+##                                                             
+##    train_ful           nitr_high            nitr_mod        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.02   1st Qu.:    63.59   1st Qu.:     2.39  
+##  Median :     0.09   Median :    66.58   Median :    11.50  
+##  Mean   :-15624.71   Mean   :-13684.89   Mean   :-13737.98  
+##  3rd Qu.:     0.23   3rd Qu.:    87.59   3rd Qu.:    18.42  
+##  Max.   :     0.50   Max.   :   100.00   Max.   :    60.00  
+##                                                             
+##    nitr_poor           irrigated             forest       
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :0.00000  
+##  1st Qu.:     0.45   1st Qu.:     0.22   1st Qu.:0.08957  
+##  Median :     8.37   Median :     0.86   Median :0.24904  
+##  Mean   :-13740.46   Mean   : -8744.66   Mean   :0.25141  
+##  3rd Qu.:    15.20   3rd Qu.:     5.79   3rd Qu.:0.37365  
+##  Max.   :    68.37   Max.   :    74.55   Max.   :0.75860  
+##                                                           
+##     artific          soil_loss           com_birds        
+##  Min.   :0.00000   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:0.02056   1st Qu.:     0.39   1st Qu.:-99999.00  
+##  Median :0.04020   Median :     1.32   Median :    54.92  
+##  Mean   :0.09023   Mean   :-13747.66   Mean   :-49339.22  
+##  3rd Qu.:0.08193   3rd Qu.:     2.48   3rd Qu.:    69.50  
+##  Max.   :1.00000   Max.   :    17.61   Max.   :    97.22  
+##                                                           
+##    farm_birds           org_farm           energy_rt        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:-99999.00   1st Qu.:     0.75   1st Qu.:     0.00  
+##  Median :    81.84   Median :     2.41   Median :     0.05  
+##  Mean   :-29316.86   Mean   : -8746.21   Mean   :-13749.73  
+##  3rd Qu.:    84.12   3rd Qu.:     5.04   3rd Qu.:     0.10  
+##  Max.   :   116.60   Max.   :    27.49   Max.   :     1.75  
+##                                                             
+##    renew_pct           renew_prod           gross_N         
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:-99999.00   1st Qu.:     4.57   1st Qu.:    34.71  
+##  Median :     3.39   Median :     7.18   Median :    57.83  
+##  Mean   :-25616.39   Mean   :-14051.68   Mean   : -9313.69  
+##  3rd Qu.:    14.33   3rd Qu.:    11.85   3rd Qu.:    80.29  
+##  Max.   :    41.01   Max.   :    37.78   Max.   :   190.17  
+##                                                             
+##     gross_P            conv_till           cons_till        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:    -1.83   1st Qu.:     0.35   1st Qu.:     0.02  
+##  Median :     0.57   Median :     0.55   Median :     0.08  
+##  Mean   : -9373.15   Mean   :-16561.83   Mean   :-16562.18  
+##  3rd Qu.:     4.71   3rd Qu.:     0.73   3rd Qu.:     0.25  
+##  Max.   :    31.00   Max.   :     1.00   Max.   :     0.65  
+##                                                             
+##    zero_till             nfert               arable         
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     6.34   1st Qu.:    26.90  
+##  Median :     0.01   Median :     9.57   Median :    58.37  
+##  Mean   :-16562.31   Mean   : -3426.87   Mean   :-13700.08  
+##  3rd Qu.:     0.03   3rd Qu.:    14.07   3rd Qu.:    74.18  
+##  Max.   :     0.19   Max.   :    29.46   Max.   :    99.28  
+##                                                             
+##    grassland           permanent           soil_prod     
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999  
+##  1st Qu.:    11.45   1st Qu.:     0.12   1st Qu.:     5  
+##  Median :    27.66   Median :     0.81   Median :     6  
+##  Mean   :-13719.04   Mean   :-13744.93   Mean   :-15932  
+##  3rd Qu.:    43.29   3rd Qu.:     3.94   3rd Qu.:     7  
+##  Max.   :    98.84   Max.   :    64.67   Max.   :     8  
+##                                                          
+##    irrig_rate           afi_awu          gva_awu         labour_use       
+##  Min.   :-99999.00   Min.   :-99999   Min.   :-99999   Min.   :-99999.00  
+##  1st Qu.:     1.18   1st Qu.:  8079   1st Qu.:  8085   1st Qu.:     0.01  
+##  Median :     6.26   Median : 19953   Median : 22316   Median :     0.02  
+##  Mean   : -6728.31   Mean   : 13381   Mean   : 15137   Mean   :-13749.83  
+##  3rd Qu.:    72.59   3rd Qu.: 34388   3rd Qu.: 36813   3rd Qu.:     0.04  
+##  Max.   :  4156.72   Max.   :107266   Max.   :122953   Max.   :     0.23  
+##                                                                           
+##    pest_rate            gdp_cap          pps_cap          emp_rate        
+##  Min.   :-99999.00   Min.   :-99999   Min.   :-99999   Min.   :-99999.00  
+##  1st Qu.:     1.21   1st Qu.: 11100   1st Qu.: 16075   1st Qu.:    56.43  
+##  Median :     1.59   Median : 24500   Median : 23300   Median :    65.76  
+##  Mean   :-11560.25   Mean   :  9393   Mean   :  9129   Mean   :-13692.23  
+##  3rd Qu.:     2.74   3rd Qu.: 32225   3rd Qu.: 29625   3rd Qu.:    73.05  
+##  Max.   :    13.14   Max.   :191400   Max.   :163500   Max.   :    81.42  
+##                                                                           
+##    tot_unemp           yth_unemp            rur_gdp          rur_pps      
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999   Min.   :-99999  
+##  1st Qu.:     3.70   1st Qu.:     8.50   1st Qu.:-99999   1st Qu.:-99999  
+##  Median :     5.80   Median :    14.95   Median :-99999   Median :-99999  
+##  Mean   :-13742.30   Mean   :-13731.41   Mean   :-41088   Mean   :-40953  
+##  3rd Qu.:     9.53   3rd Qu.:    26.18   3rd Qu.: 22148   3rd Qu.: 20730  
+##  Max.   :    31.30   Max.   :    69.10   Max.   : 46295   Max.   : 42716  
+##                                                                           
+##     int_gdp          int_pps          urb_gdp          urb_pps      
+##  Min.   :-99999   Min.   :-99999   Min.   :-99999   Min.   :-99999  
+##  1st Qu.:-99999   1st Qu.:-99999   1st Qu.:-99999   1st Qu.:-99999  
+##  Median : 12526   Median : 17513   Median :-99999   Median :-99999  
+##  Mean   :-19271   Mean   :-19268   Mean   :-43391   Mean   :-43661  
+##  3rd Qu.: 28298   3rd Qu.: 26526   3rd Qu.: 29111   3rd Qu.: 28289  
+##  Max.   : 88303   Max.   : 73498   Max.   :191423   Max.   :163515  
+##                                                                     
+##     C_factor           emi_co2eq          emi_nh3          emi_pm10       
+##  Min.   :-99999.00   Min.   : -99999   Min.   :-99999   Min.   :-99999.0  
+##  1st Qu.:     0.18   1st Qu.: 715768   1st Qu.:  8216   1st Qu.:   424.2  
+##  Median :     0.21   Median :1113299   Median : 13149   Median :   994.7  
+##  Mean   :-16874.64   Mean   :1456282   Mean   : 15525   Mean   :   636.4  
+##  3rd Qu.:     0.25   3rd Qu.:1800482   3rd Qu.: 19949   3rd Qu.:  1756.6  
+##  Max.   :     0.44   Max.   :7270058   Max.   : 88272   Max.   :  7417.5  
+##                                                                           
+##     emi_pm25             soc              bio_threat       
+##  Min.   :-99999.0   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:   223.8   1st Qu.:    52.99   1st Qu.:     0.23  
+##  Median :   602.1   Median :    73.82   Median :     0.27  
+##  Mean   :   135.5   Mean   :-12418.53   Mean   :-13437.11  
+##  3rd Qu.:  1090.5   3rd Qu.:   104.25   3rd Qu.:     0.31  
+##  Max.   :  4020.6   Max.   :   374.18   Max.   :     0.60  
+##                                                            
+##    nat2000_ag          nat2000_pr           cal_frac        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.01   1st Qu.:     0.04   1st Qu.:     0.39  
+##  Median :     0.04   Median :     0.12   Median :     0.49  
+##  Mean   :  -624.93   Mean   :  -624.86   Mean   : -2499.46  
+##  3rd Qu.:     0.10   3rd Qu.:     0.21   3rd Qu.:     0.63  
+##  Max.   :     0.51   Max.   :     0.50   Max.   :     0.96  
+##                                                             
+##      precip          deg_days      crop_suit             rye_f          
+##  Min.   : 261.3   Min.   : 242   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.: 610.6   1st Qu.:1536   1st Qu.:     3.13   1st Qu.:     0.00  
+##  Median : 745.8   Median :1711   Median :     4.25   Median :     0.00  
+##  Mean   : 800.3   Mean   :2016   Mean   :  -620.87   Mean   :  -312.49  
+##  3rd Qu.: 882.9   3rd Qu.:2246   3rd Qu.:     5.20   3rd Qu.:     0.01  
+##  Max.   :2707.9   Max.   :7765   Max.   :     6.00   Max.   :     0.13  
+##                                                                         
+##     barley_f            maize_f             tritic_f        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.02   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :     0.05   Median :     0.01   Median :     0.00  
+##  Mean   :  -312.44   Mean   :  -312.47   Mean   :  -312.49  
+##  3rd Qu.:     0.08   3rd Qu.:     0.03   3rd Qu.:     0.01  
+##  Max.   :     0.23   Max.   :     0.32   Max.   :     0.12  
+##                                                             
+##    sorghum_f           oth_cer_f             rice_f         
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :     0.00   Median :     0.00   Median :     0.00  
+##  Mean   :  -312.50   Mean   :-20312.30   Mean   :  -312.50  
+##  3rd Qu.:     0.00   3rd Qu.:     0.00   3rd Qu.:     0.00  
+##  Max.   :     0.02   Max.   :     0.02   Max.   :     0.11  
+##                                                             
+##    pasture_f            rape_f            sunflow_f        
+##  Min.   :-1.0e+05   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.: 1.0e-02   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median : 2.0e-02   Median :     0.01   Median :     0.00  
+##  Mean   :-2.5e+03   Mean   :  -312.47   Mean   :  -312.49  
+##  3rd Qu.: 8.0e-02   3rd Qu.:     0.04   3rd Qu.:     0.00  
+##  Max.   : 4.1e-01   Max.   :     0.15   Max.   :     0.22  
+##                                                            
+##     pulses_f            potato_f           sugbeet_f        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :     0.00   Median :     0.01   Median :     0.00  
+##  Mean   :  -312.49   Mean   :  -312.48   Mean   :  -312.49  
+##  3rd Qu.:     0.01   3rd Qu.:     0.01   3rd Qu.:     0.01  
+##  Max.   :     0.06   Max.   :     0.19   Max.   :     0.11  
+##                                                             
+##     oth_rt_f            wheat_f              oats_f         
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.01   1st Qu.:     0.00  
+##  Median :     0.00   Median :     0.10   Median :     0.00  
+##  Mean   :-10937.39   Mean   :  -312.39   Mean   :  -312.49  
+##  3rd Qu.:     0.00   3rd Qu.:     0.17   3rd Qu.:     0.01  
+##  Max.   :     0.01   Max.   :     0.39   Max.   :     0.19  
+##                                                             
+##    oth_oil_f            fibre_f            oth_ind_f        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :     0.00   Median :     0.00   Median :     0.00  
+##  Mean   :  -312.49   Mean   :  -312.49   Mean   :  -312.50  
+##  3rd Qu.:     0.00   3rd Qu.:     0.00   3rd Qu.:     0.00  
+##  Max.   :     0.19   Max.   :     0.15   Max.   :     0.04  
+##                                                             
+##     fodder_f             rye_y              barley_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.01   1st Qu.:     2.41   1st Qu.:     2.89  
+##  Median :     0.03   Median :     3.33   Median :     4.59  
+##  Mean   :  -312.45   Mean   : -6246.44   Mean   : -4370.53  
+##  3rd Qu.:     0.07   3rd Qu.:     4.91   3rd Qu.:     6.12  
+##  Max.   :     0.25   Max.   :     7.14   Max.   :     8.47  
+##                                                             
+##     maize_y             tritic_y           sorghum_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     5.95   1st Qu.:     2.40   1st Qu.:-99999.00  
+##  Median :     7.35   Median :     3.82   Median :-99999.00  
+##  Mean   : -8742.68   Mean   : -7183.73   Mean   :-54684.98  
+##  3rd Qu.:     9.83   3rd Qu.:     5.05   3rd Qu.:     4.00  
+##  Max.   :    12.98   Max.   :     7.36   Max.   :     9.04  
+##                                                             
+##    oth_cer_y             rice_y            pasture_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:-99999.00   1st Qu.:-99999.00   1st Qu.:-99999.00  
+##  Median :     1.00   Median :-99999.00   Median :     4.32  
+##  Mean   :-46560.74   Mean   :-55934.58   Mean   :-33743.24  
+##  3rd Qu.:     2.17   3rd Qu.:     5.05   3rd Qu.:     8.46  
+##  Max.   :    10.75   Max.   :    10.00   Max.   :    49.51  
+##                                                             
+##      rape_y            sunflow_y            pulses_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     2.23   1st Qu.:-99999.00   1st Qu.:     1.41  
+##  Median :     3.14   Median :     1.88   Median :     2.12  
+##  Mean   : -6872.16   Mean   :-32810.67   Mean   : -7810.36  
+##  3rd Qu.:     3.53   3rd Qu.:     2.39   3rd Qu.:     2.75  
+##  Max.   :     5.00   Max.   :     4.39   Max.   :     5.00  
+##                                                             
+##     potato_y           sugbeet_y            oth_rt_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:    22.96   1st Qu.:    50.47   1st Qu.:-99999.00  
+##  Median :    30.52   Median :    64.07   Median :-99999.00  
+##  Mean   : -2156.70   Mean   :-10566.73   Mean   :-54359.61  
+##  3rd Qu.:    40.33   3rd Qu.:    74.69   3rd Qu.:    22.96  
+##  Max.   :    53.33   Max.   :   107.50   Max.   :    94.40  
+##                                                             
+##     wheat_y              oats_y            oth_oil_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     3.21   1st Qu.:     2.21   1st Qu.:     1.05  
+##  Median :     5.15   Median :     3.37   Median :     1.92  
+##  Mean   : -2807.32   Mean   : -4371.52   Mean   :-15623.01  
+##  3rd Qu.:     7.39   3rd Qu.:     4.72   3rd Qu.:     2.71  
+##  Max.   :     9.29   Max.   :     7.20   Max.   :     5.00  
+##                                                             
+##     fibre_y            oth_ind_y            fodder_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:-99999.00   1st Qu.:     0.72   1st Qu.:    12.34  
+##  Median :     1.00   Median :     1.46   Median :    24.85  
+##  Mean   :-45622.86   Mean   :-16872.35   Mean   : -3725.15  
+##  3rd Qu.:     3.08   3rd Qu.:     3.14   3rd Qu.:    38.13  
+##  Max.   :     8.33   Max.   :    13.37   Max.   :    51.05  
+##                                                             
+##    berries_y           brassic_y            citrus_y       
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.0  
+##  1st Qu.:    16.55   1st Qu.:    27.35   1st Qu.:-99999.0  
+##  Median :    24.95   Median :    36.47   Median :-99999.0  
+##  Mean   : -6538.11   Mean   : -6526.48   Mean   :-61848.6  
+##  3rd Qu.:    34.52   3rd Qu.:    41.87   3rd Qu.:    52.3  
+##  Max.   :    57.13   Max.   :    77.33   Max.   :   122.2  
+##                                                            
+##    frtrees_y            grapes_y            greens_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:    38.96   1st Qu.:     0.00   1st Qu.:    18.83  
+##  Median :    58.94   Median :     2.27   Median :    35.35  
+##  Mean   : -6190.12   Mean   :-24683.87   Mean   : -8718.58  
+##  3rd Qu.:    87.48   3rd Qu.:     6.17   3rd Qu.:    43.23  
+##  Max.   :   180.73   Max.   :    13.71   Max.   :    71.30  
+##                                                             
+##      nuts_y             olives_y           oth_veg_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:-99999.00   1st Qu.:     9.18  
+##  Median :     2.91   Median :-99999.00   Median :    14.06  
+##  Mean   :-13745.69   Mean   :-67498.88   Mean   : -6234.25  
+##  3rd Qu.:     7.72   3rd Qu.:     0.73   3rd Qu.:    18.92  
+##  Max.   :    22.86   Max.   :     4.30   Max.   :    41.34  
+##                                                             
+##      peas_y            rootveg_y            tropfr_y        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:    11.77   1st Qu.:    39.14   1st Qu.:-99999.00  
+##  Median :    20.68   Median :    68.36   Median :-99999.00  
+##  Mean   : -6229.83   Mean   : -6495.95   Mean   :-59046.85  
+##  3rd Qu.:    27.97   3rd Qu.:    86.35   3rd Qu.:    31.82  
+##  Max.   :    44.57   Max.   :   170.17   Max.   :    88.52  
+##                                                             
+##    vfruits_y           berries_f           brassic_f        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:    81.08   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :   177.67   Median :     0.00   Median :     0.00  
+##  Mean   : -5972.23   Mean   : -1249.99   Mean   : -1249.99  
+##  3rd Qu.:   301.98   3rd Qu.:     0.00   3rd Qu.:     0.00  
+##  Max.   :  1194.74   Max.   :     0.01   Max.   :     0.01  
+##                                                             
+##     citrus_f           frtrees_f            grapes_f        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :     0.00   Median :     0.01   Median :     0.00  
+##  Mean   : -1249.99   Mean   : -1249.98   Mean   : -1249.97  
+##  3rd Qu.:     0.00   3rd Qu.:     0.01   3rd Qu.:     0.02  
+##  Max.   :     0.03   Max.   :     0.09   Max.   :     0.27  
+##                                                             
+##     greens_f             nuts_f             olives_f        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :     0.00   Median :     0.00   Median :     0.00  
+##  Mean   : -1249.99   Mean   : -1249.98   Mean   : -1249.97  
+##  3rd Qu.:     0.00   3rd Qu.:     0.00   3rd Qu.:     0.00  
+##  Max.   :     0.02   Max.   :     0.04   Max.   :     0.45  
+##                                                             
+##    oth_veg_f             peas_f            rootveg_f        
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.00   1st Qu.:     0.00  
+##  Median :     0.00   Median :     0.00   Median :     0.00  
+##  Mean   : -1249.98   Mean   : -1249.99   Mean   : -1249.99  
+##  3rd Qu.:     0.00   3rd Qu.:     0.00   3rd Qu.:     0.00  
+##  Max.   :     0.02   Max.   :     0.02   Max.   :     0.02  
+##                                                             
+##     tropfr_f           vfruits_f             bovine         
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.00   1st Qu.:     0.00   1st Qu.:     0.18  
+##  Median :     0.00   Median :     0.00   Median :     0.43  
+##  Mean   : -1249.99   Mean   : -1249.98   Mean   : -2499.44  
+##  3rd Qu.:     0.00   3rd Qu.:     0.01   3rd Qu.:     0.78  
+##  Max.   :     0.04   Max.   :     0.04   Max.   :     2.83  
+##                                                             
+##    milk_cows              pigs               sheep          
+##  Min.   :-99999.00   Min.   :-99999.00   Min.   :-99999.00  
+##  1st Qu.:     0.04   1st Qu.:     0.11   1st Qu.:     0.05  
+##  Median :     0.10   Median :     0.23   Median :     0.17  
+##  Mean   : -2499.82   Mean   :-10624.12   Mean   :-11249.37  
+##  3rd Qu.:     0.20   3rd Qu.:     0.67   3rd Qu.:     0.76  
+##  Max.   :     1.04   Max.   :    17.26   Max.   :     5.23  
+##                                                             
+##      goats          
+##  Min.   :-99999.00  
+##  1st Qu.:     0.00  
+##  Median :     0.01  
+##  Mean   :-11249.78  
+##  3rd Qu.:     0.10  
+##  Max.   :     2.33  
+## 
+
save.image("C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/Database_20190130.RData")
+write.csv(dbase.final, "C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/Database_20190130.csv")
+
+names(dbase.shp)
+
##   [1] "CNTR_CODE"    "NUTS_ID"      "NUTS_NAME"    "Shape_Length"
+##   [5] "Shape_Area"   "risk_pov"     "train35bas"   "train35ful"  
+##   [9] "train_bas"    "train_ful"    "nitr_high"    "nitr_mod"    
+##  [13] "nitr_poor"    "irrigated"    "forest"       "artific"     
+##  [17] "soil_loss"    "com_birds"    "farm_birds"   "org_farm"    
+##  [21] "energy_rt"    "renew_pct"    "renew_prod"   "gross_N"     
+##  [25] "gross_P"      "conv_till"    "cons_till"    "zero_till"   
+##  [29] "nfert"        "arable"       "grassland"    "permanent"   
+##  [33] "soil_prod"    "irrig_rate"   "afi_awu"      "gva_awu"     
+##  [37] "labour_use"   "pest_rate"    "gdp_cap"      "pps_cap"     
+##  [41] "emp_rate"     "tot_unemp"    "yth_unemp"    "rur_gdp"     
+##  [45] "rur_pps"      "int_gdp"      "int_pps"      "urb_gdp"     
+##  [49] "urb_pps"      "C_factor"     "emi_co2eq"    "emi_nh3"     
+##  [53] "emi_pm10"     "emi_pm25"     "soc"          "bio_threat"  
+##  [57] "nat2000_ag"   "nat2000_pr"   "cal_frac"     "precip"      
+##  [61] "deg_days"     "crop_suit"    "rye_f"        "barley_f"    
+##  [65] "maize_f"      "tritic_f"     "sorghum_f"    "oth_cer_f"   
+##  [69] "rice_f"       "pasture_f"    "rape_f"       "sunflow_f"   
+##  [73] "pulses_f"     "potato_f"     "sugbeet_f"    "oth_rt_f"    
+##  [77] "wheat_f"      "oats_f"       "oth_oil_f"    "fibre_f"     
+##  [81] "oth_ind_f"    "fodder_f"     "rye_y"        "barley_y"    
+##  [85] "maize_y"      "tritic_y"     "sorghum_y"    "oth_cer_y"   
+##  [89] "rice_y"       "pasture_y"    "rape_y"       "sunflow_y"   
+##  [93] "pulses_y"     "potato_y"     "sugbeet_y"    "oth_rt_y"    
+##  [97] "wheat_y"      "oats_y"       "oth_oil_y"    "fibre_y"     
+## [101] "oth_ind_y"    "fodder_y"     "berries_y"    "brassic_y"   
+## [105] "citrus_y"     "frtrees_y"    "grapes_y"     "greens_y"    
+## [109] "nuts_y"       "olives_y"     "oth_veg_y"    "peas_y"      
+## [113] "rootveg_y"    "tropfr_y"     "vfruits_y"    "berries_f"   
+## [117] "brassic_f"    "citrus_f"     "frtrees_f"    "grapes_f"    
+## [121] "greens_f"     "nuts_f"       "olives_f"     "oth_veg_f"   
+## [125] "peas_f"       "rootveg_f"    "tropfr_f"     "vfruits_f"   
+## [129] "bovine"       "milk_cows"    "pigs"         "sheep"       
+## [133] "goats"
+
writeOGR(dbase.shp, dsn="C:/Users/mu5106sc/Dropbox/STAGS/D1_Database/Shapefiles/D1_database_20190130.shp", layer="D1_database_20190130", driver="ESRI Shapefile")
+
## Warning in writeOGR(dbase.shp, dsn = "C:/Users/mu5106sc/Dropbox/STAGS/
+## D1_Database/Shapefiles/D1_database_20190130.shp", : Field names abbreviated
+## for ESRI Shapefile driver
+ + + + +
+ + + + + + + + diff --git a/D1_database_20190130.zip b/D1_database_20190130.zip new file mode 100644 index 0000000..bf6c61c Binary files /dev/null and b/D1_database_20190130.zip differ diff --git a/Database_20190130.RData b/Database_20190130.RData new file mode 100644 index 0000000..1fe81c1 Binary files /dev/null and b/Database_20190130.RData differ