# Adding enviromental information

Required libraries

In [1]:
library("data.table")
library("openxlsx")
library("raster")
library("rgdal")
library("rgeos")

NameError: name 'library' is not defined

Command to download required data

wget https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_elev.zip  
wget https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_bio.zip  
wget https://files.worldwildlife.org/wwfcmsprod/files/Publication/file/6kcchn7e3u_official_teow.zip?_ga=2.30019835.595585187.1657305484-1390015087.1657305484  

In [2]:
#Defining paths
#Path for sample
data_path = "./data/" #./data/GlobalAtlas-16S/"
#Path for light layers
layers_path = "./data/InfoBio/"
#Path for heavy layers
heavy_layers_path = "./BigData/"

Defining the main rasters

In [3]:
# Elevation msnm
elevation<-raster(paste(layers_path,"/wc2.1_30s_elev.tif",sep=""))

In [4]:
#Bioma info
bio_files <- list.files(path=paste(heavy_layers_path,"wc2.1_30s_bio/",sep=""), pattern="tif", all.files=FALSE, full.names=TRUE,recursive=TRUE)

BIO Variables meaning
* BIO1 = Annual Mean Temperature
* BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp))
* BIO3 = Isothermality (BIO2/BIO7) (×100)
* BIO4 = Temperature Seasonality (standard deviation ×100)
* BIO5 = Max Temperature of Warmest Month
* BIO6 = Min Temperature of Coldest Month
* BIO7 = Temperature Annual Range (BIO5-BIO6)
* BIO8 = Mean Temperature of Wettest Quarter
* BIO9 = Mean Temperature of Driest Quarter
* BIO10 = Mean Temperature of Warmest Quarter
* BIO11 = Mean Temperature of Coldest Quarter
* BIO12 = Annual Precipitation
* BIO13 = Precipitation of Wettest Month
* BIO14 = Precipitation of Driest Month
* BIO15 = Precipitation Seasonality (Coefficient of Variation)
* BIO16 = Precipitation of Wettest Quarter
* BIO17 = Precipitation of Driest Quarter
* BIO18 = Precipitation of Warmest Quarter
* BIO19 = Precipitation of Coldest Quarter

Ecosystems data

In [5]:
teow <- readOGR(dsn = paste(heavy_layers_path,"official",sep=""), layer = "wwf_terr_ecos")

OGR data source with driver: ESRI Shapefile 
Source: "/home/neuralito/alumnos/Hackaton/ignore/neuralito/official", layer: "wwf_terr_ecos"
with 14458 features
It has 21 fields


The field BIOME has 14 differents IDs, the meaning of each ID is described in the following list:
1. Tropical & Subtropical Moist Broadleaf Forests
2. Tropical & Subtropical Dry Broadleaf Forests
3. Tropical & Subtropical Coniferous Forests
4. Temperate Broadleaf & Mixed Forests
5. Temperate Conifer Forests
6. Boreal Forests/Taiga
7. Tropical & Subtropical Grasslands, Savannas & Shrublands
8. Temperate Grasslands, Savannas & Shrublands
9. Flooded Grasslands & Savannas
10. Montane Grasslands & Shrublands
11.	Tundra
12. Mediterranean Forests, Woodlands & Scrub
13. Deserts & Xeric Shrublands
14. Mangroves

#### Function to add the information to a dataframe

In [6]:
add_variables <- function(sample, lon_name="Longitude", lat_name="Latitude"){
        #Copying the sample DF to avoid unwanted changes
        extraVars <- copy(sample)
        
        #Extracting the longitude and latitude
        coords <- data.frame(Longitude=extraVars[[lon_name]], Latitude=extraVars[[lat_name]])
        
        #Adding elevation
        extraVars$Elevation <- extract(elevation,coords)
        
        #Adding the 19 bioenviromental information
        for(i in 1:19){
            bior<-raster(bio_files[i])
            col_name <- paste("BIO",i,sep="")
            extraVars[[col_name]]<-extract(bior,coords)
            rm(bior)
        }
        
        #Adding information about the bioma
        biomes<-extract(teow,coords)
        extraVars$BIOME<- biomes$BIOME #Cat
        extraVars$ECONAME <- biomes$ECO_NAME 
    
        return(extraVars)

}


In [9]:
merged_ds <- read.csv(paste(data_path,"Merged.csv",sep=""))
head(merged_ds)

Unnamed: 0_level_0,X,Latitude,Longitude,k__Bacteria,k__Archaea,p__Verrucomicrobia,p__Proteobacteria,p__Bacteroidetes,p__Firmicutes,p__Actinobacteria,⋯,g__Aminobacter,g__Variovorax,g__Microlunatus,g__Afifella,g__Kineosporia,g__Rubellimicrobium,g__Corallococcus,g__Blastomonas,g__Friedmanniella,g__Bdellovibrio
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0,33.194,-117.241,0.9657025,0.012809917,0.04442149,0.5809917,0.08677686,0.07334711,0.06136364,⋯,0,0,0,0,0,0,0,0,0,0
2,1,33.194,-117.241,0.9657025,0.012809917,0.04442149,0.5809917,0.08677686,0.07334711,0.06136364,⋯,0,0,0,0,0,0,0,0,0,0
3,2,33.194,-117.241,0.9657025,0.012809917,0.04442149,0.5809917,0.08677686,0.07334711,0.06136364,⋯,0,0,0,0,0,0,0,0,0,0
4,3,33.194,-117.241,0.9463739,0.007917941,0.03635055,0.5157459,0.10851179,0.08871693,0.12002879,⋯,0,0,0,0,0,0,0,0,0,0
5,4,33.194,-117.241,0.9463739,0.007917941,0.03635055,0.5157459,0.10851179,0.08871693,0.12002879,⋯,0,0,0,0,0,0,0,0,0,0
6,5,33.194,-117.241,0.9463739,0.007917941,0.03635055,0.5157459,0.10851179,0.08871693,0.12002879,⋯,0,0,0,0,0,0,0,0,0,0


In [10]:
mergedXtra <- add_variables(sample = merged_ds)

In [11]:
filename <- paste(data_path,"merged_complete_environmental.csv", sep="")
filename

In [13]:
write.csv(file=filename, mergedXtra, row.names = F)

In [14]:
read.csv(filename)

X,Latitude,Longitude,k__Bacteria,k__Archaea,p__Verrucomicrobia,p__Proteobacteria,p__Bacteroidetes,p__Firmicutes,p__Actinobacteria,⋯,BIO12,BIO13,BIO14,BIO15,BIO16,BIO17,BIO18,BIO19,BIOME,ECONAME
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
0,33.19400,-117.24100,0.9657025,0.012809917,0.04442149,0.5809917,0.08677686,0.073347107,0.06136364,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
1,33.19400,-117.24100,0.9657025,0.012809917,0.04442149,0.5809917,0.08677686,0.073347107,0.06136364,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
2,33.19400,-117.24100,0.9657025,0.012809917,0.04442149,0.5809917,0.08677686,0.073347107,0.06136364,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
3,33.19400,-117.24100,0.9463739,0.007917941,0.03635055,0.5157459,0.10851179,0.088716934,0.12002879,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
4,33.19400,-117.24100,0.9463739,0.007917941,0.03635055,0.5157459,0.10851179,0.088716934,0.12002879,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
5,33.19400,-117.24100,0.9463739,0.007917941,0.03635055,0.5157459,0.10851179,0.088716934,0.12002879,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
6,33.19400,-117.24100,0.9690352,0.011408083,0.02998696,0.5730117,0.06551499,0.110169492,0.08507171,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
7,33.19400,-117.24100,0.9690352,0.011408083,0.02998696,0.5730117,0.06551499,0.110169492,0.08507171,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
8,33.19400,-117.24100,0.9690352,0.011408083,0.02998696,0.5730117,0.06551499,0.110169492,0.08507171,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral
9,33.19400,-117.24100,0.9636393,0.008809867,0.04597149,0.5638315,0.09338459,0.109562710,0.06230979,⋯,12.408334,55.39435,365.3026,28.8,6.4,22.4,13.65,21.50000,12,California coastal sage and chaparral


In [15]:
samples <- read.csv(paste(data_path,"EMP.5k.csv",sep=""))

In [16]:
emp <- add_variables(sample = samples)

In [37]:
emp[743,]

Unnamed: 0_level_0,X,Latitude,Longitude,X4457032,X4471583,X9560,X4468101,X198079,X101868,X4360511,⋯,BIO12,BIO13,BIO14,BIO15,BIO16,BIO17,BIO18,BIO19,BIOME,ECONAME
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
743,1521.ct1.s.6.1.sequences,62.65,-69.57,0,0,0,0.004618511,0,0,0,⋯,,,,,,,,,11,Middle Arctic tundra


In [27]:
filename <- paste(data_path,"EMPcomplete_environmental.csv", sep="")
filename

In [28]:
write.csv(file=filename, emp, row.names = F)

In [19]:
head(emp)

Unnamed: 0_level_0,X,Latitude,Longitude,X4457032,X4471583,X9560,X4468101,X198079,X101868,X4360511,⋯,BIO12,BIO13,BIO14,BIO15,BIO16,BIO17,BIO18,BIO19,BIOME,ECONAME
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,1001.skm3,33.194,-117.241,0.003305785,0.0,0,0,0,0.005785124,0.0,⋯,12.40833,55.39435,365.3026,28.8,6.4,22.4,13.65,21.5,12,California coastal sage and chaparral
2,1001.skd3,33.194,-117.241,0.002699298,0.0,0,0,0,0.0010797193,0.0001799532,⋯,12.40833,55.39435,365.3026,28.8,6.4,22.4,13.65,21.5,12,California coastal sage and chaparral
3,1001.skm1,33.194,-117.241,0.001303781,0.0,0,0,0,0.0029335072,0.0,⋯,12.40833,55.39435,365.3026,28.8,6.4,22.4,13.65,21.5,12,California coastal sage and chaparral
4,1001.skb3,33.194,-117.241,0.009130226,0.0,0,0,0,0.0049655614,0.0,⋯,12.40833,55.39435,365.3026,28.8,6.4,22.4,13.65,21.5,12,California coastal sage and chaparral
5,1001.skm2,33.194,-117.241,0.001022495,0.0,0,0,0,0.0003408316,0.0,⋯,12.40833,55.39435,365.3026,28.8,6.4,22.4,13.65,21.5,12,California coastal sage and chaparral
6,1001.skb2,33.194,-117.241,0.004364213,0.0001983733,0,0,0,0.0037690934,0.0,⋯,12.40833,55.39435,365.3026,28.8,6.4,22.4,13.65,21.5,12,California coastal sage and chaparral


In [22]:
samples <- read.csv(paste(data_path,"GAtlas.homogeneous.csv",sep=""))

In [23]:
gatlas <- add_variables(sample = samples)

In [24]:
gatlas

X,Latitude,Longitude,o__Rhizobiales,o__Actinomycetales,o__Burkholderiales,o__Sphingomonadales,o__Solibacterales,o__WD2101,o__Ellin329,⋯,BIO12,BIO13,BIO14,BIO15,BIO16,BIO17,BIO18,BIO19,BIOME,ECONAME
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,-26.73333,-54.68333,0.2877509,0.057317428,0.0020366599,0.040442246,0.009892348,0.0130928135,0.004655223,⋯,12.191667,53.70778,384.3794,31.6,8.9,22.7,20.783333,16.883333,1,Alto Paraná Atlantic forests
2,64.80000,-148.25000,0.4577778,0.122962963,0.0074074074,0.004444444,0.176296296,0.0029629630,0.031111111,⋯,10.966667,23.33333,1369.8636,21.7,-25.3,47.0,14.033333,-10.150001,6,Interior Alaska-Yukon lowland taiga
3,42.53000,-72.19000,0.3995633,0.024017467,0.0065502183,0.002183406,0.203056769,0.0054585153,0.108078603,⋯,12.300000,31.61954,967.4087,26.6,-12.3,38.9,18.650000,-4.416667,4,New England-Acadian forests
4,39.09000,-96.57000,0.1763374,0.103311633,0.0138692329,0.040758562,0.007642230,0.0404755166,0.013020096,⋯,12.666667,30.96984,1026.9037,32.3,-8.6,40.9,22.216667,-0.850000,8,Flint Hills tall grasslands
5,44.21000,-122.26000,0.3468487,0.076676034,0.0272982738,0.015254918,0.020473705,0.0152549177,0.028101164,⋯,12.875000,44.39655,594.9986,28.0,-1.0,29.0,3.383333,17.433334,5,Central and Southern Cascades forests
6,19.81000,-155.26000,0.5398067,0.029452370,0.0193281178,0.008743672,0.063506673,0.0073630925,0.030372757,⋯,7.583333,66.52047,119.5509,19.9,8.5,11.4,13.283333,16.083334,1,Hawaii tropical moist forests
7,39.99000,-105.37000,0.3875312,0.106234414,0.0249376559,0.013466334,0.046882793,0.0274314214,0.082294264,⋯,15.900000,43.80165,754.6186,26.1,-10.2,36.3,13.533334,-2.366667,5,Colorado Rockies forests
8,35.00000,-83.50000,0.6378116,0.021468144,0.0006925208,0.000000000,0.077562327,0.0034626039,0.036011080,⋯,11.358334,38.63379,675.5608,24.9,-4.5,29.4,3.000000,15.550000,4,Appalachian-Blue Ridge forests
9,19.81000,-155.26000,0.7841207,0.009842520,0.0006561680,0.001312336,0.091863517,0.0006561680,0.026902887,⋯,7.583333,66.52047,119.5509,19.9,8.5,11.4,13.283333,16.083334,1,Hawaii tropical moist forests
10,42.53000,-72.19000,0.2421574,0.099889928,0.0178866263,0.031095212,0.007154651,0.0308200330,0.009631260,⋯,12.300000,31.61954,967.4087,26.6,-12.3,38.9,18.650000,-4.416667,4,New England-Acadian forests


In [25]:
filename <- paste(data_path,"GAtlasHomocomplete_environmental.csv", sep="")
filename

In [26]:
write.csv(file=filename, gatlas, row.names = F)