This repository has been archived by the owner on Jun 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
workflow_tuna_atlas_setup_and_update.R
181 lines (148 loc) · 14.8 KB
/
workflow_tuna_atlas_setup_and_update.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
######################################################################
##### 52North WPS annotations ##########
######################################################################
# wps.des: id = workflow_setup_update_tuna_atlas, title = Setup or update the tuna atlas database , abstract = This script enable to setup or update the tuna atlas database;
# wps.in: id = deploy_database_model, type = boolean, title = MANDATORY. Deploy the database model on the empty PostgreSQL+PostGIS database ? , value = TRUE;
# wps.in: id = load_codelists, type = boolean, title = MANDATORY. Load code lists in the database ? If set to TRUE then following parameter(s) is/are mandatory: metadata_and_parameterization_csv_codelists , value = TRUE;
# wps.in: id = load_codelists_mappings, type = boolean, title = MANDATORY. Load code list mappings in the database ? If set to TRUE then following parameter(s) is/are mandatory: metadata_and_parameterization_csv_mappings, value = TRUE;
# wps.in: id = year_tuna_atlas, type = string, title = Year of the Tuna atlas to setup/update (i.e. year the datasets to load were released by the tuna RFMOs) , value = "2017";
# wps.in: id = transform_and_load_primary_datasets, type = boolean, title = MANDATORY. Harmonize structure and load primary tuna RFMOs datasets in the database ? If set to TRUE then following parameter(s) is/are mandatory: virtual_repository_with_R_files | vre_username | vre_token | metadata_and_parameterization_csv_primary_datasets., value = TRUE;
# wps.in: id = generate_and_load_global_tuna_atlas_datasets, type = boolean, title = MANDATORY. Generate and load global tuna atlas datasets in the database ? If set to TRUE then following parameter(s) is/are mandatory: virtual_repository_with_R_files | vre_username | vre_token | metadata_and_parameterization_ird_tuna_atlas_catch_datasets | metadata_and_parameterization_ird_tuna_atlas_nominal_catch_datasets., value = TRUE;
# wps.in: id = db_host, type = string, title = MANDATORY. Host of the database , value = "db-tuna.d4science.org";
# wps.in: id = db_name, type = string, title = MANDATORY. Name of the database , value = "tunaatlas";
# wps.in: id = db_admin_name, type = string, title = MANDATORY. User name of the database (admin) , value = "tunaatlas_u";
# wps.in: id = db_admin_password, type = string, title = MANDATORY. Password of the admin of the database , value = "***";
# wps.in: id = db_read_name, type = string, title = OPTIONAL unless deploy_database_model==TRUE. User name of the database (select privileges) , value = "tunaatlas_inv";
# wps.in: id = db_dimensions, type = string, title = Name of the dimensions to deploy. The dimensions must be separated by a comma. , value = "area,catchtype,unit,flag,gear,schooltype,sex,sizeclass,species,time,source";
# wps.in: id = db_variables_and_associated_dimensions, type = string, title = Name of the variables to deploy and their associated dimensions. The format is: variable_name=list_of_dimensions_associated_separated_by_commas. The variables should be separated by the symbol '@'. , value = "catch=schooltype,species,time,area,gear,flag,catchtype,unit,source@effort=schooltype,time,area,gear,flag,unit,source@catch_at_size=schooltype,species,time,area,gear,flag,catchtype,sex,unit,sizeclass,source";
# wps.in: id = virtual_repository_with_R_files, type = string, title = OPTIONAL unless transform_and_load_primary_datasets==TRUE or generate_and_load_global_tuna_atlas_datasets==TRUE. Repository where the R scripts of data generation will be loaded. , value = "/Workspace/VRE Folders/FAO_TunaAtlas/R_scripts/datasets_creation";
# wps.in: id = vre_username, type = string, title = OPTIONAL unless transform_and_load_primary_datasets==TRUE or generate_and_load_global_tuna_atlas_datasets==TRUE. VRE user name , value = "paultaconet";
# wps.in: id = vre_token, type = string, title = OPTIONAL unless transform_and_load_primary_datasets==TRUE or generate_and_load_global_tuna_atlas_datasets==TRUE. VRE token , value = "***";
# wps.in: id = metadata_and_parameterization_csv_codelists, type = string, title = OPTIONAL unless load_codelists==TRUE. Path to the table containing the metadata and parameters for the code lists to load in the DB. See documentation to understand how this table must be filled. , value = "https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_codelists_2017.csv";
# wps.in: id = metadata_and_parameterization_csv_mappings, type = string, title = OPTIONAL unless load_codelists_mappings==TRUE. Path to the table containing the metadata and parameters for the code lists mappings to load in the DB. See documentation to understand how this table must be filled. , value = "https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_mappings_2017.csv";
# wps.in: id = metadata_and_parameterization_csv_primary_datasets, type = string, title = OPTIONAL unless transform_and_load_primary_datasets==TRUE. Path to the table containing the metadata and parameters for the primary tuna RFMOs to load in the DB. See documentation to understand how this table must be filled. , value = "https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_and_parameterization_primary_datasets_2017.csv";
# wps.in: id = metadata_and_parameterization_tuna_atlas_catch_effort_datasets, type = string, title = OPTIONAL unless generate_and_load_global_tuna_atlas_datasets==TRUE. Path to the table containing the metadata and parameters for the global georeferenced catch tuna atlas datasets to generate and load in the DB. See documentation to understand how this table must be filled. , value = "https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_and_parameterization_tuna_atlas_datasets_ird_2017.csv";
# wps.in: id = metadata_and_parameterization_tuna_atlas_nominal_catch_datasets, type = string, title = OPTIONAL unless generate_and_load_global_tuna_atlas_datasets==TRUE. Path to the table containing the metadata and parameters for the global nominal catch tuna atlas datasets to generate and load in the DB. See documentation to understand how this table must be filled. , value = "https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_and_parameterization_tuna_atlas_nominal_catch_datasets_2017.csv";
# wps.out: id = , type = , title = Database deployed (in case deploy_database_model==TRUE) and loaded with the datasets;
rm(list=ls(all=TRUE))
#### ENABLE/DISABLE WORKFLOW STEPS
deploy_database_model=TRUE
load_codelists=TRUE
load_codelists_mappings=TRUE
transform_and_load_primary_datasets=TRUE
generate_and_load_global_tuna_atlas_datasets=TRUE
#### WF CONFIGURATION:
### FAO TUNA ATLAS VRE CREDENTIALS
vre_username="paultaconet"
vre_token="***"
### DATABASE CREDENTIALS
db_host="db-tuna.d4science.org"
db_name="tunaatlas"
db_admin_name="tunaatlas_u"
db_admin_password="***"
db_read_name="tunaatlas_inv"
### TUNA ATLAS YEAR
year_tuna_atlas="2017"
### DATA WAREHOUSE (set if deploy_database_model==TRUE)
db_dimensions="area,catchtype,unit,flag,gear,schooltype,sex,sizeclass,species,time,source"
db_variables_and_associated_dimensions="catch=schooltype,species,time,area,gear,flag,catchtype,unit,source@effort=schooltype,time,area,gear,flag,unit,source@catch_at_size=schooltype,species,time,area,gear,flag,catchtype,sex,unit,sizeclass,source"
### METADATA AND PARAMETERIZATION OF CODE LISTS (set if load_codelists==TRUE)
metadata_and_parameterization_csv_codelists="https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_codelists_2017.csv"
### METADATA AND PARAMETERIZATION OF CODE LISTS MAPPING (set if load_codelists_mappings==TRUE)
metadata_and_parameterization_csv_mappings="https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_mappings_2017.csv"
### METADATA AND PARAMETERIZATION OF tRFMOs PRIMARY DATASETS (set if transform_and_load_primary_datasets==TRUE)
metadata_and_parameterization_csv_primary_datasets="https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_and_parameterization_primary_datasets_2017.csv"
### METADATA AND PARAMETERIZATION OF GLOBAL DATASETS (set if generate_and_load_global_tuna_atlas_datasets==TRUE)
metadata_and_parameterization_tuna_atlas_catch_effort_datasets="https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_and_parameterization_tuna_atlas_datasets_ird_2017.csv"
metadata_and_parameterization_tuna_atlas_nominal_catch_datasets="https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/tunaatlas_world/metadata_and_parameterization_files/metadata_and_parameterization_tuna_atlas_nominal_catch_datasets_2017.csv"
### END WF CONFIGURATION
#### BEGIN WF
virtual_repository_with_R_files="/Workspace/VRE Folders/FAO_TunaAtlas/R_scripts/datasets_creation"
repository_R_scripts="https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/workflow_etl/scripts" ## Repository where the scripts are stored
repository_sql_scripts="https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/sql/deploy_database_model/"
if(!require(RPostgreSQL)){
install.packages("RPostgreSQL")
}
if(!require(rtunaatlas)){
if(!require(devtools)){
install.packages("devtools")
}
require(devtools)
install_github("ptaconet/rtunaatlas")
}
require(RPostgreSQL)
require(rtunaatlas)
# Source scripts
source("https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/deploy_database_model.R") ## function to deploy the DB model
source(paste(repository_R_scripts,"open_dataset.R",sep="/"))
source(paste(repository_R_scripts,"generate_dataset.R",sep="/"))
source(paste(repository_R_scripts,"get_data_frame_code_lists.R",sep="/"))
source(paste(repository_R_scripts,"fill_missing_metadata.R",sep="/"))
source(paste(repository_R_scripts,"generate_tuna_atlas_identifier.R",sep="/"))
source(paste(repository_R_scripts,"push_R_script_to_server.R",sep="/"))
source(paste(repository_R_scripts,"workflow_tuna_atlas_dataset_to_load.R",sep="/"))
source(paste(repository_R_scripts,"workflow_tuna_atlas_dataset_to_generate_and_load.R",sep="/"))
## DB Connection parameters
con_parameters<-list(db_name=db_name,db_admin_name=db_admin_name,db_admin_password=db_admin_password,db_host=db_host)
## DEPLOY DATABASE MODEL
if (deploy_database_model==TRUE){ ## Deploy the database model
deploy_database_model_function(db_name,db_host,db_admin_name,db_read_name,db_admin_password,db_dimensions,db_variables_and_associated_dimensions,"https://raw.githubusercontent.com/ptaconet/rtunaatlas_scripts/master/sql/deploy_database_model")
}
### LOAD CODE LISTS
if (load_codelists==TRUE){
cat("Start loading the code lists and related metadata in the database...\n")
# Open csv metadata of code lists
table_metadata_and_parameterization<-read.csv(metadata_and_parameterization_csv_codelists,stringsAsFactors = F,colClasses = "character")
# One by one, load the code lists
for (df_to_load in 1:nrow(table_metadata_and_parameterization)){
metadata_and_parameterization<-table_metadata_and_parameterization[df_to_load,]
workflow_tuna_atlas_dataset_to_load(con_parameters,metadata_and_parameterization)
}
cat("End loading the code lists and related metadata in the database\n")
}
### LOAD CODE LISTS MAPPINGS
if (load_codelists_mappings==TRUE){ ## Load the code lists mapping
cat("Start loading the code lists mappings and related metadata in the database...\n")
# Open csv metadata of code list mappings
table_metadata_and_parameterization<-read.csv(metadata_and_parameterization_csv_mappings,stringsAsFactors = F,colClasses = "character")
# One by one, load the code lists mappings
for (df_to_load in 1:nrow(table_metadata_and_parameterization)){
metadata_and_parameterization<-table_metadata_and_parameterization[df_to_load,]
workflow_tuna_atlas_dataset_to_load(con_parameters,metadata_and_parameterization)
cat("End loading the code lists mappings and related metadata in the database\n")
}
}
### HARMONIZE AND LOAD tRFMOs PRIMARY DATASETS
if (transform_and_load_primary_datasets==TRUE){ ### Harmonize and load the primary datasets
cat("Start harmonizing and loading the tRFMOs primary datasets and related metadata in the database...\n")
# Open csv metadata of primary datasets and related parameterization
table_metadata_and_parameterization<-read.csv(metadata_and_parameterization_csv_primary_datasets,stringsAsFactors = F,colClasses = "character")
# One by one, load the primary datasets
# for (df_to_load in 1:nrow(table_metadata_and_parameterization)){
for (df_to_load in 1:54){
metadata_and_parameterization<-table_metadata_and_parameterization[df_to_load,]
workflow_tuna_atlas_dataset_to_generate_and_load(con_parameters,metadata_and_parameterization,year_tuna_atlas,vre_username,vre_token)
}
cat("End harmonizing and loading the tRFMOs primary datasets and related metadata in the database\n")
}
### GENERATE AND LOAD GLOBAL DATASETS
if (generate_and_load_global_tuna_atlas_datasets==TRUE){ ### Generate and load the global tuna atlas datasets
cat("Start generating and loading the global tuna atlas datasets and related metadata in the database...\n")
# Open csv metadata of ird tuna atlas catch datasets and related parameterization
table_metadata_and_parameterization<-read.csv(metadata_and_parameterization_tuna_atlas_catch_effort_datasets,stringsAsFactors = F,colClasses = "character")
table_metadata_and_parameterization$parameter_datasets_year_release<-year_tuna_atlas
# One by one, generate and load the ird tuna atlas datasets
for (df_to_load in 1:nrow(table_metadata_and_parameterization)){
metadata_and_parameterization<-table_metadata_and_parameterization[df_to_load,]
workflow_tuna_atlas_dataset_to_generate_and_load(con_parameters,metadata_and_parameterization,year_tuna_atlas,vre_username,vre_token)
}
# Open csv metadata of ird tuna atlas nomînal catch datasets and related parameterization
table_metadata_and_parameterization<-read.csv(metadata_and_parameterization_tuna_atlas_nominal_catch_datasets,stringsAsFactors = F,colClasses = "character")
table_metadata_and_parameterization$parameter_datasets_year_release<-year_tuna_atlas
# One by one, generate and load the ird tuna atlas datasets
for (df_to_load in 1:nrow(table_metadata_and_parameterization)){
metadata_and_parameterization<-table_metadata_and_parameterization[df_to_load,]
workflow_tuna_atlas_dataset_to_generate_and_load(con_parameters,metadata_and_parameterization,year_tuna_atlas,vre_username,vre_token)
}
cat("End generating and loading the global nominal catch tuna atlas datasets and related metadata in the database\n")
}