From 6c935df9271215f0ca049b5b13ba108030ab241a Mon Sep 17 00:00:00 2001 From: Mahdi Ben Jelloul Date: Tue, 21 Mar 2023 14:30:46 +0000 Subject: [PATCH 1/2] Change 2019 ERF data paths --- .gitignore | 2 + .gitlab-ci.yml | 913 ++---------------- ci-runner/build_ci.py | 178 +--- ci-runner/openfisca_france_data_config.ini | 3 + docker/Dockerfile | 7 +- gitlab_ci/all_years_build_and_aggregates.yml | 830 ++++++++++++++++ openfisca_france_data/base_survey.py | 5 - openfisca_france_data/comparator.py | 594 ++++++++++++ openfisca_france_data/config.py | 33 + openfisca_france_data/debugger.py | 3 +- openfisca_france_data/erfs_fpr/__init__.py | 7 + openfisca_france_data/erfs_fpr/comparison.py | 91 ++ .../erfs_fpr/get_survey_scenario.py | 12 +- .../erfs_fpr/input_data_builder/__init__.py | 61 +- .../step_01_preprocessing.py | 72 +- .../input_data_builder/step_05_final.py | 12 +- openfisca_france_data/erfs_fpr/scenario.py | 1 + openfisca_france_data/model/id_variables.py | 8 + setup.py | 18 +- 19 files changed, 1754 insertions(+), 1096 deletions(-) create mode 100644 ci-runner/openfisca_france_data_config.ini create mode 100644 gitlab_ci/all_years_build_and_aggregates.yml create mode 100644 openfisca_france_data/comparator.py create mode 100644 openfisca_france_data/config.py create mode 100644 openfisca_france_data/erfs_fpr/comparison.py diff --git a/.gitignore b/.gitignore index 81693e04..c094274a 100644 --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,5 @@ openfisca_erfs_fpr.json # PyEnv .python-version .pytest_cache/ + +figures_directory diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 74dabcfe..e41981c3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,8 +1,4 @@ - -################################################ -# GENERATED FILE, DO NOT EDIT -# Please visit ci-runner/README.md -################################################ +include: 'gitlab_ci/all_years_build_and_aggregates.yml' variables: PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" @@ -22,18 +18,21 @@ stages: - test - build_collection - build_input_data + - diagnostics - aggregates - run_on_all_years - build_input_data_all - aggregates_all - anaconda + before_script: # To be sure we are up to date even if we do not rebuild docker image - make install - cp ./ci-runner/openfisca_survey_manager_raw_data.ini ~/.config/openfisca-survey-manager/raw_data.ini - echo "End of before_script" + build docker image: stage: docker tags: @@ -53,14 +52,14 @@ build docker image: when: manual -run_on_all_years: - stage: run_on_all_years - # Prevent call of before_script because it will fail in this context - before_script: - - '' +test: + image: $CI_REGISTRY_IMAGE:latest script: - - echo "On ne fait rien" - when: manual + - make test + stage: test + tags: + - openfisca + clean_folder: before_script: @@ -71,34 +70,7 @@ clean_folder: - openfisca when: manual -copy_previous_build_collections: - before_script: - - '' - script: | - if [[ -f "$ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json" ]]; then - echo "Files already exists, do nothing." - else - rm -rf $ROOT_FOLDER/$OUT_FOLDER || true - mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_collections/ - mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_output/ - cp $ROOT_FOLDER/master/openfisca_survey_manager_config-after-build-collection.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - sed -i "s/master/$OUT_FOLDER/" $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - cp $ROOT_FOLDER/master/data_collections/erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json - cp ./ci-runner/empty_openfisca_erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/openfisca_erfs_fpr.json - fi - stage: build_collection - tags: - - openfisca - except: - - master -test: - image: $CI_REGISTRY_IMAGE:latest - script: - - make test - stage: test - tags: - - openfisca build_collection: image: $CI_REGISTRY_IMAGE:latest script: @@ -112,837 +84,74 @@ build_collection: - 'echo "{\"name\": \"erfs_fpr\", \"surveys\": {}}" > $ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json' - 'echo "{\"name\": \"openfisca_erfs_fpr\", \"surveys\": {}}" > $ROOT_FOLDER/$OUT_FOLDER/data_collections/openfisca_erfs_fpr.json' - cat ~/.config/openfisca-survey-manager/config.ini + - cat ~/.config/openfisca-survey-manager/raw_data.ini - '#build-collection -c enquete_logement -d -m -s 2013' - - build-collection -c erfs_fpr -d -m -v + - build-collection -c erfs_fpr -d -m -v -p ~/.config/openfisca-survey-manager/ - echo "Backup updated config" - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini stage: build_collection tags: - openfisca when: manual -input_data-2018: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2018" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2018 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2018.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini - stage: build_input_data - tags: - - openfisca -aggregates-2018: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - input_data-2018 - script: - - echo "aggregates-2018" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2018 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates - tags: - - openfisca -in_dt-1996: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-1996" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1996 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1996.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini - stage: build_input_data_all - tags: - - openfisca -agg-1996: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1996 - script: - - echo "aggregates-1996" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1996 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-1997: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-1997" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1997 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1997.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini - stage: build_input_data_all - tags: - - openfisca -agg-1997: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1997 - script: - - echo "aggregates-1997" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1997 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-1998: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-1998" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1998 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1998.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini - stage: build_input_data_all - tags: - - openfisca -agg-1998: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1998 - script: - - echo "aggregates-1998" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1998 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-1999: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-1999" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1999 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1999.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini - stage: build_input_data_all - tags: - - openfisca -agg-1999: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1999 - script: - - echo "aggregates-1999" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1999 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2000: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2000" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2000 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2000.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini - stage: build_input_data_all - tags: - - openfisca -agg-2000: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2000 - script: - - echo "aggregates-2000" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2000 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2001: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2001" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2001 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2001.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini - stage: build_input_data_all - tags: - - openfisca -agg-2001: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2001 - script: - - echo "aggregates-2001" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2001 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2002: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2002" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2002 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2002.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini - stage: build_input_data_all - tags: - - openfisca -agg-2002: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2002 - script: - - echo "aggregates-2002" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2002 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2003: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2003" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2003 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2003.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini - stage: build_input_data_all - tags: - - openfisca -agg-2003: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2003 - script: - - echo "aggregates-2003" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2003 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2004: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2004" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2004 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2004.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini - stage: build_input_data_all - tags: - - openfisca -agg-2004: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2004 - script: - - echo "aggregates-2004" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2004 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2005: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2005" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2005 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2005.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini - stage: build_input_data_all - tags: - - openfisca -agg-2005: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2005 - script: - - echo "aggregates-2005" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2005 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2006: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2006" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2006 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2006.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini - stage: build_input_data_all - tags: - - openfisca -agg-2006: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2006 - script: - - echo "aggregates-2006" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2006 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2007: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2007" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2007 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2007.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini - stage: build_input_data_all - tags: - - openfisca -agg-2007: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2007 - script: - - echo "aggregates-2007" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2007 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2008: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2008" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2008 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2008.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini - stage: build_input_data_all - tags: - - openfisca -agg-2008: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2008 - script: - - echo "aggregates-2008" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2008 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2009: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2009" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2009 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2009.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini - stage: build_input_data_all - tags: - - openfisca -agg-2009: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2009 - script: - - echo "aggregates-2009" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2009 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2010: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2010" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2010 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2010.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini - stage: build_input_data_all - tags: - - openfisca -agg-2010: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2010 - script: - - echo "aggregates-2010" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2010 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2011: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2011" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2011 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2011.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini - stage: build_input_data_all - tags: - - openfisca -agg-2011: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2011 - script: - - echo "aggregates-2011" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2011 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2012: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2012" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2012 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2012.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini - stage: build_input_data_all - tags: - - openfisca -agg-2012: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2012 - script: - - echo "aggregates-2012" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2012 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2013: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2013" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2013 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2013.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini - stage: build_input_data_all - tags: - - openfisca -agg-2013: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2013 - script: - - echo "aggregates-2013" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2013 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2014: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2014" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2014 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2014.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini - stage: build_input_data_all - tags: - - openfisca -agg-2014: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2014 - script: - - echo "aggregates-2014" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2014 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2015: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2015" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2015 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2015.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini - stage: build_input_data_all - tags: - - openfisca -agg-2015: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2015 - script: - - echo "aggregates-2015" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2015 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2016: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2016" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2016 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2016.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini - stage: build_input_data_all - tags: - - openfisca -agg-2016: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2016 - script: - - echo "aggregates-2016" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2016 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2017: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2017" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2017 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2017.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini - stage: build_input_data_all - tags: - - openfisca -agg-2017: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2017 - script: - - echo "aggregates-2017" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2017 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2018: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2018" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2018 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2018.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini - stage: build_input_data_all - tags: - - openfisca -agg-2018: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2018 - script: - - echo "aggregates-2018" - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2018 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all - tags: - - openfisca -in_dt-2019: - image: $CI_REGISTRY_IMAGE:latest - needs: - - run_on_all_years - script: - - echo "build_input_data-2019" - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2019 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2019.h5 - - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini - stage: build_input_data_all + + +copy_previous_build_collections: + before_script: + - '' + script: | + if [[ -f "$ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json" ]]; then + echo "Files already exists, do nothing." + else + rm -rf $ROOT_FOLDER/$OUT_FOLDER || true + mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_collections/ + mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_output/ + cp $ROOT_FOLDER/master/openfisca_survey_manager_config-after-build-collection.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + sed -i "s/master/$OUT_FOLDER/" $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + cp $ROOT_FOLDER/master/data_collections/erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json + cp ./ci-runner/empty_openfisca_erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/openfisca_erfs_fpr.json + fi + stage: build_collection tags: - openfisca -agg-2019: + except: + - master + +diagnostics: artifacts: paths: - - ./*.html - - ./*.csv + - figures_directory image: $CI_REGISTRY_IMAGE:latest needs: - - in_dt-2019 + - input_data-2019 script: - - echo "aggregates-2019" + - echo "diagnotics-2019" - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2019 - - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output - - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output - stage: aggregates_all + - mkdir -p ~/.config/openfisca-france-data + - cp ./ci-runner/openfisca_france_data_config.ini + ~/.config/openfisca-france-data/config.ini + - sed -i "s/BRANCH_NAME/$OUT_FOLDER/" ~/.config/openfisca-france-data/config.ini + - cat ~/.config/openfisca-france-data/config.ini + - compare-erfs-fpr-input -u -s -v + - cp -r /mnt/data-out/openfisca-france-data/$OUT_FOLDER/figures_directory . + - ls -alrth + - ls -alrth figures_directory + - cp -r ./figures_directory $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: diagnostics tags: - openfisca + + +run_on_all_years: + stage: run_on_all_years + # Prevent call of before_script because it will fail in this context + before_script: + - '' + script: + - echo "On ne fait rien" + when: manual + + build_conda_package: before_script: - '' diff --git a/ci-runner/build_ci.py b/ci-runner/build_ci.py index 368710dd..014c0acc 100644 --- a/ci-runner/build_ci.py +++ b/ci-runner/build_ci.py @@ -19,126 +19,9 @@ def header(): # Please visit ci-runner/README.md ################################################ -variables: - PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" - CI_REGISTRY: https://index.docker.io/v1/ - CI_REGISTRY_IMAGE: leximpact/openfisca-france-data - # OUT_FOLDER: "$CI_COMMIT_REF_NAME-$CI_COMMIT_SHORT_SHA" # For branch-commit_id - OUT_FOLDER: "$CI_COMMIT_REF_NAME" # For just branch - ROOT_FOLDER: "/mnt/data-out/openfisca-france-data" - -cache: - paths: - - .cache/pip - - ./figures/ - -stages: - - docker - - test - - build_collection - - build_input_data - - aggregates - - run_on_all_years - - build_input_data_all - - aggregates_all - - anaconda - -before_script: - # To be sure we are up to date even if we do not rebuild docker image - - make install - - cp ./ci-runner/openfisca_survey_manager_raw_data.ini ~/.config/openfisca-survey-manager/raw_data.ini - - echo "End of before_script" - -build docker image: - stage: docker - tags: - - openfisca - image: - name: gcr.io/kaniko-project/executor:debug - entrypoint: [""] - # Prevent call of before_script because it will fail in this context - before_script: - - '' - script: - # From https://github.com/GoogleContainerTools/kaniko#pushing-to-docker-hub - - DOCKER_HUB_AUTH=$(echo -n $DOCKER_HUB_USER:$DOCKER_HUB_PASSWORD | base64) - - echo "{\\"auths\\":{\\"$CI_REGISTRY\\":{\\"auth\\":\\"$DOCKER_HUB_AUTH\\"}}}" > /kaniko/.docker/config.json - - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/docker/Dockerfile --destination $CI_REGISTRY_IMAGE:latest - # Build Docker is needed only if code as changed. - when: manual - - -run_on_all_years: - stage: run_on_all_years - # Prevent call of before_script because it will fail in this context - before_script: - - '' - script: - - echo "On ne fait rien" - when: manual - -clean_folder: - before_script: - - '' - script: rm -rf $ROOT_FOLDER/$OUT_FOLDER || true - stage: build_collection - tags: - - openfisca - when: manual - -copy_previous_build_collections: - before_script: - - '' - script: | - if [[ -f "$ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json" ]]; then - echo "Files already exists, do nothing." - else - rm -rf $ROOT_FOLDER/$OUT_FOLDER || true - mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_collections/ - mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_output/ - cp $ROOT_FOLDER/master/openfisca_survey_manager_config-after-build-collection.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - sed -i "s/master/$OUT_FOLDER/" $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini - cp $ROOT_FOLDER/master/data_collections/erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json - cp ./ci-runner/empty_openfisca_erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/openfisca_erfs_fpr.json - fi - stage: build_collection - tags: - - openfisca - except: - - master - """ -def build_collections(): - build_collection = { - "build_collection": { - "stage": "build_collection", - "image": "$CI_REGISTRY_IMAGE:latest", - "tags": ["openfisca"], - "script": [ - 'echo "Begin with fresh config"', - # Delete all previous data - "rm -rf $ROOT_FOLDER/$OUT_FOLDER || true", # || true to ignore error - "mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_collections/", - "mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_output/", - "cp ./ci-runner/openfisca_survey_manager_config.ini ~/.config/openfisca-survey-manager/config.ini", - 'echo "Custom output folder"', - 'sed -i "s/BRANCH_NAME/$OUT_FOLDER/" ~/.config/openfisca-survey-manager/config.ini', - r"""echo "{\"name\": \"erfs_fpr\", \"surveys\": {}}" > $ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json""", - r"""echo "{\"name\": \"openfisca_erfs_fpr\", \"surveys\": {}}" > $ROOT_FOLDER/$OUT_FOLDER/data_collections/openfisca_erfs_fpr.json""", - "cat ~/.config/openfisca-survey-manager/config.ini", - "#build-collection -c enquete_logement -d -m -s 2013", - "build-collection -c erfs_fpr -d -m -v", - 'echo "Backup updated config"', - "cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini", - ], - "when": "manual", - } - } - return build_collection - - def build_input_data(year: str, stage: str = "build_input_data_all"): if stage == "build_input_data_all": prefix = "in_dt-" @@ -211,19 +94,6 @@ def make_test_by_year(year): } -def make_test(): - return { - "test": { - "stage": "test", - "image": "$CI_REGISTRY_IMAGE:latest", - "tags": ["openfisca"], - "script": [ - "make test", - ], - } - } - - def get_erfs_years(): """ Read raw_data.ini to find all available years. @@ -241,56 +111,20 @@ def get_erfs_years(): raise KeyError -def build_conda_package(): - """ - Build conda package - """ - return { - "build_conda_package": { - "stage": "anaconda", - "before_script": [""], - "image": "continuumio/miniconda3", - "script": [ - "conda install -y conda-build anaconda-client", - "conda build -c conda-forge -c openfisca --token $ANACONDA_TOKEN --user OpenFisca .conda", - ], - "except": ["master"], - } - } - - -def build_and_deploy_conda_package(): - """ - Build and deploy conda package - """ - return { - "build_and_deploy_conda_package": { - "stage": "anaconda", - "before_script": [""], - "image": "continuumio/miniconda3", - "script": [ - "conda install -y conda-build anaconda-client", - "conda config --set anaconda_upload yes", - "conda build -c conda-forge -c openfisca --token $ANACONDA_TOKEN --user OpenFisca .conda", - ], - "only": ["master"], - } - } - def build_gitlab_ci(erfs_years): gitlab_ci = header() - gitlab_ci += yaml.dump(make_test()) + # gitlab_ci += yaml.dump(make_test()) # gitlab_ci += yaml.dump(build_and_deploy_conda_package()) - gitlab_ci += yaml.dump(build_collections()) - gitlab_ci += yaml.dump(build_input_data("2018", stage="build_input_data")) - gitlab_ci += yaml.dump(aggregates("2018", stage="aggregates")) + # gitlab_ci += yaml.dump(build_collections()) + gitlab_ci += yaml.dump(build_input_data("2019", stage="build_input_data")) + gitlab_ci += yaml.dump(aggregates("2019", stage="aggregates")) for year in erfs_years: print("\t ERFS : Building for year", year) gitlab_ci += yaml.dump(build_input_data(year)) gitlab_ci += yaml.dump(aggregates(year)) - gitlab_ci += yaml.dump(build_conda_package()) + # gitlab_ci += yaml.dump(build_conda_package()) return gitlab_ci @@ -300,7 +134,7 @@ def main(): # For testing only some years # erfs_years = ["2016", "2017", "2018"] gitlab_ci = build_gitlab_ci(erfs_years) - with open(r".gitlab-ci.yml", mode="w") as file: + with open(r"./gitlab_ci/all_years_build_and_aggregates.yml", mode="w") as file: file.write(gitlab_ci) print("Done with success!") diff --git a/ci-runner/openfisca_france_data_config.ini b/ci-runner/openfisca_france_data_config.ini new file mode 100644 index 00000000..7019feae --- /dev/null +++ b/ci-runner/openfisca_france_data_config.ini @@ -0,0 +1,3 @@ +[paths] +figures_directory = /mnt/data-out/openfisca-france-data/BRANCH_NAME/figures_directory +backup = ./backup diff --git a/docker/Dockerfile b/docker/Dockerfile index 3cc05bb4..97cdfcd9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,7 @@ -FROM python:3.7 +FROM python:3.9 + +# Install useful packages +RUN apt-get update && apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended texlive-latex-extra # Add user and group to Image to get output files with the host user ownership # ARG USER_ID=1001 @@ -25,7 +28,7 @@ COPY README.md . # RUN pip install --upgrade pip setuptools # RUN pip install --editable .[test] --upgrade RUN make install -RUN pip install sas7bdat scipy +RUN pip install sas7bdat scipy pandoc COPY . . # build-collection can get a path of file location in parameter but not openfisca_france_data. # That's why we create a symbolic link from the default location of config to our data path diff --git a/gitlab_ci/all_years_build_and_aggregates.yml b/gitlab_ci/all_years_build_and_aggregates.yml new file mode 100644 index 00000000..145e4557 --- /dev/null +++ b/gitlab_ci/all_years_build_and_aggregates.yml @@ -0,0 +1,830 @@ + +################################################ +# GENERATED FILE, DO NOT EDIT +# Please visit ci-runner/README.md +################################################ + +input_data-2019: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2019" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - cat ~/.config/openfisca-survey-manager/config.ini + - ls $ROOT_FOLDER/$OUT_FOLDER/data_collections + - build-erfs-fpr -y 2019 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2019.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini + stage: build_input_data + tags: + - openfisca +aggregates-2019: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - input_data-2019 + script: + - echo "aggregates-2019" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2019 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates + tags: + - openfisca +in_dt-1996: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-1996" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1996 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1996.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini + stage: build_input_data_all + tags: + - openfisca +agg-1996: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1996 + script: + - echo "aggregates-1996" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1996 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-1997: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-1997" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1997 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1997.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini + stage: build_input_data_all + tags: + - openfisca +agg-1997: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1997 + script: + - echo "aggregates-1997" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1997 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-1998: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-1998" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1998 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1998.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini + stage: build_input_data_all + tags: + - openfisca +agg-1998: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1998 + script: + - echo "aggregates-1998" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1998 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-1999: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-1999" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1999 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_1999.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini + stage: build_input_data_all + tags: + - openfisca +agg-1999: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1999 + script: + - echo "aggregates-1999" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1999 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2000: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2000" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2000 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2000.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini + stage: build_input_data_all + tags: + - openfisca +agg-2000: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2000 + script: + - echo "aggregates-2000" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2000 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2001: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2001" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2001 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2001.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini + stage: build_input_data_all + tags: + - openfisca +agg-2001: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2001 + script: + - echo "aggregates-2001" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2001 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2002: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2002" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2002 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2002.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini + stage: build_input_data_all + tags: + - openfisca +agg-2002: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2002 + script: + - echo "aggregates-2002" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2002 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2003: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2003" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2003 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2003.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini + stage: build_input_data_all + tags: + - openfisca +agg-2003: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2003 + script: + - echo "aggregates-2003" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2003 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2004: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2004" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2004 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2004.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini + stage: build_input_data_all + tags: + - openfisca +agg-2004: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2004 + script: + - echo "aggregates-2004" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2004 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2005: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2005" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2005 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2005.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini + stage: build_input_data_all + tags: + - openfisca +agg-2005: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2005 + script: + - echo "aggregates-2005" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2005 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2006: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2006" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2006 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2006.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini + stage: build_input_data_all + tags: + - openfisca +agg-2006: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2006 + script: + - echo "aggregates-2006" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2006 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2007: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2007" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2007 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2007.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini + stage: build_input_data_all + tags: + - openfisca +agg-2007: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2007 + script: + - echo "aggregates-2007" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2007 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2008: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2008" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2008 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2008.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini + stage: build_input_data_all + tags: + - openfisca +agg-2008: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2008 + script: + - echo "aggregates-2008" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2008 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2009: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2009" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2009 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2009.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini + stage: build_input_data_all + tags: + - openfisca +agg-2009: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2009 + script: + - echo "aggregates-2009" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2009 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2010: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2010" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2010 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2010.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini + stage: build_input_data_all + tags: + - openfisca +agg-2010: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2010 + script: + - echo "aggregates-2010" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2010 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2011: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2011" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2011 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2011.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini + stage: build_input_data_all + tags: + - openfisca +agg-2011: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2011 + script: + - echo "aggregates-2011" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2011 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2012: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2012" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2012 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2012.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini + stage: build_input_data_all + tags: + - openfisca +agg-2012: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2012 + script: + - echo "aggregates-2012" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2012 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2013: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2013" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2013 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2013.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini + stage: build_input_data_all + tags: + - openfisca +agg-2013: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2013 + script: + - echo "aggregates-2013" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2013 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2014: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2014" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2014 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2014.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini + stage: build_input_data_all + tags: + - openfisca +agg-2014: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2014 + script: + - echo "aggregates-2014" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2014 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2015: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2015" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2015 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2015.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini + stage: build_input_data_all + tags: + - openfisca +agg-2015: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2015 + script: + - echo "aggregates-2015" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2015 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2016: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2016" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2016 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2016.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini + stage: build_input_data_all + tags: + - openfisca +agg-2016: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2016 + script: + - echo "aggregates-2016" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2016 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2017: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2017" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2017 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2017.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini + stage: build_input_data_all + tags: + - openfisca +agg-2017: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2017 + script: + - echo "aggregates-2017" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2017 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2018: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2018" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2018 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2018.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini + stage: build_input_data_all + tags: + - openfisca +agg-2018: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2018 + script: + - echo "aggregates-2018" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2018 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca +in_dt-2019: + image: $CI_REGISTRY_IMAGE:latest + needs: + - run_on_all_years + script: + - echo "build_input_data-2019" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2019 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2019.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini + stage: build_input_data_all + tags: + - openfisca +agg-2019: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2019 + script: + - echo "aggregates-2019" + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2019 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER/data_output + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER/data_output + stage: aggregates_all + tags: + - openfisca diff --git a/openfisca_france_data/base_survey.py b/openfisca_france_data/base_survey.py index 5c0decaa..cfa1ccb1 100644 --- a/openfisca_france_data/base_survey.py +++ b/openfisca_france_data/base_survey.py @@ -9,7 +9,6 @@ allocations_familiales_imposables, cesthra_invalidee, # inversion_directe_salaires, # We use a custom inversion_directe_salaires, not merged in the openfisca-france yet - plf2016, plf2016_ayrault_muet, plf2015, plfr2014, @@ -25,10 +24,7 @@ 'allocations_familiales_imposables': allocations_familiales_imposables.allocations_familiales_imposables, 'cesthra_invalidee': cesthra_invalidee.cesthra_invalidee, 'inversion_directe_salaires': inversion_directe_salaires.inversion_directe_salaires, - 'plf2016': plf2016.plf2016, 'ayrault_muet': plf2016_ayrault_muet.ayrault_muet, - 'plf2016_counterfactual': plf2016.plf2016_counterfactual, - 'plf2016_counterfactual_2014': plf2016.plf2016_counterfactual_2014, 'plf2015': plf2015.plf2015, 'plfr2014': plfr2014.plfr2014, 'trannoy_wasmer': trannoy_wasmer.trannoy_wasmer, @@ -81,4 +77,3 @@ def get_cached_composed_reform(reform_keys, tax_benefit_system): def get_cached_reform(reform_key, tax_benefit_system): return get_cached_composed_reform([reform_key], tax_benefit_system) - diff --git a/openfisca_france_data/comparator.py b/openfisca_france_data/comparator.py new file mode 100644 index 00000000..2258d41e --- /dev/null +++ b/openfisca_france_data/comparator.py @@ -0,0 +1,594 @@ +import datetime +import dtale +try: + import ipdb as pdb +except ImportError: + import pdb +import logging +from math import ceil +import numpy as np +from pathlib import Path, PurePath +import pandas as pd +import pypandoc +import seaborn as sns +import sys + + +from openfisca_core import periods + +from openfisca_france_data.config import config +from openfisca_france_data.erfs_fpr.get_survey_scenario import get_survey_scenario + + +log = logging.getLogger(__name__) + + +def compute_result(variable, survey_scenario, target_dataframe): + result = None + stats = None + + entity = survey_scenario.tax_benefit_system.variables[variable].entity.key + entity_original_id = "noindiv" if entity == "individu" else "ident" + output_variables = [entity_original_id, variable] + + entity_dataframe = survey_scenario.create_data_frame_by_entity( + variables = output_variables, + )[entity] + + target = target_dataframe[output_variables].rename(columns = {variable: f"target_{variable}"}) + + if f"target_{variable}" not in target: + return None, None + + result = entity_dataframe.merge( + target, + on = entity_original_id, + how = "outer", + ) + + result[f"diff_{variable}"] = result[variable] - result[f"target_{variable}"] + result_variables = ["noindiv", variable, f"diff_{variable}", f"target_{variable}"] + stats = compute_error_stats(result, variable) + return result, stats + + +def compute_confidence_interval(data, variable, width = .9): + """ + Compute confidence interval + + Args: + data (pandas.DataFrame): The data + variable (str, optional): The variable name. Defaults to None. + width (float, optional): Witdh of the symmetruc confidence interval. Defaults to .9. + + Returns: + [type]: [description] + """ + df = pd.DataFrame({ + "signed_values": data[variable].values, + "noind": data["noindiv"].values + }) + df["abs_values"] = df.signed_values.abs() + in_range_obs = ceil(width * len(df)) + sorted_df = df.sort_values("abs_values") + in_range_values = sorted_df[:in_range_obs]["signed_values"] + left = in_range_values.min() + right = in_range_values.max() + largest_errors = sorted_df[in_range_obs:].copy().sort_values("abs_values", ascending = False)[["signed_values", "abs_values", "noind"]].copy() + + return left, right, largest_errors + + +def compute_error_stats(data, variable): + numerical = ( + isinstance(data[variable].values.flat[0], np.integer) + or isinstance(data[variable].values.flat[0], np.floating) + ) + if not numerical: + return + + df = data.loc[ + (data[variable].values != 0.0) | (data[f"target_{variable}"].values != 0.0), + [variable, f"target_{variable}", "noindiv"] + ].copy() + df["relative_error"] = (df[variable] - df[f"target_{variable}"]) / (df[f"target_{variable}"] + (df[f"target_{variable}"] == 0.0) * df[variable]) + if df.empty: + return + + left, right, largest_errors = compute_confidence_interval(df, "relative_error") + less_than_5_pc_error = (df["relative_error"].abs() <= .05).sum() / len(df) + less_than_20_pc_error = (df["relative_error"].abs() <= .2).sum() / len(df) + more_than_80_pc_error = (df["relative_error"].abs() >= .8).sum() / len(df) + return pd.DataFrame.from_dict({ + "variable": [variable], + "size": [len(data)], + "share": [len(df) / len(data)], + "< 5%": [less_than_5_pc_error], + "< 20%": [less_than_20_pc_error], + "> 80%": [more_than_80_pc_error], + "CI (10%)": [[round(left, 2), round(right, 2)]], + }).round(2) + + +def create_output_files(markdown_sections, figures_directory, filename): + header = r"""--- +header-includes: +- \usepackage{caption} +- \usepackage{subcaption} +--- +""" + markdown_sections_joined = header + "".join(markdown_sections) + with open(PurePath.joinpath(figures_directory, "variables.md"), "w", encoding = "utf-8") as markdown_file: + markdown_file.write(markdown_sections_joined) + + pypandoc.convert_file( + str(PurePath.joinpath(figures_directory, "variables.md")), + "pdf", + format = "markdown", + outputfile = str(PurePath.joinpath(figures_directory, f"{filename}.pdf")), + extra_args = ['--pdf-engine=pdflatex'], + ) + + +def create_variable_distribution_figures(variable, result, bins = None, figures_directory = None): + log.debug(f"create_variable_distribution_figures: Examining {variable}") + assert figures_directory is not None + if bins is None: + bins = 100 + + non_both_zeroes = (result[f"{variable}"].fillna(0) != 0) | (result[f"target_{variable}"].fillna(0) != 0) + non_both_zeroes_count = sum(non_both_zeroes) + both_zeroes_count = len(result) - non_both_zeroes_count + melted = result.loc[ + non_both_zeroes, + ["noindiv", variable, f"target_{variable}"] + ].melt( + id_vars = ["noindiv"], + value_vars = [f"{variable}", f"target_{variable}"] + ) + + if melted.empty: + print(f"Cannot create variable_distribution_figures for variable = {variable} because all null values") + return False + + unique_values_count = melted["value"].nunique() + + bins == unique_values_count if unique_values_count < bins else bins + print(f"create_variable_distribution_figures (total): variable = {variable}, bins = {bins}") + + melted["value"] = melted["value"].clip(1, melted["value"].max()) + + sns_plot = sns.histplot( + data = melted, + # palette = "crest", + alpha = .5, + bins = bins, + common_bins = True, + common_norm = False, + fill = True, + hue = "variable", + linewidth = 0, + x = "value", + log_scale = True, + ) + + sns_plot.annotate( + f"Dropping {both_zeroes_count} null observations ({round(100 * both_zeroes_count / (both_zeroes_count + non_both_zeroes_count))} %)", + xy = (0, 1), + xycoords = "axes fraction", + xytext = (5, -5), + textcoords = "offset points", + ha = "left", + va = "top" + ) + filename = f"{variable}.pdf" + sns_plot.figure.savefig(PurePath.joinpath(figures_directory, filename)) + sns_plot.figure.clf() + + return True + + +def create_variable_markdown_section(variable, stats, figures_directory): + if stats is None: + return None + + variable_pdf_path = PurePath.joinpath(figures_directory, f"{variable}.pdf") + + markdown_section = f""" +## Variable `{variable}` + + +### Valeurs + +![]({variable_pdf_path}) + +### En niveau + +![]({PurePath.joinpath(figures_directory, f"diff_{variable}.pdf")}) + + +""" + stats.drop("variable", axis = 1).to_markdown(index = False) + + table_ecarts_markdown_path = PurePath.joinpath(figures_directory, f"table_ecarts_{variable}.md") + stats.drop("variable", axis = 1).to_markdown(table_ecarts_markdown_path, index = False) + + pypandoc.convert_file( + str(table_ecarts_markdown_path), + "pdf", + format = "markdown", + outputfile = str(PurePath.joinpath(figures_directory, f"table_ecarts_{variable}.pdf")), + extra_args = ['--pdf-engine=pdflatex'], + ) + + return markdown_section + + +def create_variable_markdown_summary_section(variable, stats, figures_directory): + if stats is None: + return None + markdown_section = f""" + +## Variable `{variable}` + +![]({PurePath.joinpath(figures_directory, f"{variable}.pdf")}){{width=50% height=35%}} +![]({PurePath.joinpath(figures_directory, f"diff_{variable}.pdf")}){{width=50% height=35%}} +\\begin{{figure}}[!h] +\\begin{{subfigure}}[t]{{0.5\\textwidth}} +\\caption{{Distributions des valeurs}} +\\end{{subfigure}} +\\hfill +\\begin{{subfigure}}[t]{{0.5\\textwidth}} +\\caption{{Distributions des écarts}} +\\end{{subfigure}} +\\end{{figure}} + +""" + return markdown_section + + +def create_diff_variable_distribution_figures(variable, result, bins = None, figures_directory = None): + + numerical = ( + isinstance(result[f"{variable}"].values.flat[0], np.integer) + or isinstance(result[f"{variable}"].values.flat[0], np.floating) + ) + if not numerical: + return + + assert figures_directory is not None + if bins is None: + bins = 100 + + non_both_zeroes = (result[f"{variable}"].fillna(0) != 0) | (result[f"target_{variable}"].fillna(0) != 0) + non_both_zeroes_count = sum(non_both_zeroes) + both_zeroes_count = len(result) - non_both_zeroes_count + + unique_values_count = result[f"diff_{variable}"].nunique() + bins == unique_values_count if unique_values_count < bins else bins + + data = result.loc[non_both_zeroes] + + if data.empty: + print(f"Cannot create diff variable_distribution_figures for variable = {variable} because all null values") + return + + print(f"create_diff_variable_distribution_figures (total): variable = {variable}, bins = {bins}") + sns_plot = sns.histplot(data, x = f"diff_{variable}", stat = "probability", bins = bins) + + sns_plot.annotate( + f"Dropping {both_zeroes_count} null observations ({round(100 * both_zeroes_count / (both_zeroes_count + non_both_zeroes_count))} %)", + xy = (0, 1), + xycoords = "axes fraction", + xytext = (5, -5), + textcoords = "offset points", + ha = "left", + va = "top" + ) + filename = f"diff_{variable}.pdf" + sns_plot.figure.savefig(PurePath.joinpath(figures_directory, filename)) + sns_plot.figure.clf() + return + + +class AbstractComparator(object): + name = None + default_target_variables = None + filter_expr_by_label = None + period = None + messages = list() + + def get_name(self): + return self.name + "_" + str(self.period) + + def get_test_dataframes(self, rebuild = False, noindivs = None): + start_time = datetime.datetime.now() + if not rebuild: + return self._load_test_dataframes() + + input_dataframe_by_entity, target_dataframe_by_entity = self.compute_test_dataframes() + log.debug(f"Test data has been processed in {datetime.datetime.now() - start_time}") + save_start_time = datetime.datetime.now() + try: + self._save_test_dataframes(input_dataframe_by_entity, target_dataframe_by_entity) + log.debug(f"Test data has been saved in {datetime.datetime.now() - save_start_time}") + except Exception as e: + log.debug(f"Test data has not been saved because of {e}") + pass + + if noindivs is not None: + selected_idmen_original = list(input_dataframe_by_entity["individu"].query(f"noindiv in {noindivs}").idmen_original.unique())[0] + menage_query = f"idmen_original == {selected_idmen_original}" + selected_noindivs = list(input_dataframe_by_entity["individu"].query(menage_query).noindiv.unique()) + + input_dataframe_by_entity = { + "individu": input_dataframe_by_entity["individu"].query(f"noindiv in {selected_noindivs}"), + "menage": input_dataframe_by_entity["menage"].query(menage_query), + } + target_dataframe_by_entity = { + "individu": target_dataframe_by_entity["individu"].query(f"noindiv in {selected_noindivs}"), + "menage": target_dataframe_by_entity["menage"].query(menage_query), + } + return input_dataframe_by_entity, target_dataframe_by_entity + + def compare(self, browse, load, verbose, debug, target_variables = None, period = None, rebuild = False, summary = False): + """Compare actual data with openfisca-france-data computation.""" + log.setLevel(level = logging.DEBUG if verbose else logging.WARNING) + + name = self.get_name() + + assert name is not None and isinstance(name, str) + + figures_directory = Path(config.get("paths", "figures_directory")) / name + + if not figures_directory.exists(): + figures_directory.mkdir(parents = True, exist_ok = True) + + if target_variables is not None and isinstance(target_variables, str): + target_variables = [target_variables] + + assert (target_variables is None) or isinstance(target_variables, list) + + if target_variables is None: + target_variables = self.default_target_variables + + if period is not None: + period = int(period) + + backup_directory = PurePath.joinpath(Path(config.get("paths", "backup"))) + backup_directory.mkdir(parents = True, exist_ok = True) + + backup_path = PurePath.joinpath(backup_directory, f"{name}_backup.h5") + + if load: + assert Path.exists(backup_path), f"Backup file {backup_path} doesn't exist" + shown = pd.read_hdf( + backup_path, + 'result', + ) + dtale.show( + shown, + open_browser = True, + subprocess = False, + ) + + try: + start_time = datetime.datetime.now() + input_dataframe_by_entity, target_dataframe_by_entity = self.get_test_dataframes(rebuild) + + log.debug(f"Test data has been prepared in {datetime.datetime.now() - start_time}") + + # specific_figures_directory = PurePath.joinpath(figures_directory, self.name) + specific_figures_directory = figures_directory + specific_figures_directory.mkdir(parents = True, exist_ok = True) + + result_by_variable = self.compute_divergence( + # input_dataframe_by_entity, + None, # To force load the data_table from hdf file + target_dataframe_by_entity, + specific_figures_directory, + target_variables = target_variables, + period = period, + summary = summary, + ) + + result = pd.concat(result_by_variable, ignore_index = True) + + log.debug(f"Eveyrthing has been computed in {datetime.datetime.now() - start_time}") + del input_dataframe_by_entity, target_dataframe_by_entity + if browse: + start_browsing_time = datetime.datetime.now() + result = result.dropna(axis = 1, how = 'all') + matching_variables = ["noindiv"] + assert set(matching_variables) <= set(result.columns) + cols_to_use = result.columns.tolist() + matching_variables + shown = result + + log.debug(f"Data for browsing has been prepared in {datetime.datetime.now() - start_browsing_time}") + + dtale.show( + shown, + open_browser = True, + subprocess = False, + ) + + assert backup_directory.exists() + shown.to_hdf( + backup_path, + 'result', + mode = "w", + format = "table", + ) + + except Exception as error: + if debug: + print(error) + pdb.post_mortem(sys.exc_info()[2]) + raise error + + def compute_divergence(self, input_dataframe_by_entity, target_dataframe_by_entity, figures_directory, + target_variables = None, period = None, summary = False): + """ + Compare openfisca-france-data computation with data targets. + + Args: + input_dataframe_by_period (dict): Input data + target_dataframe_by_period (dict): Targets to macth + figures_directory (path): Where to store the figures + """ + figures_directory = figures_directory.resolve() + assert Path.exists(figures_directory) + + if target_variables is None: + log.info(f"No target variables. Exiting divergence computation.") + return + + data = ( + dict(input_dataframe_by_entity = input_dataframe_by_entity) + if input_dataframe_by_entity is not None + else None + ) + + survey_scenario = get_survey_scenario( + year = str(self.period), + data = data, + survey_name = f'openfisca_erfs_fpr_{period}' + ) + + tax_benefit_system = survey_scenario.tax_benefit_system + markdown_section_by_variable = dict() + markdown_summary_section_by_variable = dict() + stats_by_variable = dict() + result_by_variable = dict() + + for variable in target_variables: + if variable == 'noind': + continue + + entity = tax_benefit_system.variables[variable].entity.key + target_dataframe = target_dataframe_by_entity[entity] + assert variable in target_dataframe + log.debug(f"Testing final only variable: {variable}") + result, stats = compute_result( + variable, + survey_scenario, + target_dataframe, + ) + + result_by_variable[variable] = result + variable_distribution_figures_created = create_variable_distribution_figures(variable, result, figures_directory = figures_directory) + diff_variable_distribution_figures_created = create_diff_variable_distribution_figures(variable, result, figures_directory = figures_directory) + stats_by_variable[variable] = stats + + variable_markdown_section = create_variable_markdown_section( + variable, + stats, + figures_directory, + ) + + if variable_markdown_section is not None: + markdown_section_by_variable[variable] = variable_markdown_section + + if summary: + # create_stats_by_period_figure(variable, result, period, figures_directory = figures_directory) + variable_markdown_summary_section = create_variable_markdown_summary_section( + variable, + stats, + figures_directory, + ) + if variable_markdown_summary_section is not None: + markdown_summary_section_by_variable[variable] = variable_markdown_summary_section + + messages_markdown_section = """ +Filtres appliqués: + +""" + "\n".join(f"- {message}" for message in self.messages) + """ +""" + with open(figures_directory / "filters.md", "w", encoding = 'utf-8') as filters_md_file: + filters_md_file.write(messages_markdown_section) + + markdown_sections = list(filter( + lambda x: x is not None, + [messages_markdown_section] + list(markdown_section_by_variable.values()), + )) + create_output_files( + markdown_sections, + figures_directory, + "variables", + ) + if summary: + markdown_sections = list(filter( + lambda x: x is not None, + [messages_markdown_section] + list(markdown_summary_section_by_variable.values()), + )) + create_output_files( + markdown_sections, + figures_directory, + "summary_variables", + ) + + return result_by_variable + + def compute_test_dataframes(self): + NotImplementedError + + def filter(self, data_frame): + for label, filter_expr in self.filter_expr_by_label.items(): + obs_before = data_frame.noind.nunique() + selection = (data_frame + .eval("keep = " + filter_expr) + .groupby("noindiv")["keep"] + .transform('all') + ) + data_frame.drop(data_frame.index[~selection], inplace = True) + obs_after = data_frame.noind.nunique() + log_message = f"Applying filter '{label}': dropping {obs_before - obs_after}, keeping {obs_after} observations." + log.info(log_message) + self.messages.append(log_message + "\n") + + def _load_test_dataframes(self, noindivs = None, idents = None): + name = self.get_name() + backup_directory = PurePath.joinpath(Path(config.get("paths", "backup"))) + backup_path = PurePath.joinpath(backup_directory, f"{name}_test_data.h5") + assert backup_path.exists(), "Backup data does not exist. Try rebuild option" + store = pd.HDFStore(backup_path) + keys = store.keys() + store.close() + input_dataframe_by_entity = dict() + target_dataframe_by_entity = dict() + + entities = ["individu", "menage"] + for prefix in ["input", "target"]: + for entity in entities: + df = pd.read_hdf(backup_path, f'{prefix}_{name}_{entity}') + + if prefix == "input": + input_dataframe_by_entity[entity] = df + elif prefix == "target": + target_dataframe_by_entity[entity] = df + + return input_dataframe_by_entity, target_dataframe_by_entity + + def _save_test_dataframes(self, input_dataframe_by_entity, target_dataframe_by_entity): + name = self.get_name() + backup_directory = PurePath.joinpath(Path(config.get("paths", "backup"))) + backup_path = PurePath.joinpath(backup_directory, f"{name}_test_data.h5") + backup_path.unlink(missing_ok = True) + + for data_frame_by_entity in [input_dataframe_by_entity, target_dataframe_by_entity]: + if data_frame_by_entity is None: + continue + prefix = "target" if id(data_frame_by_entity) == id(target_dataframe_by_entity) else "input" + for entity, dataframe in data_frame_by_entity.items(): + if dataframe is None: + continue + try: + dataframe.to_hdf( + backup_path, + f'{prefix}_{name}_{entity}', + ) + except NotImplementedError: + dataframe.to_hdf( + backup_path, + f'{prefix}_{name}_{entity}', + format = "table" + ) diff --git a/openfisca_france_data/config.py b/openfisca_france_data/config.py new file mode 100644 index 00000000..4d1e05db --- /dev/null +++ b/openfisca_france_data/config.py @@ -0,0 +1,33 @@ +"""Configuration file parser.""" + +import configparser +from pathlib import Path, PurePath +from xdg import BaseDirectory + + +default_config_files_directory = Path(BaseDirectory.save_config_path('openfisca-france-data')) + + +class Config(configparser.SafeConfigParser): + """Custom Config Parser.""" + + config_ini = None + + def __init__(self, config_files_directory = default_config_files_directory): + configparser.SafeConfigParser.__init__(self) + assert config_files_directory is not None + + config_ini = PurePath.joinpath(config_files_directory, 'config.ini') + if Path.exists(config_ini): + self.config_ini = config_ini + self.read([config_ini]) + + def save(self): + """Save config to home user config for package openfisca-france-data.""" + assert self.config_ini, "configuration file paths are not defined" + config_file = open(self.config_ini, 'w') + self.write(config_file) + config_file.close() + + +config = Config() diff --git a/openfisca_france_data/debugger.py b/openfisca_france_data/debugger.py index f97a63e3..8ed43ac2 100644 --- a/openfisca_france_data/debugger.py +++ b/openfisca_france_data/debugger.py @@ -8,8 +8,7 @@ from openfisca_france_data.erfs.input_data_builder.base import ( year_specific_by_generic_data_frame_name) -from openfisca_france_data.utils import simulation_results_as_data_frame -from openfisca_france_data.erf import get_erf2of, get_of2erf +from openfisca_france_data.erfs import get_erf2of, get_of2erf from openfisca_plugin_aggregates.aggregates import Aggregates from openfisca_survey_manager.statshelpers import mark_weighted_percentiles as mwp from openfisca_survey_manager.survey_collections import SurveyCollection diff --git a/openfisca_france_data/erfs_fpr/__init__.py b/openfisca_france_data/erfs_fpr/__init__.py index e69de29b..d3325b2f 100644 --- a/openfisca_france_data/erfs_fpr/__init__.py +++ b/openfisca_france_data/erfs_fpr/__init__.py @@ -0,0 +1,7 @@ +original_id_by_entity = { + "individu": "noindiv", + # "menage": "ident", + "menage": "idmen_original", + } + +REFERENCE_YEAR = 2019 diff --git a/openfisca_france_data/erfs_fpr/comparison.py b/openfisca_france_data/erfs_fpr/comparison.py new file mode 100644 index 00000000..a649f4bf --- /dev/null +++ b/openfisca_france_data/erfs_fpr/comparison.py @@ -0,0 +1,91 @@ +"""Compare openfisca-france-data simulation to erfs-fpr.""" + + +import click +import logging + + +from openfisca_survey_manager.survey_collections import SurveyCollection +from openfisca_france_data.comparator import AbstractComparator +from openfisca_france_data.erfs_fpr import REFERENCE_YEAR +from openfisca_france_data.erfs_fpr.input_data_builder.step_01_preprocessing import build_table_by_name + + +log = logging.getLogger(__name__) + + +openfisca_by_erfs_fpr_variables = { + "chomage_i": "chomage_net", + "ident": "idmen_original", + "noindiv": "noindiv", + "rag_i": "rag_net", + "retraites_i": "retraite_nette", # TODO: CHECk + "rev_fonciers_bruts": "f4ba", + "ric_i": "ric_net", + "rnc_i": "rnc_net", + "salaires_i": "salaire_net", + } + + +class ErfsFprtoInputComparator(AbstractComparator): + name = "erfs_fpr" + period = None + default_target_variables = [ + "chomage_net", + # "rag_net", TODO: does not exist in openfisca + "retraite_nette", + # "ric_net", TODO: does not exist in openfisca + # "rnc_net", TODO: does not exist in openfisca + # "f4ba", + "salaire_net", + ] + + def compute_test_dataframes(self): + erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr") + # infer names of the survey and data tables + assert self.period is not None + year = int(self.period) + table_by_name = build_table_by_name(year, erfs_fpr_survey_collection) + + log.debug("Loading tables for year {} [{}]".format(year, table_by_name)) + + # load survey and tables + survey = erfs_fpr_survey_collection.get_survey(table_by_name['survey']) + + fpr_individu = survey.get_values(table = table_by_name['fpr_individu'], ignorecase = True) + fpr_menage = survey.get_values(table = table_by_name['fpr_menage'], ignorecase = True) + + openfisca_survey_collection = SurveyCollection.load(collection = "openfisca_erfs_fpr") + openfisca_survey = openfisca_survey_collection.get_survey(f"openfisca_erfs_fpr_{year}") + openfisca_individu = openfisca_survey.get_values(table = f"individu_{year}") + openfisca_menage = openfisca_survey.get_values(table = f"menage_{year}") + + input_dataframe_by_entity = { + "individu": openfisca_individu, + "menage": openfisca_menage, + } + target_dataframe_by_entity = { + "individu": fpr_individu.rename(columns = openfisca_by_erfs_fpr_variables), + "menage": fpr_menage.rename(columns = openfisca_by_erfs_fpr_variables), + } + + return input_dataframe_by_entity, target_dataframe_by_entity + + +@click.command() +@click.option('-b', '--browse', is_flag = True, help = "Browse results", default = False, show_default = True) +@click.option('-l', '--load', is_flag = True, default = False, help = "Load backup results", show_default = True) +@click.option('-v', '--verbose', is_flag = True, default = False, help = "Increase output verbosity", show_default = True) +@click.option('-d', '--debug', is_flag = True, default = False, help = "Use python debugger", show_default = True) +@click.option('-p', '--period', default = REFERENCE_YEAR, help = "period(s) to treat", show_default = True) +@click.option('-t', '--target-variables', default = None, help = "target variables to inspect (None means all)", show_default = True) +@click.option('-u', '--rebuild', is_flag = True, default = False, help = "Rebuild test data", show_default = True) +@click.option('-s', '--summary', is_flag = True, default = False, help = "Produce summary figuress", show_default = True) +def compare(browse = False, load = False, verbose = True, debug = True, target_variables = None, period = None, rebuild = False, summary = False): + """Compare openfisca-france-data simulation to erfs-fpr by generating comparison data and graphs. + + Data can be explored using D-Tale and graphs are saved as pdf files. + """ + comparator = ErfsFprtoInputComparator() + comparator.period = period + comparator.compare(browse, load, verbose, debug, target_variables, period, rebuild, summary) diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index 0ff14efe..b55b379a 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -10,7 +10,7 @@ def get_survey_scenario( - year: int = 2014, + year: int = None, rebuild_input_data: bool = False, tax_benefit_system: Optional[TaxBenefitSystem] = None, baseline_tax_benefit_system: Optional[TaxBenefitSystem] = None, @@ -30,6 +30,7 @@ def get_survey_scenario( :param data: Les données de l'enquête. :param reform: Une réforme à appliquer à *france_data_tax_benefit_system*. """ + assert year is not None tax_benefit_system = get_tax_benefit_system( tax_benefit_system, reform, @@ -39,6 +40,15 @@ def get_survey_scenario( baseline_tax_benefit_system, ) + + from openfisca_france_data.model.id_variables import ( + idmen_original, + noindiv, + ) + + tax_benefit_system.add_variable(idmen_original) + tax_benefit_system.add_variable(noindiv) + if not use_marginal_tax_rate: survey_scenario = ErfsFprSurveyScenario.create( tax_benefit_system = tax_benefit_system, diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 57a1e9c2..f2037474 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -1,9 +1,14 @@ import click -import logging import configparser -import sys, getopt -import warnings import datetime +import logging +import pdb +import sys +import time +import warnings + + +from openfisca_france_data.erfs_fpr import REFERENCE_YEAR #from multipledispatch import dispatch # type: ignore warnings.filterwarnings("ignore", ".*is an invalid version and will not be supported in a future release.*") @@ -37,7 +42,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: Ici on va nettoyer et formatter les donnés ERFS-FPR, pour les rendre OpenFisca-like """ - # Step 01 : la magie de ce qui nous intéresse : le formattage OpenFisca + # Step 01 : le formattage OpenFisca # # - Formattage des différentes variables # - On merge les tables individus / menages @@ -77,20 +82,14 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: @click.command() -@click.option('-y', '--year', default = 2017, help = "ERFS-FPR year", show_default = True, - type = int, required = True) -@click.option('-f', '--file', 'export_flattened_df_filepath', default = None, - help = 'flattened dataframe filepath', show_default = True) -@click.option('-c', '--configfile', default = None, - help = 'raw_data.ini path to read years to process.', show_default = True) -@click.option('-l', '--log', 'lg', default = "info", - help = 'level of detail for log output.', show_default = True) -def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg = "info"): - import time +@click.option('-y', '--year', default = REFERENCE_YEAR, help = "ERFS-FPR year", show_default = True, type = int, required = True) +@click.option('-f', '--file', 'export_flattened_df_filepath', default = None, help = 'flattened dataframe filepath', show_default = True) +@click.option('-c', '--configfile', default = None, help = 'raw_data.ini path to read years to process.', show_default = True) +@click.option('-l', '--log', 'lg', default = "info", help = 'level of detail for log output.', show_default = True) +@click.option('-d', '--debug', 'debug', is_flag = True, default = False, help = 'debug', show_default = True) +def main(year = None, export_flattened_df_filepath = None, configfile = None, lg = "info", debug = False): + assert year is not None start = time.time() - - catch_errors = False - # get level of logging if lg == "info": lgi = logging.INFO @@ -105,9 +104,9 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg log.info("Starting build-erfs-fpr [log: {}]".format(lg)) - # determine which years are to be analyzed, from file if available, else parameter + # Determine which years are to be analyzed, from file if available, else parameter if configfile is not None: - log.warning("Reading years to process from {configfile}") + log.info("Reading years to process from {configfile}") years = [] try: @@ -128,18 +127,24 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg for year in years: log.info('Starting with year {}'.format(year)) - if catch_errors: - try: - build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) - except Exception as e: - log.warning(" == BUILD HAS FAILED FOR YEAR {} == ".format(year)) - log.warning("Error message:\n{}\nEND OF ERROR MESSAGE\n\n".format(str(e))) - else: + try: build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) - + except Exception as e: + log.warning(f" == BUILD HAS FAILED FOR YEAR {year} ==") + log.warning(f"Error message:\n{str(e)}\nEND OF ERROR MESSAGE\n\n") + if debug: + pdb.post_mortem(sys.exc_info()[2]) + raise e else: log.info('Configured single year: [{}]'.format(year)) - build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) + try: + build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) + except Exception as e: + log.warning(f" == BUILD HAS FAILED FOR YEAR {year} ==") + log.warning(f"Error message:\n{str(e)}\nEND OF ERROR MESSAGE\n\n") + if debug: + pdb.post_mortem(sys.exc_info()[2]) + raise e # TODO: create_enfants_a_naitre(year = year) log.info("\n\n ==> Script finished after {} seconds.".format(round(time.time() - start))) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py index 31abaf51..8f254a89 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py @@ -12,38 +12,63 @@ log = logging.getLogger(__name__) - -@temporary_store_decorator(file_name = "erfs_fpr") -def build_merged_dataframes(temporary_store = None, year = None): - assert temporary_store is not None - assert year is not None - - erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr") - yr = str(year)[-2:] # 12 for 2012 - yr1 = str(year+1)[-2:] # 12 for 2012 - +def build_table_by_name(year, erfs_fpr_survey_collection): + """Infer names of the survey and data tables.""" # where available, use harmoized data + yr = str(year)[-2:] # 12 for 2012 + yr1 = str(year + 1)[-2:] # 13 for 2012 add_suffix_retropole_years = [2012] + survey = erfs_fpr_survey_collection.get_survey(f"erfs_fpr_{year}") + tables = set(survey.tables.keys()) - # infer names of the survey and data tables - names = { - "survey": f"erfs_fpr_{year}", + table_by_name_stata = { "eec_individu": f"fpr_irf{yr}e{yr}t4" if year >= 2002 else f"fpr_irf{yr}e{yr1}", "eec_menage": f"fpr_mrf{yr}e{yr}t4" if year >= 2002 else f"fpr_mrf{yr}e{yr1}", "fpr_individu": f"fpr_indiv_{year}_retropole" if year in add_suffix_retropole_years else f"fpr_indiv_{year}", "fpr_menage": f"fpr_menage_{year}_retropole" if year in add_suffix_retropole_years else f"fpr_menage_{year}" - } + } + + table_by_name_sas = { + "eec_individu": "IRF", + "eec_menage": "MRF", + "fpr_individu": "Individu", + "fpr_menage": "Menage" + } + + if tables == set(table_by_name_stata.values()): + table_by_name = table_by_name_stata.copy() + + elif tables == set(table_by_name_sas.values()): + table_by_name = table_by_name_sas.copy() - log.debug("Loading tables for year {} [{}]".format(year, names)) + else: + raise ValueError("No incorrect table pattern: {tables}") + + table_by_name["survey"] = f"erfs_fpr_{year}" + + return table_by_name + + +@temporary_store_decorator(file_name = "erfs_fpr") +def build_merged_dataframes(temporary_store = None, year = None): + assert temporary_store is not None + assert year is not None + erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr", + ) + + # infer names of the survey and data tables + table_by_name = build_table_by_name(year, erfs_fpr_survey_collection) + + log.debug("Loading tables for year {} [{}]".format(year, table_by_name)) # load survey and tables - survey = erfs_fpr_survey_collection.get_survey(names['survey']) + survey = erfs_fpr_survey_collection.get_survey(table_by_name['survey']) - eec_individu = survey.get_values(table = names['eec_individu'], ignorecase= True) - eec_menage = survey.get_values(table = names['eec_menage'], ignorecase=True) + eec_individu = survey.get_values(table = table_by_name['eec_individu'], ignorecase= True) + eec_menage = survey.get_values(table = table_by_name['eec_menage'], ignorecase=True) - fpr_individu = survey.get_values(table = names['fpr_individu'], ignorecase = True) - fpr_menage = survey.get_values(table = names['fpr_menage'], ignorecase = True) + fpr_individu = survey.get_values(table = table_by_name['fpr_individu'], ignorecase = True) + fpr_menage = survey.get_values(table = table_by_name['fpr_menage'], ignorecase = True) # transform to lowercase for table in (fpr_menage, eec_menage, eec_individu, fpr_individu): @@ -272,8 +297,11 @@ def check_naia_naim(individus, year): assert individus.naim.isin(range(1, 13)).all(), f"naim values: {individus.naim.unique()}" assert isinstance(year, int) - bad_noindiv = individus.loc[~((year >= individus.naia) & (individus.naia > 1890)), - "noindiv"].unique() + bad_noindiv = individus.loc[ + ~((year >= individus.naia) & (individus.naia > 1890)), + "noindiv", + ].unique() + for id in bad_noindiv: individus.loc[individus.noindiv == id,'naia'] = year - individus.loc[individus.noindiv == id, 'ageq'] diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 687f664e..1d3e0abe 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -85,16 +85,17 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene ] ].copy() survey_name = 'openfisca_erfs_fpr_' + str(year) - + # Formats ids individus = format_ids_and_roles(individus) menages = menages.rename(columns = {'idmen':'idmen_original'}) unique_idmen = individus[['idmen','idmen_original']].drop_duplicates() assert len(unique_idmen) == len(menages), "Number of idmen should be the same individus and menages tables." - - menages = menages.merge(unique_idmen, - how = 'inner', - on = 'idmen_original') + menages = menages.merge( + unique_idmen, + how = 'inner', + on = 'idmen_original' + ) if export_flattened_df_filepath: supermerge = individus.merge( @@ -115,7 +116,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene collection = "openfisca_erfs_fpr", survey_name = survey_name, ) - menages = menages.sort_values(by = ['idmen']) log.debug(f"Saving entity 'menage' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey") set_table_in_survey( diff --git a/openfisca_france_data/erfs_fpr/scenario.py b/openfisca_france_data/erfs_fpr/scenario.py index fa44d5ee..a72a0d9d 100644 --- a/openfisca_france_data/erfs_fpr/scenario.py +++ b/openfisca_france_data/erfs_fpr/scenario.py @@ -53,6 +53,7 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario): "idfam_original", "idfoy_original", "idmen_original", + "noindiv", # 'rempli_obligation_scolaire', # 'ressortissant_eee', "wprm_init", diff --git a/openfisca_france_data/model/id_variables.py b/openfisca_france_data/model/id_variables.py index c6a42ee9..38d68044 100644 --- a/openfisca_france_data/model/id_variables.py +++ b/openfisca_france_data/model/id_variables.py @@ -1,6 +1,14 @@ from .base import * # noqa analysis:ignore +class noindiv(Variable): + value_type = int + is_period_size_independent = True + entity = Individu + label = "Identifiant indvidu de l'ERFS" + definition_period = YEAR + + class idmen_original(Variable): value_type = int is_period_size_independent = True diff --git a/setup.py b/setup.py index 77fc8d56..ca1e5033 100644 --- a/setup.py +++ b/setup.py @@ -24,28 +24,34 @@ "Topic :: Scientific/Engineering :: Information Analysis", ], entry_points = { - 'console_scripts': ['build-erfs-fpr=openfisca_france_data.erfs_fpr.input_data_builder:main'], + 'console_scripts': [ + 'build-erfs-fpr=openfisca_france_data.erfs_fpr.input_data_builder:main', + 'compare-erfs-fpr-input=openfisca_france_data.erfs_fpr.comparison:compare', + 'create-test-erfs-fpr=openfisca_france_data.erfs_fpr.test_case_creation:create_test', + ], }, python_requires = ">= 3.7", install_requires = [ "click >= 8.0.0, < 9.0.0", "matplotlib >= 3.1.1, < 4.0.0", "multipledispatch >= 0.6.0, < 1.0.0", - "openFisca-france >= 113.0.0, < 120.0.0", # Max 120 because of a bug in OF : https://github.com/openfisca/openfisca-france/issues/1996 - "openFisca-survey-manager >= 0.44.2, < 1.0.0", + "openFisca-france >= 145.0.0, < 146.0.0", + "openFisca-survey-manager >= 0.47.2, < 1.0.0", "wquantiles >= 0.3.0, < 1.0.0", # To compute weighted quantiles ], extras_require = { "test": [ "autopep8 >= 1.4.0, < 1.5.0", + "bumpver >= 2022.1120", + "dtale", "flake8 >= 3.7.0, < 3.8.0", "ipython >= 7.5.0, < 8.0.0", "mypy >= 0.670, < 1.0.0", - "pytest >= 4.3.0, < 5.0.0", - "pytest-cov >= 2.6.0, < 3.0.0", + "pypandoc", + 'pytest >= 5.0.0, < 7.0.0', + # "pytest-cov >= 2.6.0, < 3.0.0", "scipy >= 1.2.1, < 2.0.0", "toolz >= 0.9.0, < 1.0.0", - "bumpver >= 2022.1120", ], }, packages = find_packages(exclude = ("docs", "tests")), From cc94b93d69128ad3f345a0d006bb4672e863cfeb Mon Sep 17 00:00:00 2001 From: Mahdi Ben Jelloul Date: Thu, 23 Mar 2023 11:39:20 +0100 Subject: [PATCH 2/2] Bump --- CHANGELOG.md | 11 +++++++++++ setup.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 107a3a0d..331a0987 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +### 0.24 [#215](https://github.com/openfisca/openfisca-france-data/pull/215) + +* Technical changes + - Ajout d'un comparatuer ERFS-FPR vs simulation oepnfisca + - produit des graphes diagnostics + - produit de un tableur ouvert dans dtale pour rechercher les cas les plus problématiques + + - Amélioration de la CI GitLab : + - Ajout d'une étape manuelle pour initialiser les bases de la branche à partir de la dernière CI de master. + - Ajout d'une étape manuelle pour faire tourner sur toutes les branches. + ### 0.23.1 [#213](https://github.com/openfisca/openfisca-france-data/pull/213) * Technical changes diff --git a/setup.py b/setup.py index ca1e5033..162783e4 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name = "OpenFisca-France-Data", - version = "0.23.1", + version = "0.24", description = "OpenFisca-France-Data module to work with French survey data", long_description = long_description, long_description_content_type="text/markdown",