## Understanding Run Decision Tree1 

In [1]:
# Import packages
import shutil
from pathlib import Path

import pandas as pd

from apply.apply_asset_level_hotspot import apply_deforestation_hotspots_assets
from apply.apply_controversy import apply_controversy_filters
from apply.apply_decision_tree1 import apply_dt1_conservative_approach, apply_dt1_weighted_average_approach
from apply.apply_direct_attribution import apply_direct_attribution_filter
from apply.apply_forest_and_finance import apply_forest_and_finance
from apply.apply_number_of_pairs import apply_number_of_pairs
from apply.apply_sectoral_filter_nace import apply_sectoral_filters_nace
from apply.apply_supply_chain_filter import apply_supply_chain_filter



Loading the preliminary codes of the python file

In [2]:
# Import functions from other scripts
from filepaths import PATH_TO_PORTFOLIO_DATA, PATH_TO_COMPANY_SPECIFIC_ASSET_LEVEL_DATA, PATH_TO_OUTPUT_FOLDER, \
    PATH_TO_INPUT_FOLDER

In [3]:
from generate.generate_combine_asset_data import combine_asset_datasets
from prep.prep_asset_level_merge import merge_asset_level_to_portfolio_companies
from prep.prep_weighted_country_sector_pairs import prep_weighted_country_sector_pairs

In [5]:
from user_input import (
    # GENERAL SETTINGS
    exclude_financial_institutions,
    perform_full_analysis,
    bias_towards_existing_data,
    bias_towards_missing_data,
    equal_information_importance,
    climate_and_company_information_importance,
    specific_information_importance,
    importance_revenue_info,
    importance_hierarchy_info,
    importance_asset_info,
    importance_headquarter_info,
    # EXECUTE DT1
    use_io_model_score,
    use_trase_flag,
    use_flag_direct,
    use_flag_indirect,
    use_flag_forest500,
    recent_controversies_cutoffs,  # if true, absolute cutoffs are used
    historical_controversies_cutoffs,  # suggest to keep this false

    # EXECUTE BUCKETING
    flag_direct_threshold_high,
    flag_direct_threshold_medium,
    flag_indirect_threshold_high,
    flag_indirect_threshold_medium,
    IO_threshold_high,
    IO_threshold_medium,
    recent_controversies_threshold_high,
    recent_controversies_threshold_medium,
    historical_controversies_threshold_high,
    historical_controversies_threshold_medium,
    cutoff_direct_attribution,
    subsidiary_data_exists,
    hotspot_assets_threshold,
    hotspot_subsidiaries_threshold,

    # OVERLAY WITH HOTSPOTS
    DISTANCE_THRESHOLD_ASSETS
)

In [6]:
from utils import clean_df_portfolio

# from apply.apply_sectoral_filter_gics import apply_sectoral_filters_gics # not yet incorporated

### ---------------------------------------------------###
### 0) CORE DATA INPUT & SET PARAMETERS                ###
### ---------------------------------------------------###



In [7]:
"""
0.1: QUALITY CHECK PARAMETERS
"""
# Define lists of variables to check
boolean_variables = [
    exclude_financial_institutions, perform_full_analysis, bias_towards_existing_data,
    equal_information_importance, climate_and_company_information_importance, specific_information_importance,
    use_io_model_score, use_trase_flag, use_flag_direct, use_flag_indirect, use_flag_forest500,
    recent_controversies_cutoffs, historical_controversies_cutoffs
]

numerical_variables = [
    flag_direct_threshold_high, flag_direct_threshold_medium, flag_indirect_threshold_high,
    flag_indirect_threshold_medium, cutoff_direct_attribution, IO_threshold_high, IO_threshold_medium,
    recent_controversies_threshold_high, recent_controversies_threshold_medium,
    historical_controversies_threshold_high, historical_controversies_threshold_medium,
    hotspot_assets_threshold, hotspot_subsidiaries_threshold, importance_revenue_info, importance_hierarchy_info,
    importance_asset_info, importance_headquarter_info
]


In [8]:
# Perform boolean check
if any(not isinstance(variable, bool) for variable in boolean_variables):
    raise ValueError("All of the user inputs listed above must be boolean values (True or False), please check.")

# Perform numerical check
if any(not isinstance(variable, (int, float)) for variable in numerical_variables):
    raise ValueError("All of the user inputs listed above must be numerical values (integers or floats), please check.")

# Perform bias check
if bias_towards_existing_data == bias_towards_missing_data:
    raise ValueError('Please check your chosen bias and decide for one of them')

# Perform weighting check
if sum([
    equal_information_importance,
    climate_and_company_information_importance,
    specific_information_importance
]) >= 2:
    raise ValueError("Two or more weightings are set to True.")

Done till 0.1 and now we start with 0.2 ...

In [9]:
"""
0.2: Delete interim files that were stored
"""

# If perform_full_analysis is set to True, delete the output folder and create a new one

if perform_full_analysis:
    shutil.rmtree(PATH_TO_OUTPUT_FOLDER, ignore_errors=True)
    PATH_TO_OUTPUT_FOLDER.mkdir()
    Path(PATH_TO_OUTPUT_FOLDER / 'internal_data').mkdir()