## Apply variable transformations

In [2]:
import numpy
import pandas as pd
from provisioning_model.functions.create_panel_dataset import custom_log_transform

In [4]:
foundational_df_no_transformations = pd.read_csv('./output/foundational_no_transformations.csv')

1. Transform energy use variable to log

In [ ]:
foundational_df_transformed = foundational_df_no_transformations.copy()
min_value = foundational_df_no_transformations["energy"].min()
max_value = foundational_df_no_transformations["energy"].max()
foundational_df_transformed["energy"] = foundational_df_no_transformations["energy"].apply(
    lambda x: custom_log_transform(x, min_value, max_value)
)

2. Transform need satisfaction variables using saturation transformation

In [ ]:
need_satisfaction_variables = {
    "hale": 77,
    "education": 102,
    "socialsupport": 80,
    "lifesatisfaction": 10,
    # "nutrition": 100.3,
    # "sanitation": 100.7,
    # "incomepoverty": 100.3,
    # "energyaccess": 100.7,
}
for col in need_satisfaction_variables.keys():
    if col in foundational_df_no_transformations.columns:
        foundational_df_transformed[col] = foundational_df_no_transformations[col].apply(
            lambda x: numpy.log(need_satisfaction_variables[col] - x)
        )
        if foundational_df_transformed[col].isna().any():
            print(
                f"Warning: NaN values found in column {col}. Original values might be too close to or exceed the saturation point."
            )
    else:
        print(f"Warning: Column {col} not found in imputed DataFrame.")

Transform provisioning factors using appropriate transformations.
Not applying any transformations to foundational economy variables.

In [ ]:
provisioning_factor_variables = [
    "material",
    "other",
    "overlooked",
    "providential"
]
for col in provisioning_factor_variables:
    min_value = foundational_df_no_transformations[col].min()
    max_value = foundational_df_no_transformations[col].max()
    foundational_df_transformed[col] = foundational_df_no_transformations[col].apply(
        lambda x: custom_log_transform(x, min_value, max_value)
    )

Standardize by subtracting the mean and dividing by the standard deviation.

In [ ]:
from sklearn.preprocessing import StandardScaler

In [ ]:
scaler = StandardScaler()
foundational_df_transformed[['energy', 'ladder', 'socialsupport', 'hale', 'freedom', 'charity', 'corruption', 'positive', 'negative', 'wdi_hale', 'education', 'goveffectiveness', 'gini']] = scaler.fit_transform(foundational_df_transformed[['energy', 'ladder', 'socialsupport', 'hale', 'freedom', 'charity', 'corruption', 'positive', 'negative', 'wdi_hale', 'education', 'goveffectiveness', 'gini']])

In [ ]:
foundational_df_transformed.head()