In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the data
df = pd.read_csv('../data/processing/engineered_features.csv')

print(df.head())

  COUNTRY  PC_HEALTHXP_growth  PC_GDP_growth  USD_CAP_growth  PC_HEALTHXP_avg  \
0     AUS           -2.706264      -0.249001        1.734883          13.1034   
1     AUT           -0.145484       1.276625        3.235784          12.0644   
2     BEL           -2.163902      -1.320608        1.227562          12.0686   
3     CAN           -2.514451      -0.011966        1.150380          16.4811   
4     CHE            0.553733       2.304422        3.902980          10.4399   

   PC_GDP_avg  USD_CAP_avg  PC_HEALTHXP_volatility  PC_GDP_volatility  \
0      1.2641     611.4605                1.357888           0.040281   
1      1.2571     615.2524                0.156731           0.046143   
2      1.2934     582.9294                0.782231           0.060504   
3      1.7931     799.7810                1.008083           0.052830   
4      1.1219     649.7471                0.229655           0.073386   

   USD_CAP_volatility  
0           34.194026  
1           55.772536  
2 

In [7]:
# Separate country column and numeric features
country_col = df['COUNTRY']
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
numeric_data = df[numeric_cols]

# Apply Standard Scaler
scaler = StandardScaler()
scaled_data = pd.DataFrame(
    scaler.fit_transform(numeric_data),
    columns=numeric_cols
)

# Add country column back
scaled_data.insert(0, 'COUNTRY', country_col.values)

print(scaled_data.head())

  COUNTRY  PC_HEALTHXP_growth  PC_GDP_growth  USD_CAP_growth  PC_HEALTHXP_avg  \
0     AUS           -0.640268       0.098427       -0.305759        -0.583378   
1     AUT            0.923362       0.811742        0.437774        -0.746094   
2     BEL           -0.309099      -0.402608       -0.557082        -0.745436   
3     CAN           -0.523146       0.209254       -0.595317        -0.054402   
4     CHE            1.350309       1.292294        0.768297        -1.000504   

   PC_GDP_avg  USD_CAP_avg  PC_HEALTHXP_volatility  PC_GDP_volatility  \
0   -0.290888     0.483847                0.083153          -0.818927   
1   -0.308051     0.505073               -0.972705          -0.746294   
2   -0.219048     0.324137               -0.422869          -0.568377   
3    1.006154     1.538017               -0.224337          -0.663453   
4   -0.639544     0.698166               -0.908602          -0.408781   

   USD_CAP_volatility  
0           -0.600752  
1            0.346475  
2 

In [None]:
# Save scaled dataset
scaled_data.to_csv('scaled_features.csv', index=False)

print("✓ Standard scaling complete!")
print(f"✓ Scaled data saved as 'scaled_features.csv'")
print(f"✓ Scaled {len(numeric_cols)} features for {len(scaled_data)} countries")