In [2]:
import pandas as pd
import os
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

# 1. Load feature-engineered data
file_path = "../data/processed/marketing_campaign_feature_engineered.xlsx"  # fixed path
df_fe = pd.read_excel(file_path)

# 2. Separate categorical & numeric columns
categorical_cols = df_fe.select_dtypes(include=['object', 'category']).columns.tolist()
numeric_cols = df_fe.select_dtypes(include=['number']).columns.tolist()

# Remove ID from numeric columns
if 'ID' in numeric_cols:
    numeric_cols.remove('ID')

# 3. One-hot encode categorical features
encoder = OneHotEncoder(sparse_output=False, drop='first')  # sklearn >=1.2
encoded_array = encoder.fit_transform(df_fe[categorical_cols])
encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(categorical_cols))

# 4. Scale numeric features
scaler = MinMaxScaler()
scaled_array = scaler.fit_transform(df_fe[numeric_cols])
scaled_df = pd.DataFrame(scaled_array, columns=numeric_cols)

# 5. Combine scaled numeric + encoded categorical + ID
df_ml_ready = pd.concat(
    [df_fe[['ID']].reset_index(drop=True),
     scaled_df.reset_index(drop=True),
     encoded_df.reset_index(drop=True)],
    axis=1
)

# 6. Save ML-ready dataset
save_path = os.path.join("../data/processed/encoded", "marketing_campaign_ml_ready.xlsx")
df_ml_ready.to_excel(save_path, index=False)
print(f"✅ ML-ready dataset saved at: {save_path}")


✅ ML-ready dataset saved at: ../data/processed/encoded\marketing_campaign_ml_ready.xlsx
