In [None]:
# Step 5: Feature Engineering & Data Preparation
"""

# Checking for missing columns
missing_features = ['availability_6:00-22:00', 'availability_9:00-18:00',
                    'charger_type_AC Level 1', 'charger_type_AC Level 2', 'charger_type_DC Fast Charger']
missing_in_df = [col for col in missing_features if col not in df_cleaned.columns]

print("Missing Columns:", missing_in_df)

# Reapplying One-Hot Encoding
df_cleaned = pd.get_dummies(df_cleaned, columns=['availability', 'charger_type'], prefix=['availability', 'charger_type'])

# Re-checking missing features beofre heading towards Feature engineering
missing_in_df = [col for col in missing_features if col not in df_cleaned.columns]
print("Missing Columns After Encoding:", missing_in_df)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Fxn to prepare data for demand prediction by selecting features, encoding categories, and normalizing data.

def preprocess_demand_data(df):

    # 1. Selecting Predictor Variables
    features = [
        "cost_usd_per_kwh",
        "distance_to_city_km",
        "parking_spots",
        "availability_6:00-22:00",  # Encoded availability hours
        "availability_9:00-18:00",  # Encoded availability hours
        "renewable_energy_source",
        "charger_type_AC Level 1",
        "charger_type_AC Level 2",
        "charger_type_DC Fast Charger"
    ]

    target = "usage_stats_avg_users_per_day"  # Predicting demand

    # 2. Ensuring All Features Exist
    missing_features = [col for col in features if col not in df.columns]
    if missing_features:
        print(f"ERROR: Missing required features in df_cleaned: {missing_features}")
        return None, None, None, None

    # 3. Normalizing Numerical Features
    scaler = MinMaxScaler()
    df[["cost_usd_per_kwh", "distance_to_city_km", "parking_spots"]] = scaler.fit_transform(
        df[["cost_usd_per_kwh", "distance_to_city_km", "parking_spots"]]
    )

    # 4. Train-Test Split (80:20)
    X = df[features]
    y = df[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print("Data Preprocessing Complete!")
    print(f"Training Samples: {len(X_train)}, Testing Samples: {len(X_test)}")

    return X_train, X_test, y_train, y_test

# Running data preprocessing
X_train, X_test, y_train, y_test = preprocess_demand_data(df_cleaned)

"""