In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
import os

# Load balanced datasets
file_paths = {
    'archer': r'C:\Users\USER\IoT_Network_Traffic_Management\data\balanced\balanced_archer.csv',
    'camera': r'C:\Users\USER\IoT_Network_Traffic_Management\data\balanced\balanced_camera.csv',
    'indoor': r'C:\Users\USER\IoT_Network_Traffic_Management\data\balanced\balanced_indoor.csv'
}

# Function for feature engineering and selection
def feature_engineering_and_selection(device_name):
    # Load dataset
    data = pd.read_csv(file_paths[device_name])
    
    # Feature engineering: Create time-based features if applicable
    # Example: Assuming 'timestamp' column exists
    # data['hour'] = pd.to_datetime(data['timestamp']).dt.hour
    # data['day_of_week'] = pd.to_datetime(data['timestamp']).dt.dayofweek

    # Identify categorical and numerical columns
    categorical_cols = ['your_categorical_columns']  # Replace with actual categorical column names
    numerical_cols = data.select_dtypes(include=['int64', 'float64']).columns.tolist()

    # One-hot encode categorical variables
    encoder = OneHotEncoder(sparse=False)
    encoded_categorical = encoder.fit_transform(data[categorical_cols])
    encoded_df = pd.DataFrame(encoded_categorical, columns=encoder.get_feature_names_out(categorical_cols))
    
    # Combine with numerical features
    X = pd.concat([data[numerical_cols].reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)
    y = data['label']

    # Standardize numerical features
    scaler = StandardScaler()
    X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

    # Feature selection using SelectKBest
    selector = SelectKBest(score_func=chi2, k='all')  # Change k as needed
    X_selected = selector.fit_transform(X, y)

    # Get selected feature names
    selected_features = X.columns[selector.get_support(indices=True)].tolist()

    # Save selected features and target for model training
    X_selected_df = pd.DataFrame(X_selected, columns=selected_features)
    X_selected_df['label'] = y.values

    output_dir = r'C:\Users\USER\IoT_Network_Traffic_Management\data\feature_engineered'
    os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn't exist
    X_selected_df.to_csv(os.path.join(output_dir, f'feature_engineered_{device_name}.csv'), index=False)

# Apply feature engineering and selection for each device
for device in ['archer', 'camera', 'indoor']:
    feature_engineering_and_selection(device)

print("Feature engineering and selection have been completed, and results have been saved.")
