# DeepFed: Time-Series Intrusion Detection System

This notebook implements a comprehensive DeepFed model for intrusion detection using the Edge-IIoTset dataset. The system includes:

- **Dataset**: Edge-IIoTset - Cyber Security Dataset of IoT & IIoT
- **Model**: GRU + CNN (Time-Series Architecture) 
- **Features**: Multi-class attack type classification, efficient HDF5 caching, interactive data exploration

## Section 1: Import Required Libraries

In [33]:
import os
import sys
import time
import json
import pickle
import zipfile
import subprocess
from collections import Counter
from pathlib import Path

import numpy as np
import pandas as pd

import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend for notebook
import matplotlib.pyplot as plt
import seaborn as sns

# Use Keras 3 (not tensorflow.keras)
import keras
from keras import layers, callbacks

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, RobustScaler, OrdinalEncoder
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

import warnings
warnings.filterwarnings('ignore')

# Check for required packages
try:
    import tables  # For HDF5 support
except ImportError:
    print("ERROR: pytables is required for HDF5 export. Please install with `pip install tables`.")
    sys.exit(1)

print("✓ All required libraries imported successfully!")

✓ All required libraries imported successfully!


## Section 2: Configuration and Setup

In [41]:
# Configuration
DATASET_NAME = "mohamedamineferrag/edgeiiotset-cyber-security-dataset-of-iot-iiot"
DATA_DIR = Path("./data/edge_iiot")
MODELS_DIR = Path("./models/deepfed")
VISUALIZATIONS_DIR = Path("./visualizations")
CACHE_DIR = Path("./cache")
PREPROCESSED_DIR = CACHE_DIR / "preprocessed"
HDF5_DATASET = PREPROCESSED_DIR / "dataset.h5"

BATCH_SIZE = 128
MAX_EPOCHS = 10
LEARNING_RATE = 1e-3
RANDOM_STATE = 42
SEQUENCE_LENGTH = 128  # Time steps for time-series sequences (set None for dynamic windows)
WINDOW_STRIDE = 1
VALIDATION_SPLIT = 0.2
SAMPLE_SIZE = 20000  # Use a small sample for debugging; set to None for full dataset
MAX_SEQUENCES = 20000  # Downsample generated sequences for faster experimentation (set None to disable)
USE_MULTICLASS = True  # Use multi-class attack type classification
USE_CACHED_DATA = True  # Reuse cached binary dataset once generated

# Model hyperparameters
GRU_UNITS = 128
GRU_DROPOUT = 0.3
CNN_FILTERS = 128
CNN_KERNEL_SIZE = 3
MLP_UNITS = 256
MLP_DROPOUT = 0.4
EARLY_STOPPING_PATIENCE = 5

# Set random seeds
np.random.seed(RANDOM_STATE)
keras.utils.set_random_seed(RANDOM_STATE)

# Create directories
for dir_path in [DATA_DIR, MODELS_DIR, VISUALIZATIONS_DIR, CACHE_DIR, PREPROCESSED_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)

print("✓ Configuration set and directories created!")

✓ Configuration set and directories created!


## Section 3: Download Dataset

In [35]:
def download_dataset():
    """Download Edge-IIoTset dataset from Kaggle"""
    print("\n" + "=" * 80)
    print("DOWNLOADING EDGE-IIOTSET DATASET FROM KAGGLE")
    print("=" * 80)
    
    # Setup Kaggle credentials from Colab secrets
    try:
        from google.colab import userdata
        print("✓ Running in Google Colab - using secrets")
        
        # Get credentials from Colab secrets
        kaggle_username = userdata.get('KAGGLE_USERNAME')
        kaggle_key = userdata.get('KAGGLE_KEY')
        
        if not kaggle_username or not kaggle_key:
            raise ValueError("KAGGLE_USERNAME and KAGGLE_KEY must be set in Colab secrets")
        
        # Set environment variables for Kaggle API
        os.environ['KAGGLE_USERNAME'] = kaggle_username
        os.environ['KAGGLE_KEY'] = kaggle_key
        
        print(f"  • Username: {kaggle_username}")
        print(f"  • API Key: {'*' * len(kaggle_key)}")
        
    except ImportError:
        print("✓ Not running in Colab - using default kaggle.json authentication")
    except Exception as e:
        print(f"✗ Error setting up Kaggle credentials: {e}")
        print("\nPlease add these secrets in Colab:")
        print("  1. Click the key icon (🔑) in the left sidebar")
        print("  2. Add secret: KAGGLE_USERNAME")
        print("  3. Add secret: KAGGLE_KEY")
        print("\nGet your credentials from: https://www.kaggle.com/settings/account")
        raise
    
    try:
        import kaggle
    except ImportError:
        print("Installing kaggle package...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "kaggle"])
        import kaggle
    
    try:
        print(f"\nDownloading {DATASET_NAME}...")
        subprocess.run([
            "kaggle", "datasets", "download", "-d", DATASET_NAME, "-p", DATA_DIR
        ], check=True)
        
        # Extract zip files
        for zip_file in Path(DATA_DIR).glob("*.zip"):
            print(f"Extracting {zip_file.name}...")
            with zipfile.ZipFile(zip_file, 'r') as zip_ref:
                zip_ref.extractall(DATA_DIR)
            zip_file.unlink()
        
        print("✓ Dataset downloaded and extracted successfully!")
        return True
    except Exception as e:
        print(f"✗ Error: {e}")
        print(f"\nPlease download manually from:")
        print(f"https://www.kaggle.com/datasets/{DATASET_NAME}")
        return False

# Check if dataset exists, download if needed
csv_exists = any(Path(DATA_DIR).rglob("*.csv"))
if not csv_exists:
    print("Dataset not found. Downloading...")
    download_dataset()
else:
    print("✓ Dataset already exists!")

✓ Dataset already exists!


## Section 4: Convert CSV to Binary Format

In [36]:
def convert_csv_to_binary():
    """
    Convert CSV files to a consolidated HDF5 dataset while preserving all features.
    Adds source metadata and derived temporal features for downstream processing.
    """
    preprocessed_file = HDF5_DATASET
    
    if preprocessed_file.exists() and USE_CACHED_DATA:
        print("\n" + "=" * 80)
        print("✓ PREPROCESSED DATA FOUND - SKIPPING CSV PARSING")
        print("=" * 80)
        print(f"Using cached file: {preprocessed_file}")
        print(f"Size: {preprocessed_file.stat().st_size / 1024**2:.1f} MB")
        return preprocessed_file
    
    print("\n" + "=" * 80)
    print("CONVERTING CSV TO EFFICIENT BINARY FORMAT")
    print("=" * 80)
    
    # Find all CSV files
    csv_files = list(Path(DATA_DIR).rglob("*.csv"))
    if not csv_files:
        raise FileNotFoundError("No CSV files found!")
    
    print(f"\n✓ Found {len(csv_files)} CSV file(s):")
    for f in csv_files:
        print(f"  - {f.name} ({f.stat().st_size / 1024 / 1024:.1f} MB)")
    
    # Load and combine all CSVs
    if SAMPLE_SIZE:
        print(f"\nLoading sample data (max {SAMPLE_SIZE:,} rows per file)...")
    else:
        print(f"\nLoading FULL dataset (this may take a while)...")
    
    dfs = []
    manifest = []
    for csv_file in csv_files:
        try:
            df = pd.read_csv(csv_file, nrows=SAMPLE_SIZE, low_memory=False)
            original_rows = len(df)

            # Normalize string columns to avoid mixed dtype issues
            object_cols = df.select_dtypes(include=['object']).columns.tolist()
            for col in object_cols:
                df[col] = df[col].astype(str).str.strip().fillna('__NA__')

            # Attach source metadata
            df['source_file'] = csv_file.name
            df['source_path'] = str(csv_file.relative_to(DATA_DIR))
            df['source_category'] = csv_file.parent.name

            # Build temporal features without dropping original column
            if 'frame.time' in df.columns:
                time_str = df['frame.time'].astype(str).str.strip()
                parsed_time = pd.to_datetime(time_str, format='%Y %H:%M:%S.%f', errors='coerce')
                if parsed_time.isna().all():
                    parsed_time = pd.to_datetime(time_str, errors='coerce')
                parsed_time = parsed_time.fillna(method='ffill').fillna(method='bfill')
                df['frame_time_datetime'] = parsed_time
                base_time = parsed_time.iloc[0]
                rel_seconds = (parsed_time - base_time).dt.total_seconds()
                df['frame_time_relative_sec'] = rel_seconds.astype('float64')

            dfs.append(df)
            duration = float(df.get('frame_time_relative_sec', pd.Series([0])).max()) if len(df) else 0.0
            manifest.append({
                'file': str(csv_file.relative_to(DATA_DIR)),
                'rows_loaded': int(original_rows),
                'duration_seconds': duration
            })
            print(f"  ✓ {csv_file.name}: {len(df):,} rows, {len(df.columns)} columns")
        except Exception as e:
            print(f"  ✗ Error loading {csv_file.name}: {e}")
    
    df = pd.concat(dfs, ignore_index=True)
    print(f"\n{'='*80}")
    print(f"Combined dataset: {len(df):,} rows × {len(df.columns)} columns")
    print(f"{'='*80}")
    
    print("\nPreparing data for HDF5 storage...")
    df_filtered = df.copy()
    print(f"Final dataset shape: {df_filtered.shape}")
    print(f"Memory usage: {df_filtered.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

    preprocessed_file.parent.mkdir(parents=True, exist_ok=True)
    df_filtered.to_hdf(preprocessed_file, key='data', mode='w', index=False)
    print(f"✓ Saved: {preprocessed_file}")
    print(f"  Size: {preprocessed_file.stat().st_size / 1024**2:.1f} MB")

    total_csv_size = sum(f.stat().st_size for f in csv_files)
    compression_ratio = (1 - preprocessed_file.stat().st_size / total_csv_size) * 100
    print(f"  Compression: {compression_ratio:.1f}% savings over CSV")
    print(f"  Original CSV size: {total_csv_size / 1024**2:.1f} MB")

    manifest_path = Path(PREPROCESSED_DIR) / 'ingest_manifest.json'
    with open(manifest_path, 'w') as f:
        json.dump(manifest, f, indent=2)
    print(f"  Manifest saved: {manifest_path}")

    return preprocessed_file

# Convert CSV to HDF5
preprocessed_file = convert_csv_to_binary()


✓ PREPROCESSED DATA FOUND - SKIPPING CSV PARSING
Using cached file: cache/preprocessed/dataset.h5
Size: 277.1 MB


## Section 5: Explore Dataset Characteristics

In this section, we'll load the preprocessed HDF5 data and explore the dataset characteristics including:
- Basic statistics and data types
- Class distribution analysis
- Feature analysis and correlations
- Temporal patterns and sequence characteristics

In [37]:
def explore_dataset():
    """
    Comprehensive dataset exploration including statistics, distributions, and correlations.
    """
    print("\n" + "=" * 80)
    print("EXPLORING DATASET CHARACTERISTICS")
    print("=" * 80)
    
    # Load the preprocessed data
    print(f"\nLoading data from: {preprocessed_file}")
    df = pd.read_hdf(preprocessed_file, key='data')
    print(f"✓ Loaded {len(df):,} rows × {len(df.columns)} columns")
    
    # Basic info
    print(f"\n{'='*80}")
    print("BASIC DATASET INFO")
    print(f"{'='*80}")
    print(f"Shape: {df.shape}")
    print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    
    # Data types
    print(f"\nData types:")
    dtype_counts = df.dtypes.value_counts()
    for dtype, count in dtype_counts.items():
        print(f"  {dtype}: {count} columns")
    
    # Target variable analysis
    target_col = 'Attack_type'
    if target_col in df.columns:
        print(f"\n{'='*80}")
        print("TARGET VARIABLE ANALYSIS")
        print(f"{'='*80}")
        
        class_counts = df[target_col].value_counts()
        print(f"Classes found: {len(class_counts)}")
        print(f"Most frequent class: '{class_counts.index[0]}' ({class_counts.iloc[0]:,} samples)")
        print(f"Least frequent class: '{class_counts.index[-1]}' ({class_counts.iloc[-1]:,} samples)")
        
        # Class distribution plot
        plt.figure(figsize=(12, 6))
        ax = class_counts.plot(kind='bar', color='skyblue', edgecolor='black')
        plt.title('Class Distribution', fontsize=14, fontweight='bold')
        plt.xlabel('Attack Type', fontsize=12)
        plt.ylabel('Sample Count', fontsize=12)
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', alpha=0.3)
        
        # Add value labels on bars
        for i, v in enumerate(class_counts):
            ax.text(i, v + max(class_counts) * 0.01, f'{v:,}', ha='center', va='bottom', fontsize=9)
        
        plt.tight_layout()
        plt.savefig(VISUALIZATIONS_DIR / 'class_distribution.png', dpi=150, bbox_inches='tight')
        plt.show()
        
        # Class imbalance analysis
        imbalance_ratio = class_counts.max() / class_counts.min()
        print(f"\nClass imbalance ratio: {imbalance_ratio:.2f}x")
        if imbalance_ratio > 10:
            print("⚠️  SEVERE class imbalance detected - consider balancing techniques")
        elif imbalance_ratio > 5:
            print("⚠️  Moderate class imbalance detected")
        else:
            print("✓ Balanced class distribution")
    
    # Feature analysis
    print(f"\n{'='*80}")
    print("FEATURE ANALYSIS")
    print(f"{'='*80}")
    
    # Identify feature types
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
    
    print(f"Numeric features: {len(numeric_cols)}")
    print(f"Categorical features: {len(categorical_cols)}")
    
    # Remove metadata columns for analysis
    metadata_cols = ['source_file', 'source_path', 'source_category']
    analysis_cols = [col for col in df.columns if col not in metadata_cols + [target_col]]
    
    print(f"Analysis features: {len(analysis_cols)}")
    
    # Numeric feature statistics
    if numeric_cols:
        print(f"\nNumeric feature statistics (first 10):")
        numeric_stats = df[numeric_cols[:10]].describe().T
        print(numeric_stats.to_string(float_format='%.2f'))
    
    # Correlation analysis for numeric features
    if len(numeric_cols) > 1:
        print(f"\n{'='*80}")
        print("FEATURE CORRELATIONS")
        print(f"{'='*80}")
        
        # Sample for correlation if dataset is large
        sample_size = min(10000, len(df))
        if sample_size < len(df):
            corr_df = df[numeric_cols].sample(sample_size, random_state=42)
            print(f"Using sample of {sample_size:,} rows for correlation analysis")
        else:
            corr_df = df[numeric_cols]
        
        corr_matrix = corr_df.corr()
        
        # Plot correlation heatmap
        plt.figure(figsize=(12, 10))
        mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
        sns.heatmap(corr_matrix, mask=mask, annot=False, cmap='coolwarm', 
                   center=0, square=True, linewidths=0.5)
        plt.title('Feature Correlation Matrix', fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.savefig(VISUALIZATIONS_DIR / 'feature_correlations.png', dpi=150, bbox_inches='tight')
        plt.show()
        
        # Find highly correlated features
        high_corr = []
        for i in range(len(corr_matrix.columns)):
            for j in range(i+1, len(corr_matrix.columns)):
                corr_val = abs(corr_matrix.iloc[i, j])
                if corr_val > 0.8:
                    high_corr.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val))
        
        if high_corr:
            print(f"\nHighly correlated feature pairs (|ρ| > 0.8):")
            for feat1, feat2, corr in sorted(high_corr, key=lambda x: x[2], reverse=True)[:10]:
                print(f"  {feat1} ↔ {feat2}: {corr:.3f}")
        else:
            print("\nNo highly correlated feature pairs found (|ρ| > 0.8)")
    
    # Temporal analysis
    if 'frame_time_relative_sec' in df.columns:
        print(f"\n{'='*80}")
        print("TEMPORAL ANALYSIS")
        print(f"{'='*80}")
        
        time_col = 'frame_time_relative_sec'
        time_range = df[time_col].max() - df[time_col].min()
        print(f"Time range: {time_range:.2f} seconds")
        
        # Sample time series plot
        plt.figure(figsize=(14, 6))
        
        # Plot a few numeric features over time (sample)
        sample_df = df.sample(min(5000, len(df)), random_state=42).sort_values(time_col)
        plot_cols = [col for col in numeric_cols[:3] if col in sample_df.columns]
        
        if plot_cols:
            for i, col in enumerate(plot_cols):
                plt.subplot(1, len(plot_cols), i+1)
                plt.plot(sample_df[time_col], sample_df[col], alpha=0.7, linewidth=1)
                plt.title(f'{col} over Time', fontsize=12)
                plt.xlabel('Time (seconds)')
                plt.ylabel(col)
                plt.grid(alpha=0.3)
            
            plt.tight_layout()
            plt.savefig(VISUALIZATIONS_DIR / 'temporal_patterns.png', dpi=150, bbox_inches='tight')
            plt.show()
    
    print(f"\n{'='*80}")
    print("EXPLORATION COMPLETE")
    print(f"{'='*80}")
    print(f"✓ Dataset exploration saved to: {VISUALIZATIONS_DIR}")
    
    return df

# Explore the dataset
df = explore_dataset()


EXPLORING DATASET CHARACTERISTICS

Loading data from: cache/preprocessed/dataset.h5
✓ Loaded 469,070 rows × 68 columns

BASIC DATASET INFO
Shape: (469070, 68)
✓ Loaded 469,070 rows × 68 columns

BASIC DATASET INFO
Shape: (469070, 68)
Memory usage: 864.43 MB

Data types:
  object: 34 columns
  float64: 32 columns
  int64: 1 columns
  datetime64[ns]: 1 columns

TARGET VARIABLE ANALYSIS
Classes found: 16
Most frequent class: 'Normal' (220,000 samples)
Least frequent class: 'Fingerprinting' (1,001 samples)

Class imbalance ratio: 219.78x
⚠️  SEVERE class imbalance detected - consider balancing techniques

FEATURE ANALYSIS
Numeric features: 33
Categorical features: 34
Analysis features: 64

Numeric feature statistics (first 10):
Memory usage: 864.43 MB

Data types:
  object: 34 columns
  float64: 32 columns
  int64: 1 columns
  datetime64[ns]: 1 columns

TARGET VARIABLE ANALYSIS
Classes found: 16
Most frequent class: 'Normal' (220,000 samples)
Least frequent class: 'Fingerprinting' (1,001 

## Section 6: Prepare Time Series Data

In this section, we'll prepare the data for time series modeling by:
- Encoding categorical features
- Creating sliding window sequences
- Splitting data into train/validation/test sets
- Applying feature scaling

In [38]:
def prepare_time_series_data(df):
    """
    Prepare time series sequences with categorical encoding and sliding windows.
    """
    print("\n" + "=" * 80)
    print("PREPARING TIME SERIES SEQUENCES")
    print("=" * 80)
    
    # Identify feature columns (exclude metadata and target)
    metadata_cols = ['source_file', 'source_path', 'source_category', 'frame_time_datetime']
    target_col = 'Attack_type'
    
    # Get all features except metadata and target
    all_cols = [col for col in df.columns if col not in metadata_cols + [target_col]]
    
    # Separate numeric and categorical features
    numeric_cols = df[all_cols].select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = [col for col in all_cols if col not in numeric_cols]
    
    print(f"Total features: {len(all_cols)}")
    print(f"  Numeric: {len(numeric_cols)}")
    print(f"  Categorical: {len(categorical_cols)}")
    
    # Handle categorical encoding
    if categorical_cols:
        print(f"\nEncoding {len(categorical_cols)} categorical features...")
        encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
        df_encoded = df.copy()
        df_encoded[categorical_cols] = encoder.fit_transform(df[categorical_cols].astype(str))
        
        # Save encoder for later use
        encoder_path = CACHE_DIR / 'ordinal_encoder.pkl'
        with open(encoder_path, 'wb') as f:
            pickle.dump(encoder, f)
        print(f"✓ Encoder saved: {encoder_path}")
    else:
        df_encoded = df.copy()
    
    # Prepare features and target
    feature_cols = numeric_cols + categorical_cols
    X = df_encoded[feature_cols].values.astype(np.float32)
    y = df_encoded[target_col].values
    
    # Encode target labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    
    # Save label encoder
    label_encoder_path = CACHE_DIR / 'label_encoder.pkl'
    with open(label_encoder_path, 'wb') as f:
        pickle.dump(label_encoder, f)
    print(f"✓ Label encoder saved: {label_encoder_path}")
    
    print(f"\nFeature matrix shape: {X.shape}")
    print(f"Target vector shape: {y_encoded.shape}")
    print(f"Number of classes: {len(label_encoder.classes_)}")
    
    # Create time series sequences
    print(f"\n{'='*80}")
    print("CREATING TIME SERIES SEQUENCES")
    print(f"{'='*80}")
    
    sequence_length = SEQUENCE_LENGTH
    print(f"Sequence length: {sequence_length} timesteps")
    
    # Sort by time if available to preserve temporal structure
    if 'frame_time_relative_sec' in df_encoded.columns:
        print("Sorting data by timestamp...")
        sort_indices = df_encoded['frame_time_relative_sec'].argsort()
        X = X[sort_indices]
        y_encoded = y_encoded[sort_indices]
    
    # Create sliding window sequences
    sequences = []
    targets = []
    
    for i in range(len(X) - sequence_length + 1):
        seq = X[i:i + sequence_length]
        target = y_encoded[i + sequence_length - 1]  # Target is the last timestep's label
        sequences.append(seq)
        targets.append(target)
    
    X_sequences = np.asarray(sequences, dtype=np.float32)
    y_sequences = np.asarray(targets, dtype=np.int64)
    
    print(f"✓ Created {len(X_sequences):,} sequences")
    print(f"  Sequence shape: {X_sequences.shape}")
    print(f"  Target shape: {y_sequences.shape}")
    
    # Optional downsampling for faster experimentation
    if MAX_SEQUENCES and len(X_sequences) > MAX_SEQUENCES:
        print(f"\nDownsampling sequences to {MAX_SEQUENCES:,} for quicker experimentation...")
        sample_indices, _ = train_test_split(
            np.arange(len(X_sequences)),
            train_size=MAX_SEQUENCES,
            random_state=RANDOM_STATE,
            stratify=y_sequences,
            shuffle=True
        )
        X_sequences = X_sequences[sample_indices]
        y_sequences = y_sequences[sample_indices]
        print(f"  -> Using {len(X_sequences):,} sequences after downsampling")
    
    # Train/validation/test split with stratification to preserve class balance
    print(f"\n{'='*80}")
    print("SPLITTING DATA INTO TRAIN/VAL/TEST SETS")
    print(f"{'='*80}")
    
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X_sequences,
        y_sequences,
        test_size=0.2,
        random_state=RANDOM_STATE,
        stratify=y_sequences,
        shuffle=True
    )
    
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val,
        y_train_val,
        test_size=0.2,
        random_state=RANDOM_STATE,
        stratify=y_train_val,
        shuffle=True
    )
    
    print(f"Train set: {X_train.shape[0]:,} sequences")
    print(f"Validation set: {X_val.shape[0]:,} sequences")
    print(f"Test set: {X_test.shape[0]:,} sequences")
    
    # Apply feature scaling using training data only
    print(f"\nApplying RobustScaler to features...")
    scaler = RobustScaler()
    scaler.fit(X_train.reshape(-1, X_train.shape[-1]))
    
    def scale_sequences(data):
        flattened = data.reshape(-1, data.shape[-1])
        scaled = scaler.transform(flattened)
        scaled = np.nan_to_num(scaled, nan=0.0, posinf=0.0, neginf=0.0, copy=False)
        return scaled.reshape(data.shape).astype(np.float32)
    
    X_train_scaled = scale_sequences(X_train)
    X_val_scaled = scale_sequences(X_val)
    X_test_scaled = scale_sequences(X_test)
    
    # Save scaler
    scaler_path = CACHE_DIR / 'robust_scaler.pkl'
    with open(scaler_path, 'wb') as f:
        pickle.dump(scaler, f)
    print(f"✓ Scaler saved: {scaler_path}")
    
    # Class distribution in splits
    def summarize_distribution(name, labels):
        counts = Counter(labels)
        human_counts = {int(k): int(v) for k, v in counts.items()}
        print(f"{name} class distribution: {human_counts}")
    
    print()
    summarize_distribution("Train", y_train)
    summarize_distribution("Val", y_val)
    summarize_distribution("Test", y_test)
    
    # Save processed data
    print(f"\n{'='*80}")
    print("SAVING PROCESSED DATA")
    print(f"{'='*80}")
    
    np.save(CACHE_DIR / 'X_train.npy', X_train_scaled)
    np.save(CACHE_DIR / 'X_val.npy', X_val_scaled)
    np.save(CACHE_DIR / 'X_test.npy', X_test_scaled)
    np.save(CACHE_DIR / 'y_train.npy', y_train)
    np.save(CACHE_DIR / 'y_val.npy', y_val)
    np.save(CACHE_DIR / 'y_test.npy', y_test)
    
    print("✓ Processed data saved to cache directory")
    
    # Save metadata
    metadata = {
        'sequence_length': sequence_length,
        'num_features': len(feature_cols),
        'num_classes': len(label_encoder.classes_),
        'feature_cols': feature_cols,
        'numeric_cols': numeric_cols,
        'categorical_cols': categorical_cols,
        'class_names': label_encoder.classes_.tolist(),
        'train_samples': int(X_train.shape[0]),
        'val_samples': int(X_val.shape[0]),
        'test_samples': int(X_test.shape[0]),
        'total_sequences_generated': int(len(sequences)),
        'total_sequences_used': int(X_sequences.shape[0])
    }
    
    metadata_path = CACHE_DIR / 'preprocessing_metadata.json'
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)
    print(f"✓ Metadata saved: {metadata_path}")
    
    return X_train_scaled, X_val_scaled, X_test_scaled, y_train, y_val, y_test, metadata

# Prepare time series data
X_train, X_val, X_test, y_train, y_val, y_test, metadata = prepare_time_series_data(df)


PREPARING TIME SERIES SEQUENCES
Total features: 63
  Numeric: 33
  Categorical: 30

Encoding 30 categorical features...
✓ Encoder saved: cache/ordinal_encoder.pkl
✓ Label encoder saved: cache/label_encoder.pkl

Feature matrix shape: (469070, 63)
Target vector shape: (469070,)
Number of classes: 16

CREATING TIME SERIES SEQUENCES
Sequence length: 128 timesteps
Sorting data by timestamp...
✓ Encoder saved: cache/ordinal_encoder.pkl
✓ Label encoder saved: cache/label_encoder.pkl

Feature matrix shape: (469070, 63)
Target vector shape: (469070,)
Number of classes: 16

CREATING TIME SERIES SEQUENCES
Sequence length: 128 timesteps
Sorting data by timestamp...
✓ Created 468,943 sequences
  Sequence shape: (468943, 128, 63)
  Target shape: (468943,)

Downsampling sequences to 20,000 for quicker experimentation...
  -> Using 20,000 sequences after downsampling

SPLITTING DATA INTO TRAIN/VAL/TEST SETS
✓ Created 468,943 sequences
  Sequence shape: (468943, 128, 63)
  Target shape: (468943,)

Dow

## Section 7: Build DeepFed Model

In this section, we'll construct the DeepFed architecture which combines:
- **GRU layers** for temporal pattern recognition in sequential data
- **CNN layers** for spatial feature extraction
- **MLP layers** for final classification

The model processes time series sequences and learns both temporal dependencies and spatial patterns.

In [42]:
def build_deepfed_model(input_shape, num_classes):
    """
    Build the DeepFed model with GRU, CNN, and MLP components.
    """
    print("\n" + "=" * 80)
    print("BUILDING DEEPFED MODEL")
    print("=" * 80)
    
    print(f"Input shape: {input_shape}")
    print(f"Number of classes: {num_classes}")
    
    # Input layer
    inputs = layers.Input(shape=input_shape, name='input_sequences')
    
    # GRU branch for temporal patterns
    print("\nBuilding GRU branch...")
    gru_branch = layers.GRU(units=GRU_UNITS, return_sequences=True, name='gru_1')(inputs)
    gru_branch = layers.Dropout(GRU_DROPOUT, name='gru_dropout_1')(gru_branch)
    gru_branch = layers.GRU(units=GRU_UNITS // 2, return_sequences=False, name='gru_2')(gru_branch)
    gru_branch = layers.Dropout(GRU_DROPOUT, name='gru_dropout_2')(gru_branch)
    
    # CNN branch for spatial patterns
    print("Building CNN branch...")
    cnn_branch = layers.Conv1D(filters=CNN_FILTERS, kernel_size=CNN_KERNEL_SIZE, 
                               activation='relu', padding='same', name='conv1d_1')(inputs)
    cnn_branch = layers.MaxPooling1D(pool_size=2, name='maxpool_1')(cnn_branch)
    cnn_branch = layers.Conv1D(filters=CNN_FILTERS // 2, kernel_size=CNN_KERNEL_SIZE, 
                               activation='relu', padding='same', name='conv1d_2')(cnn_branch)
    cnn_branch = layers.MaxPooling1D(pool_size=2, name='maxpool_2')(cnn_branch)
    cnn_branch = layers.GlobalAveragePooling1D(name='global_avg_pool')(cnn_branch)
    
    # Concatenate branches
    print("Fusing branches...")
    concatenated = layers.Concatenate(name='concatenate')([gru_branch, cnn_branch])
    
    # MLP head
    print("Building MLP classifier...")
    mlp = layers.Dense(MLP_UNITS, activation='relu', name='dense_1')(concatenated)
    mlp = layers.Dropout(MLP_DROPOUT, name='mlp_dropout_1')(mlp)
    mlp = layers.Dense(MLP_UNITS // 2, activation='relu', name='dense_2')(mlp)
    mlp = layers.Dropout(MLP_DROPOUT, name='mlp_dropout_2')(mlp)
    
    # Output layer
    outputs = layers.Dense(num_classes, activation='softmax', name='output')(mlp)
    
    # Create model
    model = keras.Model(inputs=inputs, outputs=outputs, name='DeepFed')
    
    # Compile model
    optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    print(f"\n{'='*80}")
    print("MODEL SUMMARY")
    print(f"{'='*80}")
    model.summary()
    
    # Calculate model size
    total_params = model.count_params()
    print(f"\nTotal parameters: {total_params:,}")
    
    # Estimate model size in memory
    param_size = 4  # float32 = 4 bytes
    model_size_mb = (total_params * param_size) / (1024 ** 2)
    print(f"Estimated model size: {model_size_mb:.2f} MB")
    
    return model

# Build the model
input_shape = (metadata['sequence_length'], metadata['num_features'])
num_classes = metadata['num_classes']
model = build_deepfed_model(input_shape, num_classes)


BUILDING DEEPFED MODEL
Input shape: (128, 63)
Number of classes: 16

Building GRU branch...
Building CNN branch...
Fusing branches...
Building MLP classifier...

MODEL SUMMARY



Total parameters: 228,304
Estimated model size: 0.87 MB


## Section 8: Train the Model

In this section, we'll train the DeepFed model with:
- Early stopping to prevent overfitting
- Model checkpointing to save the best weights
- Learning rate scheduling
- Training history visualization

In [None]:
def train_model(model, X_train, X_val, y_train, y_val):
    """
    Train the DeepFed model with callbacks and monitoring.
    """
    print("\n" + "=" * 80)
    print("TRAINING DEEPFED MODEL")
    print("=" * 80)
    
    # Create callbacks
    callbacks_list = []
    
    # Early stopping
    early_stopping = callbacks.EarlyStopping(
        monitor='val_loss',
        patience=EARLY_STOPPING_PATIENCE,
        restore_best_weights=True,
        verbose=1
    )
    callbacks_list.append(early_stopping)
    
    # Terminate on NaN to avoid wasted epochs
    terminate_on_nan = callbacks.TerminateOnNaN()
    callbacks_list.append(terminate_on_nan)
    
    # Model checkpointing (weights only for portability)
    checkpoint_path = MODELS_DIR / 'deepfed_best.weights.h5'
    checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
    
    model_checkpoint = callbacks.ModelCheckpoint(
        filepath=str(checkpoint_path),
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=True,
        mode='max',
        verbose=1
    )
    callbacks_list.append(model_checkpoint)
    
    # Learning rate scheduler
    lr_scheduler = callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-5,
        verbose=1
    )
    callbacks_list.append(lr_scheduler)
    
    # CSV logger
    csv_logger = callbacks.CSVLogger(str(MODELS_DIR / 'training_log.csv'), append=False)
    callbacks_list.append(csv_logger)
    
    print("Training configuration:")
    print(f"  Batch size: {BATCH_SIZE}")
    print(f"  Max epochs: {MAX_EPOCHS}")
    print(f"  Early stopping patience: {EARLY_STOPPING_PATIENCE}")
    print(f"  Callbacks: {[cb.__class__.__name__ for cb in callbacks_list]}")
    
    # Calculate class weights for imbalanced data
    unique_classes = np.unique(y_train)
    class_weights = compute_class_weight(
        class_weight='balanced',
        classes=unique_classes,
        y=y_train
    )
    class_weights_dict = {int(cls): float(weight) for cls, weight in zip(unique_classes, class_weights)}
    
    print(f"\nClass weights: {class_weights_dict}")
    
    # Quick NaN/infinity check before training
    def assert_finite(name, array):
        if not np.isfinite(array).all():
            raise ValueError(f"{name} contains NaN or infinite values after preprocessing")
    
    assert_finite("X_train", X_train)
    assert_finite("X_val", X_val)
    
    # Train the model
    print(f"\n{'='*80}")
    print("STARTING TRAINING")
    print(f"{'='*80}")
    
    start_time = time.time()
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=MAX_EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=callbacks_list,
        class_weight=class_weights_dict,
        verbose=1
    )
    
    training_time = time.time() - start_time
    print(f"\nTraining completed in {training_time:.2f} seconds ({training_time/60:.2f} minutes)")
    
    # Load best weights if available
    if checkpoint_path.exists():
        print(f"\nLoading best weights from: {checkpoint_path}")
        model.load_weights(str(checkpoint_path))
    
    return model, history

# Train the model
model, history = train_model(model, X_train, X_val, y_train, y_val)


TRAINING DEEPFED MODEL
Training configuration:
  Batch size: 128
  Max epochs: 10
  Early stopping patience: 5
  Callbacks: ['EarlyStopping', 'TerminateOnNaN', 'ModelCheckpoint', 'ReduceLROnPlateau', 'CSVLogger']

Class weights: {0: 1.465201465201465, 1: 1.465201465201465, 2: 1.465201465201465, 3: 1.465201465201465, 4: 1.4760147601476015, 5: 28.571428571428573, 6: 12.121212121212121, 7: 0.13322231473771856, 8: 28.571428571428573, 9: 1.465201465201465, 10: 1.465201465201465, 11: 1.340033500837521, 12: 1.465201465201465, 13: 1.0914051841746248, 14: 1.4678899082568808, 15: 1.8433179723502304}

STARTING TRAINING
Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step - accuracy: 0.1106 - loss: 111361.8287
Epoch 1: val_accuracy improved from None to 0.21219, saving model to models/deepfed/deepfed_best.weights.h5

Epoch 1: val_accuracy improved from None to 0.21219, saving model to models/deepfed/deepfed_best.weights.h5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━

## Section 9: Evaluate Model Performance

In this section, we'll evaluate the trained model by:
- Computing performance metrics on the test set
- Generating classification reports and confusion matrices
- Visualizing training history
- Analyzing model predictions

In [None]:
def evaluate_model(model, X_test, y_test, history, metadata):
    """
    Comprehensive model evaluation with metrics, visualizations, and analysis.
    """
    print("\n" + "=" * 80)
    print("EVALUATING MODEL PERFORMANCE")
    print("=" * 80)
    
    # Load label encoder for class names
    label_encoder_path = CACHE_DIR / 'label_encoder.pkl'
    with open(label_encoder_path, 'rb') as f:
        label_encoder = pickle.load(f)
    
    class_names = label_encoder.classes_
    
    # Make predictions
    print("Generating predictions on test set...")
    y_pred_prob = model.predict(X_test, batch_size=BATCH_SIZE, verbose=1)
    y_pred = np.argmax(y_pred_prob, axis=1)
    
    # Basic metrics
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"\nTest Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")
    
    # Detailed classification report
    print(f"\n{'='*80}")
    print("CLASSIFICATION REPORT")
    print(f"{'='*80}")
    
    report = classification_report(y_test, y_pred, target_names=class_names, output_dict=True)
    print(classification_report(y_test, y_pred, target_names=class_names))
    
    # Confusion matrix
    print(f"\n{'='*80}")
    print("CONFUSION MATRIX")
    print(f"{'='*80}")
    
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
               xticklabels=class_names, yticklabels=class_names,
               cbar_kws={'label': 'Number of Samples'})
    plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig(VISUALIZATIONS_DIR / 'confusion_matrix.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    # Training history visualization
    print(f"\n{'='*80}")
    print("TRAINING HISTORY")
    print(f"{'='*80}")
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
    
    # Accuracy
    ax1.plot(history.history['accuracy'], label='Train Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Val Accuracy')
    ax1.set_title('Model Accuracy', fontweight='bold')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(alpha=0.3)
    
    # Loss
    ax2.plot(history.history['loss'], label='Train Loss')
    ax2.plot(history.history['val_loss'], label='Val Loss')
    ax2.set_title('Model Loss', fontweight='bold')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(alpha=0.3)
    
    # Learning rate (if available)
    if 'lr' in history.history:
        ax3.plot(history.history['lr'])
        ax3.set_title('Learning Rate Schedule', fontweight='bold')
        ax3.set_xlabel('Epoch')
        ax3.set_ylabel('Learning Rate')
        ax3.set_yscale('log')
        ax3.grid(alpha=0.3)
    else:
        ax3.text(0.5, 0.5, 'Learning rate data not available', 
                ha='center', va='center', transform=ax3.transAxes)
        ax3.set_title('Learning Rate Schedule', fontweight='bold')
    
    # Per-class accuracy
    class_accuracies = []
    for i, class_name in enumerate(class_names):
        class_mask = (y_test == i)
        if np.sum(class_mask) > 0:
            class_acc = np.mean(y_pred[class_mask] == i)
            class_accuracies.append((class_name, class_acc))
    
    if class_accuracies:
        classes, accs = zip(*class_accuracies)
        bars = ax4.barh(classes, accs, color='skyblue', edgecolor='black')
        ax4.set_title('Per-Class Accuracy', fontweight='bold')
        ax4.set_xlabel('Accuracy')
        ax4.set_xlim(0, 1)
        ax4.grid(axis='x', alpha=0.3)
        
        # Add value labels
        for bar, acc in zip(bars, accs):
            ax4.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2, 
                    f'{acc:.3f}', ha='left', va='center', fontsize=9)
    
    plt.tight_layout()
    plt.savefig(VISUALIZATIONS_DIR / 'training_history.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    # Save model and results
    print(f"\n{'='*80}")
    print("SAVING MODEL AND RESULTS")
    print(f"{'='*80}")
    
    # Save final model
    model_path = MODELS_DIR / 'deepfed_final_model.h5'
    model.save(str(model_path))
    print(f"✓ Model saved: {model_path}")
    
    # Save evaluation results
    results = {
        'test_loss': float(test_loss),
        'test_accuracy': float(test_accuracy),
        'classification_report': report,
        'confusion_matrix': cm.tolist(),
        'class_names': class_names.tolist(),
        'training_epochs': len(history.history['loss']),
        'final_train_accuracy': float(history.history['accuracy'][-1]),
        'final_val_accuracy': float(history.history['val_accuracy'][-1]),
        'best_val_accuracy': float(max(history.history['val_accuracy']))
    }
    
    results_path = MODELS_DIR / 'evaluation_results.json'
    with open(results_path, 'w') as f:
        json.dump(results, f, indent=2)
    print(f"✓ Results saved: {results_path}")
    
    print(f"\n{'='*80}")
    print("EVALUATION COMPLETE")
    print(f"{'='*80}")
    print(f"✓ All results saved to: {VISUALIZATIONS_DIR}")
    print(f"✓ Models saved to: {MODELS_DIR}")
    
    return results

# Evaluate the model
results = evaluate_model(model, X_test, y_test, history, metadata)

## Section 10: Save Artifacts and Summary

In this final section, we'll:
- Save all trained models and preprocessing artifacts
- Generate a comprehensive experiment summary
- Provide next steps and recommendations

In [None]:
def save_artifacts_and_summary(results, metadata):
    """
    Save all artifacts and generate comprehensive experiment summary.
    """
    print("\n" + "=" * 80)
    print("SAVING ARTIFACTS AND GENERATING SUMMARY")
    print("=" * 80)
    
    # Create experiment summary
    experiment_summary = {
        'experiment_name': 'DeepFed_Edge_IIoT_Analysis',
        'timestamp': datetime.now().isoformat(),
        'dataset': {
            'name': 'Edge-IIoTset',
            'source': str(DATA_DIR),
            'total_samples': metadata['total_sequences'],
            'sequence_length': metadata['sequence_length'],
            'num_features': metadata['num_features'],
            'num_classes': metadata['num_classes'],
            'class_names': metadata['class_names']
        },
        'model': {
            'architecture': 'DeepFed (GRU+CNN+MLP)',
            'input_shape': f"({metadata['sequence_length']}, {metadata['num_features']})",
            'parameters': model.count_params(),
            'gru_units': GRU_UNITS,
            'cnn_filters': CNN_FILTERS,
            'mlp_units': MLP_UNITS
        },
        'training': {
            'batch_size': BATCH_SIZE,
            'max_epochs': MAX_EPOCHS,
            'learning_rate': LEARNING_RATE,
            'early_stopping_patience': EARLY_STOPPING_PATIENCE,
            'epochs_completed': len(history.history['loss']),
            'final_train_accuracy': float(history.history['accuracy'][-1]),
            'final_val_accuracy': float(history.history['val_accuracy'][-1]),
            'best_val_accuracy': float(max(history.history['val_accuracy']))
        },
        'evaluation': {
            'test_accuracy': results['test_accuracy'],
            'test_loss': results['test_loss'],
            'precision_macro': results['classification_report']['macro avg']['precision'],
            'recall_macro': results['classification_report']['macro avg']['recall'],
            'f1_macro': results['classification_report']['macro avg']['f1-score']
        },
        'artifacts': {
            'model_path': str(MODELS_DIR / 'deepfed_final_model.h5'),
            'weights_path': str(MODELS_DIR / 'deepfed_best_weights.h5'),
            'scaler_path': str(CACHE_DIR / 'robust_scaler.pkl'),
            'encoder_path': str(CACHE_DIR / 'ordinal_encoder.pkl'),
            'label_encoder_path': str(CACHE_DIR / 'label_encoder.pkl'),
            'training_log': str(MODELS_DIR / 'training_log.csv'),
            'evaluation_results': str(MODELS_DIR / 'evaluation_results.json'),
            'visualizations_dir': str(VISUALIZATIONS_DIR)
        },
        'configuration': {
            'use_cached_data': USE_CACHED_DATA,
            'sample_size': SAMPLE_SIZE,
            'sequence_length': SEQUENCE_LENGTH,
            'batch_size': BATCH_SIZE,
            'learning_rate': LEARNING_RATE,
            'gru_units': GRU_UNITS,
            'gru_dropout': GRU_DROPOUT,
            'cnn_filters': CNN_FILTERS,
            'cnn_kernel_size': CNN_KERNEL_SIZE,
            'mlp_units': MLP_UNITS,
            'mlp_dropout': MLP_DROPOUT,
            'max_epochs': MAX_EPOCHS,
            'early_stopping_patience': EARLY_STOPPING_PATIENCE
        }
    }
    
    # Save experiment summary
    summary_path = MODELS_DIR / 'experiment_summary.json'
    with open(summary_path, 'w') as f:
        json.dump(experiment_summary, f, indent=2)
    print(f"✓ Experiment summary saved: {summary_path}")
    
    # Generate human-readable summary
    print(f"\n{'='*80}")
    print("EXPERIMENT SUMMARY")
    print(f"{'='*80}")
    
    print(f"Dataset: Edge-IIoTset")
    print(f"  - {metadata['total_sequences']:,} sequences")
    print(f"  - {metadata['sequence_length']} timesteps × {metadata['num_features']} features")
    print(f"  - {metadata['num_classes']} classes: {', '.join(metadata['class_names'][:5])}{'...' if len(metadata['class_names']) > 5 else ''}")
    
    print(f"\nModel: DeepFed Architecture")
    print(f"  - Parameters: {model.count_params():,}")
    print(f"  - GRU units: {GRU_UNITS}")
    print(f"  - CNN filters: {CNN_FILTERS}")
    print(f"  - MLP units: {MLP_UNITS}")
    
    print(f"\nTraining Results:")
    print(f"  - Best validation accuracy: {max(history.history['val_accuracy']):.4f}")
    print(f"  - Test accuracy: {results['test_accuracy']:.4f}")
    print(f"  - Test loss: {results['test_loss']:.4f}")
    print(f"  - Macro F1-score: {results['classification_report']['macro avg']['f1-score']:.4f}")
    
    print(f"\nArtifacts saved to:")
    print(f"  - Models: {MODELS_DIR}")
    print(f"  - Cache: {CACHE_DIR}")
    print(f"  - Visualizations: {VISUALIZATIONS_DIR}")
    
    # Performance analysis
    print(f"\n{'='*80}")
    print("PERFORMANCE ANALYSIS")
    print(f"{'='*80}")
    
    test_acc = results['test_accuracy']
    if test_acc > 0.9:
        print("🎉 EXCELLENT performance! Model achieved >90% test accuracy")
    elif test_acc > 0.8:
        print("✅ GOOD performance! Model achieved >80% test accuracy")
    elif test_acc > 0.7:
        print("⚠️  MODERATE performance. Consider hyperparameter tuning or more data")
    else:
        print("❌ POOR performance. Significant improvements needed")
    
    # Class imbalance check
    class_counts = np.bincount(y_test)
    imbalance_ratio = class_counts.max() / class_counts.min()
    if imbalance_ratio > 5:
        print(f"⚠️  High class imbalance detected ({imbalance_ratio:.1f}x). Consider advanced balancing techniques")
    
    # Recommendations
    print(f"\n{'='*80}")
    print("RECOMMENDATIONS")
    print(f"{'='*80}")
    
    recommendations = []
    
    if test_acc < 0.8:
        recommendations.append("- Increase model capacity (more units/filters)")
        recommendations.append("- Try different architectures or ensemble methods")
        recommendations.append("- Implement data augmentation for time series")
    
    if imbalance_ratio > 3:
        recommendations.append("- Use focal loss or class-weighted training")
        recommendations.append("- Implement SMOTE or other oversampling techniques")
        recommendations.append("- Collect more data for minority classes")
    
    if len(history.history['loss']) >= MAX_EPOCHS:
        recommendations.append("- Training stopped due to max epochs. Try increasing MAX_EPOCHS")
    
    if not recommendations:
        recommendations.append("- Model performance is good! Consider deployment")
        recommendations.append("- Experiment with different sequence lengths")
        recommendations.append("- Try transfer learning with similar datasets")
    
    for rec in recommendations:
        print(rec)
    
    print(f"\n{'='*80}")
    print("NEXT STEPS")
    print(f"{'='*80}")
    print("1. Review visualizations in the 'visualizations/' directory")
    print("2. Analyze confusion matrix for misclassification patterns")
    print("3. Consider hyperparameter optimization (grid/random search)")
    print("4. Experiment with different architectures or preprocessing")
    print("5. Deploy model for inference or integrate into production pipeline")
    
    print(f"\n{'='*80}")
    print("EXPERIMENT COMPLETE ✓")
    print(f"{'='*80}")
    
    return experiment_summary

# Save artifacts and generate summary
experiment_summary = save_artifacts_and_summary(results, metadata)