In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import precision_recall_curve, average_precision_score
import time
import pickle
from collections import Counter
import tensorflow as tf
from imblearn.ensemble import BalancedRandomForestClassifier, RUSBoostClassifier
import xgboost as xgb
from sklearn.svm import SVC
import json
import librosa

# Create directories for saving results
os.makedirs('results/tables', exist_ok=True)
os.makedirs('results/plots', exist_ok=True)

# Global storage for results and models
all_results = []
best_models = {}

# Create directories for saving results
os.makedirs('results/tables', exist_ok=True)
os.makedirs('results/plots', exist_ok=True)

# Paths to JSON and audio files
json_folder = 'JSON'
audio_folder = './output/cleaned_wav_files'

# Match audio and JSON files
json_files = {os.path.splitext(f)[0]: os.path.join(json_folder, f) for f in os.listdir(json_folder) if f.endswith('.json')}
audio_files = {os.path.splitext(f)[0]: os.path.join(audio_folder, f) for f in os.listdir(audio_folder) if f.endswith(('.wav', '.m4a', '.mp3'))}
matched_files = {name: (json_files[name], audio_files[name]) for name in json_files if name in audio_files}

# Function to load annotations from a JSON file
def load_annotations(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    annotations = []
    for item in data[0]['annotations'][0]['result']:
        if item['type'] == 'labels':
            start = item['value']['start']
            end = item['value']['end']
            label = item['value']['labels'][0]
            annotations.append((start, end, label))
    return annotations

# Function to extract audio segments based on annotations
def extract_audio_segments(audio_file, annotations, sr=16000):
    y, _ = librosa.load(audio_file, sr=sr)
    segments = []
    for start, end, label in annotations:
        segment = y[int(start * sr):int(end * sr)]
        segments.append((segment, label))
    return segments

# Function to extract features (MFCC) from audio segments with improved handling of short segments
def extract_features(segments, n_mfcc=40, max_length=300, min_segment_length=512):
    features, labels = [], []
    skipped_segments = 0
    
    for i, (segment, label) in enumerate(segments):
        try:
            # Check if the segment is too short for any FFT processing
            if len(segment) < min_segment_length:
                print(f"Skipping segment {i} due to insufficient length: {len(segment)} samples")
                skipped_segments += 1
                continue
                
            # For very short segments, use a smaller n_fft and hop_length
            if len(segment) < 2048:
                n_fft = 512
                hop_length = 128
            else:
                n_fft = 2048
                hop_length = 512
                
            # Extract MFCC features with adjusted parameters
            mfcc = librosa.feature.mfcc(
                y=segment, 
                sr=16000, 
                n_mfcc=n_mfcc, 
                n_fft=n_fft,
                hop_length=hop_length
            )
            
            # Handle variable lengths (pad if short, truncate if long)
            if mfcc.shape[1] < max_length:
                padded_mfcc = np.pad(mfcc, ((0, 0), (0, max_length - mfcc.shape[1])), mode='constant')
            else:
                padded_mfcc = mfcc[:, :max_length]
                
            # Append features and labels
            features.append(padded_mfcc.T)
            labels.append(0 if label == 'Field pause' else 1)
            
        except Exception as e:
            print(f"Error processing segment {i}: {e}")
            skipped_segments += 1
    
    print(f"Total segments skipped: {skipped_segments} out of {len(segments)}")
    return np.array(features), np.array(labels)

# Load the dataset
dataset = []
files_loaded = []

for name, (json_path, audio_path) in matched_files.items():
    annotations = load_annotations(json_path)
    if len(annotations) != 0:
        files_loaded.append(json_path)
        audio_segments = extract_audio_segments(audio_path, annotations)
        dataset.extend(audio_segments)

# Extract features with improved handling of short segments
X, y = extract_features(dataset, min_segment_length=256)  # Lowered minimum segment length

# Display class distribution
print("Class distribution:")
unique, counts = np.unique(y, return_counts=True)
for label, count in zip(unique, counts):
    print(f"Class {label} ({'Field pause' if label == 0 else 'Filled pause'}): {count} samples")

# Plot class distribution
plt.figure(figsize=(8, 6))
sns.countplot(x=y)
plt.title('Class Distribution in Dataset')
plt.xlabel('Class (0: Field pause, 1: Filled pause)')
plt.ylabel('Count')
plt.savefig('results/plots/class_distribution.png')
plt.close()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Make a copy of the original train/test data
X_train_original = X_train.copy()
y_train_original = y_train.copy()
X_test_original = X_test.copy()
y_test_original = y_test.copy()

Skipping segment 166 due to insufficient length: 0 samples
Skipping segment 167 due to insufficient length: 0 samples
Skipping segment 304 due to insufficient length: 0 samples
Skipping segment 401 due to insufficient length: 0 samples
Skipping segment 466 due to insufficient length: 0 samples
Skipping segment 467 due to insufficient length: 0 samples
Skipping segment 468 due to insufficient length: 0 samples
Skipping segment 469 due to insufficient length: 0 samples
Skipping segment 470 due to insufficient length: 0 samples
Skipping segment 471 due to insufficient length: 0 samples
Skipping segment 472 due to insufficient length: 0 samples
Skipping segment 473 due to insufficient length: 0 samples
Skipping segment 474 due to insufficient length: 0 samples
Skipping segment 475 due to insufficient length: 0 samples
Skipping segment 476 due to insufficient length: 0 samples
Skipping segment 477 due to insufficient length: 0 samples
Skipping segment 478 due to insufficient length: 0 sampl

In [2]:
# Function to evaluate models for imbalanced datasets
def evaluate_model_imbalanced(model, X_test, y_test, model_name, shot_config="full"):
    """
    Evaluate a model with metrics suitable for imbalanced datasets
    """
    import gc
    import time
    import numpy as np
    from sklearn.metrics import precision_recall_curve, f1_score, average_precision_score
    from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
    
    start_time = time.time()
    
    # Reshape data if needed
    X_test_reshaped = X_test.reshape(X_test.shape[0], -1) if len(X_test.shape) > 2 else X_test
    
    # For models that return probabilities
    if hasattr(model, "predict_proba"):
        try:
            y_prob = model.predict_proba(X_test_reshaped)[:, 1]
            
            # Calculate precision-recall curve
            precision, recall, thresholds = precision_recall_curve(y_test, y_prob)
            
            # Find threshold that maximizes F1 score - limit to max 100 thresholds for efficiency
            if len(thresholds) > 100:
                # Sample evenly spaced thresholds
                indices = np.linspace(0, len(thresholds)-1, 100, dtype=int)
                thresholds_sample = thresholds[indices]
            else:
                thresholds_sample = thresholds
                
            f1_scores = []
            for t in thresholds_sample:
                y_pred_t = (y_prob >= t).astype(int)
                f1_scores.append(f1_score(y_test, y_pred_t))
                
            best_threshold_idx = np.argmax(f1_scores)
            best_threshold = thresholds_sample[best_threshold_idx]
            
            # Calculate predictions with optimal threshold
            y_pred = (y_prob >= best_threshold).astype(int)
            
            # Calculate AUC-PR (Area Under Precision-Recall Curve)
            average_precision = average_precision_score(y_test, y_prob)
        except Exception as e:
            print(f"Error calculating probabilities: {e}")
            y_pred = model.predict(X_test_reshaped)
            y_pred = np.round(y_pred).astype(int)  # Convert continuous to binary
            average_precision = None
            best_threshold = None
    else:
        # If no probability predictions available
        y_pred = model.predict(X_test_reshaped)
        # Handle Keras models that output continuous values
        if isinstance(y_pred, np.ndarray) and y_pred.ndim > 1 and y_pred.shape[1] == 1:
            y_pred = y_pred.flatten()
        y_pred = np.round(y_pred).astype(int)  # Convert to binary
        average_precision = None
        best_threshold = None
    
    # Calculate standard metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    
    # Create confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Execution time
    execution_time = time.time() - start_time
    
    # Print results
    print(f"\n{model_name} ({shot_config}) Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    if average_precision:
        print(f"Average Precision (AP): {average_precision:.4f}")
    if best_threshold:
        print(f"Best threshold: {best_threshold:.4f}")
    print(f"Execution Time: {execution_time:.2f} seconds")
    
    # Return results
    results = {
        'model_name': f"{model_name}_{shot_config}",
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'average_precision': average_precision,
        'best_threshold': best_threshold,
        'confusion_matrix': cm,
        'execution_time': execution_time,
        'y_pred': y_pred
    }
    
    # Force garbage collection
    gc.collect()
    
    return results

# Function to plot and save confusion matrix - FUNCTION KEPT BUT NO LONGER USED
def plot_confusion_matrix(cm, model_name, shot_config):
    """
    Plot and save confusion matrix
    """
    # REMOVED: Plotting and saving confusion matrix
    pass

# Function to save model architecture
def save_model_architecture(model, model_name, shot_config):
    """
    Save the architecture of a model to a text file
    """
    import os
    
    # Create directory if it doesn't exist
    architecture_dir = 'results/model_architecture'
    os.makedirs(architecture_dir, exist_ok=True)
    
    file_path = f'{architecture_dir}/{model_name}_{shot_config}.txt'
    
    try:
        with open(file_path, 'w') as f:
            # Handle different model types
            if 'keras' in str(type(model)).lower() or 'tensorflow' in str(type(model)).lower():
                # For Keras/TensorFlow models
                model.summary(print_fn=lambda x: f.write(x + '\n'))
            elif hasattr(model, 'get_params'):
                # For scikit-learn models
                f.write(f"Model Type: {type(model).__name__}\n")
                f.write("Parameters:\n")
                for param, value in model.get_params().items():
                    f.write(f"  {param}: {value}\n")
            elif isinstance(model, dict) and 'model' in model:
                # For wrapped models
                if 'keras' in str(type(model['model'])).lower() or 'tensorflow' in str(type(model['model'])).lower():
                    model['model'].summary(print_fn=lambda x: f.write(x + '\n'))
                elif hasattr(model['model'], 'get_params'):
                    f.write(f"Model Type: {type(model['model']).__name__}\n")
                    f.write("Parameters:\n")
                    for param, value in model['model'].get_params().items():
                        f.write(f"  {param}: {value}\n")
                else:
                    f.write(f"Model Type: {type(model['model']).__name__}\n")
            else:
                # For other model types
                f.write(f"Model Type: {type(model).__name__}\n")
        
        print(f"Model architecture saved to {file_path}")
    except Exception as e:
        print(f"Error saving model architecture: {e}")

# Function to create balanced datasets
def create_balanced_datasets(X_train, y_train, X_test, y_test):
    """Create balanced datasets for training using different techniques"""
    from collections import Counter
    import numpy as np
    
    # Calculate class distribution
    class_counts = Counter(y_train)
    minority_class = min(class_counts, key=class_counts.get)
    minority_count = class_counts[minority_class]
    
    print(f"\nCreating balanced datasets from {len(y_train)} samples")
    print(f"Original class distribution: {dict(class_counts)}")
    
    balanced_datasets = {}
    
    # 1. Random Undersampling - use all minority samples, randomly select majority samples
    majority_indices = np.where(y_train != minority_class)[0]
    minority_indices = np.where(y_train == minority_class)[0]
    
    # Randomly select majority samples equal to minority count * factor
    undersampling_factor = 2  # Use 2x minority samples from majority class
    selected_majority = np.random.choice(
        majority_indices, 
        size=min(minority_count * undersampling_factor, len(majority_indices)),
        replace=False
    )
    
    # Combine indices and create balanced dataset
    balanced_indices = np.concatenate([minority_indices, selected_majority])
    X_balanced = X_train[balanced_indices]
    y_balanced = y_train[balanced_indices]
    
    balanced_datasets['undersampled'] = (X_balanced, y_balanced)
    
    # Print the new distribution
    print(f"Undersampled dataset: {len(X_balanced)} samples, distribution: {dict(Counter(y_balanced))}")
    
    # 2. Create few-shot learning datasets
    minority_shots = min(minority_count, 20)  # Cap at 20 samples
    shot_configs = [1, 3, 5, 10, minority_shots]
    
    for n_shots in shot_configs:
        if n_shots <= minority_count:
            X_few, y_few = create_few_shot_dataset(X_train, y_train, n_shots)
            balanced_datasets[f'{n_shots}-shot'] = (X_few, y_few)
            print(f"{n_shots}-shot dataset: {len(X_few)} samples, distribution: {dict(Counter(y_few))}")
    
    return balanced_datasets

# Function to create few-shot datasets
def create_few_shot_dataset(X, y, n_shots=5, random_state=42):
    """
    Create a few-shot learning dataset with n examples per class
    """
    import numpy as np
    np.random.seed(random_state)
    
    # Find indices for each class
    unique_labels = np.unique(y)
    few_shot_indices = []
    
    for label in unique_labels:
        indices = np.where(y == label)[0]
        n_available = len(indices)
        selected_n = min(n_shots, n_available)
        
        if selected_n < n_shots:
            print(f"Warning: Only {selected_n} samples available for class {label}")
        
        selected = np.random.choice(indices, selected_n, replace=False)
        few_shot_indices.extend(selected)
    
    return X[few_shot_indices], y[few_shot_indices]

# Function to train specialized models for imbalanced datasets
def train_balanced_models(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train models specifically designed for imbalanced datasets"""
    import gc
    import numpy as np
    from collections import Counter
    import xgboost as xgb
    from sklearn.preprocessing import StandardScaler
    from sklearn.svm import SVC
    from imblearn.ensemble import BalancedRandomForestClassifier, RUSBoostClassifier
    
    # Reshape data
    X_train_2d = X_train.reshape(X_train.shape[0], -1)
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    
    results_list = []  # Store results locally
    models_dict = {}   # Store models locally
    
    # 1. Balanced Random Forest
    try:
        print(f"\nTraining Balanced Random Forest ({shot_config})...")
        brf = BalancedRandomForestClassifier(
            n_estimators=100,
            replacement=True,
            sampling_strategy='auto',
            random_state=42
        )
        
        brf.fit(X_train_2d, y_train)
        results = evaluate_model_imbalanced(brf, X_test_2d, y_test, 'BalancedRF', shot_config)
        results_list.append(results)
        
        # Save the model architecture
        save_model_architecture(brf, 'BalancedRF', shot_config)
        
        # Store the model
        models_dict["BalancedRF"] = brf
        
        # Force garbage collection
        gc.collect()
    except Exception as e:
        print(f"Error training Balanced Random Forest: {e}")
    
    # 2. RUSBoost
    try:
        print(f"\nTraining RUSBoost ({shot_config})...")
        rusboost = RUSBoostClassifier(
            n_estimators=100,
            learning_rate=0.1,
            sampling_strategy='auto',
            random_state=42
        )
        
        rusboost.fit(X_train_2d, y_train)
        results = evaluate_model_imbalanced(rusboost, X_test_2d, y_test, 'RUSBoost', shot_config)
        results_list.append(results)
        
        # Save the model architecture
        save_model_architecture(rusboost, 'RUSBoost', shot_config)
        
        # Store the model
        models_dict["RUSBoost"] = rusboost
        
        # Force garbage collection
        gc.collect()
    except Exception as e:
        print(f"Error training RUSBoost: {e}")
    
    # 3. Cost-sensitive SVM
    try:
        print(f"\nTraining Cost-sensitive SVM ({shot_config})...")
        
        # Calculate class weights inversely proportional to class frequencies
        class_counts = Counter(y_train)
        n_samples = len(y_train)
        class_weights = {
            c: n_samples / (len(class_counts) * count)
            for c, count in class_counts.items()
        }
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train_2d)
        X_test_scaled = scaler.transform(X_test_2d)
        
        # Train weighted SVM
        svm_weighted = SVC(
            kernel='rbf',
            class_weight=class_weights,
            probability=True,
            random_state=42
        )
        
        svm_weighted.fit(X_train_scaled, y_train)
        results = evaluate_model_imbalanced(svm_weighted, X_test_scaled, y_test, 'WeightedSVM', shot_config)
        results_list.append(results)
        
        # Save the model architecture
        save_model_architecture(svm_weighted, 'WeightedSVM', shot_config)
        
        # Store the model with its scaler
        models_dict["WeightedSVM"] = {'model': svm_weighted, 'scaler': scaler}
        
        # Force garbage collection
        gc.collect()
    except Exception as e:
        print(f"Error training Weighted SVM: {e}")
    
    # 4. XGBoost with scale_pos_weight
    try:
        print(f"\nTraining Weighted XGBoost ({shot_config})...")
        
        # Calculate positive class weight
        scale_pos_weight = class_counts[0] / class_counts[1] if 1 in class_counts else 1.0
        
        xgb_weighted = xgb.XGBClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=3,
            scale_pos_weight=scale_pos_weight,
            random_state=42,
            use_label_encoder=False,
            eval_metric='logloss'
        )
        
        xgb_weighted.fit(X_train_2d, y_train)
        results = evaluate_model_imbalanced(xgb_weighted, X_test_2d, y_test, 'WeightedXGB', shot_config)
        results_list.append(results)
        
        # Save the model architecture
        save_model_architecture(xgb_weighted, 'WeightedXGB', shot_config)
        
        # Store the model
        models_dict["WeightedXGB"] = xgb_weighted
        
        # Force garbage collection
        gc.collect()
    except Exception as e:
        print(f"Error training Weighted XGBoost: {e}")
    
    # Add results to global list
    all_results.extend(results_list)
    
    # Add models to global dictionary
    for model_name, model in models_dict.items():
        best_models[f"{model_name}_{shot_config}"] = model
    
    return models_dict

# Function to train k-NN model
def train_knn(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train and evaluate k-NN model"""
    import gc
    import numpy as np
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.preprocessing import StandardScaler
    
    print(f"\nTraining k-NN ({shot_config})...")
    
    # Reshape data for k-NN
    X_train_2d = X_train.reshape(X_train.shape[0], -1)
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_2d)
    X_test_scaled = scaler.transform(X_test_2d)
    
    try:
        # Find best k using cross-validation
        best_k = 5  # Default
        if len(X_train) > 10:
            from sklearn.model_selection import GridSearchCV
            param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}
            grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=min(5, len(X_train)), scoring='f1')
            try:
                grid_search.fit(X_train_scaled, y_train)
                best_k = grid_search.best_params_['n_neighbors']
                print(f"Best k for {shot_config}: {best_k}")
            except Exception as e:
                print(f"Error finding best k: {e}")
        
        # Train k-NN with best k
        knn = KNeighborsClassifier(n_neighbors=best_k)
        knn.fit(X_train_scaled, y_train)
        
        # Evaluate
        results = evaluate_model_imbalanced(knn, X_test_scaled, y_test, 'kNN', shot_config)
        all_results.append(results)
        
        # Save the model architecture
        save_model_architecture(knn, 'kNN', shot_config)
        
        # Save the model with its scaler
        best_models[f"kNN_{shot_config}"] = {'model': knn, 'scaler': scaler}
        
        # Force garbage collection
        gc.collect()
        
        return knn, results
    except Exception as e:
        print(f"Error in kNN training: {e}")
        gc.collect()
        return None, None

# Function to train Random Forest with RFE
def train_rf_rfe(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train and evaluate Random Forest with RFE"""
    import gc
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.feature_selection import RFE
    
    print(f"\nTraining Random Forest with RFE ({shot_config})...")
    
    # Reshape data
    X_train_2d = X_train.reshape(X_train.shape[0], -1)
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    
    try:
        # First train a simple random forest to use with RFE
        base_rf = RandomForestClassifier(n_estimators=100, random_state=42)
        
        # Apply RFE to select top features
        # Number of features to select should be adaptive to dataset size
        n_features = min(50, X_train_2d.shape[1])
        
        # Use RFE if we have enough samples
        if len(X_train) >= 10:
            rfe = RFE(estimator=base_rf, n_features_to_select=n_features, step=10)
            X_train_rfe = rfe.fit_transform(X_train_2d, y_train)
            X_test_rfe = rfe.transform(X_test_2d)
            
            # Train RF on selected features
            rf = RandomForestClassifier(n_estimators=100, random_state=42)
            rf.fit(X_train_rfe, y_train)
            
            # Evaluate
            results = evaluate_model_imbalanced(rf, X_test_rfe, y_test, 'RF_RFE', shot_config)
            all_results.append(results)
            
            # Save the model architecture
            save_model_architecture(rf, 'RF_RFE', shot_config)
            
            # Save the model with RFE
            best_models[f"RF_RFE_{shot_config}"] = {'model': rf, 'rfe': rfe}
            
            # Force garbage collection
            gc.collect()
            
            return rf, results
        else:
            # Fall back to regular RF for very small datasets
            raise ValueError("Not enough samples for RFE")
    except Exception as e:
        print(f"Error in RF+RFE: {e}")
        print("Falling back to standard Random Forest")
        
        try:
            # Train standard RF
            rf = RandomForestClassifier(n_estimators=100, random_state=42)
            rf.fit(X_train_2d, y_train)
            
            # Evaluate
            results = evaluate_model_imbalanced(rf, X_test_2d, y_test, 'RF', shot_config)
            all_results.append(results)
            
            # Save the model architecture
            save_model_architecture(rf, 'RF', shot_config)
            
            # Save the model
            best_models[f"RF_{shot_config}"] = rf
            
            # Force garbage collection
            gc.collect()
            
            return rf, results
        except Exception as e2:
            print(f"Error in standard RF: {e2}")
            gc.collect()
            return None, None

# Function to train SVM model
def train_svm(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train and evaluate SVM model"""
    import gc
    import numpy as np
    from sklearn.svm import SVC
    from sklearn.preprocessing import StandardScaler
    
    print(f"\nTraining SVM ({shot_config})...")
    
    try:
        # Reshape data
        X_train_2d = X_train.reshape(X_train.shape[0], -1)
        X_test_2d = X_test.reshape(X_test.shape[0], -1)
        
        # Scale features for SVM
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train_2d)
        X_test_scaled = scaler.transform(X_test_2d)
        
        # Train SVM with RBF kernel
        svm = SVC(kernel='rbf', probability=True, random_state=42, class_weight='balanced')
        svm.fit(X_train_scaled, y_train)
        
        # Evaluate
        results = evaluate_model_imbalanced(svm, X_test_scaled, y_test, 'SVM', shot_config)
        all_results.append(results)
        
        # Save the model architecture
        save_model_architecture(svm, 'SVM', shot_config)
        
        # Save the model with its scaler
        best_models[f"SVM_{shot_config}"] = {'model': svm, 'scaler': scaler}
        
        # Force garbage collection
        gc.collect()
        
        return svm, results
    except Exception as e:
        print(f"Error in SVM training: {e}")
        gc.collect()
        return None, None

# Function to train MLP model
def train_mlp(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train and evaluate MLP model"""
    import gc
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.preprocessing import StandardScaler
    from collections import Counter
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout
    from tensorflow.keras.callbacks import EarlyStopping
    
    print(f"\nTraining MLP ({shot_config})...")
    
    try:
        # Reshape data
        X_train_2d = X_train.reshape(X_train.shape[0], -1)
        X_test_2d = X_test.reshape(X_test.shape[0], -1)
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train_2d)
        X_test_scaled = scaler.transform(X_test_2d)
        
        # Build model
        mlp = Sequential([
            Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),  # Reduced complexity
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dropout(0.3),
            Dense(1, activation='sigmoid')
        ])
        
        # Compile model
        mlp.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        
        # Set epochs based on shot configuration
        if shot_config == "10-shot":
            max_epochs = 20
        elif shot_config == "17-shot":
            max_epochs = 12
        elif shot_config == "5-shot":
            max_epochs = 20
        elif shot_config == "balanced":
            max_epochs = 16
        else:
            max_epochs = 30  # Reduced from 50 to prevent overfitting
        
        # Early stopping
        early_stopping = EarlyStopping(
            monitor='loss',
            patience=5,  # Reduced patience 
            restore_best_weights=True
        )
        
        # Class weights for imbalanced data
        class_weights = None
        if shot_config == "full":
            class_counts = Counter(y_train)
            n_samples = len(y_train)
            class_weights = {
                label: n_samples / (len(class_counts) * count)
                for label, count in class_counts.items()
            }
        
        # Train model
        batch_size = min(32, len(X_train) // 2) if len(X_train) > 1 else 1
        history = mlp.fit(
            X_train_scaled, y_train,
            epochs=max_epochs,
            batch_size=batch_size,
            validation_split=0.1 if len(X_train) > 10 else 0.0,
            callbacks=[early_stopping],
            class_weight=class_weights,
            verbose=0
        )
        
        # Plot training history with low DPI to save memory
        dpi = 72  # Lower DPI for memory efficiency
        plt.figure(figsize=(12, 4), dpi=dpi)
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        if 'val_loss' in history.history:
            plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'MLP Loss ({shot_config})')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        if 'val_accuracy' in history.history:
            plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'MLP Accuracy ({shot_config})')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig(f'results/plots/mlp_training_{shot_config}.png', dpi=dpi)
        plt.close('all')
        
        # Save the model architecture
        save_model_architecture(mlp, 'MLP', shot_config)
        
        # Create a custom predict function
        def predict_fn(X):
            X_reshaped = X.reshape(X.shape[0], -1)
            X_scaled = scaler.transform(X_reshaped)
            return mlp.predict(X_scaled, verbose=0)
        
        # Evaluate
        results = evaluate_model_imbalanced(mlp, X_test_2d, y_test, 'MLP', shot_config)
        all_results.append(results)
        
        # Save the model
        best_models[f"MLP_{shot_config}"] = {'model': mlp, 'scaler': scaler, 'predict_fn': predict_fn}
        
        # Clear TensorFlow session to free memory
        tf.keras.backend.clear_session()
        gc.collect()
        
        return mlp, results
    except Exception as e:
        print(f"Error in MLP training: {e}")
        tf.keras.backend.clear_session()
        gc.collect()
        return None, None
    
# Function to train CNN model
def train_cnn(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train and evaluate CNN model"""
    import gc
    import numpy as np
    import matplotlib.pyplot as plt
    from collections import Counter
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten
    from tensorflow.keras.callbacks import EarlyStopping
    
    print(f"\nTraining CNN ({shot_config})...")
    
    try:
        # Reshape data for CNN (samples, timesteps, features)
        if len(X_train.shape) == 2:
            # Reshape to (samples, 300, 40) for CNN input
            X_train_cnn = X_train.reshape(X_train.shape[0], 300, 40)
            X_test_cnn = X_test.reshape(X_test.shape[0], 300, 40)
        else:
            X_train_cnn = X_train
            X_test_cnn = X_test
        
        # Build CNN model with reduced complexity
        cnn = Sequential([
            Conv1D(filters=16, kernel_size=3, activation='relu', input_shape=X_train_cnn.shape[1:]),  # Reduced filters
            MaxPooling1D(pool_size=2),
            Conv1D(filters=32, kernel_size=3, activation='relu'),  # Reduced filters
            MaxPooling1D(pool_size=2),
            Flatten(),
            Dense(32, activation='relu'),  # Reduced neurons
            Dropout(0.3),
            Dense(1, activation='sigmoid')
        ])
        
        # Compile model
        cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        
        # Set epochs based on shot configuration - reduced for all configs
        if shot_config == "10-shot":
            max_epochs = 15  # Reduced
        elif shot_config == "17-shot":
            max_epochs = 8
        elif shot_config == "5-shot":
            max_epochs = 10
        else:
            max_epochs = 25  # Reduced
        
        # Early stopping with specific monitoring based on configuration
        early_stopping = EarlyStopping(
            monitor='loss',
            patience=5,  # Reduced patience
            restore_best_weights=True
        )
        
        # Class weights for imbalanced data
        class_weights = None
        if shot_config == "full":
            class_counts = Counter(y_train)
            n_samples = len(y_train)
            class_weights = {
                label: n_samples / (len(class_counts) * count)
                for label, count in class_counts.items()
            }
        
        # Train model
        batch_size = min(32, len(X_train) // 2) if len(X_train) > 1 else 1
        history = cnn.fit(
            X_train_cnn, y_train,
            epochs=max_epochs,
            batch_size=batch_size,
            validation_split=0.1 if len(X_train) > 10 else 0.0,
            callbacks=[early_stopping],
            class_weight=class_weights,
            verbose=0
        )
        
        # Plot training history with low DPI
        dpi = 72
        plt.figure(figsize=(12, 4), dpi=dpi)
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        if 'val_loss' in history.history:
            plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'CNN Loss ({shot_config})')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        if 'val_accuracy' in history.history:
            plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'CNN Accuracy ({shot_config})')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig(f'results/plots/cnn_training_{shot_config}.png', dpi=dpi)
        plt.close('all')
        
        # Save the model architecture
        save_model_architecture(cnn, 'CNN', shot_config)
        
        # Create a proper predict function that handles reshaping
        def predict_fn(X):
            if len(X.shape) == 2:
                # Reshape to match the CNN input shape
                X_reshaped = X.reshape(X.shape[0], 300, 40)
            else:
                X_reshaped = X
            return cnn.predict(X_reshaped, verbose=0)
        
        # Use the custom predict function for evaluation
        class ModelWrapper:
            def __init__(self, predict_function):
                self.predict = predict_function
        
        model_wrapper = ModelWrapper(predict_fn)
        
        # Evaluate with the wrapper
        results = evaluate_model_imbalanced(model_wrapper, X_test, y_test, 'CNN', shot_config)
        all_results.append(results)
        
        # Save the model
        best_models[f"CNN_{shot_config}"] = {'model': cnn, 'predict_fn': predict_fn}
        
        # Clear TensorFlow session to free memory
        tf.keras.backend.clear_session()
        gc.collect()
        
        return cnn, results
    except Exception as e:
        print(f"Error in CNN training: {e}")
        tf.keras.backend.clear_session()
        gc.collect()
        return None, None
    
# Function to extract CNN features
def extract_cnn_features(model, X, max_samples=1000):
    """Extract features from CNN for use with XGBoost"""
    import numpy as np
    import tensorflow as tf
    
    try:
        # Limit the number of samples to process at once to avoid memory issues
        if len(X) > max_samples:
            X_subset = X[:max_samples]
            print(f"Warning: Processing only {max_samples} samples for feature extraction to save memory")
        else:
            X_subset = X
            
        # Create a feature extractor model
        feature_layer = -3  # Usually the layer before the final Dense layers
        feature_extractor = tf.keras.Model(
            inputs=model.inputs,
            outputs=model.layers[feature_layer].output
        )
        
        # Ensure X has right shape
        if len(X_subset.shape) == 2:
            # Reshape to match the CNN input shape
            X_reshaped = X_subset.reshape(X_subset.shape[0], 300, 40)
        else:
            X_reshaped = X_subset
        
        # Extract features in batches to save memory
        batch_size = 32
        features = []
        for i in range(0, len(X_reshaped), batch_size):
            batch = X_reshaped[i:i+batch_size]
            batch_features = feature_extractor.predict(batch, verbose=0)
            features.append(batch_features)
        
        # Combine batches
        features = np.concatenate(features, axis=0)
        
        # Reshape if needed
        if len(features.shape) > 2:
            features = features.reshape(features.shape[0], -1)
        
        return features
    except Exception as e:
        print(f"Error extracting CNN features: {e}")
        return np.zeros((len(X), 10))  # Return dummy features on error

# Function to train CNN+XGBoost model
def train_cnn_xgboost(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train and evaluate CNN+XGBoost model"""
    import gc
    import numpy as np
    from collections import Counter
    import xgboost as xgb
    import tensorflow as tf
    
    print(f"\nTraining CNN+XGBoost ({shot_config})...")
    
    try:
        # Check if we already have a trained CNN
        cnn_key = f"CNN_{shot_config}"
        if cnn_key in best_models:
            cnn = best_models[cnn_key]['model']
        else:
            # Train a new CNN
            cnn, _ = train_cnn(X_train, y_train, X_test, y_test, shot_config)
        
        # Extract CNN features - use a smaller subset if dataset is large
        max_samples_train = min(500, len(X_train))
        max_samples_test = min(200, len(X_test))
        
        X_train_subset = X_train[:max_samples_train]
        y_train_subset = y_train[:max_samples_train]
        X_test_subset = X_test[:max_samples_test]
        
        X_train_features = extract_cnn_features(cnn, X_train_subset)
        X_test_features = extract_cnn_features(cnn, X_test_subset)
        
        # Calculate positive class weight for XGBoost
        class_counts = Counter(y_train_subset)
        scale_pos_weight = class_counts[0] / class_counts[1] if 1 in class_counts else 1.0
        
        # Train XGBoost on CNN features with reduced complexity
        xgb_model = xgb.XGBClassifier(
            n_estimators=50,  # Reduced
            learning_rate=0.1,
            max_depth=3,
            scale_pos_weight=scale_pos_weight,
            random_state=42,
            use_label_encoder=False,
            eval_metric='logloss'
        )
        
        xgb_model.fit(X_train_features, y_train_subset)
        
        # Save the model architecture
        save_model_architecture({'cnn': cnn, 'xgb': xgb_model}, 'CNN_XGBoost', shot_config)
        
        # Create a custom predict function that combines CNN feature extraction with XGBoost
        def predict_fn(X):
            # Limit the number of samples for prediction to avoid memory issues
            if len(X) > 200:
                print(f"Warning: Processing only 200 samples for prediction to save memory")
                X_subset = X[:200]
                features = extract_cnn_features(cnn, X_subset)
                probs = xgb_model.predict_proba(features)[:, 1].reshape(-1, 1)
                # Pad with default predictions for the rest
                pad_size = len(X) - 200
                padding = np.ones((pad_size, 1)) * 0.5  # Default prediction
                return np.vstack([probs, padding])
            else:
                features = extract_cnn_features(cnn, X)
                return xgb_model.predict_proba(features)[:, 1].reshape(-1, 1)
        
        # Use a wrapper for evaluation
        class ModelWrapper:
            def __init__(self, predict_function):
                self.predict = predict_function
        
        model_wrapper = ModelWrapper(predict_fn)
        
        # Evaluate on a subset
        results = evaluate_model_imbalanced(model_wrapper, X_test_subset, y_test[:max_samples_test], 'CNN_XGBoost', shot_config)
        all_results.append(results)
        
        # Save the model
        best_models[f"CNN_XGBoost_{shot_config}"] = {'cnn': cnn, 'xgb': xgb_model, 'predict_fn': predict_fn}
        
        # Clear TensorFlow session and release memory
        tf.keras.backend.clear_session()
        gc.collect()
        
        return (cnn, xgb_model), results
    except Exception as e:
        print(f"Error in CNN+XGBoost training: {e}")
        tf.keras.backend.clear_session()
        gc.collect()
        return None, None
    
# Function to train LSTM model
def train_lstm(X_train, y_train, X_test, y_test, shot_config="full"):
    """Train and evaluate LSTM model"""
    import gc
    import numpy as np
    import matplotlib.pyplot as plt
    from collections import Counter
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout, LSTM as KerasLSTM
    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
    from tensorflow.keras.regularizers import l2
    
    print(f"\nTraining LSTM ({shot_config})...")
    
    try:
        # Reshape data for LSTM (samples, timesteps, features)
        if len(X_train.shape) == 2:
            # Reshape to match the expected LSTM input
            X_train_lstm = X_train.reshape(X_train.shape[0], 300, 40)
            X_test_lstm = X_test.reshape(X_test.shape[0], 300, 40)
        else:
            X_train_lstm = X_train
            X_test_lstm = X_test
        
        # Build LSTM model with simplified architecture to reduce memory usage
        lstm_model = Sequential([
            KerasLSTM(32, return_sequences=True, input_shape=X_train_lstm.shape[1:], 
                    kernel_regularizer=l2(0.001), recurrent_regularizer=l2(0.001)),
            Dropout(0.4),
            KerasLSTM(16, kernel_regularizer=l2(0.001), recurrent_regularizer=l2(0.001)),
            Dropout(0.4),
            Dense(8, activation='relu', kernel_regularizer=l2(0.001)),
            Dense(1, activation='sigmoid')
        ])
        
        # Compile model
        lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        
        # Set epochs and patience based on shot configuration - reduced for all configs
        if shot_config == "5-shot":
            max_epochs = 15  # Reduced
            patience = 10     # Reduced
        else:
            max_epochs = 25   # Reduced
            patience = 5      # Reduced
        
        # Early stopping
        early_stopping = EarlyStopping(
            monitor='loss' if len(X_train) <= 20 else 'val_loss',
            patience=patience,
            restore_best_weights=True
        )
        
        # Add learning rate reduction to help with convergence
        reduce_lr = ReduceLROnPlateau(
            monitor='loss',
            factor=0.5,
            patience=3,
            min_lr=0.0001
        )
        
        # Class weights for imbalanced data
        class_weights = None
        if shot_config == "full":
            class_counts = Counter(y_train)
            n_samples = len(y_train)
            class_weights = {
                label: n_samples / (len(class_counts) * count)
                for label, count in class_counts.items()
            }
        
        # Train model with smaller batch size
        batch_size = min(16, len(X_train) // 2) if len(X_train) > 1 else 1  # Reduced batch size
        history = lstm_model.fit(
            X_train_lstm, y_train,
            epochs=max_epochs,
            batch_size=batch_size,
            validation_split=0.1 if len(X_train) > 10 else 0.0,
            callbacks=[early_stopping, reduce_lr],
            class_weight=class_weights,
            verbose=0
        )
        
        # Plot training history with low DPI
        dpi = 72
        plt.figure(figsize=(12, 4), dpi=dpi)
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        if 'val_loss' in history.history:
            plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'LSTM Loss ({shot_config})')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        if 'val_accuracy' in history.history:
            plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'LSTM Accuracy ({shot_config})')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig(f'results/plots/lstm_training_{shot_config}.png', dpi=dpi)
        plt.close('all')
        
        # Save the model architecture
        save_model_architecture(lstm_model, 'LSTM', shot_config)
        
        # Create a proper predict function
        def predict_fn(X):
            if len(X.shape) == 2:
                # Reshape to match the LSTM input shape
                X_reshaped = X.reshape(X.shape[0], 300, 40)
            else:
                X_reshaped = X
            return lstm_model.predict(X_reshaped, verbose=0)
        
        # Use a wrapper for evaluation
        class ModelWrapper:
            def __init__(self, predict_function):
                self.predict = predict_function
        
        model_wrapper = ModelWrapper(predict_fn)
        
        # Evaluate on a subset of test data to save memory
        max_test_samples = min(200, len(X_test))
        X_test_subset = X_test[:max_test_samples]
        y_test_subset = y_test[:max_test_samples]
        
        results = evaluate_model_imbalanced(model_wrapper, X_test_subset, y_test_subset, 'LSTM', shot_config)
        all_results.append(results)
        
        # Save the model
        best_models[f"LSTM_{shot_config}"] = {'model': lstm_model, 'predict_fn': predict_fn}
        
        # Clear TensorFlow session to free memory
        tf.keras.backend.clear_session()
        gc.collect()
        
        return lstm_model, results
    except Exception as e:
        print(f"Error in LSTM training: {e}")
        tf.keras.backend.clear_session()
        gc.collect()
        return None, None
    
# Function to create results table
def create_results_table():
    """Create and save a table of all model results with optimized memory usage"""
    import gc
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Extract metrics from results
    metrics_list = []
    for result in all_results:
        metrics = {
            'model_name': result['model_name'],
            'accuracy': result['accuracy'],
            'precision': result['precision'],
            'recall': result['recall'],
            'f1_score': result['f1_score'],
            'average_precision': result.get('average_precision', float('nan'))
        }
        metrics_list.append(metrics)
    
    # Convert to DataFrame
    results_df = pd.DataFrame(metrics_list)
    
    # Sort by F1 score (descending)
    results_df = results_df.sort_values('f1_score', ascending=False)
    
    # Save to CSV
    results_df.to_csv('results/tables/model_performance.csv', index=False)
    
    # Create a more readable format for display
    display_df = results_df.copy()
    display_df = display_df.round(3)
    
    # Save as HTML for better visualization
    display_df.to_html('results/tables/model_performance.html')
    
    # Create separate tables for each shot configuration with less processing
    shot_configs = ['full', 'balanced'] + [f"{shot}-shot" for shot in [1, 3, 5, 10, 17]]
    for config in shot_configs:
        config_df = display_df[display_df['model_name'].str.endswith(config)]
        if not config_df.empty:
            config_df.to_csv(f'results/tables/model_performance_{config}.csv', index=False)
    
    # Plot performance comparison for top models only
    dpi = 72
    plt.figure(figsize=(14, 8), dpi=dpi)
    
    # Get top 10 models only
    top_models = results_df.head(10)
    
    # Create simpler bar plot
    sns.barplot(x='model_name', y='f1_score', data=top_models)
    plt.title('F1 Score for Top 10 Models')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('results/plots/top_models_f1.png', dpi=dpi)
    plt.close('all')
    
    # Force garbage collection
    gc.collect()
    
    return results_df

def visualize_model(model_info, X_train, y_train, X_test, y_test, model_type, shot_config="full"):
    """Create optimized visualizations for each model type"""
    import gc
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
    
    print(f"\nCreating visualizations for {model_type} ({shot_config})...")
    
    # Create directory for visualizations if it doesn't exist
    os.makedirs('results/plots/model_viz', exist_ok=True)
    
    # Use lower DPI to reduce memory usage
    dpi = 72
    
    # Limit sample size for visualization
    max_samples = min(200, len(X_test))
    X_test_sample = X_test[:max_samples]
    y_test_sample = y_test[:max_samples]
    
    # Reshape data if needed
    X_train_2d = X_train.reshape(X_train.shape[0], -1) if len(X_train.shape) > 2 else X_train
    X_test_2d = X_test_sample.reshape(X_test_sample.shape[0], -1) if len(X_test_sample.shape) > 2 else X_test_sample
    
    try:
        # ROC Curve visualization for all models
        if hasattr(model_info, 'predict_proba') or (isinstance(model_info, dict) and 'predict_fn' in model_info):
            try:
                # Get predictions
                if isinstance(model_info, dict):
                    if 'predict_fn' in model_info:
                        # Use custom predict function
                        y_pred_prob = model_info['predict_fn'](X_test_sample)
                        if isinstance(y_pred_prob, np.ndarray) and y_pred_prob.ndim > 1:
                            if y_pred_prob.shape[1] == 2:  # Binary classification with 2 columns
                                y_pred_prob = y_pred_prob[:, 1]
                            elif y_pred_prob.shape[1] == 1:  # Single column output
                                y_pred_prob = y_pred_prob.flatten()
                    elif 'model' in model_info:
                        # Use model with potential scaler
                        if 'scaler' in model_info:
                            X_test_scaled = model_info['scaler'].transform(X_test_2d)
                            if hasattr(model_info['model'], 'predict_proba'):
                                y_pred_prob = model_info['model'].predict_proba(X_test_scaled)[:, 1]
                            else:
                                y_pred_prob = model_info['model'].predict(X_test_scaled).flatten()
                        else:
                            if hasattr(model_info['model'], 'predict_proba'):
                                y_pred_prob = model_info['model'].predict_proba(X_test_2d)[:, 1]
                            else:
                                y_pred_prob = model_info['model'].predict(X_test_2d).flatten()
                elif hasattr(model_info, 'predict_proba'):
                    y_pred_prob = model_info.predict_proba(X_test_2d)[:, 1]
                else:
                    y_pred_prob = model_info.predict(X_test_2d).flatten()
                
                # Ensure y_pred_prob is of correct shape
                if isinstance(y_pred_prob, np.ndarray) and y_pred_prob.ndim > 1:
                    y_pred_prob = y_pred_prob.flatten()
                
                # Calculate ROC curve
                fpr, tpr, _ = roc_curve(y_test_sample, y_pred_prob)
                roc_auc = auc(fpr, tpr)
                
                # Plot ROC curve
                plt.figure(figsize=(8, 6), dpi=dpi)
                plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
                plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title(f'ROC Curve - {model_type} ({shot_config})')
                plt.legend(loc="lower right")
                plt.savefig(f'results/plots/model_viz/roc_{model_type}_{shot_config}.png', dpi=dpi)
                plt.close('all')
                
                # Precision-Recall Curve
                precision, recall, _ = precision_recall_curve(y_test_sample, y_pred_prob)
                avg_precision = average_precision_score(y_test_sample, y_pred_prob)
                
                plt.figure(figsize=(8, 6), dpi=dpi)
                plt.plot(recall, precision, color='blue', lw=2, 
                        label=f'Precision-Recall curve (AP = {avg_precision:.2f})')
                plt.xlabel('Recall')
                plt.ylabel('Precision')
                plt.title(f'Precision-Recall Curve - {model_type} ({shot_config})')
                plt.legend(loc="lower left")
                plt.savefig(f'results/plots/model_viz/pr_curve_{model_type}_{shot_config}.png', dpi=dpi)
                plt.close('all')
                
                # Force garbage collection
                gc.collect()
                
            except Exception as e:
                print(f"Error creating ROC or PR curve for {model_type}: {e}")
                plt.close('all')
                gc.collect()
        
        # Create confusion matrix for all models
        try:
            # Get predictions
            if isinstance(model_info, dict):
                if 'predict_fn' in model_info:
                    y_pred = np.round(model_info['predict_fn'](X_test_sample)).astype(int).flatten()
                elif 'model' in model_info:
                    if 'scaler' in model_info:
                        X_test_scaled = model_info['scaler'].transform(X_test_2d)
                        y_pred = np.round(model_info['model'].predict(X_test_scaled)).astype(int).flatten()
                    else:
                        if model_type in ['CNN', 'LSTM']:
                            X_test_reshaped = X_test_sample.reshape(X_test_sample.shape[0], 300, 40)
                            y_pred = np.round(model_info['model'].predict(X_test_reshaped, verbose=0)).astype(int).flatten()
                        else:
                            y_pred = np.round(model_info['model'].predict(X_test_2d)).astype(int).flatten()
            else:
                y_pred = np.round(model_info.predict(X_test_2d)).astype(int).flatten()
            
            # Create confusion matrix
            from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
            cm = confusion_matrix(y_test_sample, y_pred)
            
            # Plot confusion matrix
            plt.figure(figsize=(8, 6), dpi=dpi)
            disp = ConfusionMatrixDisplay(confusion_matrix=cm, 
                                        display_labels=['Field Pause', 'Filled Pause'])
            disp.plot(cmap=plt.cm.Blues)
            plt.title(f'Confusion Matrix - {model_type} ({shot_config})')
            plt.savefig(f'results/plots/model_viz/cm_{model_type}_{shot_config}.png', dpi=dpi)
            plt.close('all')
            
            # Force garbage collection
            gc.collect()
            
        except Exception as e:
            print(f"Error creating confusion matrix for {model_type}: {e}")
            plt.close('all')
            gc.collect()
            
    except Exception as e:
        print(f"Error in visualization for {model_type}: {e}")
        plt.close('all')
        gc.collect()

    return True

def apply_xai_to_model(model_info, X_train, X_test, y_test, model_type, shot_config="full"):
    """Apply explainable AI techniques to a model (optimized for memory usage)"""
    import gc
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
    
    print(f"\nApplying XAI to {model_type} ({shot_config})...")
    
    # Use a smaller subset of the data for XAI to save memory
    max_samples = min(100, len(X_test))
    X_test_sample = X_test[:max_samples]
    y_test_sample = y_test[:max_samples]
    
    # Prepare data in appropriate format - ensure 2D arrays
    X_train_2d = X_train.reshape(X_train.shape[0], -1) if len(X_train.shape) > 2 else X_train
    X_test_2d = X_test_sample.reshape(X_test_sample.shape[0], -1) if len(X_test_sample.shape) > 2 else X_test_sample
    
    # Get predictions based on model type
    try:
        if isinstance(model_info, dict):
            if 'predict_fn' in model_info:
                # Use the model's custom predict function
                y_pred_prob = model_info['predict_fn'](X_test_sample)
                # Ensure y_pred is properly shaped
                if isinstance(y_pred_prob, np.ndarray):
                    if y_pred_prob.ndim > 1 and y_pred_prob.shape[1] == 1:
                        y_pred_prob = y_pred_prob.flatten()
                    y_pred = (y_pred_prob > 0.5).astype(int)
                else:
                    # Handle case where predict_fn returns something unexpected
                    y_pred = np.zeros(len(X_test_sample))
                    print(f"Warning: Unexpected predict_fn output for {model_type}")
            elif 'model' in model_info:
                # Handle models with scalers
                if 'scaler' in model_info:
                    X_test_scaled = model_info['scaler'].transform(X_test_2d)
                    if hasattr(model_info['model'], 'predict'):
                        try:
                            y_pred = model_info['model'].predict(X_test_scaled)
                            if hasattr(y_pred, 'ndim') and y_pred.ndim > 1 and y_pred.shape[1] == 1:
                                y_pred = y_pred.flatten()
                            y_pred = np.round(y_pred).astype(int)
                        except Exception as e:
                            print(f"Error predicting with {model_type}: {e}")
                            y_pred = np.zeros(len(X_test_sample))
                    else:
                        print(f"Model {model_type} doesn't support predict method")
                        y_pred = np.zeros(len(X_test_sample))
                else:
                    # Handle models without scalers
                    if model_type in ['CNN', 'LSTM', 'MLP']:
                        # For neural networks, we need to reshape
                        try:
                            # For CNN/LSTM, reshape to expected dimensions
                            if model_type in ['CNN', 'LSTM']:
                                X_reshaped = X_test_sample.reshape(X_test_sample.shape[0], 300, 40)
                                y_pred = model_info['model'].predict(X_reshaped, verbose=0)
                            else:
                                y_pred = model_info['model'].predict(X_test_2d, verbose=0)
                            
                            if hasattr(y_pred, 'ndim') and y_pred.ndim > 1 and y_pred.shape[1] == 1:
                                y_pred = y_pred.flatten()
                            y_pred = np.round(y_pred).astype(int)
                        except Exception as e:
                            print(f"Error predicting with {model_type}: {e}")
                            y_pred = np.zeros(len(X_test_sample))
                    else:
                        # For other models
                        try:
                            y_pred = model_info['model'].predict(X_test_2d)
                            if hasattr(y_pred, 'ndim') and y_pred.ndim > 1 and y_pred.shape[1] == 1:
                                y_pred = y_pred.flatten()
                            y_pred = np.round(y_pred).astype(int)
                        except Exception as e:
                            print(f"Error predicting with {model_type}: {e}")
                            y_pred = np.zeros(len(X_test_sample))
        else:
            # Handle the case where model_info is the model itself
            try:
                y_pred = model_info.predict(X_test_2d)
                if hasattr(y_pred, 'ndim') and y_pred.ndim > 1 and y_pred.shape[1] == 1:
                    y_pred = y_pred.flatten()
                y_pred = np.round(y_pred).astype(int)
            except Exception as e:
                print(f"Error predicting with {model_type}: {e}")
                y_pred = np.zeros(len(X_test_sample))
        
        # Ensure y_pred is 1D for comparison
        if hasattr(y_pred, 'ndim') and y_pred.ndim > 1:
            y_pred = y_pred.flatten()
        
        # Convert to numpy arrays if they aren't already
        y_pred = np.array(y_pred)
        y_test_sample = np.array(y_test_sample)
        
        # Calculate and display model performance metrics
        accuracy = accuracy_score(y_test_sample, y_pred)
        precision = precision_score(y_test_sample, y_pred, zero_division=0)
        recall = recall_score(y_test_sample, y_pred, zero_division=0)
        f1 = f1_score(y_test_sample, y_pred, zero_division=0)
        
        print(f"\nPerformance metrics for {model_type} ({shot_config}):")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        
        # Create confusion matrix visualization
        cm = confusion_matrix(y_test_sample, y_pred)
        plt.figure(figsize=(8, 6), dpi=72)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=['Field Pause', 'Filled Pause'],
                    yticklabels=['Field Pause', 'Filled Pause'])
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.title(f'Confusion Matrix - {model_type} ({shot_config})')
        plt.tight_layout()
        plt.savefig(f'results/plots/xai_cm_{model_type}_{shot_config}.png', dpi=72)
        plt.close('all')
        gc.collect()
        
        # Apply XAI methods selectively based on model type to save memory
        # Only apply LIME to traditional ML models
        if model_type in ['kNN', 'RF_RFE', 'RF', 'SVM', 'WeightedSVM', 'BalancedRF', 'WeightedXGB', 'RUSBoost']:
            try:
                # Try to import LIME
                import lime
                import lime.lime_tabular
                
                # Find correctly predicted samples for explanation
                correct_indices = np.where(y_pred == y_test_sample)[0]
                
                # If we have correctly predicted samples, use them for LIME
                if len(correct_indices) > 0:
                    # Select one sample from each class if possible
                    sample_indices = []
                    for cls in np.unique(y_test_sample):
                        cls_correct = [i for i in correct_indices if y_test_sample[i] == cls]
                        if cls_correct:
                            sample_indices.append(cls_correct[0])
                    
                    # If no samples found, use the first correct index
                    if not sample_indices:
                        sample_indices = [correct_indices[0]]
                    
                    # Create feature names
                    feature_names = [f'feature_{i}' for i in range(X_test_2d.shape[1])]
                    
                    # Create and save LIME explanations for selected samples
                    for idx in sample_indices[:1]:  # Limit to 1 sample to save memory
                        try:
                            # Initialize LIME explainer
                            explainer = lime.lime_tabular.LimeTabularExplainer(
                                X_train_2d[:min(500, len(X_train_2d))],  # Use subset of training data
                                feature_names=feature_names,
                                class_names=['Field Pause', 'Filled Pause'],
                                mode='classification',
                                random_state=42
                            )
                            
                            # Create explanation function based on model type
                            if isinstance(model_info, dict) and 'model' in model_info:
                                if 'scaler' in model_info:
                                    def predict_fn(x):
                                        x_scaled = model_info['scaler'].transform(x)
                                        return model_info['model'].predict_proba(x_scaled)
                                else:
                                    def predict_fn(x):
                                        return model_info['model'].predict_proba(x)
                            elif hasattr(model_info, 'predict_proba'):
                                def predict_fn(x):
                                    return model_info.predict_proba(x)
                            else:
                                print(f"Model {model_type} doesn't support probability predictions for LIME")
                                continue
                            
                            # Generate explanation with fewer features
                            exp = explainer.explain_instance(
                                X_test_2d[idx],
                                predict_fn,
                                num_features=8  # Reduced number of features
                            )
                            
                            # Save explanation as image only (more memory efficient than HTML)
                            class_name = 'field_pause' if y_test_sample[idx] == 0 else 'filled_pause'
                            plt.figure(figsize=(10, 6), dpi=72)
                            exp.as_pyplot_figure()
                            plt.tight_layout()
                            plt.savefig(f'results/plots/lime_{model_type}_{shot_config}_{class_name}.png', dpi=72)
                            plt.close('all')
                            
                        except Exception as e:
                            print(f"Error creating LIME explanation for sample {idx}: {e}")
                            plt.close('all')
                    
                    print(f"LIME explanations created for {model_type}")
                else:
                    print(f"No correct predictions for LIME visualization in {model_type}")
                
                # Force garbage collection
                gc.collect()
                
            except ImportError:
                print("LIME not installed. Skipping LIME explanations.")
            except Exception as e:
                print(f"Error applying LIME to {model_type}: {e}")
                plt.close('all')
                gc.collect()
        
        # Return performance metrics
        return {
            'model_type': model_type,
            'shot_config': shot_config,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1
        }
    
    except Exception as e:
        print(f"Error in XAI processing for {model_type}: {e}")
        plt.close('all')
        gc.collect()
        return None

# Main execution function
def main():
    """Main execution script to train and evaluate models for filled pause detection"""
    import os
    import gc
    import time
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    from collections import Counter
    
    # Create necessary directories
    os.makedirs('results/plots', exist_ok=True)
    os.makedirs('results/tables', exist_ok=True)
    os.makedirs('results/model_architecture', exist_ok=True)
    os.makedirs('results/plots/model_viz', exist_ok=True)
    
    # Initialize global variables to store results
    global all_results, best_models
    all_results = []
    best_models = {}
    
    print("Starting filled pause detection model evaluation with imbalanced data handling...")
    
    # Starting with data loading (this section would be provided but let's stub it)
    # Normally you would load X_train, y_train, X_test, y_test here
    # For this example, we'll assume they're loaded already
    
    # Analyze data imbalance
    print("\nData distribution analysis:")
    class_counts = Counter(y)
    total_samples = len(y)
    for label, count in class_counts.items():
        print(f"Class {label} ({'Field pause' if label == 0 else 'Filled pause'}): {count} samples ({count/total_samples*100:.2f}%)")
    
    # Create balanced datasets
    balanced_datasets = create_balanced_datasets(X_train, y_train, X_test, y_test)
    
    # Use a phased approach to training to avoid memory issues
    
    # Phase 1: Train specialized models for imbalanced data with full dataset
    print("\n\n==== Phase 1: Training Specialized Models for Imbalanced Data ====")
    imbalanced_models = train_balanced_models(X_train, y_train, X_test, y_test, "full")
    gc.collect()  # Force garbage collection
    
    # Phase 2: Train regular models with the undersampled balanced dataset
    print("\n\n==== Phase 2: Training with Undersampled Balanced Dataset ====")
    X_balanced, y_balanced = balanced_datasets['undersampled']
    
    # Machine Learning Models - less memory intensive
    knn_model, _ = train_knn(X_balanced, y_balanced, X_test, y_test, "balanced")
    rf_model, _ = train_rf_rfe(X_balanced, y_balanced, X_test, y_test, "balanced")
    svm_model, _ = train_svm(X_balanced, y_balanced, X_test, y_test, "balanced")
    gc.collect()  # Force garbage collection
    
    # Deep Learning Models - more memory intensive, train them one by one
    print("\nTraining Deep Learning Models one by one to manage memory...")
    mlp_model, _ = train_mlp(X_balanced, y_balanced, X_test, y_test, "balanced")
    gc.collect()
    
    cnn_model, _ = train_cnn(X_balanced, y_balanced, X_test, y_test, "balanced")
    gc.collect()
    
    cnn_xgb_model, _ = train_cnn_xgboost(X_balanced, y_balanced, X_test, y_test, "balanced")
    gc.collect()
    
    lstm_model, _ = train_lstm(X_balanced, y_balanced, X_test, y_test, "balanced")
    gc.collect()
    
    # Phase 3: Train models with few-shot learning
    print("\n\n==== Phase 3: Training with Few-Shot Learning ====")
    
    # Group shot configs to train in batches
    shot_configs = [config for config in balanced_datasets.keys() 
                   if config != 'undersampled' and int(config.split('-')[0]) >= 3]
    
    for shot_config in shot_configs:
        X_few, y_few = balanced_datasets[shot_config]
        print(f"\n--- Training with {shot_config} Learning ---")
        
        # Traditional ML models
        train_knn(X_few, y_few, X_test, y_test, shot_config)
        train_rf_rfe(X_few, y_few, X_test, y_test, shot_config)
        train_svm(X_few, y_few, X_test, y_test, shot_config)
        gc.collect()
        
        # Only train complex models with larger shots
        if int(shot_config.split('-')[0]) >= 5:
            # Train these one by one to manage memory
            train_balanced_models(X_few, y_few, X_test, y_test, shot_config)
            gc.collect()
            
            if int(shot_config.split('-')[0]) >= 10:  # Skip the smallest datasets for deep learning
                train_mlp(X_few, y_few, X_test, y_test, shot_config)
                gc.collect()
                
                train_cnn(X_few, y_few, X_test, y_test, shot_config)
                gc.collect()
                
                train_lstm(X_few, y_few, X_test, y_test, shot_config)
                gc.collect()
    
    # Phase 4: Create results table and find best model
    print("\n\n==== Phase 4: Analyzing Results ====")
    results_df = create_results_table()
    
    # Identify best model based on F1 score
    best_result = results_df.loc[results_df['f1_score'].idxmax()]
    best_model_name = best_result['model_name']
    print(f"\nBest model: {best_model_name} with F1 score: {best_result['f1_score']:.4f}")
    
    # Phase 5: Apply XAI to top models only
    print("\n\n==== Phase 5: Applying XAI to Top Models Only ====")
    
    # Only process top 3 models for XAI to save memory
    top_models = results_df.nlargest(3, 'f1_score')
    xai_results = []
    
    for _, row in top_models.iterrows():
        model_key = row['model_name']
        model_type, shot_config = model_key.split('_', 1)
        
        if model_key in best_models:
            model_info = best_models[model_key]
            
            # Apply visualizations
            visualize_model(model_info, X_train, y_train, X_test, y_test, model_type, shot_config)
            gc.collect()
            
            # Apply XAI with sample limit
            result = apply_xai_to_model(model_info, X_train, X_test, y_test, model_type, shot_config)
            if result:
                xai_results.append(result)
            gc.collect()
    
    # Create a summary of XAI results
    if xai_results:
        xai_df = pd.DataFrame(xai_results)
        
        # Save XAI results
        xai_df.to_csv('results/tables/xai_performance.csv', index=False)
        
        # Plot XAI comparison for top models only
        plt.figure(figsize=(10, 6), dpi=72)
        sns.barplot(x='model_type', y='f1_score', data=xai_df)
        plt.title('F1 Score Comparison for Top Models')
        plt.xlabel('Model Type')
        plt.ylabel('F1 Score')
        plt.ylim(0, 1.0)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig('results/plots/xai_comparison_top.png', dpi=72)
        plt.close('all')
        
        print("\nXAI analysis complete. Results saved to results/tables/xai_performance.csv")
        print("XAI visualizations saved to results/plots/ directory")
    
    # Final cleanup
    gc.collect()
    print("\nAll processing complete!")

# Run the main function if this script is executed directly
if __name__ == "__main__":
    main()

Starting filled pause detection model evaluation with imbalanced data handling...

Data distribution analysis:
Class 0 (Field pause): 21 samples (1.60%)
Class 1 (Filled pause): 1293 samples (98.40%)

Creating balanced datasets from 1051 samples
Original class distribution: {1: 1034, 0: 17}
Undersampled dataset: 51 samples, distribution: {0: 17, 1: 34}
1-shot dataset: 2 samples, distribution: {0: 1, 1: 1}
3-shot dataset: 6 samples, distribution: {0: 3, 1: 3}
5-shot dataset: 10 samples, distribution: {0: 5, 1: 5}
10-shot dataset: 20 samples, distribution: {0: 10, 1: 10}
17-shot dataset: 34 samples, distribution: {0: 17, 1: 17}


==== Phase 1: Training Specialized Models for Imbalanced Data ====

Training Balanced Random Forest (full)...

BalancedRF (full) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9979
Best threshold: 0.2700
Execution Time: 0.04 seconds


  warn(


Model architecture saved to results/model_architecture/BalancedRF_full.txt

Training RUSBoost (full)...

RUSBoost (full) Results:
Accuracy: 0.9924
Precision: 0.9923
Recall: 1.0000
F1 Score: 0.9962
Average Precision (AP): 0.9932
Best threshold: 0.3980
Execution Time: 0.17 seconds
Model architecture saved to results/model_architecture/RUSBoost_full.txt

Training Cost-sensitive SVM (full)...

WeightedSVM (full) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9977
Best threshold: 0.9815
Execution Time: 1.79 seconds
Model architecture saved to results/model_architecture/WeightedSVM_full.txt

Training Weighted XGBoost (full)...


Parameters: { "use_label_encoder" } are not used.




WeightedXGB (full) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9952
Best threshold: 0.4423
Execution Time: 0.13 seconds
Model architecture saved to results/model_architecture/WeightedXGB_full.txt


==== Phase 2: Training with Undersampled Balanced Dataset ====

Training k-NN (balanced)...
Best k for balanced: 11

kNN (balanced) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9925
Best threshold: 0.3636
Execution Time: 0.08 seconds
Model architecture saved to results/model_architecture/kNN_balanced.txt

Training Random Forest with RFE (balanced)...

RF_RFE (balanced) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9924
Best threshold: 0.3100
Execution Time: 0.05 seconds
Model architecture saved to results/model_architecture/RF_RFE_balanced.txt

Training SVM (balanced)...

SVM (balanced) Results:
Accuracy: 0.9848
Precision:

Parameters: { "use_label_encoder" } are not used.




CNN_XGBoost (balanced) Results:
Accuracy: 0.6400
Precision: 1.0000
Recall: 0.6327
F1 Score: 0.7750
Execution Time: 0.21 seconds

Training LSTM (balanced)...
Model architecture saved to results/model_architecture/LSTM_balanced.txt

LSTM (balanced) Results:
Accuracy: 0.9800
Precision: 0.9800
Recall: 1.0000
F1 Score: 0.9899
Execution Time: 0.50 seconds


==== Phase 3: Training with Few-Shot Learning ====

--- Training with 3-shot Learning ---

Training k-NN (3-shot)...

kNN (3-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9833
Best threshold: 0.4000
Execution Time: 0.06 seconds
Model architecture saved to results/model_architecture/kNN_3-shot.txt

Training Random Forest with RFE (3-shot)...
Error in RF+RFE: Not enough samples for RFE
Falling back to standard Random Forest

RF (3-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9975
Best threshold: 0.3100
Execution Time

  warn(


Model architecture saved to results/model_architecture/BalancedRF_5-shot.txt

Training RUSBoost (5-shot)...

RUSBoost (5-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9833
Best threshold: 0.0000
Execution Time: 0.01 seconds
Model architecture saved to results/model_architecture/RUSBoost_5-shot.txt

Training Cost-sensitive SVM (5-shot)...

WeightedSVM (5-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9795
Best threshold: 0.4483
Execution Time: 0.12 seconds
Model architecture saved to results/model_architecture/WeightedSVM_5-shot.txt

Training Weighted XGBoost (5-shot)...

WeightedXGB (5-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9888
Best threshold: 0.2743
Execution Time: 0.01 seconds
Model architecture saved to results/model_architecture/WeightedXGB_5-shot.txt


Parameters: { "use_label_encoder" } are not used.




--- Training with 10-shot Learning ---

Training k-NN (10-shot)...
Best k for 10-shot: 3

kNN (10-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9887
Execution Time: 0.07 seconds
Model architecture saved to results/model_architecture/kNN_10-shot.txt

Training Random Forest with RFE (10-shot)...

RF_RFE (10-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9908
Best threshold: 0.1700
Execution Time: 0.05 seconds
Model architecture saved to results/model_architecture/RF_RFE_10-shot.txt

Training SVM (10-shot)...

SVM (10-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9777
Best threshold: 0.3970
Execution Time: 0.15 seconds
Model architecture saved to results/model_architecture/SVM_10-shot.txt

Training Balanced Random Forest (10-shot)...

BalancedRF (10-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall:

  warn(


Model architecture saved to results/model_architecture/BalancedRF_10-shot.txt

Training RUSBoost (10-shot)...

RUSBoost (10-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9822
Best threshold: 0.0000
Execution Time: 0.11 seconds
Model architecture saved to results/model_architecture/RUSBoost_10-shot.txt

Training Cost-sensitive SVM (10-shot)...

WeightedSVM (10-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9777
Best threshold: 0.3970
Execution Time: 0.16 seconds
Model architecture saved to results/model_architecture/WeightedSVM_10-shot.txt

Training Weighted XGBoost (10-shot)...


Parameters: { "use_label_encoder" } are not used.




WeightedXGB (10-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9864
Best threshold: 0.0975
Execution Time: 0.11 seconds
Model architecture saved to results/model_architecture/WeightedXGB_10-shot.txt

Training MLP (10-shot)...
Model architecture saved to results/model_architecture/MLP_10-shot.txt

MLP (10-shot) Results:
Accuracy: 0.8821
Precision: 0.9872
Recall: 0.8919
F1 Score: 0.9371
Execution Time: 0.06 seconds

Training CNN (10-shot)...
Model architecture saved to results/model_architecture/CNN_10-shot.txt

CNN (10-shot) Results:
Accuracy: 0.7034
Precision: 0.9840
Recall: 0.7104
F1 Score: 0.8251
Execution Time: 0.08 seconds

Training LSTM (10-shot)...
Model architecture saved to results/model_architecture/LSTM_10-shot.txt

LSTM (10-shot) Results:
Accuracy: 0.2850
Precision: 1.0000
Recall: 0.2704
F1 Score: 0.4257
Execution Time: 0.50 seconds

--- Training with 17-shot Learning ---

Training k-NN (17-shot)...
Best k for 17-

  warn(


Model architecture saved to results/model_architecture/BalancedRF_17-shot.txt

Training RUSBoost (17-shot)...

RUSBoost (17-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9941
Best threshold: 0.0001
Execution Time: 0.17 seconds
Model architecture saved to results/model_architecture/RUSBoost_17-shot.txt

Training Cost-sensitive SVM (17-shot)...

WeightedSVM (17-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9957
Best threshold: 0.4282
Execution Time: 0.21 seconds
Model architecture saved to results/model_architecture/WeightedSVM_17-shot.txt

Training Weighted XGBoost (17-shot)...


Parameters: { "use_label_encoder" } are not used.




WeightedXGB (17-shot) Results:
Accuracy: 0.9848
Precision: 0.9848
Recall: 1.0000
F1 Score: 0.9923
Average Precision (AP): 0.9923
Best threshold: 0.0504
Execution Time: 0.13 seconds
Model architecture saved to results/model_architecture/WeightedXGB_17-shot.txt

Training MLP (17-shot)...
Model architecture saved to results/model_architecture/MLP_17-shot.txt

MLP (17-shot) Results:
Accuracy: 0.9582
Precision: 0.9844
Recall: 0.9730
F1 Score: 0.9786
Execution Time: 0.06 seconds

Training CNN (17-shot)...
Model architecture saved to results/model_architecture/CNN_17-shot.txt

CNN (17-shot) Results:
Accuracy: 0.1445
Precision: 1.0000
Recall: 0.1313
F1 Score: 0.2321
Execution Time: 0.08 seconds

Training LSTM (17-shot)...
Model architecture saved to results/model_architecture/LSTM_17-shot.txt

LSTM (17-shot) Results:
Accuracy: 0.2600
Precision: 1.0000
Recall: 0.2449
F1 Score: 0.3934
Execution Time: 0.50 seconds


==== Phase 4: Analyzing Results ====

Best model: RUSBoost_full with F1 score: 0