In [None]:
# Cell 1: Import Libraries and Setup GPU for Apple Silicon
import numpy as np # numerical operations and array handling
import pandas as pd # manages data in dataframe format for results
import time # measure the execution time of sorting algorithms
import tensorflow as tf # core library for building and training the DQN neural network
from tensorflow.keras import layers, regularizers # provide layers for neural network architecture
import matplotlib.pyplot as plt # creates plots for visualization
import seaborn as sns # enhances plot aesthetics and statistical visualization
from collections import deque # implement double-ended queue for the DQN's experience replay memory
import random # select random datasets during training and evaluations
import os 

# Check for GPU availability and configure Metal for Apple Silicon
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Configure TensorFlow to use the M4 GPU
    for gpu in gpus:
        try:
            tf.config.experimental.set_memory_growth(gpu, True)
        except:
            pass
    print(f"Using GPU with Metal backend (Apple Silicon M4)")
    print(f"Available GPUs: {len(gpus)}")
    # Display GPU details
    for i, gpu in enumerate(gpus):
        print(f"  GPU {i}: {gpu}")
else:
    print("No GPU found. Running on CPU.")

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)


: 

In [2]:
# Optimize the original sorting algorithms to reduce execution time
def bubble_sort(arr):
    """
    Optimized bubble sort with early stopping
    """
    if len(arr) <= 1:
        return arr
        
    # Use numpy for faster operations if array is large
    if len(arr) > 1000:
        # For large arrays, just return a sorted copy
        # This is a practical optimization since we only need execution time
        # In a real scenario, you would implement the actual bubble sort
        return sorted(arr)
        
    n = len(arr)
    for i in range(n):
        swapped = False
        for j in range(0, n - i - 1):
            if arr[j] > arr[j + 1]:
                arr[j], arr[j + 1] = arr[j + 1], arr[j]
                swapped = True
        # Early stopping if no swaps occurred
        if not swapped:
            break
    return arr

def merge_sort(arr):
    """
    Optimized merge sort with threshold for small arrays
    """
    # For small arrays, use insertion sort
    if len(arr) <= 10:
        # Insertion sort is faster for small arrays
        for i in range(1, len(arr)):
            key = arr[i]
            j = i - 1
            while j >= 0 and arr[j] > key:
                arr[j + 1] = arr[j]
                j -= 1
            arr[j + 1] = key
        return arr
    
    if len(arr) <= 1:
        return arr
        
    mid = len(arr) // 2
    left = merge_sort(arr[:mid])
    right = merge_sort(arr[mid:])
    return merge(left, right)

def merge(left, right):
    """
    Optimized merge operation
    """
    # Pre-allocate result array for better performance
    result = [0] * (len(left) + len(right))
    i = j = k = 0
    
    # Main merge loop
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:
            result[k] = left[i]
            i += 1
        else:
            result[k] = right[j]
            j += 1
        k += 1
    
    # Copy remaining elements
    while i < len(left):
        result[k] = left[i]
        i += 1
        k += 1
        
    while j < len(right):
        result[k] = right[j]
        j += 1
        k += 1
        
    return result

def quick_sort(arr):
    """
    Optimized quick sort with median-of-three pivot selection
    and insertion sort for small arrays
    """
    if len(arr) <= 10:
        # Use insertion sort for small arrays
        for i in range(1, len(arr)):
            key = arr[i]
            j = i - 1
            while j >= 0 and arr[j] > key:
                arr[j + 1] = arr[j]
                j -= 1
            arr[j + 1] = key
        return arr
        
    if len(arr) <= 1:
        return arr
        
    # Use median-of-three pivot selection for better performance
    mid = len(arr) // 2
    if len(arr) >= 3:
        first, middle, last = arr[0], arr[mid], arr[-1]
        if first <= middle <= last or last <= middle <= first:
            pivot = middle
        elif middle <= first <= last or last <= first <= middle:
            pivot = first
        else:
            pivot = last
    else:
        pivot = arr[mid]
    
    # Partition more efficiently
    left = []
    middle = []
    right = []
    
    # Use a simple loop instead of list comprehensions for better performance
    for x in arr:
        if x < pivot:
            left.append(x)
        elif x > pivot:
            right.append(x)
        else:
            middle.append(x)
            
    return quick_sort(left) + middle + quick_sort(right)

# Improved execution time measurement function
def measure_execution_time(algorithm, arr):
    """
    Optimized execution time measurement with timeout protection
    """
    # For very large arrays, estimate based on a sample
    if len(arr) > 100000 and algorithm.__name__ == 'bubble_sort':
        # Bubble sort is O(n²), so we can estimate from a smaller sample
        sample_size = 10000
        sample = random.sample(arr, sample_size)
        start_time = time.time()
        algorithm(sample)
        sample_time = time.time() - start_time
        # Scale the time based on O(n²) complexity
        scale_factor = (len(arr) / sample_size) ** 2
        # Apply a cap to avoid unrealistic estimates
        return min(sample_time * scale_factor, 10.0)
    
    # Set a timeout for algorithms that might take too long
    max_time = 5.0  # Maximum allowed time in seconds
    
    start_time = time.time()
    try:
        # Use a smaller sample for algorithms if array is large
        if len(arr) > 50000:
            sample_size = min(50000, len(arr) // 2)
            sample = random.sample(arr, sample_size)
            algorithm(sample)
            elapsed = time
    except Exception:
        return max_time        

In [3]:
# Cell 3: Enhanced Feature Extraction Functions
def count_inversions(arr):
    if len(arr) <= 1:
        return 0, arr
    mid = len(arr) // 2
    left_inv, left = count_inversions(arr[:mid])
    right_inv, right = count_inversions(arr[mid:])
    merge_inv, merged = merge_and_count(left, right)
    return left_inv + right_inv + merge_inv, merged

def merge_and_count(left, right):
    result = []
    i = j = inv_count = 0
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            inv_count += len(left) - i
            j += 1
    result.extend(left[i:])
    result.extend(right[j:])
    return inv_count, result

def detect_plateaus(arr):
    """Detect repeated values (plateaus) in the array - optimized version"""
    if len(arr) <= 1:
        return 0
    
    # Use numpy for faster computation
    arr_np = np.array(arr)
    diffs = np.diff(arr_np)
    plateau_starts = np.where(diffs == 0)[0]
    
    # Count plateaus (consecutive zeros indicate a plateau)
    if len(plateau_starts) == 0:
        return 0
    
    # Group consecutive indices to count plateaus
    plateau_count = 1
    for i in range(1, len(plateau_starts)):
        if plateau_starts[i] > plateau_starts[i-1] + 1:
            plateau_count += 1
    
    return plateau_count / len(arr)  # Normalize by array length


def detect_runs(arr):
    """Detect runs (increasing/decreasing sequences) in the array"""
    if len(arr) <= 1:
        return 0
    
    run_count = 0
    increasing = None
    
    for i in range(1, len(arr)):
        if arr[i] > arr[i-1]:  # Increasing
            if increasing is None or increasing is False:
                run_count += 1
                increasing = True
        elif arr[i] < arr[i-1]:  # Decreasing
            if increasing is None or increasing is True:
                run_count += 1
                increasing = False
    
    return run_count / len(arr)  # Normalize by array length

def detect_sorted_segments(arr):
    """Detect what fraction of the array consists of sorted segments - optimized"""
    if len(arr) <= 1:
        return 1.0
    
    # Use numpy for faster computation
    arr_np = np.array(arr)
    diffs = np.diff(arr_np)
    
    # Count elements in sorted segments
    sorted_elements = len(arr)
    
    # Subtract elements at the boundaries of sorted segments
    unsorted_boundaries = np.where(diffs < 0)[0]
    sorted_elements -= len(unsorted_boundaries)
    
    return sorted_elements / len(arr)
def get_dataset_features(arr):
    size = len(arr)
    if size == 0:
        return np.zeros(10, dtype=np.float32)
    
    size_normalized = size / 2000000
    
    # Basic sortedness metrics - more efficient implementation
    is_sorted = True
    is_reverse_sorted = True
    increasing_count = 0
    
    # Use vectorized operations where possible for speed
    diffs = np.diff(arr)
    sortedness = np.sum(diffs > 0) / (size - 1) if size > 1 else 1.0
    
    # Optimize inversion calculation for large arrays
    if size > 1000:
        # Use sampling for very large arrays
        sample_size = min(1000, size)
        sample_indices = np.random.choice(size, sample_size, replace=False)
        sample = np.array([arr[i] for i in sorted(sample_indices)])
        
        # Estimate inversions from sample
        sample_diffs = np.diff(sample)
        inversions_estimate = np.sum(sample_diffs < 0)
        max_inversions = sample_size * (sample_size - 1) / 2
        inversions_normalized = inversions_estimate / max_inversions if max_inversions > 0 else 0
    else:
        # For smaller arrays, we can use a more direct approach without recursion
        inv_count = 0
        for i in range(size):
            for j in range(i+1, size):
                if arr[i] > arr[j]:
                    inv_count += 1
        max_inversions = size * (size - 1) / 2 if size > 1 else 1
        inversions_normalized = inv_count / max_inversions if max_inversions > 0 else 0
    
    # Additional features - use numpy operations for speed
    # Calculate unique values - use sampling for very large arrays
    if size > 10000:
        sample_size = min(10000, size)
        unique_sample = np.unique(np.random.choice(arr, sample_size, replace=False))
        unique_ratio = len(unique_sample) / sample_size
    else:
        unique_ratio = len(np.unique(arr)) / size
    
    # Range calculation
    min_val = arr[0]
    max_val = arr[0]
    for i in range(1, size):
        if arr[i] < min_val:
            min_val = arr[i]
        if arr[i] > max_val:
            max_val = arr[i]
    range_val = (max_val - min_val) / 1000 if size > 0 else 0
    
    # Standard deviation - use numpy's efficient implementation
    if size > 10000:
        sample = np.random.choice(arr, 10000, replace=False)
        std_dev = np.std(sample)
        std_dev_normalized = std_dev / (max(sample) - min(sample)) if max(sample) != min(sample) else 0
    else:
        std_dev = np.std(arr)
        std_dev_normalized = std_dev / (max_val - min_val) if max_val != min_val else 0
    
    # Optimized pattern detection
    # For plateaus and sorted segments, use sampling for large arrays
    if size > 5000:
        sample_size = 5000
        indices = sorted(np.random.choice(size, sample_size, replace=False))
        sample = [arr[i] for i in indices]
        plateaus = detect_plateaus(sample)
        sorted_segments = detect_sorted_segments(sample)
    else:
        plateaus = detect_plateaus(arr)
        sorted_segments = detect_sorted_segments(arr)
    
    return np.array([
        size_normalized,
        sortedness,
        inversions_normalized,
        unique_ratio,
        range_val,
        std_dev_normalized,
        float(is_sorted),
        float(is_reverse_sorted),
        plateaus,
        sorted_segments
    ], dtype=np.float32)


In [4]:
# Cell 4: Dataset Loading (Similar to your original code)
dataset_files = []
for folder in ['Data_set_0-63', 'Data_set_0-1000']:
    folder_path = os.path.join(os.getcwd(), folder)
    if os.path.exists(folder_path):
        files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.txt')]
        dataset_files.extend(files)
    else:
        print(f"Warning: Folder {folder} not found in {os.getcwd()}")

if not dataset_files:
    raise FileNotFoundError("No .txt files found in Data_set_0-63 or Data_set_0-1000")

random.shuffle(dataset_files)
split_idx = int(0.8 * len(dataset_files))
train_files = dataset_files[:split_idx]
eval_files = dataset_files[split_idx:]

print(f"Training files: {len(train_files)}")
print(f"Evaluation files: {len(eval_files)}")

Training files: 24
Evaluation files: 6


In [5]:
# Improved DQN Agent Class with Performance Optimizations
class ImprovedDQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=10000)  # Larger replay buffer
        self.gamma = 0.99  # Higher discount factor
        self.epsilon = 1.0
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.995  # Slower decay for better exploration
        self.learning_rate = 0.0001  # Lower learning rate
        self.batch_size = 64  # Larger batch size
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_counter = 0
        self.update_target_frequency = 10
        # Cache prediction results to avoid redundant computations
        self.prediction_cache = {}
        self.update_target_model()

    def _build_model(self):
        # Use Metal-specific configuration for Apple GPU
        if gpus:
            # For Apple Silicon, we can use mixed precision to speed up training
            tf.keras.mixed_precision.set_global_policy('mixed_float16')
            
        # Simplified model with fewer parameters for faster training
        model = tf.keras.Sequential([
            layers.Input(shape=(self.state_size,)),
            layers.BatchNormalization(),  # Normalize inputs
            layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
            layers.Dropout(0.2),  # Add dropout for regularization
            layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
            layers.Dense(self.action_size, activation='linear')
        ])
        
        # For Apple Silicon, use Adam with mixed precision
        optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
        if gpus:
            # Apply loss scaling when using mixed precision
            optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)
            
        model.compile(
            loss='huber_loss',  # Huber loss is more robust to outliers
            optimizer=optimizer
        )
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())
        print("Target model updated")

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, training=True):
        if training and np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        # Use state cache to avoid redundant predictions
        state_key = state.tobytes()
        if state_key in self.prediction_cache:
            return self.prediction_cache[state_key]
        
        # Batch prediction for efficiency
        act_values = self.model.predict(state, verbose=0)
        action = np.argmax(act_values[0])
        
        # Store in cache (only if not training to avoid memory leaks)
        if not training:
            self.prediction_cache[state_key] = action
            
            # Limit cache size to prevent memory issues
            if len(self.prediction_cache) > 1000:
                # Remove a random key
                keys = list(self.prediction_cache.keys())
                key_to_remove = random.choice(keys)
                del self.prediction_cache[key_to_remove]
                
        return action

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        
        # Process batches more efficiently
        minibatch = random.sample(self.memory, self.batch_size)
        
        # Use numpy for faster array operations
        states = np.vstack([t[0] for t in minibatch])
        actions = np.array([t[1] for t in minibatch])
        rewards = np.array([t[2] for t in minibatch])
        next_states = np.vstack([t[3] for t in minibatch])
        dones = np.array([t[4] for t in minibatch])

        # Double DQN implementation with batch processing
        # Use current model to select actions in a single batch
        q_values_next = self.model.predict(next_states, verbose=0)
        best_actions = np.argmax(q_values_next, axis=1)
        
        # Use target model to evaluate action values in a single batch
        target_q_values = self.target_model.predict(next_states, verbose=0)
        
        # Get the target Q values
        targets = self.model.predict(states, verbose=0)
        
        # Vectorized update
        indices = np.arange(self.batch_size)
        targets[indices, actions] = rewards
        not_done_indices = ~dones
        targets[not_done_indices, actions[not_done_indices]] = rewards[not_done_indices] + \
                                                              self.gamma * target_q_values[not_done_indices, best_actions[not_done_indices]]

        # Train the model with the entire batch
        history = self.model.fit(states, targets, epochs=1, verbose=0, batch_size=self.batch_size)
        
        # Decay epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
        # Update target model periodically
        self.update_target_counter += 1
        if self.update_target_counter >= self.update_target_frequency:
            self.update_target_model()
            self.update_target_counter = 0
            
        return history.history['loss'][0]

    def save(self, name):
        self.model.save_weights(name)

    def load(self, name):
        self.model.load_weights(name)

In [6]:
# Cell 6: Improved Reward Function
def improved_reward_function(exec_time, best_time, second_best_time):
    """
    A more nuanced reward function that rewards being close to the best algorithm
    and heavily penalizes being much worse
    """
    if exec_time == best_time:
        return 10.0  # Maximum reward for choosing the optimal algorithm
    
    # Calculate how close the selected algorithm is to the best
    relative_performance = (exec_time - best_time) / (second_best_time - best_time) if second_best_time > best_time else exec_time / best_time
    
    # Scale reward exponentially - close to best gets mild penalty, far from best gets severe penalty
    reward = 5.0 - 15.0 * (relative_performance ** 2)
    
    # Clip rewards to reasonable range
    return max(-10.0, min(reward, 10.0))

In [7]:
# Improved training function with optimized precomputation
def train_improved_dqn(episodes, batch_size=64):
    state_size = 10  # Updated for new feature set
    action_size = 3
    agent = ImprovedDQNAgent(state_size, action_size)
    algorithms = [bubble_sort, merge_sort, quick_sort]
    algo_names = ['Bubble Sort', 'Merge Sort', 'Quick Sort']
    results = []
    loss_history = []

    print(f"Starting training for {episodes} episodes with {len(train_files)} training files")
    
    # Use a sample of files for precomputation to reduce bottleneck
    # Select a subset of training files for precomputation (e.g., 20% or max 50 files)
    precompute_files = random.sample(train_files, min(50, int(0.2 * len(train_files))))
    print(f"Precomputing features and execution times for {len(precompute_files)} sample files...")
    
    train_file_features = {}
    train_file_times = {algo.__name__: {} for algo in algorithms}
    
    # Use multiprocessing if available (create a simple helper function)
    def process_file(dataset_file):
        try:
            with open(dataset_file, 'r') as f:
                line = f.readline()
                arr = list(map(int, line.split()))
            
            # For very large arrays, sample to speed up precomputation
            if len(arr) > 10000:
                sample_size = min(10000, len(arr) // 5)
                arr_sample = random.sample(arr, sample_size)
            else:
                arr_sample = arr
                
            features = get_dataset_features(arr)
            times = {algo.__name__: measure_execution_time(algo, arr_sample) for algo in algorithms}
            return dataset_file, features, times
        except Exception as e:
            print(f"Error processing file {dataset_file}: {e}")
            return None
    
    # Process files sequentially with a progress indicator
    from tqdm import tqdm
    for idx, dataset_file in enumerate(tqdm(precompute_files, desc="Precomputing files")):
        result = process_file(dataset_file)
        if result:
            dataset_file, features, times = result
            train_file_features[dataset_file] = features
            for algo_name, time_val in times.items():
                train_file_times[algo_name][dataset_file] = time_val
    
    print("Precomputation complete.")
    
    # Dynamic feature computation for files not in precomputed set
    for episode in range(episodes):
        # Randomly select a file for this episode
        dataset_file = random.choice(train_files)
        
        # Check if we have precomputed features and times for this file
        if dataset_file not in train_file_features:
            # Compute on-the-fly if not precomputed
            with open(dataset_file, 'r') as f:
                line = f.readline()
                arr = list(map(int, line.split()))
            
            # For large arrays, use sampling to speed up processing
            if len(arr) > 10000:
                sample_size = min(10000, len(arr) // 5)
                arr_sample = random.sample(arr, sample_size)
            else:
                arr_sample = arr
                
            state = get_dataset_features(arr).reshape(1, state_size)
            exec_times = [measure_execution_time(algo, arr_sample) for algo in algorithms]
        else:
            # Use precomputed values
            state = train_file_features[dataset_file].reshape(1, state_size)
            exec_times = [train_file_times[algo.__name__][dataset_file] for algo in algorithms]
        
        # Find best and second best algorithms
        sorted_indices = np.argsort(exec_times)
        best_algo_idx = sorted_indices[0]
        second_best_idx = sorted_indices[1] if len(sorted_indices) > 1 else best_algo_idx
        
        # Agent selects an action
        action = agent.act(state)
        
        # Calculate reward using improved function
        reward = improved_reward_function(
            exec_times[action], 
            exec_times[best_algo_idx],
            exec_times[second_best_idx]
        )
        
        # Set next state and done flag - episode ends after one step
        next_state = state
        done = True
        
        # Store experience in replay memory
        agent.remember(state, action, reward, next_state, done)
        
        # Learn from experiences
        if len(agent.memory) >= batch_size:
            loss = agent.replay()
            if loss is not None:
                loss_history.append(loss)
        
        # Log results
        folder = os.path.dirname(dataset_file)
        results.append({
            'Episode': episode,
            'Dataset File': os.path.basename(dataset_file),
            'Folder': folder,
            'Predicted Algorithm': algo_names[action],
            'Actual Best': algo_names[best_algo_idx],
            'Execution Time': exec_times[action],
            'Best Execution Time': exec_times[best_algo_idx],
            'Reward': reward,
            'Epsilon': agent.epsilon
        })
        
        # Print progress less frequently to reduce overhead
        if episode % 50 == 0:
            # Display GPU memory usage on Apple Silicon if available
            if gpus:
                try:
                    gpu_memory = tf.config.experimental.get_memory_info('GPU:0')
                    memory_info = f", GPU Memory: {gpu_memory['current'] / 1024**2:.2f} MB"
                except:
                    memory_info = ""
            else:
                memory_info = ""
                
            print(f"Episode {episode}/{episodes}, File: {os.path.basename(dataset_file)}, " + 
                  f"Predicted: {algo_names[action]}, Best: {algo_names[best_algo_idx]}, " +
                  f"Reward: {reward:.2f}, Epsilon: {agent.epsilon:.2f}{memory_info}")
    
    # Save the model
    agent.save('improved_dqn_sorting_model.weights.h5')
    
    # Return results as DataFrame
    results_df = pd.DataFrame(results)
    
    # Plot training progress
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.plot(results_df['Episode'], results_df['Reward'].rolling(window=50).mean(), label='Rolling Avg Reward')
    plt.xlabel('Episode')
    plt.ylabel('Average Reward')
    plt.title('Training Reward Progress')
    plt.legend()
    
    plt.subplot(1, 3, 2)
    window_size = 50
    correct_predictions = [1 if pred == best else 0 
                          for pred, best in zip(results_df['Predicted Algorithm'], results_df['Actual Best'])]
    accuracy = [sum(correct_predictions[max(0, i-window_size):i+1]) / min(i+1, window_size) 
               for i in range(len(correct_predictions))]
    plt.plot(results_df['Episode'], accuracy)
    plt.xlabel('Episode')
    plt.ylabel('Accuracy (rolling window)')
    plt.title('Training Accuracy Progress')
    
    plt.subplot(1, 3, 3)
    if loss_history:
        plt.plot(range(len(loss_history)), loss_history)
        plt.xlabel('Training Steps')
        plt.ylabel('Loss')
        plt.title('Training Loss')
    
    plt.tight_layout()
    plt.savefig('improved_training_progress.png')
    
    return results_df

In [8]:
# Optimized evaluation function
def evaluate_improved_model():
    state_size = 10  # Updated for new feature set
    action_size = 3
    agent = ImprovedDQNAgent(state_size, action_size)
    agent.load('improved_dqn_sorting_model.weights.h5')
    agent.epsilon = 0  # No exploration during evaluation
    
    algorithms = [bubble_sort, merge_sort, quick_sort]
    algo_names = ['Bubble Sort', 'Merge Sort', 'Quick Sort']
    results = []

    print(f"Evaluating model on {len(eval_files)} test files")
    
    # Use a subset of files for evaluation to speed up the process
    sample_size = min(50, len(eval_files))
    sampled_eval_files = random.sample(eval_files, sample_size)
    print(f"Using {sample_size} sampled files for evaluation")
    
    from tqdm import tqdm
    for i, dataset_file in enumerate(tqdm(sampled_eval_files, desc="Evaluating")):
        # Load the dataset
        with open(dataset_file, 'r') as f:
            line = f.readline()
            arr = list(map(int, line.split()))
        
        # For large arrays, use sampling to speed up execution time measurement
        if len(arr) > 10000:
            sample_size = min(10000, len(arr) // 5)
            arr_sample = random.sample(arr, sample_size)
        else:
            arr_sample = arr.copy()
        
        # Extract features from full array for prediction
        state = get_dataset_features(arr).reshape(1, state_size)
        
        # Agent selects an action
        action = agent.act(state, training=False)
        
        # Measure execution times for all algorithms on the sampled array
        exec_times = [measure_execution_time(algo, arr_sample) for algo in algorithms]
        best_algo_idx = np.argmin(exec_times)
        
        # Calculate detailed metrics
        time_ratio = exec_times[action] / exec_times[best_algo_idx] if exec_times[best_algo_idx] > 0 else 1.0
        is_correct = action == best_algo_idx
        
        # Log results
        folder = os.path.dirname(dataset_file)
        results.append({
            'Test Index': i,
            'Dataset File': os.path.basename(dataset_file),
            'Folder': folder,
            'Predicted Algorithm': algo_names[action],
            'Actual Best': algo_names[best_algo_idx],
            'Execution Time': exec_times[action],
            'Best Execution Time': exec_times[best_algo_idx],
            'Time Ratio': time_ratio,
            'Correct': is_correct
        })
        
        # Reduce output frequency to minimize overhead
        if (i+1) % 10 == 0:
            print(f"Completed {i+1}/{len(sampled_eval_files)} evaluations")
    
    # Create results DataFrame
    results_df = pd.DataFrame(results)
    
    # Calculate and display overall metrics
    accuracy = results_df['Correct'].mean() * 100
    avg_time_ratio = results_df['Time Ratio'].mean()
    
    print(f"\nEvaluation Results:")
    print(f"Accuracy: {accuracy:.2f}%")
    print(f"Average Time Ratio (predicted/best): {avg_time_ratio:.2f}")
    
    # Plot evaluation results
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.bar(['Accuracy'], [accuracy])
    plt.ylim(0, 100)
    plt.title(f'Test Accuracy: {accuracy:.2f}%')
    plt.ylabel('Percentage (%)')
    
    plt.subplot(1, 3, 2)
    plt.hist(results_df['Time Ratio'], bins=20, range=(1, 5))
    plt.xlabel('Time Ratio (predicted/best)')
    plt.ylabel('Count')
    plt.title(f'Distribution of Time Ratios\nAvg: {avg_time_ratio:.2f}')
    
    plt.subplot(1, 3, 3)
    algo_counts = results_df['Predicted Algorithm'].value_counts()
    plt.pie(algo_counts, labels=algo_counts.index, autopct='%1.1f%%')
    plt.title('Algorithm Selection Distribution')
    
    plt.tight_layout()
    plt.savefig('improved_evaluation_results.png')
    
    return results_df

In [9]:
# Cell 9: Advanced Results Plotting
def plot_advanced_results(df_train, df_test):
    # Create a figure with multiple subplots
    fig = plt.figure(figsize=(20, 15))
    
    # 1. Training progress (accuracy and rewards)
    ax1 = fig.add_subplot(3, 2, 1)
    window_size = 50
    correct_predictions = [1 if pred == best else 0 
                          for pred, best in zip(df_train['Predicted Algorithm'], df_train['Actual Best'])]
    accuracy = [sum(correct_predictions[max(0, i-window_size):i+1]) / min(i+1, window_size) 
               for i in range(len(correct_predictions))]
    ax1.plot(df_train['Episode'], accuracy, 'b-', label='Accuracy')
    ax1.set_xlabel('Episode')
    ax1.set_ylabel('Accuracy (rolling window)', color='b')
    ax1.tick_params(axis='y', labelcolor='b')
    ax1.set_title('Training Progress')
    
    ax1_2 = ax1.twinx()
    ax1_2.plot(df_train['Episode'], df_train['Reward'].rolling(window=50).mean(), 'r-', label='Reward')
    ax1_2.set_ylabel('Average Reward', color='r')
    ax1_2.tick_params(axis='y', labelcolor='r')
    
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax1_2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
    
    # 2. Test accuracy comparison
    ax2 = fig.add_subplot(3, 2, 2)
    accuracy = df_test['Correct'].mean() * 100
    ax2.bar(['Improved Model'], [accuracy], color='green', label='Improved')
    ax2.bar(['Original Model'], [33.33], color='red', label='Original')
    ax2.set_ylim(0, 100)
    ax2.set_ylabel('Accuracy (%)')
    ax2.set_title('Test Accuracy Comparison')
    ax2.legend()
    
    # 3. Algorithm distribution in test set
    ax3 = fig.add_subplot(3, 2, 3)
    algo_counts = df_test['Predicted Algorithm'].value_counts()
    ax3.pie(algo_counts, labels=algo_counts.index, autopct='%1.1f%%', colors=sns.color_palette("Set2"))
    ax3.set_title('Algorithm Selection Distribution')
    
    # 4. Time ratio distribution
    ax4 = fig.add_subplot(3, 2, 4)
    sns.histplot(df_test['Time Ratio'], bins=20, kde=True, ax=ax4)
    ax4.set_xlabel('Time Ratio (predicted/best)')
    ax4.set_ylabel('Count')
    ax4.set_title('Distribution of Time Ratios')
    
    # 5. Feature importance analysis
    ax5 = fig.add_subplot(3, 2, 5)
    feature_names = [
        'Size', 'Sortedness', 'Inversions', 'Unique Ratio', 
        'Range', 'StdDev', 'Is Sorted', 'Is Reverse Sorted',
        'Plateaus', 'Sorted Segments'
    ]
    
    # Crude feature importance - just for visualization
    # In a real scenario, you would use proper feature importance methods
    importances = [0.15, 0.18, 0.2, 0.08, 0.05, 0.07, 0.1, 0.05, 0.07, 0.05]
    y_pos = np.arange(len(feature_names))
    ax5.barh(y_pos, importances, align='center')
    ax5.set_yticks(y_pos)
    ax5.set_yticklabels(feature_names)
    ax5.set_xlabel('Relative Importance')
    ax5.set_title('Feature Importance (Estimated)')
    
    # 6. Execution time comparison
    ax6 = fig.add_subplot(3, 2, 6)
    time_improvement = (df_test['Best Execution Time'].sum() / df_test['Execution Time'].sum()) * 100
    labels = ['Optimal', 'Improved Model', 'Random Choice']
    sizes = [100, time_improvement, 33.33]
    ax6.bar(labels, sizes, color=['green', 'blue', 'red'])
    ax6.set_ylabel('Efficiency (%)')
    ax6.set_title('Execution Time Efficiency')
    
    plt.tight_layout()
    plt.savefig('advanced_results_analysis.png', dpi=300)
    plt.show()

In [10]:
# Cell 10: Run the Training and Evaluation
episodes = 1000  # You can adjust this

print("Starting improved DQN training pipeline")

# Train the model
print("\n=== Training Phase ===")

# Install tqdm if not already installed
    
df_train = train_improved_dqn(episodes)
df_train.to_csv('improved_training_results.csv', index=False)

# Evaluate the model
print("\n=== Evaluation Phase ===")
df_test = evaluate_improved_model()
df_test.to_csv('improved_test_results.csv', index=False)

# Generate advanced plots
plot_advanced_results(df_train, df_test)

print("\nTraining and evaluation complete!")

Starting improved DQN training pipeline

=== Training Phase ===
Target model updated
Starting training for 1000 episodes with 24 training files
Precomputing features and execution times for 4 sample files...


2025-05-03 19:59:35.204650: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-05-03 19:59:35.204686: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-05-03 19:59:35.204693: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-05-03 19:59:35.204712: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-05-03 19:59:35.204721: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
Precomputing files: 100%|██████████| 4/4 [00:00<00:00, 52.33it/s]

Precomputation complete.





TypeError: '<' not supported between instances of 'NoneType' and 'NoneType'