In [None]:
import os
import pickle
import random
import shutil
import time
import yaml
from collections import defaultdict
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.decomposition import PCA
from ultralytics import YOLO
from pathlib import Path
import subprocess
from threading import Thread, Event

In [None]:
# =============================================================================
# EXPERIMENT CONFIGURATION
# =============================================================================

# Project Configuration
result_folder_dir = Path.cwd() / 'results'
data_dir = Path.cwd() / 'data'

# Learning Strategy Configuration
# Options: 1 = naive fine-tuning (warm start), 2 = cumulative learning (from scratch), 3 = language based ER
mode = 3

# GPU ID to use for training
GPU_ID = 1  

# Energy Measurement Configuration
power_measuring_mode = 0  # 0: no, 1: yes

# Training Parameters
num_days = 9  # Number of training days to simulate

# Model Hyperparameters
conf_th = 0.9        # Confidence threshold for YOLO inference
num_epochs = 100     # Training epochs per day
num_clusters = 20    # Number of clusters for language-based sampling
buffer_size = 300    # Experience replay buffer size

In [None]:
# =============================================================================
# INITIALIZATION AND SETUP
# =============================================================================
# Initialize Performance Tracking Dictionaries
daily_f1_scores = {}
daily_precision_scores = {}
daily_recall_scores = {}
days_train_time = {}

# Initialize Energy Tracking Dictionaries
days_energy = {}

# Initialize global variables for main experiment loop
lan_accumulated_imgs_path = []


# =============================================================================
# LOAD LANGUAGE EMBEDDINGS
# =============================================================================
print("📚 Loading language embeddings...")

if mode == 3:
    # Load precomputed embeddings and image description dictionaries
    with open(f'{Path.cwd()}/Language_description_generator/text_embeddings_dict.pickle', 'rb') as file: # path to your text embeddings pickle file
        text_embeddings_dict = pickle.load(file)
    with open(f'{Path.cwd()}/Language_description_generator/image_descriptions_dict.pickle', 'rb') as file: # path to your image descriptions pickle file
        image_descriptions_dict = pickle.load(file)

    # Create image-to-embedding mapping
    image_embeddings_dict = {}
    for img_path, description in image_descriptions_dict.items():
        if description in text_embeddings_dict:
            image_embeddings_dict[img_path] = text_embeddings_dict[description]
        else:
            print(f"⚠️  Warning: No embedding found for description: {description}")



# =============================================================================
# SETUP WORKING DIRECTORIES
# =============================================================================
print("📁 Setting up working directories...")


if mode == 3:
    results_dir = f'{result_folder_dir}/language_based_ER'
    os.makedirs(results_dir, exist_ok=True)
    [shutil.rmtree(p) if os.path.isdir(p) else os.remove(p) for p in [os.path.join(results_dir, f) for f in os.listdir(results_dir)]]

    # Setup language training buffer directory
    experience_dir = f'{data_dir}/training_buffer_language'
    experience_images = f'{experience_dir}/images'
    experience_labels = f'{experience_dir}/labels'
    
    os.makedirs(experience_dir, exist_ok=True)
    [shutil.rmtree(p) if os.path.isdir(p) else os.remove(p) for p in [os.path.join(experience_dir, f) for f in os.listdir(experience_dir)]]

    # Create language training buffer directories if they don't exist
    os.makedirs(experience_images, exist_ok=True)
    os.makedirs(experience_labels, exist_ok=True)


    
elif mode == 1:  # Naive fine-tuning setup
    results_dir = f'{result_folder_dir}/naive_fine_tuning'
    os.makedirs(results_dir, exist_ok=True)
    [shutil.rmtree(p) if os.path.isdir(p) else os.remove(p) for p in [os.path.join(results_dir, f) for f in os.listdir(results_dir)]]

    dir_naive_train_temp = f'{data_dir}/training_buffer_naive_ft'
    os.makedirs(dir_naive_train_temp, exist_ok=True)
    [shutil.rmtree(p) if os.path.isdir(p) else os.remove(p) for p in [os.path.join(dir_naive_train_temp, f) for f in os.listdir(dir_naive_train_temp)]]

    os.makedirs(os.path.join(dir_naive_train_temp, 'images'), exist_ok=True)
    os.makedirs(os.path.join(dir_naive_train_temp, 'labels'), exist_ok=True)



elif mode == 2:  # Cumulative learning setup
    results_dir = f'{result_folder_dir}/cumulative_learning'
    os.makedirs(results_dir, exist_ok=True)
    [shutil.rmtree(p) if os.path.isdir(p) else os.remove(p) for p in [os.path.join(results_dir, f) for f in os.listdir(results_dir)]]

    cumulative_buffer_dir = f'{data_dir}/training_buffer_cumulative'
    os.makedirs(cumulative_buffer_dir, exist_ok=True)
    [shutil.rmtree(p) if os.path.isdir(p) else os.remove(p) for p in [os.path.join(cumulative_buffer_dir, f) for f in os.listdir(cumulative_buffer_dir)]]


    os.makedirs(os.path.join(cumulative_buffer_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(cumulative_buffer_dir, 'labels'), exist_ok=True)

# Initialize energy tracking on first run or load existing data
if power_measuring_mode == 1:
    energy_file_path = f'{results_dir}/days_energy.pkl'
    if os.path.exists(energy_file_path):
        with open(energy_file_path, 'rb') as file:
            days_energy = pickle.load(file)
        print(f"📊 Loaded existing energy data: {days_energy}")
    else:
        days_energy = {}
        print("📊 Initialized new energy tracking")

# Clean up YOLO's default runs directory if it exists
if os.path.exists('runs') and os.path.isdir('runs'):
    shutil.rmtree('runs')  # Prevents clutter from YOLO outputs



# =============================================================================
# INITIAL YOLOMODEL PREPARATION
# =============================================================================
print("🚀 Initializing base YOLO model...")
yolo = YOLO('yolov8n.pt')
init_results =yolo.train(
    data=f'{data_dir}/day1/data.yaml', 
    epochs=1, 
    device=1,  # Use both GPUs
    project=results_dir, 
    name=f'yolo_checkpoints/day0', 
    lr0=0.0000000000001, 
    lrf=0.0000000000001, 
    optimizer='Adam', 
    verbose=False  # Suppress detailed training output
);




In [None]:
# =============================================================================
# ENERGY MEASUREMENT FUNCTIONS
# =============================================================================

# Global flag for emergency stop
energy_monitoring_stopped = False

# Function to measure the total power consumption of GPUs during training
def log_gpu_power_during_training(interval=1.0, stop_event=None, idle_power=0, energy_container=None):
    global energy_monitoring_stopped
    total_energy = 0.0  # To accumulate the total energy consumed
    start_time = time.time()
    
    try:
        while not stop_event.is_set() and not energy_monitoring_stopped:  # Check both stop conditions
            try:
                # Check for global stop flag frequently
                if energy_monitoring_stopped:
                    print("Energy monitoring stopped by global flag")
                    break
                    
                gpu_result = subprocess.run(['nvidia-smi', '--query-gpu=power.draw', '--format=csv,noheader,nounits'], 
                                          stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=5)
                power_draws = [float(power) - idle_power for power in gpu_result.stdout.decode('utf-8').strip().split('\n')]
                power_draws = [power_draws[GPU_ID]]
                # Assuming the power_draws list has power readings for both GPUs
                avg_power = sum(power_draws) / len(power_draws)
                # Calculate the elapsed time since the last measurement
                elapsed_time = interval  
                
                # Accumulate energy consumed: Energy (Joules) = Power (Watts) * Time (Seconds)
                total_energy += 1 * avg_power * elapsed_time
                
                print(f"# Time: {time.time() - start_time:.2f} seconds, GPU Power Consumption (Net): {power_draws} Watts")
                
                for i in range(int(interval * 10)):  # Check every 0.1 seconds
                    if stop_event.is_set() or energy_monitoring_stopped:
                        break
                    time.sleep(0.1)
                    
            except (subprocess.TimeoutExpired, subprocess.CalledProcessError, ValueError, IndexError) as e:
                print(f"Warning: Error reading GPU power: {e}")
                if stop_event.wait(interval) or energy_monitoring_stopped:  # Wait for interval or until stop event
                    break
                    
    except KeyboardInterrupt:
        print("Energy monitoring interrupted by user")
    except Exception as e:
        print(f"Energy monitoring stopped due to error: {e}")
    finally:
        if energy_container is not None:
            energy_container.append(total_energy)
        print(f"Energy monitoring thread stopped. Total energy: {total_energy:.2f} Joules")

# Measure idle power before starting the training
def measure_idle_gpu_power(duration=10, interval=1.0):
    global energy_monitoring_stopped
    idle_power = []
    try:
        for _ in range(int(duration / interval)):
            if energy_monitoring_stopped:
                break
            result = subprocess.run(['nvidia-smi', '--query-gpu=power.draw', '--format=csv,noheader,nounits'], 
                                  stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=5)
            power_draws = [float(power) for power in result.stdout.decode('utf-8').strip().split('\n')]
            power_draws = [power_draws[GPU_ID]] # to track only one GPU
            idle_power.append(sum(power_draws) / len(power_draws)) 
            time.sleep(interval)
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError, ValueError, IndexError) as e:
        print(f"Warning: Error measuring idle power: {e}")
        return 0  # Return 0 if measurement fails
    except KeyboardInterrupt:
        print("Idle power measurement interrupted")
        if idle_power:
            return sum(idle_power) / len(idle_power)
        return 0
        
    return sum(idle_power) / len(idle_power) if idle_power else 0 

def measure_gpu_power(duration=10, interval=1):
    global energy_monitoring_stopped
    power_measurements = []
    gpu_start_time = time.time()
    
    try:
        while (time.time() - gpu_start_time) < duration and not energy_monitoring_stopped:
            result = subprocess.run(['nvidia-smi', '--query-gpu=power.draw', '--format=csv,noheader,nounits'], 
                                  stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=5)
            power_draws = [float(power) for power in result.stdout.decode('utf-8').strip().split('\n')]
            power_draws = [power_draws[GPU_ID]] # to track only one GPU
            power_measurements.append(power_draws)
            print(power_draws)
            
            time.sleep(interval)
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError, ValueError, IndexError) as e:
        print(f"Warning: Error measuring GPU power: {e}")
        if not power_measurements:
            return [0]  # Return default if no measurements
    except KeyboardInterrupt:
        print("GPU power measurement interrupted")
    
    if not power_measurements:
        return [0]
    
    # Calculate average power consumption 
    try:
        avg_power_per_gpu = [sum(gpu_power) / len(gpu_power) for gpu_power in zip(*power_measurements)]
        return avg_power_per_gpu
    except:
        return [sum(sum(measurement) for measurement in power_measurements) / len(power_measurements)]
    


# =============================================================================
# ENERGY MONITORING CLEANUP UTILITY
# =============================================================================

import threading
'''
def stop_all_energy_threads():
    """
    Utility function to stop any lingering energy monitoring threads
    Call this if energy monitoring continues after cell interruption
    """
    print("🛑 Stopping all energy monitoring threads...")
    
    for thread in threading.enumerate():
        if thread.name != 'MainThread' and 'log_gpu_power' in str(thread._target) if hasattr(thread, '_target') else False:
            print(f"Found energy monitoring thread: {thread.name}")
            if hasattr(thread, '_stop'):
                thread._stop()
    
    print("✅ Energy monitoring cleanup complete")

current_stop_event = None
current_energy_thread = None

def cleanup_energy_monitoring():
    """Clean up current energy monitoring thread"""
    global current_stop_event, current_energy_thread
    
    if current_stop_event is not None:
        print("Stopping energy monitoring...")
        current_stop_event.set()
        
    if current_energy_thread is not None and current_energy_thread.is_alive():
        current_energy_thread.join(timeout=3)

        
    current_stop_event = None
    current_energy_thread = None
'''

# =============================================================================
# MEASURE IDLE GPU POWER
# =============================================================================
if power_measuring_mode == 1:
    print("🔌 Measuring idle GPU power consumption...")
    # Measure GPU power for 10 seconds and get idle power baseline
    avg_power = measure_gpu_power(duration=10, interval=1)
    for i, avg_power_gpu in enumerate(avg_power):
        print(f"Average Power Consumption for GPU {i}: {avg_power_gpu:.2f} Watts")

    idle_power = sum(avg_power)/len(avg_power)
    print(f'idle_power: {idle_power:.3f} Watts')
else:
    idle_power = 0
    print("⚠️ Power measurement disabled")


In [None]:
# =============================================================================
# MAIN EXPERIMENT LOOP - INCREMENTAL LEARNING SIMULATION
# =============================================================================

for day_number in range(1, num_days + 1):
    
    # =========================================================================
    # DAILY MODEL INITIALIZATION
    # =========================================================================
    
    # Load the model from previous day's training
    yolo = YOLO(f'{results_dir}/yolo_checkpoints/day{day_number-1}/weights/best.pt')

    # Load accumulated performance metrics from previous days (if any)
    if day_number > 1:
        with open(f'{results_dir}/daily_precision_scores.pkl', 'rb') as file:
            daily_precision_scores = pickle.load(file)
        with open(f'{results_dir}/daily_recall_scores.pkl', 'rb') as file:
            daily_recall_scores = pickle.load(file)
        with open(f'{results_dir}/daily_f1_scores.pkl', 'rb') as file:
            daily_f1_scores = pickle.load(file)
        with open(f'{results_dir}/days_train_time.pkl', 'rb') as file:
            days_train_time = pickle.load(file)
        
        # Load energy data if power measurement is enabled
        if power_measuring_mode == 1:
            with open(f'{results_dir}/days_energy.pkl', 'rb') as file:
                days_energy = pickle.load(file)
    else:
        # Initialize metrics dictionaries for first day
        daily_precision_scores = {}
        daily_recall_scores = {}
        daily_f1_scores = {}
        days_train_time = {}
        
        # Initialize energy tracking
        if power_measuring_mode == 1:
            days_energy = {}

    # =========================================================================
    # MODEL EVALUATION ON CURRENT DAY
    # =========================================================================
    
    print(f"\n\n🔬 Day {day_number} - Model Evaluation")
    print("=" * 100)

    yolo_conf_paths = []  # Store paths of images where YOLO failed to detect the action
    
    # Prepare test data for current day
    test_day_path = f'{data_dir}/day{day_number}/images'
    imgs_path = []
    for path in os.listdir(test_day_path):
        imgs_path.append(path)
    imgs_path.sort()
    
    # Create directory for saving performance visualizations
    os.makedirs(f"{results_dir}/vacuum_performance/day{day_number}", exist_ok=True)
    
    # Run inference on each test image and save results
    for img_idx in range(0, len(imgs_path)):
        source = f'{test_day_path}/{imgs_path[img_idx]}'
        results = yolo(source, conf=conf_th, verbose=False)  # Suppress inference output
        
        for result in results:
            boxes = result.boxes
            
            # Collect images where YOLO failed to detect any objects
            if boxes.cls.numel() == 0:
                yolo_conf_paths.append(f'{test_day_path}/{imgs_path[img_idx]}')
            
            # Save visualization of detection results
            result.save(filename=f"{results_dir}/vacuum_performance/day{day_number}/{imgs_path[img_idx]}")

    # Formal validation and metric computation
    yolo_test_on_day = yolo.val(data=f'{data_dir}/day{day_number}/data.yaml', split='test', conf=conf_th, verbose=False)  # Suppress validation output
    dict_metrics = yolo_test_on_day.results_dict   
    
    # Calculate F1 score
    precision = dict_metrics['metrics/precision(B)']
    recall = dict_metrics['metrics/recall(B)']
    f1 = 2 * (precision * recall) / (0.001 + precision + recall)
    
    # Store metrics for current day
    daily_precision_scores[f'day{day_number}'] = precision
    daily_recall_scores[f'day{day_number}'] = recall
    daily_f1_scores[f'day{day_number}'] = f1
    
    # Display performance metrics
    print(f"\n\n📊 Performance Metrics:")
    print(f"   Precision: {daily_precision_scores[f'day{day_number}']:.4f}")
    print(f"   Recall:    {daily_recall_scores[f'day{day_number}']:.4f}")
    print(f"   F1 Score:  {daily_f1_scores[f'day{day_number}']:.4f}")

    # Save metrics to files
    with open(f'{results_dir}/daily_precision_scores.pkl', 'wb') as file:
        pickle.dump(daily_precision_scores, file)
    with open(f'{results_dir}/daily_recall_scores.pkl', 'wb') as file:
        pickle.dump(daily_recall_scores, file)
    with open(f'{results_dir}/daily_f1_scores.pkl', 'wb') as file:
        pickle.dump(daily_f1_scores, file)

    # =========================================================================
    # TRAINING STRATEGY SELECTION AND EXECUTION
    # =========================================================================

    if mode == 1:  # Naive Fine-Tuning with Warm Start
        print(f"\n\n🎯 Day {day_number} - Naive Fine-Tuning Training")
        print("=" * 100)

        # Prepare training data using only failed detection images
        train_images_dir = f'{dir_naive_train_temp}/images'
        train_labels_dir = f'{dir_naive_train_temp}/labels'
        
        # Clean previous training data
        [os.remove(os.path.join(train_images_dir, f)) for f in os.listdir(train_images_dir) if os.path.isfile(os.path.join(train_images_dir, f))]
        [os.remove(os.path.join(train_labels_dir, f)) for f in os.listdir(train_labels_dir) if os.path.isfile(os.path.join(train_labels_dir, f))]

        # Copy failed detection images and labels to training directory
        for image_name in yolo_conf_paths:
            label_name = image_name.replace('images', 'labels').replace('jpg', 'txt')
            shutil.copy(image_name, train_images_dir)
            shutil.copy(label_name, train_labels_dir)

        # Create validation set (20% of training data)
        sample_size = max(1, int(len(yolo_conf_paths) * 0.2))
        val_list_images = random.sample(yolo_conf_paths, sample_size)

        val_images_dir = f'{data_dir}/ValFolder/images'
        val_labels_dir = f'{data_dir}/ValFolder/labels'
        
        # Clean validation directories
        [os.remove(os.path.join(val_images_dir, f)) for f in os.listdir(val_images_dir) if os.path.isfile(os.path.join(val_images_dir, f))]
        [os.remove(os.path.join(val_labels_dir, f)) for f in os.listdir(val_labels_dir) if os.path.isfile(os.path.join(val_labels_dir, f))]

        # Copy validation images and labels
        for image_name in val_list_images:
            label_name = image_name.replace('images', 'labels').replace('jpg', 'txt')
            shutil.copy(image_name, val_images_dir)
            shutil.copy(label_name, val_labels_dir)

        # Create YAML configuration for training
        datayaml = {
            'train': train_images_dir, 
            'val': val_images_dir,
            'test': val_images_dir,
            'nc': 2,
            'names': ['avoid', 'suck']
        }
        
        with open(os.path.join(dir_naive_train_temp, 'data.yaml'), 'w') as yaml_file:
            yaml.dump(datayaml, yaml_file, default_flow_style=None)

        YAML_DIR = f'{dir_naive_train_temp}/data.yaml'

    elif mode == 2:  # Cumulative Learning from Scratch
        print(f"\n\n📚 Day {day_number} - Cumulative Learning Training")
        print("=" * 100)
        
        # Initialize cumulative training setup on first day
        if day_number == 1:
            accumulated_images = f'{data_dir}/training_buffer_cumulative/images'
            accumulated_labels = f'{data_dir}/training_buffer_cumulative/labels'
            val_images = f'{data_dir}/ValFolder/images'
            
            datayaml = {
                'train': accumulated_images,
                'val': val_images,
                'test': accumulated_images,
                'nc': 2,
                'names': ['avoid', 'suck']
            }
            
            with open(os.path.join(f'{data_dir}/training_buffer_cumulative', 'data.yaml'), 'w') as yaml_file:
                yaml.dump(datayaml, yaml_file, default_flow_style=None)

        # Reset to base model for training from scratch
        yolo = YOLO('yolov8n.pt')
        YAML_DIR = f'{data_dir}/training_buffer_cumulative/data.yaml'

        # Add current day's failed detections to accumulated dataset
        accumulated_images = f'{data_dir}/training_buffer_cumulative/images'
        accumulated_labels = f'{data_dir}/training_buffer_cumulative/labels'

        for image_name in yolo_conf_paths:
            label_name = image_name.replace('images', 'labels').replace('jpg', 'txt')
            shutil.copy(image_name, accumulated_images)
            shutil.copy(label_name, accumulated_labels) 

        # Create validation set from accumulated data
        imgs_path_val_select = []
        for path in os.listdir(accumulated_images):
            imgs_path_val_select.append(os.path.join(accumulated_images, path))

        sample_size = max(1, int(len(imgs_path_val_select) * 0.2))
        val_list_images = random.sample(imgs_path_val_select, sample_size)

        val_images_dir = f'{data_dir}/ValFolder/images'
        val_labels_dir = f'{data_dir}/ValFolder/labels'
        
        # Clean and populate validation directories
        [os.remove(os.path.join(val_images_dir, f)) for f in os.listdir(val_images_dir) if os.path.isfile(os.path.join(val_images_dir, f))]
        [os.remove(os.path.join(val_labels_dir, f)) for f in os.listdir(val_labels_dir) if os.path.isfile(os.path.join(val_labels_dir, f))]

        for image_name in val_list_images:
            label_name = image_name.replace('images', 'labels').replace('jpg', 'txt')
            shutil.copy(image_name, val_images_dir)
            shutil.copy(label_name, val_labels_dir)

    elif mode == 3:  # Language-Based Experience Replay
        print(f"\n\n🔤 Day {day_number} - Language-Based Experience Replay")
        print("=" * 100)
        
        # Setup clustering directory for current day
        os.makedirs(f'{results_dir}/clusters/day{day_number}', exist_ok=True)
        dir_clusters = f'{results_dir}/clusters/day{day_number}'
        [os.remove(os.path.join(dir_clusters, f)) for f in os.listdir(dir_clusters) if os.path.isfile(os.path.join(dir_clusters, f))]
   
        # Accumulate failed detection images across all days
        lan_accumulated_imgs_path.extend(yolo_conf_paths)

        # Create embeddings dictionary for accumulated images
        # Convert absolute paths to relative paths for lookup in image_embeddings_dict
        image_to_embedding = {}
        for img_path in lan_accumulated_imgs_path:
            # Convert absolute path to relative path by removing the workspace directory
            relative_img_path = os.path.relpath(img_path, Path.cwd())
            if relative_img_path in image_embeddings_dict:
                image_to_embedding[img_path] = image_embeddings_dict[relative_img_path]
            else:
                print(f"⚠️  Warning: No embedding found for image: {relative_img_path}")

        # Prepare data for K-means clustering
        image_names = list(image_to_embedding.keys())
        embeddings = list(image_to_embedding.values())

        # Perform K-means clustering on language embeddings
        mini_kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init=10)
        mini_kmeans.fit(embeddings)
        labels = mini_kmeans.labels_

        # Organize images by cluster
        cluster_to_images_dict = defaultdict(list)
        for image_name, label in zip(image_names, labels):
            cluster_to_images_dict[label].append(image_name)
        
        # Build experience replay buffer
        experience_buffer = []
        for cluster_label, image_list in cluster_to_images_dict.items():
            
            # Save cluster images for analysis
            for cluster_img_path in image_list:
                cluster_img_name = os.path.basename(cluster_img_path)
                cluster_dest_path = os.path.join(f'{results_dir}/clusters/day{day_number}', f'L{cluster_label}_{cluster_img_name}')
                shutil.copy(cluster_img_path, cluster_dest_path)

            # Sample from cluster for experience buffer
            if len(image_list) >= buffer_size // num_clusters:
                selected_images = random.sample(image_list, buffer_size // num_clusters)
            else:
                selected_images = image_list
            experience_buffer.extend(selected_images)


        
        # Clean language training buffer directories
        for directory in [experience_images, experience_labels]:
            for filename in os.listdir(directory):
                file_path = os.path.join(directory, filename)
                if os.path.isfile(file_path):
                    os.remove(file_path)
        
        # Copy experience buffer to training directory
        for image_path in experience_buffer:
            label_path = image_path.replace('images', 'labels').replace('jpg', 'txt')
            shutil.copy(image_path, experience_images)
            shutil.copy(label_path, experience_labels)
        
        # Save experience buffer for analysis
        save_experience_images = f'{results_dir}/experience_buffer/day{day_number}'
        os.makedirs(save_experience_images, exist_ok=True)
        
        # Clean and populate language training buffer archive
        for filename in os.listdir(save_experience_images):
            file_path = os.path.join(save_experience_images, filename)
            if os.path.isfile(file_path):
                os.remove(file_path)
        
        for buffer_image_path in experience_buffer:
            shutil.copy(buffer_image_path, save_experience_images)

        # Create validation set from experience buffer
        sample_size = max(1, int(len(experience_buffer) * 0.2))
        val_list_images = random.sample(experience_buffer, sample_size)
        
        val_images_dir = f'{data_dir}/ValFolder/images'
        val_labels_dir = f'{data_dir}/ValFolder/labels'
        
        # Clean and populate validation directories
        for directory in [val_images_dir, val_labels_dir]:
            for filename in os.listdir(directory):
                file_path = os.path.join(directory, filename)
                if os.path.isfile(file_path):
                    os.remove(file_path)
        
        for image_path in val_list_images:
            label_path = image_path.replace('images', 'labels').replace('jpg', 'txt')
            shutil.copy(image_path, val_images_dir)
            shutil.copy(label_path, val_labels_dir)

        # Create YAML configuration for language training buffer
        datayaml = {
            'train': f'{experience_dir}/images', 
            'val': val_images_dir,
            'test': f'{experience_dir}/images',
            'nc': 2,
            'names': ['avoid', 'suck']
        }
        
        with open(os.path.join(experience_dir, 'data.yaml'), 'w') as yaml_file:
            yaml.dump(datayaml, yaml_file, default_flow_style=None)

        YAML_DIR = f'{experience_dir}/data.yaml'

    # =========================================================================
    # MODEL TRAINING EXECUTION WITH ENERGY MEASUREMENT
    # =========================================================================
    
    # Clean up any existing training directory for current day
    check_dir = f'{results_dir}/day{day_number}'
    if os.path.exists(check_dir) and os.path.isdir(check_dir):
        shutil.rmtree(check_dir)

    # Initialize variables for energy measurement
    stop_event = None
    energy_container = []
    log_thread = None
    
    try:
        # Start energy measurement if enabled
        if power_measuring_mode == 1:
            stop_event = Event()
            energy_container = []
            log_thread = Thread(target=log_gpu_power_during_training, args=(1.0, stop_event, idle_power, energy_container), daemon=False)
            log_thread.start()

        # Execute YOLO training with configured parameters
        train_start_time = time.time()
        yolo.train(
            data=YAML_DIR, 
            epochs=num_epochs, 
            project=results_dir, 
            name=f'yolo_checkpoints/day{day_number}', 
            device=1, 
            patience=200, 
            verbose=False  # Suppress detailed training output
        )
        train_end_time = time.time()

    except KeyboardInterrupt:
        print(f"\n⚠️ Training interrupted by user on day {day_number}")
        train_end_time = time.time()
        
    except Exception as e:
        print(f"\n❌ Training failed on day {day_number}: {e}")
        train_end_time = time.time()
        
    finally:
        # Always ensure energy measurement thread is stopped properly
        if power_measuring_mode == 1 and stop_event is not None:
            print("Stopping energy measurement thread...")
            stop_event.set()
            if log_thread is not None and log_thread.is_alive():
                log_thread.join(timeout=5)  # Wait up to 5 seconds for thread to stop
                if log_thread.is_alive():
                    print("⚠️ Energy monitoring thread did not stop gracefully")
            
            total_energy = energy_container[0] if energy_container else 0
            days_energy[f'day{day_number}'] = total_energy
            print(f'   ⚡ Energy consumed on day {day_number}: {total_energy:.2f} Joules')
            print('   ##############   days_energy: ', days_energy)

            with open(f'{results_dir}/days_energy.pkl', 'wb') as file:
                pickle.dump(days_energy, file)

        # Record training time
        if 'train_start_time' in locals():
            days_train_time[f'day{day_number}'] = (train_end_time - train_start_time)/60
            with open(f'{results_dir}/days_train_time.pkl', 'wb') as file:
                pickle.dump(days_train_time, file)
            print(f'   ⏱️  Training time for day {day_number}: {days_train_time[f"day{day_number}"]:.2f} minutes')


print("\n\n🎉 Experiment Complete!")
if power_measuring_mode == 1:
    print("📊 Final Energy Consumption Summary:")
    for day, energy in days_energy.items():
        print(f"   {day}: {energy:.2f} Joules")
    total_energy_consumed = sum(days_energy.values())
    print(f"   Total Energy: {total_energy_consumed:.2f} Joules")

In [None]:
# Plot F1 scores for all available modes
import pickle
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os
%matplotlib inline

# Define mode configurations in desired order: naive, language, cumulative
mode_configs = {
    1: {'name': 'naive fine-tuning', 'color': 'blue', 'path': Path.cwd() / 'results/naive_fine_tuning/daily_f1_scores.pkl', 'marker': 'o'},
    3: {'name': 'language-based ER', 'color': 'green', 'path': Path.cwd() / 'results/language_based_ER/daily_f1_scores.pkl', 'marker': '^'},
    2: {'name': 'cumulative learning', 'color': 'orange', 'path': Path.cwd() / 'results/cumulative_learning/daily_f1_scores.pkl', 'marker': 's'}
}

# Check which modes have data available
available_modes = []
for mode_num, config in mode_configs.items():
    if os.path.exists(config['path']):
        available_modes.append(mode_num)

# Print unavailable modes first
unavailable_modes = [mode_configs[m]['name'] for m in [1, 3, 2] if m not in available_modes]
if unavailable_modes:
    for mode_name in unavailable_modes:
        print(f"{mode_name} dict not available")

if available_modes:
    fig, ax = plt.subplots(figsize=(8, 4))
    
    # Collect all unique days across available modes
    all_days = set()
    for mode_num in available_modes:
        with open(mode_configs[mode_num]['path'], 'rb') as file:
            daily_f1_scores = pickle.load(file)
            all_days.update(daily_f1_scores.keys())
    
    days = sorted(list(all_days))
    x = np.arange(len(days))
    
    # Add shading to separate sets of three days
    cc = 0
    for i in range(0, len(days), 3):
        a = [6/60, 3/60, 0/60]
        ax.axvspan(i - 0.5, i + 2.5, color='red', alpha=a[cc])
        cc += 1
    
    # Plot lines for each available mode and calculate mean for last 6 days
    print("\nMean F1 score for last 6 days:")
    for mode_num in available_modes:
        config = mode_configs[mode_num]
        
        with open(config['path'], 'rb') as file:
            daily_f1_scores = pickle.load(file)
        
        # Align F1 values with all_days
        f1_values = [daily_f1_scores.get(day, 0) for day in days]
        
        ax.plot(x, f1_values, marker=config['marker'], linestyle='-', label=config['name'], 
                markersize=4, linewidth=1, color=config['color'])
        
        # Calculate mean F1 for last 6 days
        if len(f1_values) >= 6:
            last_6_f1 = f1_values[-6:]
            mean_f1 = np.mean(last_6_f1)
            print(f"  {config['name']}: {mean_f1:.4f}")
        else:
            mean_f1 = np.mean(f1_values) if f1_values else 0
            print(f"  {config['name']}: {mean_f1:.4f} (only {len(f1_values)} days available)")
    
    # Add labels and formatting
    ax.set_ylabel('F$_1$ score')
    ax.set_xticks(x)
    ax.set_xticklabels(days)
    ax.legend()
    
    ax.grid(True, which='both', linestyle='--', color='lightgray', linewidth=0.5)
    ax.grid(axis='x', color='gray', linewidth=0.7)
    ax.minorticks_on()
    
    fig.tight_layout()
    plt.show()
else:
    print("No F1 score data files found.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
import os
from pathlib import Path

# Mode configurations
mode_configs = {
    1: {'name': 'Naive FT (warm start)', 'color': '#1f77b4', 'path': Path.cwd() / 'results/naive_fine_tuning/days_energy.pkl'},  # blue
    3: {'name': 'Language-based ER', 'color': '#2ca02c', 'path': Path.cwd() / 'results/language_based_ER/days_energy.pkl'},      # green
    2: {'name': 'Cumulative learning', 'color': '#ff7f0e', 'path': Path.cwd() / 'results/cumulative_learning/days_energy.pkl'}   # orange
}

# Check available data files
available_modes = [m for m, cfg in mode_configs.items() if os.path.exists(cfg['path'])]

# Optional: compute energy reduction
if 3 in available_modes and 2 in available_modes:
    with open(mode_configs[3]['path'], 'rb') as f:
        language_energy = pickle.load(f)
    with open(mode_configs[2]['path'], 'rb') as f:
        cumulative_energy = pickle.load(f)
    total_lang = sum(language_energy.values())
    total_cum = sum(cumulative_energy.values())
    if total_cum > 0:
        reduction = ((total_cum - total_lang) / total_cum) * 100
        print(f"Energy reduction (Language-based ER vs Cumulative): {reduction:.1f}%")
        print(f"Language total: {total_lang:.2f} J, Cumulative total: {total_cum:.2f} J")

# Plot
if available_modes:
    fig, ax = plt.subplots(figsize=(8, 4))

    # Clean grid and background
    ax.set_facecolor('white')
    ax.grid(True, which='major', linestyle='--', color='lightgray', linewidth=0.8)
    ax.grid(True, which='minor', linestyle='--', color='lightgray', linewidth=0.5)
    ax.minorticks_on()

    # Collect days across modes
    all_days = set()
    for m in available_modes:
        with open(mode_configs[m]['path'], 'rb') as f:
            all_days.update(pickle.load(f).keys())
    days = sorted(list(all_days))
    x = np.arange(len(days))

    bar_width = 0.25 if len(available_modes) > 1 else 0.6
    max_energy = 0

    for idx, m in enumerate(available_modes):
        cfg = mode_configs[m]
        with open(cfg['path'], 'rb') as f:
            data = pickle.load(f)
        values = np.array([data.get(day, 0) for day in days]) / 1000
        max_energy = max(max_energy, np.max(values))
        if len(available_modes) > 1:
            xpos = x + (idx - len(available_modes)/2 + 0.5) * bar_width
        else:
            xpos = x
        ax.bar(xpos, values, width=bar_width, label=cfg['name'], color=cfg['color'], zorder=3)

    ax.set_xticks(x)
    ax.set_xticklabels(days)
    ax.set_ylabel('GPU energy consumption (kJ)')
    ax.set_ylim(0, max_energy * 1.1)
    ax.legend(frameon=False)  # legend without border

    fig.tight_layout()
    plt.show()
else:
    print("No energy data files found.")
