<center><h1>SaRLVision Reinforcement Learning Training</h1>
<h2>Matthias Bartolo</h2>

</center>

**<h3> Package Imports </h3>**

In [None]:
import SaRLVision as srlv
import gymnasium as gym

from SaRLVision.utils import *
from SaRLVision.agents import *

**<h3> Defining Classes </h3>**

In [None]:
classes = ['cat', 'bird', 'motorbike', 'diningtable', 'train', 'tvmonitor', 'bus', 'horse', 'car', 'pottedplant', 'person', 'chair', 'boat', 'bottle', 'bicycle', 'dog', 'aeroplane', 'cow', 'sheep', 'sofa']
print(classes)
print('\033[93m' + 'Number of classes: ' + '\033[0m' + str(len(classes)))

**<h3> Training Experiment 1: Exploration and Saliency Ranking </h3>**

**<h4> Normal DQN Agent - Random Exploration - VGG16 - No SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':VGG16FeatureExtractor(), 
                'target_size':VGG16_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':False,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Random Explore - VGG16 - No SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Normal DQN Agent - Random Exploration - VGG16 - with SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':VGG16FeatureExtractor(), 
                'target_size':VGG16_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':True,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Random Explore - VGG16 - With SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Normal DQN Agent - Guided Exploration - VGG16 - No SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':VGG16FeatureExtractor(), 
                'target_size':VGG16_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':False,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Guided Explore - VGG16 - No SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=GUIDED_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Normal DQN Agent - Guided Exploration - VGG16 - With SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':VGG16FeatureExtractor(), 
                'target_size':VGG16_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':True,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Guided Explore - VGG16 - With SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=GUIDED_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h3> Training Experiment 2: Feature Learning Architectures </h3>**

**<h4> Normal DQN Agent - Random Exploration - MobileNet - No SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':MobileNetV2FeatureExtractor(), 
                'target_size':MOBILENETV2_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':False,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Random Explore - MobileNet - No SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Normal DQN Agent - Random Exploration - MobileNet - With SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':MobileNetV2FeatureExtractor(), 
                'target_size':MOBILENETV2_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':True,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Random Explore - MobileNet - With SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Normal DQN Agent - Random Exploration - ResNet50 - No SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':ResNet50FeatureExtractor(), 
                'target_size':RESNET50_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':False,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Random Explore - ResNet50 - No SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Normal DQN Agent - Random Exploration - ResNet50 - With SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':ResNet50FeatureExtractor(), 
                'target_size':RESNET50_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':True,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Normal DQN - Random Explore - ResNet50 - With SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DQN agent
    dqn_agent = DQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h3> Training Experiment 3: DQN Agent Architectures </h3>**

**<h4> Double DQN Agent - Random Exploration - VGG16 - No SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':VGG16FeatureExtractor(), 
                'target_size':VGG16_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':False,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "DDQN - Random Explore - VGG16 - No SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the DDQN agent
    dqn_agent = DoubleDQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Dueling DQN Agent - Random Exploration - VGG16 - No SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':VGG16FeatureExtractor(), 
                'target_size':VGG16_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':False,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "Dueling DQN - Random Explore - VGG16 - No SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the Dueling DQN agent
    dqn_agent = DuelingDQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info


**<h4> Double Dueling DQN Agent - Random Exploration - VGG16 - No SaRa </h4>**

In [None]:
# Iterating over the classes
for current_class in classes:
    # Printing the class name
    print('*'*50)
    print('\033[92m' + 'Executing for class: ' + current_class + '\033[0m')
    print('*'*50)

    # Setting the environment configuration
    env_config = {'dataset': '../Datasets/PascalVOC2007_2012Dataset',
                'dataset_year': '2007+2012',
                'dataset_image_set': 'train',
                'feature_extractor':VGG16FeatureExtractor(), 
                'target_size':VGG16_TARGET_SIZE,
                'current_class':current_class,
                'use_sara':False,
                }

    # Creating the environment
    env = gym.make('DetectionEnv-v0-Train', env_config=env_config)

    # Setting name
    name = "D3QN - Random Explore - VGG16 - No SaRa"

    # Creating the Replay_Buffer
    replay_buffer = Replay_Buffer(env)

    # Creating the D3QN agent
    dqn_agent = DoubleDuelingDQNAgent(env, replay_buffer, name=name, exploration_mode=RANDOM_EXPLORE)

    # Running the agent
    dqn_agent.run()

    # Retrieving the episode info
    dqn_episode_info = dqn_agent.get_episode_info()

    # Printing the time taken to train the agent
    print("Time taken to train the agent: {:.2f} seconds".format(dqn_episode_info["eps_duration"]))

    # Saving the model
    dqn_agent.save(path="evaluation_models/" + name + '/' + current_class)

    # Closing the environment
    env.close()

    # Memory Cleaning
    del env, replay_buffer, dqn_agent, dqn_episode_info
