# Exhaustive Search : Depth First Strategy

In [1]:
# Make sure the libraries are reloaded
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from src.environment.ADR_Environment import ADR_Environment
from src.simulator.Simulator import Simulator
import yaml
import pandas as pd
import csv
from tqdm import tqdm

In [3]:
with open("src/config/exhaustive_config_10.yaml") as file: # change file name to use different sweep
        config = yaml.load(file, Loader=yaml.FullLoader)


n_debris = config['environment_parameters']['total_n_debris']

In [4]:
def generate_permutations(nums, length):
    def backtrack(visited, path):
        if len(path) == length:
            permutations.append(path[:])
            return
        for num in nums:
            if num not in visited:
                visited.add(num)
                path.append(num)
                backtrack(visited, path)
                path.pop()
                visited.remove(num)

    permutations = []
    backtrack(set(), [])
    return permutations

nums_range = range(0, n_debris)  # Range of available integers
combination_length = 5   # Length of the combination
combinations = generate_permutations(nums_range, combination_length)

for i in nums_range:
    print(i)


0
1
2
3
4
5
6
7
8
9


In [5]:
sum_rewards_per_combination = []
fuel_used = []
time_used = []

In [6]:

env = ADR_Environment()
env.env_init(env_info=config['environment_parameters'])
for i, combination in enumerate(tqdm(combinations)):
# rl episode
    # print(f"combination {i}")
    is_terminal = False
    sum_rewards = 0.0
    action_key = combination[0]
    first_debris, _ = env.action_space[action_key]
    # print('days:', _)
    observation = env.env_start(first_debris = first_debris)
    action_id = 0
    num_steps = 0
    for action in combination[1:]:
        (reward, last_state, term) = env.env_step(action)
        sum_rewards += reward
        num_steps += 1
        if term:
            break
    # print(f"Combination: {combination}, Reward: {sum_rewards}")
    sum_rewards_per_combination.append(sum_rewards)
    fuel_used.append(env.dv_max_per_mission - last_state[2])
    time_used.append(env.dt_max_per_mission - last_state[3])

df = pd.DataFrame({'combination': combinations, 'sum_rewards': sum_rewards_per_combination, 'fuel_used': fuel_used, 'time_used': time_used})

  0%|          | 0/30240 [00:00<?, ?it/s]

100%|██████████| 30240/30240 [21:29<00:00, 23.44it/s]


In [7]:
df = df.sort_values(by='sum_rewards', ascending=False)
df

Unnamed: 0,combination,sum_rewards,fuel_used,time_used
30239,"[9, 8, 7, 6, 5]",4.0,3.339356,99.148450
21052,"[6, 9, 5, 1, 7]",4.0,3.880008,99.074206
21050,"[6, 9, 5, 1, 3]",4.0,3.770283,99.083039
21048,"[6, 9, 5, 1, 0]",4.0,3.165871,99.068223
9917,"[3, 2, 6, 0, 9]",4.0,3.534433,99.357402
...,...,...,...,...
26104,"[8, 5, 6, 3, 7]",0.0,1.181921,99.376467
26103,"[8, 5, 6, 3, 4]",0.0,1.181921,99.376467
26102,"[8, 5, 6, 3, 2]",0.0,1.181921,99.376467
26101,"[8, 5, 6, 3, 1]",0.0,1.181921,99.376467


In [8]:
df.to_csv('exhaustive_sweep_results.csv', index=False)

In [9]:
df = df[df['sum_rewards'] == df['sum_rewards'].max()]
df

Unnamed: 0,combination,sum_rewards,fuel_used,time_used
30239,"[9, 8, 7, 6, 5]",4.0,3.339356,99.148450
21052,"[6, 9, 5, 1, 7]",4.0,3.880008,99.074206
21050,"[6, 9, 5, 1, 3]",4.0,3.770283,99.083039
21048,"[6, 9, 5, 1, 0]",4.0,3.165871,99.068223
9917,"[3, 2, 6, 0, 9]",4.0,3.534433,99.357402
...,...,...,...,...
23604,"[7, 8, 2, 0, 1]",4.0,3.677320,99.207213
54,"[0, 1, 3, 5, 2]",4.0,3.468625,99.269630
29462,"[9, 6, 5, 3, 2]",4.0,3.618847,99.343664
25305,"[8, 3, 2, 5, 6]",4.0,3.865082,99.210043


In [10]:
# sort df by fuel used and output the 10 first
df = df.sort_values(by='fuel_used')

In [11]:
df.head(10)

Unnamed: 0,combination,sum_rewards,fuel_used,time_used
10995,"[3, 6, 7, 8, 4]",4.0,1.082799,99.142868
23335,"[7, 6, 3, 5, 1]",4.0,1.086994,99.225271
14733,"[4, 8, 7, 6, 3]",4.0,1.087757,99.11955
4473,"[1, 5, 3, 6, 7]",4.0,1.08994,99.108411
23343,"[7, 6, 3, 8, 4]",4.0,1.11209,99.142868
14602,"[4, 8, 3, 6, 7]",4.0,1.114934,99.247235
19395,"[6, 3, 7, 8, 4]",4.0,1.126924,99.099024
14722,"[4, 8, 7, 3, 6]",4.0,1.13177,99.119551
20311,"[6, 7, 3, 5, 1]",4.0,1.152186,99.225271
4479,"[1, 5, 3, 7, 6]",4.0,1.15522,99.10841
