# Exhaustive Search : Depth First Strategy

In [40]:
# Make sure the libraries are reloaded
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
import numpy as np
from src.environment.ADR_Environment import ADR_Environment
from src.simulator.Simulator import Simulator
import yaml
import pandas as pd
import csv

In [42]:
with open("src/config/exhaustive_config.yaml") as file: # change file name to use different sweep
        config = yaml.load(file, Loader=yaml.FullLoader)


n_debris = config['environment_parameters']['total_n_debris']

In [43]:
def generate_permutations(nums, length):
    def backtrack(visited, path):
        if len(path) == length:
            permutations.append(path[:])
            return
        for num in nums:
            if num not in visited:
                visited.add(num)
                path.append(num)
                backtrack(visited, path)
                path.pop()
                visited.remove(num)

    permutations = []
    backtrack(set(), [])
    return permutations

nums_range = range(0, n_debris)  # Range of available integers
combination_length = 4   # Length of the combination
combinations = generate_permutations(nums_range, combination_length)

for i in nums_range:
    print(i)


0
1
2
3
4
5


In [44]:
sum_rewards_per_combination = []
fuel_used = []
time_used = []

In [45]:

env = ADR_Environment()
env.env_init(env_info=config['environment_parameters'])
for i, combination in enumerate(combinations):
# rl episode
    print(f"combination {i}")
    is_terminal = False
    sum_rewards = 0.0
    action_key = combination[0]
    first_debris, _ = env.action_space[action_key]
    print('days:', _)
    observation = env.env_start(first_debris = first_debris)
    action_id = 0
    num_steps = 0
    for action in combination[1:]:
        (reward, last_state, term) = env.env_step(action)
        sum_rewards += reward
        num_steps += 1
        if term:
            break
    print(f"Combination: {combination}, Reward: {sum_rewards}")
    sum_rewards_per_combination.append(sum_rewards)
    fuel_used.append(env.dv_max_per_mission - last_state[2])
    time_used.append(env.dt_max_per_mission - last_state[3])

df = pd.DataFrame({'combination': combinations, 'sum_rewards': sum_rewards_per_combination, 'fuel_used': fuel_used, 'time_used': time_used})

combination 0
days: 30

ENV START


 ----- Starting Episode ---- 


 -----  ENV STEP ----- 

action_key:  1
converted action:  (1, 30)

 -----  ENV STEP ----- 

action_key:  2
converted action:  (2, 30)
max fuel used
Combination: [0, 1, 2, 3], Reward: 1.0
combination 1
days: 30

ENV START


 ----- Starting Episode ---- 


 -----  ENV STEP ----- 

action_key:  1
converted action:  (1, 30)

 -----  ENV STEP ----- 

action_key:  2
converted action:  (2, 30)
max fuel used
Combination: [0, 1, 2, 4], Reward: 1.0
combination 2
days: 30

ENV START


 ----- Starting Episode ---- 


 -----  ENV STEP ----- 

action_key:  1
converted action:  (1, 30)
illegal binary flag
Combination: [0, 1, 2, 5], Reward: 0.0
combination 3
days: 30

ENV START


 ----- Starting Episode ---- 


 -----  ENV STEP ----- 

action_key:  1
converted action:  (1, 30)

 -----  ENV STEP ----- 

action_key:  3
converted action:  (3, 30)

 -----  ENV STEP ----- 

action_key:  2
converted action:  (2, 30)
max fuel used
Combinati

In [46]:
df = df.sort_values(by='sum_rewards', ascending=False)
df

Unnamed: 0,combination,sum_rewards,fuel_used,time_used
82,"[1, 2, 5, 3]",3.0,1.581130,9.380690
116,"[1, 5, 3, 4]",3.0,1.590343,1.181990
5,"[0, 1, 3, 5]",3.0,1.590338,0.883874
263,"[4, 1, 5, 3]",3.0,1.364004,0.885143
50,"[0, 5, 1, 4]",3.0,1.400601,1.701084
...,...,...,...,...
60,"[1, 0, 2, 3]",0.0,0.000000,0.000000
234,"[3, 5, 2, 0]",0.0,0.000000,0.000000
95,"[1, 3, 5, 4]",0.0,0.000000,0.000000
183,"[3, 0, 2, 1]",0.0,0.000000,0.000000


In [47]:
df.to_csv('exhaustive_sweep_results.csv', index=False)

In [48]:
df = df[df['sum_rewards'] == df['sum_rewards'].max()]
df

Unnamed: 0,combination,sum_rewards,fuel_used,time_used
82,"[1, 2, 5, 3]",3.0,1.58113,9.38069
116,"[1, 5, 3, 4]",3.0,1.590343,1.18199
5,"[0, 1, 3, 5]",3.0,1.590338,0.883874
263,"[4, 1, 5, 3]",3.0,1.364004,0.885143
50,"[0, 5, 1, 4]",3.0,1.400601,1.701084
286,"[4, 3, 5, 1]",3.0,1.540384,1.065997
176,"[2, 5, 3, 4]",3.0,1.590343,1.18199


In [49]:
# sort df by fuel used and output the 10 first
df = df.sort_values(by='fuel_used')

In [50]:
df.head(10)

Unnamed: 0,combination,sum_rewards,fuel_used,time_used
263,"[4, 1, 5, 3]",3.0,1.364004,0.885143
50,"[0, 5, 1, 4]",3.0,1.400601,1.701084
286,"[4, 3, 5, 1]",3.0,1.540384,1.065997
82,"[1, 2, 5, 3]",3.0,1.58113,9.38069
5,"[0, 1, 3, 5]",3.0,1.590338,0.883874
116,"[1, 5, 3, 4]",3.0,1.590343,1.18199
176,"[2, 5, 3, 4]",3.0,1.590343,1.18199
