# Exhaustive Search

In [None]:
# Make sure the libraries are reloaded
%load_ext autoreload
%autoreload 2

### Import

In [None]:
import numpy as np
from src.environment.ADR_Environment import ADR_Environment
from src.simulator.Simulator import Simulator
import yaml
import pandas as pd
import csv
from tqdm import tqdm

### Load config file

In [None]:
with open("src/config/exhaustive_config.yaml") as file: # change file name to use different sweep
        config = yaml.load(file, Loader=yaml.FullLoader)


n_debris = config['environment_parameters']['total_n_debris']

### Generate all possible combinations of debris to visit

In [None]:
def generate_permutations(nums, length):
    def backtrack(visited, path):
        if len(path) == length:
            permutations.append(path[:])
            return
        for num in nums:
            if num not in visited:
                visited.add(num)
                path.append(num)
                backtrack(visited, path)
                path.pop()
                visited.remove(num)

    permutations = []
    backtrack(set(), [])
    return permutations

nums_range = range(0, n_debris)  # Range of available integers
combination_length = 5   # Length of the combination
combinations = generate_permutations(nums_range, combination_length)

### Initialize lists to store results

In [None]:
sum_rewards_per_combination = []
fuel_used = []
time_used = []

### Run the exhaustive search


In [None]:
env = ADR_Environment()
env.env_init(env_info=config['environment_parameters'])
with tqdm(total=len(combinations)) as pbar:
    for i, combination in enumerate(combinations):
    # rl episode
        #print(f"combination {i}")
        is_terminal = False
        sum_rewards = 0.0
        action_key = combination[0]
        first_debris, _ = env.action_space[action_key]
        observation = env.env_start(first_debris = first_debris)
        action_id = 0
        num_steps = 0
        for action in combination[1:]:
            (reward, last_state, term) = env.env_step(action)
            sum_rewards += reward
            num_steps += 1
            if term:
                break
        #print(f"Combination: {combination}, Reward: {sum_rewards}")
        sum_rewards_per_combination.append(sum_rewards)
        fuel_used.append(env.dv_max_per_mission - last_state[2])
        time_used.append(env.dt_max_per_mission - last_state[3])
        pbar.update(1)

df = pd.DataFrame({'combination': combinations, 'sum_rewards': sum_rewards_per_combination, 'fuel_used': fuel_used, 'time_used': time_used})

In [None]:
df = df.sort_values(by='sum_rewards', ascending=False)
df

In [None]:
df.to_csv('results/exhaustive_sweep_results.csv', index=False)

In [None]:
df = df[df['sum_rewards'] == df['sum_rewards'].max()]
df

In [None]:
# sort df by fuel used and output the 10 first
df = df.sort_values(by='fuel_used')