In [1]:
!pip install git+https://github.com/sarah-keren/AI_agents


Collecting git+https://github.com/sarah-keren/AI_agents
  Cloning https://github.com/sarah-keren/AI_agents to /tmp/pip-req-build-j090r_d4
  Running command git clone -q https://github.com/sarah-keren/AI_agents /tmp/pip-req-build-j090r_d4
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone


In [2]:
import gym
from AI_agents.Environments.gym_problem import GymProblem
from AI_agents.Search.best_first_search import best_first_search, breadth_first_search, depth_first_search, a_star
import AI_agents.Search.utils as utils
import AI_agents.Search.defs as defs
import AI_agents.Search.heuristic as heuristic



def main_taxi_bfs():

    # define the environment
    taxi_env = gym.make("Taxi-v3").env
    taxi_env.reset()
    #init_state = taxi_env.encode(0, 4, 4, 1) # (taxi row, taxi column, passenger index, destination index)
    init_state = taxi_env.encode(0, 3, 4, 1)  # (taxi row, taxi column, passenger index, destination index)
    taxi_row, taxi_col, pass_idx, dest_idx = taxi_env.decode(init_state)
    print(taxi_row)
    taxi_env.unwrapped.s = init_state
    print("State:", init_state)
    taxi_env.render()

    # dropping off the passenger
    #observation, reward, done, info = taxi_env.step(5)
    #print(done)
    #taxi_env.render()


    # create a wrapper of the environment to the search
    taxi_p = GymProblem(taxi_env, taxi_env.unwrapped.s)

    # perform BFS
    [best_value, best_node, best_plan, explored_count, ex_terminated] = best_first_search(problem=taxi_p,
                                                                                                       frontier=utils.FIFOQueue(),
                                                                                                       closed_list=utils.ClosedListOfKeys(),
                                                                                                       termination_criteria=utils.TerminationCriteriaGoalStateReached(),
                                                                                                       evaluation_criteria=utils.EvaluationCriteriaGoalCondition(),
                                                                                                       prune_func=None,
                                                                                                       log=True, log_file=None,
                                                                                                       iter_limit=defs.NA,
                                                                                                       time_limit=defs.NA,
                                                                                                       )
    print(best_plan)
    for action_id in best_plan:
        taxi_p.apply_action(action_id)
        taxi_p.env.render()


def main_taxi_dfs():


    # define the environment
    taxi_env = gym.make("Taxi-v3").env
    taxi_env.reset()
    #init_state = taxi_env.encode(0, 4, 4, 1) # (taxi row, taxi column, passenger index, destination index)
    init_state = taxi_env.encode(0, 3, 4, 1)   # (taxi row, taxi column, passenger index, destination index)
    taxi_row, taxi_col, pass_idx, dest_idx = taxi_env.decode(init_state)
    taxi_env.unwrapped.s = init_state
    print("State:", init_state)
    taxi_env.render()

    # dropping off the passenger
    #observation, reward, done, info = taxi_env.step(5)
    #print(done)
    #taxi_env.render()


    # create a wrapper of the environment to the search
    taxi_p = GymProblem(taxi_env, taxi_env.unwrapped.s)


    # perform BFS
    [best_value, best_node, best_plan, explored_count, ex_terminated] = depth_first_search(problem=taxi_p,
                                                                                                        log=True,
                                                                                                        log_file=None,
                                                                                                        iter_limit=defs.NA,
                                                                                                        time_limit=defs.NA,
                                                                                                        )


    print(best_plan)
    for action_id in best_plan:
        taxi_p.apply_action(action_id)
        taxi_p.env.render()

def main_taxi_bfs_exp():

    # define the environment
    taxi_env = gym.make("Taxi-v3").env
    taxi_env.reset()
    #init_state = taxi_env.encode(0, 4, 4, 1) # (taxi row, taxi column, passenger index, destination index)
    init_state = taxi_env.encode(0, 3, 4, 1)  # (taxi row, taxi column, passenger index, destination index)
    taxi_row, taxi_col, pass_idx, dest_idx = taxi_env.decode(init_state)
    taxi_env.unwrapped.s = init_state
    print("State:", init_state)
    taxi_env.render()

    # dropping off the passenger
    #observation, reward, done, info = taxi_env.step(5)
    #print(done)
    #taxi_env.render()


    # create a wrapper of the environment to the search
    taxi_p = GymProblem(taxi_env, taxi_env.unwrapped.s)


    # perform BFS
    [best_value, best_node, best_plan, explored_count, ex_terminated] = breadth_first_search(problem=taxi_p,
                                                                                                          log=True,
                                                                                                          log_file=None,
                                                                                                          iter_limit=defs.NA,
                                                                                                          time_limit=defs.NA,
                                                                                                          )


    print(best_plan)
    for action_id in best_plan:
        taxi_p.apply_action(action_id)
        taxi_p.env.render()


def main_taxi_A_star_exp():

    # define the environment
    taxi_env = gym.make("Taxi-v3").env
    taxi_env.reset()
    #init_state = taxi_env.encode(0, 4, 4, 1) # (taxi row, taxi column, passenger index, destination index)
    init_state = taxi_env.encode(0, 3, 4, 1)  # (taxi row, taxi column, passenger index, destination index)
    taxi_row, taxi_col, pass_idx, dest_idx = taxi_env.decode(init_state)
    print(taxi_row)
    taxi_env.unwrapped.s = init_state
    print("State:", init_state)
    taxi_env.render()

    # dropping off the passenger
    #observation, reward, done, info = taxi_env.step(5)
    #print(done)
    #taxi_env.render()


    # create a wrapper of the environment to the search
    taxi_p = GymProblem(taxi_env, taxi_env.unwrapped.s)

    # perform A*
    [best_value, best_node, best_plan, explored_count, ex_terminated] = best_first_search(problem=taxi_p,
                                                                                                       frontier=utils.PriorityQueue(heuristic.zero_heuristic),
                                                                                                       closed_list=utils.ClosedListOfKeys(),
                                                                                                       termination_criteria=utils.TerminationCriteriaGoalStateReached(),
                                                                                                       evaluation_criteria=utils.EvaluationCriteriaGoalCondition(),
                                                                                                       prune_func=None,
                                                                                                       log=True, log_file=None,
                                                                                                       iter_limit=defs.NA,
                                                                                                       time_limit=defs.NA,
                                                                                                       )
    print(best_plan)
    for action_id in best_plan:
        taxi_p.apply_action(action_id)
        taxi_p.env.render()

def main_taxi_A_star():

    # define the environment
    taxi_env = gym.make("Taxi-v3").env
    taxi_env.reset()
    #init_state = taxi_env.encode(0, 4, 4, 1) # (taxi row, taxi column, passenger index, destination index)
    init_state = taxi_env.encode(0, 3, 4, 1)  # (taxi row, taxi column, passenger index, destination index)
    taxi_row, taxi_col, pass_idx, dest_idx = taxi_env.decode(init_state)
    print(taxi_row)
    taxi_env.unwrapped.s = init_state
    print("State:", init_state)
    taxi_env.render()

    # dropping off the passenger
    #observation, reward, done, info = taxi_env.step(5)
    #print(done)
    #taxi_env.render()


    # create a wrapper of the environment to the search
    taxi_p = GymProblem(taxi_env, taxi_env.unwrapped.s)

    # perform A*
    [best_value, best_node, best_plan, explored_count, ex_terminated] = a_star(problem=taxi_p, heuristic_func=heuristic.zero_heuristic, log=True)

    print(best_plan)
    for action_id in best_plan:
        taxi_p.apply_action(action_id)
        taxi_p.env.render()



if __name__ == "__main__":

    main_taxi_bfs()
    main_taxi_dfs()
    main_taxi_A_star()

0
State: 77
+---------+
|R: | :[42m_[0m:[35mG[0m|
| : | : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+

best_first_design(node) node number 1 cur_node:<Node 77>
InMethod best_first_design(node): explored_count:1 cur_node:<Node 77>, node_eval_time:0.000
best_first_design(node) node number 2 cur_node:<Node 177>
InMethod best_first_design(node): explored_count:2 cur_node:<Node 177>, node_eval_time:0.000
best_first_design(node) node number 3 cur_node:<Node 97>
InMethod best_first_design(node): explored_count:3 cur_node:<Node 97>, node_eval_time:0.000
best_first_design(node) node number 4 cur_node:<Node 57>
InMethod best_first_design(node): explored_count:4 cur_node:<Node 57>, node_eval_time:0.000
best_first_design(node) node number 5 cur_node:<Node 277>
InMethod best_first_design(node): explored_count:5 cur_node:<Node 277>, node_eval_time:0.000
best_first_design(node) node number 6 cur_node:<Node 197>
InMethod best_first_design(node): explored_count:6 cur_node:<Node 197>, node_e