In [157]:
class Problem:
    def __init__(self):
        self.goal = None
        
    def get_state(self): pass
    def get_pos_actions(self): pass
    def action(self, actions): pass
    def cost(self, action): pass

In [158]:
class Any:
    def __init__(self, *vals):
        self.vals = vals
        
    def __eq__(self, other):
        if len(self.vals) == 0 or\
            other in self.vals:
            return True
        return False

In [159]:
import random

class Mal_cleaner(Problem):
    def __init__(self):
        Problem.__init__(self)
        self.rooms=['Dirty']*2
        self.loc='A'
        self.goal=(Any('A','B'),'Clean','Clean')
    
    def get_state(self):
        return (self.loc,*self.rooms)
        
    def get_pos_actions(self):
        if self.loc=='B': yield 'Left'
        if self.loc=='A': yield 'Right'
        yield 'Suck'
    
    def action(self, actions):
        for action in actions:
            if action not in self.get_pos_actions():continue
            if action=='Left':
                self.loc='A'
            elif action=='Right':
                self.loc='B'
            elif action=='Suck':
                room_ind=ord(self.loc)-65
                if self.rooms[room_ind]=='Dirty':
                    if random.randint(0,1):
                        self.rooms=['Clean']*2
                    else:
                        self.rooms[room_ind]='Clean'
                elif random.randint(0, 1):
                    self.rooms[room_ind]='Dirty'
    
    def cost(self, action):
        return 1

In [160]:
from typing import Type

class Problem_solver:
    def __init__(self): pass
    def train(self, problem: Type[Problem]): pass
    def solve(self): pass

In [161]:
class And_or_search(Problem_solver):
    def __init__(self):
        Problem_solver.__init__(self)
        
    # states_func: Return belief state (a set of possible states)
    def train(self, problem: Type[Problem], states_func, pos_actions_func):
        Problem_solver.train(self, problem)
        self.problem=problem
        self.states_func=states_func
        self.pos_actions_func=pos_actions_func
        
    def solve(self):
        rules=self.__search()
        return rules
    
    def __search(self):
        return self.__or_search(self.problem.get_state(),[])
        
    def __or_search(self, state, path):
        if state==self.problem.goal:
            return []
        if self.__is_cycle(path):
            return None
        for action in self.pos_actions_func(state):
            plan=self.__and_search(list(self.states_func(state, action)),[state,*path])
            if plan is not None:
                return [action,*plan]
        return None
                
    def __and_search(self, states, path):
        plans=[None]*len(states)
        for i, state in enumerate(states):
            t = self.__or_search(state,path)
            if t is None:
                return None
            plans[i] = {
                'state': state,
                'action': t
            }
        return plans
        
    def __is_cycle(self,path):
        return len(set(path))!=len(path)

In [162]:
def get_pos_actions(state):
    if state[0]=='B':yield 'Left'
    if state[0]=='A':yield 'Right'
    yield 'Suck'

In [163]:
def get_states(state, action):
    loc, rooms=state[0],[state[1],state[2]]
    if action=='Left':
        yield ('A',rooms[0],rooms[1])
    elif action=='Right':
        yield ('B',rooms[0],rooms[1])
    elif action=='Suck':
        room_ind=ord(loc)-65
        if rooms[room_ind]=='Dirty':
            yield (loc,'Clean','Clean')
            rooms[room_ind]='Clean'
            yield (loc,*rooms)
        else:
            yield state
            rooms[room_ind]='Dirty'
            yield (loc,*rooms)

In [164]:
mal_cleaner = Mal_cleaner()
mal_cleaner.get_state()

('A', 'Dirty', 'Dirty')

In [165]:
solver = And_or_search()
solver.train(mal_cleaner, get_states, get_pos_actions)
plan = solver.solve()

In [166]:
print(plan)

['Right', {'state': ('B', 'Dirty', 'Dirty'), 'action': ['Suck', {'state': ('B', 'Clean', 'Clean'), 'action': []}, {'state': ('B', 'Dirty', 'Clean'), 'action': ['Left', {'state': ('A', 'Dirty', 'Clean'), 'action': ['Suck', {'state': ('A', 'Clean', 'Clean'), 'action': []}, {'state': ('A', 'Clean', 'Clean'), 'action': []}]}]}]}]


In [171]:
def get_readable_plan(plan):
    result = ""
    for i, action in enumerate(plan):
        if isinstance(action, str):
            result+=f"Do {action}\n"
        elif isinstance(action, dict):
            state = action['state']
            action_plan = action['action']
            result+="Do " + action_plan[0]
            for action_item in action_plan:
                if isinstance(action_item, dict):
                    next_state = action_item['state']
                    subplan = action_item['action']
                    if next_state == ('B', 'Dirty', 'Clean'):
                        result+="\nif state == ('B', 'Dirty', 'Clean')\n"
                        for action in subplan:
                            if isinstance(action, str):
                                result+=f"    Do {action}\n"
                            elif isinstance(action, dict):
                                sub_actions = action.get('action', [])
                                for action in sub_actions:
                                    if isinstance(action, str):
                                        result+=f"    Do {action}"
                        break
    return result

In [172]:
print(get_readable_plan(plan))

Do Right
Do Suck
if state == ('B', 'Dirty', 'Clean')
    Do Left
    Do Suck
