In [1]:
import numpy as np

In [2]:
class State:
    def __init__(self, name="default", reward = 0):
        self.name = name
        self.reward = reward
        self.value = 0
        self.neighbors = []
        self.p_transitions = []
        
    def next_state(self):
        p_transitions = np.array(self.p_transitions)
        total = np.sum(p_transitions)
        p_transitions /= total
        choice = np.random.choice(self.neighbors,1,p=p_transitions)[0]
        
        index = self.neighbors.index(choice)
        prob = self.p_transitions[index]
        
        return choice, prob
        
    def add_neighbor(self, neighbor, probablility):
        self.neighbors.append(neighbor)
        self.p_transitions.append(probablility)
        
    def __eq__(self, other):
        return self.name == other.name
        
    def __repr__(self):
        return str([self.name, str(self.value)])

class StateMachine:
    def __init__(self):
        self.current_name = None
        self.states = {}
        self.gamma = 0.6
        
    def step(self):
        
        #next state
        new_name,prob = self.states[self.current_name].next_state()
        
        value_choice = prob * (self.states[new_name].reward + self.gamma * self.states[new_name].value)
        value_other = (1-prob) * ( self.gamma * self.states[self.current_name].value)
        
        #update current state
        self.states[self.current_name].value = (value_choice + value_other)
        
        
        """
        #surroundings
        current_value = 0 
        for i in range(len(self.states[self.current_name].neighbors)):
            neighbor = self.states[self.current_name].neighbors[i]
            prob = self.states[self.current_name].p_transitions[i]
            value = self.states[neighbor].value
            reward = self.states[neighbor].reward
            current_value += prob*(reward + (self.gamma*value))
            
            
        self.states[self.current_name].value = current_value
        new_name = self.states[this.current_name].next_state()
        self.current_name = self.states[new_name].name
        """
        
        self.current_name = self.states[new_name].name
        return self.states[self.current_name]
        
    def set_start(self,name):
        self.current_name = name
        
    def add_states(self, *states):
        for state in states:
            self.states[state.name] = state

In [3]:
s1,s2,s3,s4,s5,s6,s7 = State("1",1), State("2"), State("3"), State("4"), State("5"), State("6"), State("7",10)

In [4]:
s1.add_neighbor(s1.name, 0.6)
s1.add_neighbor(s2.name, 0.4)

s2.add_neighbor(s1.name, 0.4)
s2.add_neighbor(s2.name, 0.2)
s2.add_neighbor(s3.name, 0.4)

s3.add_neighbor(s2.name, 0.4)
s3.add_neighbor(s3.name, 0.2)
s3.add_neighbor(s4.name, 0.4)

s4.add_neighbor(s3.name, 0.4)
s4.add_neighbor(s4.name, 0.2)
s4.add_neighbor(s5.name, 0.4)

s5.add_neighbor(s4.name, 0.4)
s5.add_neighbor(s5.name, 0.2)
s5.add_neighbor(s6.name, 0.4)
        
s6.add_neighbor(s5.name, 0.4)
s6.add_neighbor(s6.name, 0.2)
s6.add_neighbor(s7.name, 0.4)

s7.add_neighbor(s6.name, 0.4)
s7.add_neighbor(s7.name, 0.6)

machine = StateMachine()
machine.add_states(s1,s2,s3,s4,s5,s6,s7)
machine.set_start(s4.name)

In [5]:
machine.set_start(s4.name)
count = [0,0,0,0,0,0,0]

for i in range(10000):
    state = machine.step()
    index = int(state.name)
    count[index-1] += 1
    
print(count)
for state in machine.states.values():
    print(state.reward, state.value)

[1530, 1528, 1499, 1399, 1405, 1363, 1276]
1 0.3079117098015186
0 0.7597172747824454
0 0.01461148181457397
0 0.002756947763189201
0 0.012788290562612934
0 1.5287699461736646
10 3.2991877674897445
