In [6]:
from mdp import StudentMDP
from agent import Agent, QLearningAgent
mdp = StudentMDP(verbose=True)

In [7]:
# Default policy shown in images/student-mdp.png
agent = Agent(mdp.action_space) 
agent.pi = {
    "Class 1":  {"Study": 0.5, "Go on Facebook": 0.5},
    "Class 2":  {"Study": 0.8, "Fall asleep": 0.2},
    "Class 3":  {"Study": 0.6, "Go to the pub": 0.4},
    "Facebook": {"Keep scrolling": 0.9, "Close Facebook": 0.1},
    "Pub":      {"Have a pint": 1.},
    "Pass":     {"Fall asleep": 1.},
    "Asleep":   {"Stay asleep": 1.}
}

In [8]:
state = mdp.reset()
done = False
while not done:
    state, _, done, _ = mdp.step(agent.act(state))

| Time  | State    | Action         | Reward | Next state | Done  |
|-------|----------|----------------|--------|------------|-------|
| 0     | Class 1  | Study          | -2.0   | Class 2    | False |
| 1     | Class 2  | Study          | -2.0   | Class 3    | False |
| 2     | Class 3  | Go to the pub  |  1.0   | Pub        | False |
| 3     | Pub      | Have a pint    | -2.0   | Class 2    | False |
| 4     | Class 2  | Study          | -2.0   | Class 3    | False |
| 5     | Class 3  | Study          | 10.0   | Pass       | False |
| 6     | Pass     | Fall asleep    |  0.0   | Asleep     | True  |


In [16]:
agent = QLearningAgent(mdp.action_space, epsilon=1.0, alpha=0.2, gamma=0.9)
mdp.ep = 0

In [17]:
while mdp.ep < 50:
    state = mdp.reset()
    done = False
    while not done:
        action = agent.act(state)
        next_state, reward, done, info = mdp.step(action)
        agent.learn(state, action, reward, next_state, done)
        state = next_state

    print("Value function:")
    print(agent.Q)
    print("Policy:")
    print(agent.pi)
    
    agent.epsilon *= 0.95

| Time  | State    | Action         | Reward | Next state | Done  |
|-------|----------|----------------|--------|------------|-------|
| 0     | Class 1  | Go on Facebook | -1.0   | Facebook   | False |
| 1     | Facebook | Close Facebook | -2.0   | Class 1    | False |
| 2     | Class 1  | Study          | -2.0   | Class 2    | False |
| 3     | Class 2  | Study          | -2.0   | Class 3    | False |
| 4     | Class 3  | Go to the pub  |  1.0   | Pub        | False |
| 5     | Pub      | Have a pint    | -2.0   | Class 2    | False |
| 6     | Class 2  | Study          | -2.0   | Class 3    | False |
| 7     | Class 3  | Go to the pub  |  1.0   | Pub        | False |
| 8     | Pub      | Have a pint    | -2.0   | Class 3    | False |
| 9     | Class 3  | Go to the pub  |  1.0   | Pub        | False |
| 10    | Pub      | Have a pint    | -2.0   | Class 3    | False |
| 11    | Class 3  | Study          | 10.0   | Pass       | False |
| 12    | Pass     | Fall asleep    |  0.0   | A