In [16]:
import pandas as pd
import numpy as np
import random
import gym
from gym.spaces import Discrete, Box, Dict

In [49]:
class LifecycleEnv(gym.Env):
    def __init__(self):
        # Here we define the choices on consumption and equity allocation
        self.action_space = Dict({"equity_allocation": Discrete(101),
                                  "consumption": Discrete(200)})
        # Set starting variables
        self.wealth = 0
        self.income = 100
        self.age = 20
        self.retirement_age = 70
        self.terminal_age = 115
        # Here we create our observation space
        self.observation_space = Dict({"age": Discrete(self.terminal_age - self.age),
                                       "obs_wealth": Discrete(1000)})

    def step(self, action):
        # Income reduces when you retire
        if self.age > self.retirement_age:
            self.income = 0
        # define market returns
        risk_return = 0.05
        risk_free_return = 0.02
        # tranformation
        print(action)
        action_equity_allocation = action["equity_allocation"] / 100
        # Apply action
        portfolio_return = risk_return * \
            action_equity_allocation + risk_free_return * \
            (1 - action_equity_allocation)
        self.wealth = (1 + portfolio_return) * (self.wealth -
                                                action["consumption"]) + self.income
        reward = action["consumption"]
        # force wealth to be in grid - need to work on this
        self.wealth = int(self.wealth)
        # Time passes (philosophical consideration here)
        self.age += 1
        # Complete step if agent is older than terminal age
        if self.age > self.terminal_age:
            done = True
        else:
            done = False
        # Placeholder for info
        info = {"age": self.age, "wealth": self.wealth, "consumption": action["consumption"], "equity_allocation": action_equity_allocation}
        state = {"age": self.age, "wealth": self.wealth}
        return state, reward, done, info

    def render(self, mode='human'):
        # no urgent need for this
        pass

    def reset(self):
        # Resetting age and wealth
        self.wealth = 0
        self.age = 20
        self.income = 10000
        state = {"age": self.age, "wealth": self.wealth}
        return state


env = LifecycleEnv()
# Observation and action space
print("The observation space: {}".format(env.observation_space))
print("The action space: {}".format(env.action_space))


The observation space: Dict(age:Discrete(95), obs_wealth:Discrete(1000))
The action space: Dict(consumption:Discrete(200), equity_allocation:Discrete(101))


In [50]:
# Hyperparameters
num_steps = 95
epsilon = 0.1
alpha = 0.1
gamma = 0.6

# env setup
state = env.reset()
info_df = pd.DataFrame(
    columns=("age", "wealth", "consumption", "equity_allocation"))
q_table = np.zeros([env.observation_space["age"].n * env.observation_space["obs_wealth"].n,
                    env.action_space["equity_allocation"].n * env.action_space["consumption"].n])

for step in range(num_steps):
    # take random action, but you can also do something more intelligent
    # action = my_intelligent_agent_fn(obs)
    if random.uniform(0, 1) < epsilon:
        action = env.action_space.sample()  # Explore action space
    else:
        action = np.argmax(q_table[state["age"] * state["wealth"]])  # Exploit learned values

    # apply the action
    next_state, reward, done, info = env.step(action)

    # Render the env
    info_df = info_df.append(info, ignore_index=True)

    old_value = q_table[state["age"] * state["wealth"], action["equity_allocation"] * action["consumption"]]
    next_max = np.max(q_table[next_state["age"] * next_state["wealth"]])
    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
    q_table[state["age"] * state["wealth"], action["equity_allocation"] * action["consumption"]] = new_value

    # If the episode is up, then start another one
    if done:
        info_df = pd.DataFrame(
            columns=("age", "wealth", "consumption", "equity_allocation"))
        env.reset()

print(info_df.set_index("age"))
info_df.set_index("age").plot(secondary_y=["equity_allocation"])

# Close the env
env.close()


0


IndexError: invalid index to scalar variable.