In [3]:
import numpy as np
import gym
from gym.spaces import Discrete, Box, Dict

In [10]:
class LifecycleEnv(gym.Env):
    def __init__(self):
        # Here we define the choices on consumption and equity allocation
        self.action_space = Dict({"equity_allocation": Box(low=0.0, high=1.0, shape = (1,)),
                                  "consumption": Box(low = 0.0, high = 1000.0, shape = (1,))})
        # 
        self.observation_space = Box(low = np.array([0.0]), 
                                     high = np.array([1.0]))
        # Set starting variables
        self.wealth = 0
        self.income = 10000
        self.age = 20
        self.retirement_age = 70
        self.terminal_age = 115

    def step(self, action):
        # Income reduces when you retire
        if self.age > self.retirement_age:
            self.income = 0
        # define market returns
        risk_return = 0.05
        risk_free_return = 0.02
        # Apply action
        portfolio_return = risk_return * action["equity_allocation"] + risk_free_return * (1 - action["equity_allocation"])
        self.wealth = (1 + portfolio_return) * (self.wealth - action["consumption"]) + self.income
        reward = action
        # Time passes (philosophical consideration here)
        self.age += 1
        # Complete step if agent is older than terminal age
        if self.age > self.terminal_age:
            done = True
        else:
            done = False
        # Placeholder for info
        info = self.wealth
        return self.wealth, reward, done, info 

    def render(self, mode = 'human'):
        # no urgent need for this
        pass

    def reset(self):
        # Resetting age and wealth
        self.wealth = 0
        self.age = 20
        self.income = 10000
        return self.wealth
        
env = LifecycleEnv()
# Observation and action space 
obs_space = env.observation_space
action_space = env.action_space
print("The observation space: {}".format(obs_space))
print("The action space: {}".format(action_space))

The observation space: Box(0.0, 1.0, (1,), float32)
The action space: Dict(consumption:Box(0.0, 1000.0, (1,), float32), equity_allocation:Box(0.0, 1.0, (1,), float32))


In [12]:
# Number of steps you run the agent for 
num_steps = 100

obs = env.reset()

for step in range(num_steps):
    # take random action, but you can also do something more intelligent
    # action = my_intelligent_agent_fn(obs) 
    action = env.action_space.sample()
    #print(action)
    
    # apply the action
    obs, reward, done, info = env.step(action)
    
    # Render the env
    print(info)
    
    # If the epsiode is up, then start another one
    if done:
        env.reset()

# Close the env
env.close()

[9869.489]
[19669.492]
[29483.137]
[39907.344]
[49827.594]
[60386.715]
[72571.53]
[84260.35]
[97559.14]
[111919.55]
[124936.836]
[139869.73]
[154321.7]
[169352.4]
[186271.2]
[205301.22]
[223401.17]
[240806.69]
[258569.33]
[279033.06]
[300465.88]
[320691.2]
[341149.5]
[364358.97]
[382966.03]
[409807.88]
[432372.62]
[458386.2]
[489871.8]
[522103.34]
[547540.3]
[579850.7]
[608780.4]
[644905.5]
[668285.06]
[709455.44]
[742307.8]
[771485.5]
[818935.]
[851128.1]
[889721.8]
[940801.5]
[974674.6]
[1021522.75]
[1076357.4]
[1111743.5]
[1171943.1]
[1219269.4]
[1261887.4]
[1310951.4]
[1350758.6]
[1406771.4]
[1445055.6]
[1507309.4]
[1545269.4]
[1593582.5]
[1652311.2]
[1690021.5]
[1749801.6]
[1803957.1]
[1850976.2]
[1897960.2]
[1991239.]
[2074489.5]
[2123040.8]
[2202694.5]
[2266254.5]
[2317254.5]
[2412418.]
[2506550.]
[2596396.5]
[2705790.2]
[2833428.5]
[2924386.8]
[3067060.2]
[3219511.5]
[3377552.8]
[3450103.]
[3578716.5]
[3730652.8]
[3914777.5]
[4006726.5]
[4090999.]
[4241838.5]
[4385700.5]
[45719