In [7]:
import numpy as np
import gym
from gym.spaces import Discrete, Box

In [35]:
class LifecycleEnv(gym.Env):
    def __init__(self):
        # Here we define the choices on consumption and equity allocation
        self.action_space = Box(low=np.array([0.0, 0.0]), 
                                high=np.array([1.0, 1000.0]))
        # 
        self.observation_space = Box(low = np.array([0.0]), 
                                     high = np.array([1.0]))
        # Set starting variables
        self.wealth = 0
        self.income = 10000
        self.age = 20
        self.retirement_age = 70
        self.terminal_age = 115

    def step(self, action):
        # Income reduces when you retire
        if self.age > self.retirement_age:
            self.income = 0
        # define market returns
        risk_return = 0.05
        risk_free_return = 0.02
        # Apply action
        portfolio_return = risk_return * action[0] + risk_free_return * (1 - action[0])
        self.wealth = (1 + portfolio_return) * (self.wealth - action[1]) + self.income
        reward = action
        # Time passes (philosophical consideration here)
        self.age += 1
        # Complete step if agent is older than terminal age
        if self.age > self.terminal_age:
            done = True
        else:
            done = False
        # Placeholder for info
        info = self.wealth
        return self.wealth, reward, done, info 

    def render(self, mode = 'human'):
        # no urgent need for this
        pass

    def reset(self):
        # Resetting age and wealth
        self.wealth = 0
        self.age = 20
        self.income = 10000
        return self.wealth
        
env = LifecycleEnv()
# Observation and action space 
obs_space = env.observation_space
action_space = env.action_space
print("The observation space: {}".format(obs_space))
print("The action space: {}".format(action_space))

The observation space: Box(0.0, 1.0, (1,), float32)
The action space: Box(0.0, [   1. 1000.], (2,), float32)


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [36]:
# Number of steps you run the agent for 
num_steps = 100

obs = env.reset()

for step in range(num_steps):
    # take random action, but you can also do something more intelligent
    # action = my_intelligent_agent_fn(obs) 
    action = env.action_space.sample()
    
    # apply the action
    obs, reward, done, info = env.step(action)
    
    # Render the env
    print(info)
    
    # If the epsiode is up, then start another one
    if done:
        env.reset()

# Close the env
env.close()

9057.14493852586
19206.96887296818
28969.073035730446
39586.581845295994
50938.136448518715
61478.88896647625
73358.1629605271
84708.11124576058
97311.22750087646
111988.19520345858
124391.97998481145
137378.16777220546
149459.78786557788
162319.4156163746
177565.19275580684
193127.19644526538
207589.42986233195
226927.81211038821
246936.35764018854
266791.5484657632
288466.78498720046
310965.7218069949
334119.12547585793
356318.3704943256
378797.07343646383
401968.9738838571
424704.1416648394
450551.53500185726
474451.0408901771
499287.4832814573
518992.8812343695
544891.411700852
567588.5111190111
592773.4556859152
629992.1570715585
668124.3969644825
696815.052105371
737941.0682011954
775675.8770421004
818487.1047388361
848769.6984417689
885296.889670423
929203.949039305
981645.9720354559
1026227.4957602879
1059891.5301586285
1091319.3542651664
1150696.497316151
1187872.7665000656
1235629.604452645
1291623.6964985006
1317746.638661635
1348138.7373446368
1405818.0462162157
1441553.104