In [4]:
from BankSimEnv import BankSimEnv
from MARL.NaiveA2C.ddpg_agent import Agent
import matplotlib.pyplot as plt
import numpy as np


def MA_obs_to_bank_obs(obs, bank):
    bank_obs = obs[bank.BankName]
    # print(f'BANK OBS of {bank.BankName}', bank_obs)
    cash_price, cb_price, gb_price = bank_obs[0]['CASH'], bank_obs[0]['CB'], bank_obs[0]['GB']
    leverage = bank_obs[3]
    return np.asarray([cash_price, cb_price, gb_price, leverage])

In [5]:
agent_dict = {}
env = BankSimEnv()

for name in ['AT01', 'BE04', 'FR09']:
    agent = Agent(state_size=4, action_size=3, random_seed=0, name=name)
    agent_dict[name] = agent


In [6]:

for episode in range(100000):
    print(f'=========================================Episode {episode}===============================================')
    current_obs = env.reset()
    play, max_play = 0, 5
    num_default = []
    while play < max_play:
        actions = {}
        for bank_name, bank in env.allAgentBanks.items():
            if bank.DaysInsolvent >= 2:
                continue
            print(f'Round {play}. Bank {bank_name}, CB: {bank.BS.Asset["CB"].Quantity}, GB: {bank.BS.Asset["GB"].Quantity}, CASH: {bank.BS.Asset["CASH"].Quantity}, OTHER: {bank.BS.Asset["OTHER"].Quantity}, LEV: {bank.get_leverage_ratio()}')
            # conversion
            my_obs = MA_obs_to_bank_obs(current_obs, bank)
            current_obs[bank_name] = my_obs
            # choose action
            action = agent_dict[bank_name].act(current_obs[bank_name], add_noise=True)
            actions[bank_name] = action  # this is where you use your RLAgents!
        # convert actions
        actions_dict = {}
        for name, action in actions.items():
            action_dict = {}
            action_dict['CB'], action_dict['GB'], action_dict['CASH'] = action[0], action[1], action[2]
            actions_dict[name] = action_dict
        new_obs, rewards, dones, infos = env.step(actions_dict)
        for bank_name, bank in env.allAgentBanks.items():
            if bank.DaysInsolvent >= 2:
                continue
            my_new_obs = MA_obs_to_bank_obs(new_obs, bank)
            current_obs[bank_name] = my_new_obs
            agent.step(current_obs[bank_name], actions[bank_name], rewards[bank_name], my_new_obs, dones[bank_name])
        current_obs = new_obs
        num_default.append(infos['NUM_DEFAULT'])
        play += 1

    # plt.plot(num_default)
    # plt.ylabel('Number of defaults')
    # plt.show()


Round 0. Bank AT01, CB: 6546.0, GB: 27695, CASH: 13254.054054054053, OTHER: 217586.027027027, LEV: 0.050540141932813434
Round 0. Bank BE04, CB: 6127.0, GB: 40016, CASH: 12445.723684210527, OTHER: 190325.75, LEV: 0.053189439871996105
Round 0. Bank FR09, CB: 26519.0, GB: 105238, CASH: 56987.275449101806, OTHER: 951001.2335329342, LEV: 0.06247168265709347
Round 1. Bank AT01, CB: 2807.28406226635, GB: 13429.057824015617, CASH: 5527.722153290328, OTHER: 217586.027027027, LEV: -0.04897987527572821
Round 1. Bank BE04, CB: 2986.3330043554306, GB: 20390.49169778824, CASH: 5210.070232551589, OTHER: 190325.75, LEV: -0.07291355757462206
Round 1. Bank FR09, CB: 11533.574274599552, GB: 55813.261807858944, CASH: 23442.13429380486, OTHER: 951001.2335329342, LEV: -0.02368720659231311
Round 0. Bank AT01, CB: 6546.0, GB: 27695, CASH: 13254.054054054053, OTHER: 217586.027027027, LEV: 0.050540141932813434
Round 0. Bank BE04, CB: 6127.0, GB: 40016, CASH: 12445.723684210527, OTHER: 190325.75, LEV: 0.05318943

KeyboardInterrupt: 