In [2]:
#from envs.LinearBertrandInflation_final3 import LinearBertrandEnv
from envs.LinearBertrandInflation_profe import LinearBertrandEnv
from envs.BertrandInflation_profe import BertrandEnv
from agents.sac_moving4 import SACAgent
from replay_buffer_final import ReplayBuffer
from tqdm import tqdm
import numpy as np

N = 2
k = 10
rho = 5e-3
#total_timesteps = 30_000
total_timesteps = 500
episodes = 1
inflation_start = 0

#env = LinearBertrandEnv(N = N, k = k, rho = rho, v = int(k * 1.5), timesteps = total_timesteps, moving_dim = 200, use_moving_avg=True, inflation_start=inflation_start, max_var = 2.0)
env = BertrandEnv(N = N, k = k, rho = rho, v = int(k * 1.5), timesteps = total_timesteps, use_moving_avg=True, inflation_start=inflation_start, max_var = 2.0)
dim_states = N * k + k + 1
agents = [SACAgent(dim_states, 1, gamma=0.95) for agent in range(env.N)]

buffer = ReplayBuffer(dim_states=dim_states, N = env.N, sample_size = 256, buffer_size = 1000000)

prices_history = np.zeros((episodes, total_timesteps, N))
actions_history = np.zeros((episodes, total_timesteps, N))
costs_history = np.zeros((episodes, total_timesteps))
monopoly_history = np.zeros((episodes, total_timesteps))
nash_history = np.zeros((episodes, total_timesteps))
rewards_history = np.zeros((episodes, total_timesteps, N))
delta_history = np.zeros((episodes, total_timesteps))
quantities_history = np.zeros((episodes, total_timesteps, N))
pi_N_history = np.zeros((episodes, total_timesteps))
pi_M_history = np.zeros((episodes, total_timesteps))
A_history = np.zeros((episodes, total_timesteps))

for episode in range(episodes):
    ob_t = env.reset()
    for timestep in tqdm(range(total_timesteps)):
        
        actions = [agent.select_action(ob_t) for agent in agents]
        
        ob_t1, rewards, done, _ = env.step(actions)
        
        experience = (ob_t, actions, rewards, ob_t1, done)
        buffer.store_transition(*experience)
        
        if timestep > buffer.sample_size:
            for agent_idx in range(N):
                agent = agents[agent_idx]
                sample = buffer.sample(agent_idx)
                agent.update(*sample)
        
        ob_t = ob_t1
        
    # store metrics
    prices_history[episode] = np.array(env.prices_history)[-total_timesteps:]
    actions_history[episode] = np.array(env.action_history)[-total_timesteps:]
    costs_history[episode] = np.array(env.costs_history)[-total_timesteps:]
    monopoly_history[episode] = np.array(env.monopoly_history)[-total_timesteps:]
    nash_history[episode] = np.array(env.nash_history)[-total_timesteps:]
    rewards_history[episode] = np.array(env.rewards_history)[-total_timesteps:]
    delta_history[episode] = np.array(env.metric_history)[-total_timesteps:]
    quantities_history[episode] = np.array(env.quantities_history)[-total_timesteps:]
    pi_N_history[episode] = np.array(env.pi_N_history)[-total_timesteps:]
    pi_M_history[episode] = np.array(env.pi_M_history)[-total_timesteps:]
    A_history[episode] = np.array(env.A_history)[-total_timesteps:]
    
prices_history = np.mean(prices_history, axis = 0)
actions_history = np.mean(actions_history, axis = 0)
costs_history = np.mean(costs_history, axis = 0)
monopoly_history = np.mean(monopoly_history, axis = 0)
nash_history = np.mean(nash_history, axis = 0)
rewards_history = np.mean(rewards_history, axis = 0)
delta_history = np.mean(delta_history, axis = 0)
quantities_history = np.mean(quantities_history, axis = 0)
pi_N_history = np.mean(pi_N_history, axis = 0)
pi_M_history = np.mean(pi_M_history, axis = 0)
A_history = np.mean(A_history, axis = 0) # equal disposition to pay

100%|██████████| 500/500 [00:04<00:00, 104.78it/s]


In [None]:
import pandas as pd

results = pd.DataFrame({'costs': costs_history,
                        'pi_N': pi_N_history,
                        'pi_M': pi_M_history,
                        'delta': delta_history,
                        'p_nash': nash_history,
                        'p_monopoly': monopoly_history,
                        'A': A_history,
                        })

for agent in range(env.N):
    results[f'actions_{agent}'] = actions_history[:, agent]
    results[f'prices_{agent}'] = prices_history[:, agent]
    results[f'quantities_{agent}'] = quantities_history[:, agent]
    results[f'rewards_{agent}'] = rewards_history[:, agent]

results.to_csv(f'test.csv', index = False, sep = ';', encoding = 'utf-8-sig')
results

In [None]:
from utils.plot_metrics import get_rolling

window_size = 1000
df_plot = pd.read_csv('test.csv', sep = ';', encoding = 'utf-8-sig')

actions_cols = [col for col in df_plot.columns if 'actions' in col]
price_cols = [col for col in df_plot.columns if 'prices' in col]
rewards_cols = [col for col in df_plot.columns if 'rewards' in col]
quantities_cols = [col for col in df_plot.columns if 'quantities' in col]

n_agents = len(actions_cols)

df_plot['avg_actions'] = df_plot[actions_cols].mean(axis = 1)
df_plot['avg_prices'] = df_plot[price_cols].mean(axis = 1)
df_plot['avg_rewards'] = df_plot[rewards_cols].mean(axis = 1)
df_plot['avg_quantities'] = df_plot[quantities_cols].mean(axis = 1)
avg_cols = [col for col in df_plot.columns if 'avg' in col]

window_cols = price_cols + rewards_cols + quantities_cols + avg_cols + ['delta']
for col in window_cols:
    df_plot[col] = get_rolling(df_plot[col], window_size = window_size)

df_plot.tail()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize = (12, 4))
for agent in range(n_agents):
    price_serie = df_plot[f'prices_{agent}']
    plt.plot(price_serie, label = f'Agent {agent}')
plt.plot(df_plot['p_monopoly'], color = 'red', label = 'Monopoly price')
plt.plot(df_plot['p_nash'], color = 'green', label = 'Nash price')
plt.xlabel('Timesteps')
plt.ylabel('Prices')
plt.legend()
plt.savefig('plot.png')

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize = (12, 4))
plt.plot(df_plot['avg_prices'], label = 'Average prices')
plt.plot(df_plot['p_monopoly'], color = 'red', label = 'Monopoly price')
plt.plot(df_plot['p_nash'], color = 'green', label = 'Nash price')
plt.xlabel('Timesteps')
plt.ylabel('Prices')
plt.legend()
plt.savefig('plot.png')

In [None]:
plt.figure(figsize = (12, 4))
plt.plot(df_plot['avg_rewards'], label = 'Average profits')
plt.plot(df_plot['pi_N'], label = 'Nash profits', color = 'green')
plt.plot(df_plot['pi_M'], label = 'Monopoly profits', color = 'red')
plt.xlabel('Timesteps')
plt.ylabel('Profits')
plt.legend()

In [None]:
plt.figure(figsize = (12, 4))
plt.plot(df_plot['delta'], label = 'Average profits')
plt.axhline(1, color = 'red', label = 'Nash profits')
plt.axhline(0, color = 'green', label = 'Monoply profits')
plt.xlabel('Timesteps')
plt.ylabel('Delta')
plt.legend()