### Simple Q-learning for making offers to a seller with unknown reservation price. 

In [86]:
import numpy as np

# Parameters
value_buyer = 1
value_seller = 0.2

# Hyperparameters
num_actions = 21
num_episodes = 1000
alpha = 0.1  # Learning rate
gamma = 0  # No discount for immediate rewards
initial_epsilon = 0.99
epsilon_decay = 0.995  # Decay factor for epsilon
min_epsilon = 0.01

# Q-table initialization
q_table = np.zeros((num_actions,))
bid2action = np.linspace(0, 1, num_actions)  # Mapping of action index to bid value

# Training loop
epsilon = initial_epsilon
for episode in range(num_episodes):
    
    # Select action using epsilon-greedy strategy
    if np.random.uniform(0, 1) < epsilon:
        action = np.random.randint(0, num_actions)  # Exploration: Random action
    else:
        action = np.argmax(q_table)  # Exploitation: Choose best action based on Q-values

    bid = bid2action[action]  # Convert action index to bid value
    
    if bid >= value_seller:
        reward = value_buyer - bid  # Calculate reward based on bid and buyer's value
    else:
        reward = 0  # No reward if bid is below seller's value

    # Q-value update using Q-learning equation
    q_table[action] += alpha * (reward + gamma * np.max(q_table) - q_table[action])

    # Decay epsilon to reduce exploration over time
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

    # Print relevant information for each episode
    print("Bid:", round(bid, 2), "Reward:", round(reward, 2), "Epsilon:", round(epsilon, 2))


Bid: 0.05 Reward: 0 Epsilon: 0.99
Bid: 0.25 Reward: 0.75 Epsilon: 0.98
Bid: 0.85 Reward: 0.15 Epsilon: 0.98
Bid: 0.85 Reward: 0.15 Epsilon: 0.97
Bid: 0.45 Reward: 0.55 Epsilon: 0.97
Bid: 0.15 Reward: 0 Epsilon: 0.96
Bid: 0.3 Reward: 0.7 Epsilon: 0.96
Bid: 0.25 Reward: 0.75 Epsilon: 0.95
Bid: 0.8 Reward: 0.2 Epsilon: 0.95
Bid: 0.75 Reward: 0.25 Epsilon: 0.94
Bid: 0.7 Reward: 0.3 Epsilon: 0.94
Bid: 0.5 Reward: 0.5 Epsilon: 0.93
Bid: 0.25 Reward: 0.75 Epsilon: 0.93
Bid: 0.95 Reward: 0.05 Epsilon: 0.92
Bid: 0.1 Reward: 0 Epsilon: 0.92
Bid: 0.35 Reward: 0.65 Epsilon: 0.91
Bid: 0.25 Reward: 0.75 Epsilon: 0.91
Bid: 0.5 Reward: 0.5 Epsilon: 0.9
Bid: 1.0 Reward: 0.0 Epsilon: 0.9
Bid: 0.35 Reward: 0.65 Epsilon: 0.9
Bid: 0.15 Reward: 0 Epsilon: 0.89
Bid: 0.25 Reward: 0.75 Epsilon: 0.89
Bid: 0.5 Reward: 0.5 Epsilon: 0.88
Bid: 0.25 Reward: 0.75 Epsilon: 0.88
Bid: 0.0 Reward: 0 Epsilon: 0.87
Bid: 0.95 Reward: 0.05 Epsilon: 0.87
Bid: 0.45 Reward: 0.55 Epsilon: 0.86
Bid: 0.25 Reward: 0.75 Epsilon: 0.8

In [87]:
print(q_table)

[0.         0.         0.         0.         0.61698566 0.75
 0.36519217 0.33910701 0.1626     0.28693671 0.32566078 0.23476639
 0.1874236  0.24016629 0.21527114 0.1423832  0.11390656 0.07825546
 0.0468559  0.03587852 0.        ]


### Q-learning against seller with random reservation price

In [92]:
import numpy as np

# Parameters
value_buyer = 1
avg_value_seller = 0.2
std_value_seller = 0.05

# Hyperparameters
num_actions = 21
num_episodes = 10000
alpha = 0.1  # Learning rate
gamma = 0  # No discount for immediate rewards
initial_epsilon = 0.99
epsilon_decay = 0.9995  # Decay factor for epsilon
min_epsilon = 0.01

# Q-table initialization
q_table = np.zeros((num_actions,))
bid2action = np.linspace(0, 1, num_actions)  # Mapping of action index to bid value

# Training loop
epsilon = initial_epsilon
for episode in range(num_episodes):
    
    # Seller realizes value
    value_seller = np.random.normal(avg_value_seller,std_value_seller,1)[0]
    
    # Select action using epsilon-greedy strategy
    if np.random.uniform(0, 1) < epsilon:
        action = np.random.randint(0, num_actions)  # Exploration: Random action
    else:
        action = np.argmax(q_table)  # Exploitation: Choose best action based on Q-values

    bid = bid2action[action]  # Convert action index to bid value
    
    if bid >= value_seller:
        reward = value_buyer - bid  # Calculate reward based on bid and buyer's value
    else:
        reward = 0  # No reward if bid is below seller's value

    # Q-value update using Q-learning equation
    q_table[action] += alpha * (reward + gamma * np.max(q_table) - q_table[action])

    # Decay epsilon to reduce exploration over time
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

    # Print relevant information for each episode
    print("Bid:",round(bid,2), "Seller value:",round(value_seller,2),round(bid, 2), "Reward:", round(reward, 2), "Epsilon:", round(epsilon, 2))



Bid: 0.1 Seller value: 0.15 0.1 Reward: 0 Epsilon: 0.99
Bid: 0.8 Seller value: 0.2 0.8 Reward: 0.2 Epsilon: 0.99
Bid: 0.05 Seller value: 0.18 0.05 Reward: 0 Epsilon: 0.99
Bid: 1.0 Seller value: 0.18 1.0 Reward: 0.0 Epsilon: 0.99
Bid: 0.35 Seller value: 0.16 0.35 Reward: 0.65 Epsilon: 0.99
Bid: 0.15 Seller value: 0.22 0.15 Reward: 0 Epsilon: 0.99
Bid: 0.7 Seller value: 0.16 0.7 Reward: 0.3 Epsilon: 0.99
Bid: 0.1 Seller value: 0.19 0.1 Reward: 0 Epsilon: 0.99
Bid: 0.65 Seller value: 0.15 0.65 Reward: 0.35 Epsilon: 0.99
Bid: 0.75 Seller value: 0.19 0.75 Reward: 0.25 Epsilon: 0.99
Bid: 0.2 Seller value: 0.19 0.2 Reward: 0.8 Epsilon: 0.98
Bid: 0.9 Seller value: 0.17 0.9 Reward: 0.1 Epsilon: 0.98
Bid: 0.7 Seller value: 0.21 0.7 Reward: 0.3 Epsilon: 0.98
Bid: 0.1 Seller value: 0.16 0.1 Reward: 0 Epsilon: 0.98
Bid: 0.1 Seller value: 0.16 0.1 Reward: 0 Epsilon: 0.98
Bid: 0.1 Seller value: 0.28 0.1 Reward: 0 Epsilon: 0.98
Bid: 0.45 Seller value: 0.14 0.45 Reward: 0.55 Epsilon: 0.98
Bid: 0.0 Sell

Bid: 0.35 Seller value: 0.17 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.6 Seller value: 0.13 0.6 Reward: 0.4 Epsilon: 0.33
Bid: 0.05 Seller value: 0.23 0.05 Reward: 0 Epsilon: 0.33
Bid: 0.35 Seller value: 0.15 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.35 Seller value: 0.18 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.35 Seller value: 0.17 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.35 Seller value: 0.19 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.35 Seller value: 0.25 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.35 Seller value: 0.18 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.35 Seller value: 0.13 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.65 Seller value: 0.14 0.65 Reward: 0.35 Epsilon: 0.33
Bid: 0.35 Seller value: 0.19 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.1 Seller value: 0.17 0.1 Reward: 0 Epsilon: 0.33
Bid: 0.35 Seller value: 0.23 0.35 Reward: 0.65 Epsilon: 0.33
Bid: 0.85 Seller value: 0.18 0.85 Reward: 0.15 Epsilon: 0.33
Bid: 0.7 Seller value: 0.19 0.7 Reward: 0.3 Epsilon: 0.33
Bid: 0.35 Seller value: 0.27 0.35 Rewa

Bid: 0.4 Seller value: 0.19 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.19 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.24 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.2 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.23 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.21 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.15 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.13 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.17 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.25 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.17 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.2 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.21 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.25 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.23 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.24 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller value: 0.13 0.4 Reward: 0.6 Epsilon: 0.01
Bid: 0.4 Seller 

### Q-learning when buyer value changes and is known, but seller value is unknown. 

In [94]:
import numpy as np

# Parameters
avg_value_seller = 0.7
std_value_seller = 0.05

# Hyperparameters
num_actions = 21
num_episodes = 50000
alpha = 0.1  # Learning rate
gamma = 0  # No discount for immediate rewards
initial_epsilon = 0.99
epsilon_decay = 0.99995  # Decay factor for epsilon
min_epsilon = 0.01

# Divide the buyer value range into 11 divisions
buyer_value_divisions = np.linspace(0.5, 1.0, num_actions)

# Q-table initialization
num_states = num_actions
q_table = np.zeros((num_states, num_actions))
action2bid = np.linspace(0, 1, num_actions)  # Mapping of action index to bid value

# Training loop
epsilon = initial_epsilon
for episode in range(num_episodes):
    
    # Seller realizes value
    value_seller = np.random.normal(avg_value_seller, std_value_seller, 1)[0]
    
    # Buyer realizes value 
    value_buyer = np.random.choice(buyer_value_divisions)
    
    # Find the index of the buyer value division
    state = np.argmin(np.abs(buyer_value_divisions - value_buyer))
    
    # Select action using epsilon-greedy strategy
    if np.random.uniform(0, 1) < epsilon:
        action = np.random.randint(0, num_actions)  # Exploration: Random action
    else:
        action = np.argmax(q_table[state])  # Exploitation: Choose best action based on Q-values

    bid = action2bid[action]  # Convert action index to bid value
    
    if bid >= value_seller:
        reward = value_buyer - bid  # Calculate reward based on bid and buyer's value
    else:
        reward = 0  # No reward if bid is below seller's value

    # Q-value update using Q-learning equation
    q_table[state][action] += alpha * (reward + gamma * np.max(q_table[state]) - q_table[state][action])

    # Decay epsilon to reduce exploration over time
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

    # Print relevant information for each episode
    print("Seller value:", round(value_seller, 2), "Buyer value:", round(value_buyer, 2), "Bid:", round(bid, 2), "Reward:", round(reward, 2), "Epsilon:", round(epsilon, 2))


Seller value: 0.73 Buyer value: 0.68 Bid: 0.95 Reward: -0.28 Epsilon: 0.99
Seller value: 0.72 Buyer value: 0.85 Bid: 0.45 Reward: 0 Epsilon: 0.99
Seller value: 0.7 Buyer value: 0.82 Bid: 0.7 Reward: 0 Epsilon: 0.99
Seller value: 0.76 Buyer value: 0.85 Bid: 0.05 Reward: 0 Epsilon: 0.99
Seller value: 0.75 Buyer value: 0.75 Bid: 0.3 Reward: 0 Epsilon: 0.99
Seller value: 0.64 Buyer value: 0.62 Bid: 0.3 Reward: 0 Epsilon: 0.99
Seller value: 0.61 Buyer value: 0.9 Bid: 0.15 Reward: 0 Epsilon: 0.99
Seller value: 0.61 Buyer value: 0.92 Bid: 0.3 Reward: 0 Epsilon: 0.99
Seller value: 0.71 Buyer value: 0.55 Bid: 0.2 Reward: 0 Epsilon: 0.99
Seller value: 0.67 Buyer value: 0.65 Bid: 0.0 Reward: 0 Epsilon: 0.99
Seller value: 0.77 Buyer value: 0.8 Bid: 0.75 Reward: 0 Epsilon: 0.99
Seller value: 0.72 Buyer value: 0.85 Bid: 1.0 Reward: -0.15 Epsilon: 0.99
Seller value: 0.76 Buyer value: 0.52 Bid: 0.55 Reward: 0 Epsilon: 0.99
Seller value: 0.85 Buyer value: 0.7 Bid: 0.1 Reward: 0 Epsilon: 0.99
Seller val

Seller value: 0.72 Buyer value: 0.57 Bid: 0.1 Reward: 0 Epsilon: 0.9
Seller value: 0.77 Buyer value: 0.9 Bid: 0.25 Reward: 0 Epsilon: 0.9
Seller value: 0.69 Buyer value: 0.82 Bid: 0.6 Reward: 0 Epsilon: 0.9
Seller value: 0.69 Buyer value: 0.78 Bid: 0.55 Reward: 0 Epsilon: 0.9
Seller value: 0.69 Buyer value: 0.85 Bid: 0.95 Reward: -0.1 Epsilon: 0.9
Seller value: 0.71 Buyer value: 0.95 Bid: 0.05 Reward: 0 Epsilon: 0.9
Seller value: 0.81 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.9
Seller value: 0.76 Buyer value: 0.6 Bid: 0.4 Reward: 0 Epsilon: 0.9
Seller value: 0.76 Buyer value: 0.78 Bid: 0.85 Reward: -0.08 Epsilon: 0.9
Seller value: 0.65 Buyer value: 0.8 Bid: 0.2 Reward: 0 Epsilon: 0.9
Seller value: 0.68 Buyer value: 0.52 Bid: 0.85 Reward: -0.33 Epsilon: 0.9
Seller value: 0.76 Buyer value: 0.52 Bid: 0.6 Reward: 0 Epsilon: 0.9
Seller value: 0.69 Buyer value: 0.72 Bid: 0.75 Reward: -0.03 Epsilon: 0.9
Seller value: 0.73 Buyer value: 0.62 Bid: 0.6 Reward: 0 Epsilon: 0.9
Seller value: 0.

Seller value: 0.69 Buyer value: 0.52 Bid: 0.55 Reward: 0 Epsilon: 0.76
Seller value: 0.7 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.76
Seller value: 0.69 Buyer value: 0.6 Bid: 0.8 Reward: -0.2 Epsilon: 0.76
Seller value: 0.63 Buyer value: 0.62 Bid: 0.4 Reward: 0 Epsilon: 0.76
Seller value: 0.68 Buyer value: 0.65 Bid: 0.3 Reward: 0 Epsilon: 0.76
Seller value: 0.72 Buyer value: 0.5 Bid: 0.15 Reward: 0 Epsilon: 0.76
Seller value: 0.68 Buyer value: 1.0 Bid: 0.6 Reward: 0 Epsilon: 0.76
Seller value: 0.65 Buyer value: 0.65 Bid: 0.05 Reward: 0 Epsilon: 0.76
Seller value: 0.66 Buyer value: 0.52 Bid: 0.8 Reward: -0.28 Epsilon: 0.76
Seller value: 0.72 Buyer value: 0.62 Bid: 0.8 Reward: -0.18 Epsilon: 0.76
Seller value: 0.72 Buyer value: 0.78 Bid: 0.2 Reward: 0 Epsilon: 0.76
Seller value: 0.64 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.76
Seller value: 0.69 Buyer value: 0.55 Bid: 0.7 Reward: -0.15 Epsilon: 0.76
Seller value: 0.73 Buyer value: 0.82 Bid: 0.0 Reward: 0 Epsilon: 0.76
Sel

Seller value: 0.65 Buyer value: 0.75 Bid: 0.4 Reward: 0 Epsilon: 0.7
Seller value: 0.73 Buyer value: 0.75 Bid: 0.45 Reward: 0 Epsilon: 0.7
Seller value: 0.83 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.7
Seller value: 0.72 Buyer value: 0.72 Bid: 0.75 Reward: -0.03 Epsilon: 0.7
Seller value: 0.82 Buyer value: 0.55 Bid: 0.85 Reward: -0.3 Epsilon: 0.7
Seller value: 0.64 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.7
Seller value: 0.8 Buyer value: 0.95 Bid: 0.0 Reward: 0 Epsilon: 0.7
Seller value: 0.72 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.7
Seller value: 0.67 Buyer value: 0.55 Bid: 0.45 Reward: 0 Epsilon: 0.7
Seller value: 0.67 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.7
Seller value: 0.69 Buyer value: 0.52 Bid: 0.95 Reward: -0.43 Epsilon: 0.7
Seller value: 0.68 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.7
Seller value: 0.73 Buyer value: 0.5 Bid: 1.0 Reward: -0.5 Epsilon: 0.69
Seller value: 0.7 Buyer value: 1.0 Bid: 0.0 Reward: 0 Epsilon: 0.69
Seller v

Seller value: 0.74 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.63
Seller value: 0.66 Buyer value: 0.52 Bid: 0.7 Reward: -0.18 Epsilon: 0.63
Seller value: 0.67 Buyer value: 0.85 Bid: 0.2 Reward: 0 Epsilon: 0.63
Seller value: 0.64 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.63
Seller value: 0.72 Buyer value: 0.72 Bid: 0.65 Reward: 0 Epsilon: 0.63
Seller value: 0.61 Buyer value: 1.0 Bid: 0.4 Reward: 0 Epsilon: 0.63
Seller value: 0.77 Buyer value: 0.92 Bid: 0.55 Reward: 0 Epsilon: 0.63
Seller value: 0.69 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.63
Seller value: 0.72 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.63
Seller value: 0.69 Buyer value: 0.57 Bid: 0.7 Reward: -0.13 Epsilon: 0.63
Seller value: 0.73 Buyer value: 0.68 Bid: 0.0 Reward: 0 Epsilon: 0.63
Seller value: 0.75 Buyer value: 0.65 Bid: 0.7 Reward: 0 Epsilon: 0.63
Seller value: 0.67 Buyer value: 0.95 Bid: 1.0 Reward: -0.05 Epsilon: 0.63
Seller value: 0.75 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.63
S

Seller value: 0.73 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.59
Seller value: 0.72 Buyer value: 0.75 Bid: 0.7 Reward: 0 Epsilon: 0.59
Seller value: 0.69 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.59
Seller value: 0.7 Buyer value: 0.8 Bid: 0.5 Reward: 0 Epsilon: 0.59
Seller value: 0.69 Buyer value: 0.82 Bid: 0.7 Reward: 0.12 Epsilon: 0.59
Seller value: 0.71 Buyer value: 0.9 Bid: 0.8 Reward: 0.1 Epsilon: 0.59
Seller value: 0.75 Buyer value: 0.8 Bid: 1.0 Reward: -0.2 Epsilon: 0.59
Seller value: 0.66 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.59
Seller value: 0.8 Buyer value: 0.68 Bid: 0.6 Reward: 0 Epsilon: 0.59
Seller value: 0.65 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.59
Seller value: 0.72 Buyer value: 0.88 Bid: 0.1 Reward: 0 Epsilon: 0.59
Seller value: 0.69 Buyer value: 0.92 Bid: 0.85 Reward: 0.07 Epsilon: 0.59
Seller value: 0.63 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.59
Seller value: 0.68 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.59
Seller va

Seller value: 0.7 Buyer value: 0.92 Bid: 0.75 Reward: 0.18 Epsilon: 0.54
Seller value: 0.8 Buyer value: 0.65 Bid: 1.0 Reward: -0.35 Epsilon: 0.54
Seller value: 0.69 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.54
Seller value: 0.73 Buyer value: 0.57 Bid: 0.65 Reward: 0 Epsilon: 0.54
Seller value: 0.69 Buyer value: 0.88 Bid: 0.55 Reward: 0 Epsilon: 0.54
Seller value: 0.64 Buyer value: 0.6 Bid: 0.95 Reward: -0.35 Epsilon: 0.54
Seller value: 0.76 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.54
Seller value: 0.67 Buyer value: 0.8 Bid: 0.95 Reward: -0.15 Epsilon: 0.54
Seller value: 0.85 Buyer value: 0.8 Bid: 0.75 Reward: 0 Epsilon: 0.54
Seller value: 0.63 Buyer value: 1.0 Bid: 0.15 Reward: 0 Epsilon: 0.54
Seller value: 0.7 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.54
Seller value: 0.69 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.54
Seller value: 0.73 Buyer value: 0.78 Bid: 0.05 Reward: 0 Epsilon: 0.54
Seller value: 0.68 Buyer value: 0.85 Bid: 0.4 Reward: 0 Epsilon

Seller value: 0.66 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.47
Seller value: 0.73 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.47
Seller value: 0.72 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.47
Seller value: 0.66 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.47
Seller value: 0.63 Buyer value: 0.92 Bid: 0.2 Reward: 0 Epsilon: 0.47
Seller value: 0.6 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.47
Seller value: 0.74 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.47
Seller value: 0.75 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.47
Seller value: 0.7 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.47
Seller value: 0.74 Buyer value: 0.75 Bid: 0.7 Reward: 0 Epsilon: 0.47
Seller value: 0.74 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.47
Seller value: 0.7 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.47
Seller value: 0.65 Buyer value: 0.8 Bid: 0.55 Reward: 0 Epsilon: 0.47
Seller value: 0.66 Buyer value: 0.78 Bid: 0.65 Reward: 0 Epsilon: 0.47

Seller value: 0.78 Buyer value: 0.9 Bid: 0.75 Reward: 0 Epsilon: 0.43
Seller value: 0.67 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.43
Seller value: 0.71 Buyer value: 0.82 Bid: 0.7 Reward: 0 Epsilon: 0.43
Seller value: 0.72 Buyer value: 0.88 Bid: 0.45 Reward: 0 Epsilon: 0.43
Seller value: 0.74 Buyer value: 0.5 Bid: 0.75 Reward: -0.25 Epsilon: 0.43
Seller value: 0.67 Buyer value: 0.6 Bid: 0.2 Reward: 0 Epsilon: 0.43
Seller value: 0.76 Buyer value: 0.78 Bid: 0.35 Reward: 0 Epsilon: 0.43
Seller value: 0.62 Buyer value: 0.9 Bid: 0.1 Reward: 0 Epsilon: 0.43
Seller value: 0.68 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.43
Seller value: 0.82 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.43
Seller value: 0.63 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.43
Seller value: 0.75 Buyer value: 0.9 Bid: 0.55 Reward: 0 Epsilon: 0.43
Seller value: 0.7 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.43
Seller value: 0.65 Buyer value: 0.52 Bid: 0.35 Reward: 0 Epsilon: 0.43
Selle

Seller value: 0.67 Buyer value: 0.85 Bid: 0.7 Reward: 0.15 Epsilon: 0.4
Seller value: 0.75 Buyer value: 0.8 Bid: 0.4 Reward: 0 Epsilon: 0.4
Seller value: 0.66 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.4
Seller value: 0.69 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.4
Seller value: 0.76 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.4
Seller value: 0.66 Buyer value: 0.88 Bid: 1.0 Reward: -0.12 Epsilon: 0.4
Seller value: 0.72 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.4
Seller value: 0.72 Buyer value: 0.65 Bid: 0.4 Reward: 0 Epsilon: 0.4
Seller value: 0.72 Buyer value: 0.85 Bid: 0.6 Reward: 0 Epsilon: 0.4
Seller value: 0.61 Buyer value: 0.68 Bid: 0.65 Reward: 0.03 Epsilon: 0.4
Seller value: 0.71 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.4
Seller value: 0.64 Buyer value: 0.55 Bid: 1.0 Reward: -0.45 Epsilon: 0.4
Seller value: 0.7 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.4
Seller value: 0.7 Buyer value: 0.57 Bid: 0.85 Reward: -0.28 Epsilon: 0.4
Seller 

Seller value: 0.71 Buyer value: 0.72 Bid: 1.0 Reward: -0.28 Epsilon: 0.38
Seller value: 0.75 Buyer value: 0.82 Bid: 0.05 Reward: 0 Epsilon: 0.38
Seller value: 0.69 Buyer value: 0.85 Bid: 0.7 Reward: 0.15 Epsilon: 0.38
Seller value: 0.54 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.38
Seller value: 0.77 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.38
Seller value: 0.77 Buyer value: 0.92 Bid: 0.7 Reward: 0 Epsilon: 0.38
Seller value: 0.71 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.38
Seller value: 0.65 Buyer value: 0.92 Bid: 0.75 Reward: 0.18 Epsilon: 0.38
Seller value: 0.75 Buyer value: 0.55 Bid: 0.9 Reward: -0.35 Epsilon: 0.38
Seller value: 0.75 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.38
Seller value: 0.65 Buyer value: 0.85 Bid: 0.7 Reward: 0.15 Epsilon: 0.38
Seller value: 0.76 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.38
Seller value: 0.59 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.38
Seller value: 0.7 Buyer value: 0.57 Bid: 0.05 Reward: 0 Epsilon: 0

Seller value: 0.65 Buyer value: 0.85 Bid: 0.7 Reward: 0.15 Epsilon: 0.36
Seller value: 0.78 Buyer value: 0.98 Bid: 0.7 Reward: 0 Epsilon: 0.36
Seller value: 0.65 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.36
Seller value: 0.69 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.36
Seller value: 0.75 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.36
Seller value: 0.71 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.36
Seller value: 0.81 Buyer value: 0.78 Bid: 0.75 Reward: 0 Epsilon: 0.36
Seller value: 0.78 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.36
Seller value: 0.74 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.36
Seller value: 0.65 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.36
Seller value: 0.68 Buyer value: 0.52 Bid: 0.85 Reward: -0.33 Epsilon: 0.36
Seller value: 0.87 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.36
Seller value: 0.7 Buyer value: 0.6 Bid: 1.0 Reward: -0.4 Epsilon: 0.36
Seller value: 0.68 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 

Seller value: 0.73 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.35
Seller value: 0.74 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.35
Seller value: 0.73 Buyer value: 0.98 Bid: 0.45 Reward: 0 Epsilon: 0.35
Seller value: 0.82 Buyer value: 1.0 Bid: 0.8 Reward: 0 Epsilon: 0.35
Seller value: 0.76 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.35
Seller value: 0.66 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.35
Seller value: 0.67 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.35
Seller value: 0.66 Buyer value: 0.78 Bid: 1.0 Reward: -0.22 Epsilon: 0.35
Seller value: 0.72 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.35
Seller value: 0.71 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.35
Seller value: 0.68 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.35
Seller value: 0.66 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.35
Seller value: 0.66 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.35
Seller value: 0.63 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilo

Seller value: 0.63 Buyer value: 0.8 Bid: 0.65 Reward: 0.15 Epsilon: 0.34
Seller value: 0.65 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.34
Seller value: 0.68 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.34
Seller value: 0.64 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.34
Seller value: 0.58 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.34
Seller value: 0.77 Buyer value: 0.52 Bid: 0.25 Reward: 0 Epsilon: 0.34
Seller value: 0.75 Buyer value: 0.8 Bid: 0.65 Reward: 0 Epsilon: 0.34
Seller value: 0.67 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.34
Seller value: 0.7 Buyer value: 0.7 Bid: 1.0 Reward: -0.3 Epsilon: 0.34
Seller value: 0.75 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.34
Seller value: 0.78 Buyer value: 0.88 Bid: 0.75 Reward: 0 Epsilon: 0.34
Seller value: 0.77 Buyer value: 0.57 Bid: 0.85 Reward: -0.28 Epsilon: 0.34
Seller value: 0.71 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.34
Seller value: 0.66 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsil

Seller value: 0.65 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.31
Seller value: 0.75 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.31
Seller value: 0.69 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.31
Seller value: 0.79 Buyer value: 0.88 Bid: 0.75 Reward: 0 Epsilon: 0.31
Seller value: 0.67 Buyer value: 0.55 Bid: 0.2 Reward: 0 Epsilon: 0.31
Seller value: 0.69 Buyer value: 0.5 Bid: 0.6 Reward: 0 Epsilon: 0.31
Seller value: 0.71 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.31
Seller value: 0.75 Buyer value: 0.8 Bid: 0.7 Reward: 0 Epsilon: 0.31
Seller value: 0.67 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.31
Seller value: 0.76 Buyer value: 0.92 Bid: 0.1 Reward: 0 Epsilon: 0.31
Seller value: 0.73 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.31
Seller value: 0.74 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.31
Seller value: 0.74 Buyer value: 0.78 Bid: 0.7 Reward: 0 Epsilon: 0.31
Seller value: 0.7 Buyer value: 0.55 Bid: 0.15 Reward: 0 Epsilon: 0.31
Seller value

Seller value: 0.69 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.29
Seller value: 0.75 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.29
Seller value: 0.66 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.29
Seller value: 0.74 Buyer value: 0.95 Bid: 0.75 Reward: 0.2 Epsilon: 0.29
Seller value: 0.76 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.29
Seller value: 0.77 Buyer value: 0.9 Bid: 0.75 Reward: 0 Epsilon: 0.29
Seller value: 0.57 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.29
Seller value: 0.8 Buyer value: 0.6 Bid: 0.05 Reward: 0 Epsilon: 0.29
Seller value: 0.67 Buyer value: 0.68 Bid: 0.85 Reward: -0.18 Epsilon: 0.29
Seller value: 0.67 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.29
Seller value: 0.71 Buyer value: 0.88 Bid: 0.15 Reward: 0 Epsilon: 0.29
Seller value: 0.72 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.29
Seller value: 0.7 Buyer value: 0.7 Bid: 0.2 Reward: 0 Epsilon: 0.29
Seller value: 0.68 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.29
Se

Seller value: 0.67 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.28
Seller value: 0.69 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.28
Seller value: 0.64 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.28
Seller value: 0.7 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.28
Seller value: 0.73 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.28
Seller value: 0.66 Buyer value: 0.82 Bid: 0.1 Reward: 0 Epsilon: 0.28
Seller value: 0.7 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.28
Seller value: 0.62 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.28
Seller value: 0.78 Buyer value: 0.95 Bid: 0.0 Reward: 0 Epsilon: 0.28
Seller value: 0.76 Buyer value: 0.88 Bid: 0.75 Reward: 0 Epsilon: 0.28
Seller value: 0.7 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.28
Seller value: 0.64 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.28
Seller value: 0.77 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.28
Seller value: 0.73 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.28
Seller 

Seller value: 0.76 Buyer value: 0.8 Bid: 0.75 Reward: 0 Epsilon: 0.27
Seller value: 0.71 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.27
Seller value: 0.76 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.27
Seller value: 0.68 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.27
Seller value: 0.66 Buyer value: 1.0 Bid: 0.75 Reward: 0.25 Epsilon: 0.27
Seller value: 0.65 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.27
Seller value: 0.67 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.27
Seller value: 0.7 Buyer value: 0.75 Bid: 0.7 Reward: 0 Epsilon: 0.27
Seller value: 0.74 Buyer value: 0.88 Bid: 0.6 Reward: 0 Epsilon: 0.27
Seller value: 0.7 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.27
Seller value: 0.76 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.27
Seller value: 0.65 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.27
Seller value: 0.65 Buyer value: 0.62 Bid: 0.45 Reward: 0 Epsilon: 0.27
Seller value: 0.69 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.27
Seller

Seller value: 0.78 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.25
Seller value: 0.63 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.25
Seller value: 0.69 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.25
Seller value: 0.73 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.25
Seller value: 0.7 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.25
Seller value: 0.6 Buyer value: 0.95 Bid: 0.75 Reward: 0.2 Epsilon: 0.25
Seller value: 0.7 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.25
Seller value: 0.69 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.25
Seller value: 0.7 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.25
Seller value: 0.73 Buyer value: 0.92 Bid: 0.75 Reward: 0.18 Epsilon: 0.25
Seller value: 0.76 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.25
Seller value: 0.71 Buyer value: 0.7 Bid: 0.1 Reward: 0 Epsilon: 0.25
Seller value: 0.62 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.25
Seller value: 0.64 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.25
Seller

Seller value: 0.79 Buyer value: 0.78 Bid: 0.8 Reward: -0.03 Epsilon: 0.23
Seller value: 0.71 Buyer value: 0.95 Bid: 0.75 Reward: 0.2 Epsilon: 0.23
Seller value: 0.65 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.23
Seller value: 0.73 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.23
Seller value: 0.71 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.23
Seller value: 0.66 Buyer value: 0.65 Bid: 0.9 Reward: -0.25 Epsilon: 0.23
Seller value: 0.66 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.23
Seller value: 0.74 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.23
Seller value: 0.72 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.23
Seller value: 0.6 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.23
Seller value: 0.76 Buyer value: 0.78 Bid: 0.75 Reward: 0 Epsilon: 0.23
Seller value: 0.77 Buyer value: 0.92 Bid: 0.95 Reward: -0.03 Epsilon: 0.23
Seller value: 0.77 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.23
Seller value: 0.75 Buyer value: 0.5 Bid: 0.0 Reward: 0 Eps

Seller value: 0.69 Buyer value: 0.78 Bid: 0.8 Reward: -0.03 Epsilon: 0.23
Seller value: 0.75 Buyer value: 0.75 Bid: 0.7 Reward: 0 Epsilon: 0.23
Seller value: 0.62 Buyer value: 0.98 Bid: 0.9 Reward: 0.08 Epsilon: 0.23
Seller value: 0.74 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.23
Seller value: 0.75 Buyer value: 0.72 Bid: 0.4 Reward: 0 Epsilon: 0.23
Seller value: 0.71 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.23
Seller value: 0.72 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.23
Seller value: 0.6 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.23
Seller value: 0.66 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.23
Seller value: 0.8 Buyer value: 0.85 Bid: 0.75 Reward: 0 Epsilon: 0.23
Seller value: 0.71 Buyer value: 0.98 Bid: 0.3 Reward: 0 Epsilon: 0.23
Seller value: 0.7 Buyer value: 0.98 Bid: 0.05 Reward: 0 Epsilon: 0.23
Seller value: 0.72 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.23
Seller value: 0.77 Buyer value: 0.78 Bid: 0.75 Reward: 0 Epsilon: 0.23
Seller va

Seller value: 0.74 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.22
Seller value: 0.79 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.61 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.77 Buyer value: 0.88 Bid: 0.55 Reward: 0 Epsilon: 0.22
Seller value: 0.61 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.73 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.22
Seller value: 0.62 Buyer value: 1.0 Bid: 0.25 Reward: 0 Epsilon: 0.22
Seller value: 0.69 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.7 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.22
Seller value: 0.68 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.22
Seller value: 0.69 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.22
Se

Seller value: 0.63 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.8 Buyer value: 0.78 Bid: 0.75 Reward: 0 Epsilon: 0.22
Seller value: 0.72 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.22
Seller value: 0.7 Buyer value: 0.52 Bid: 0.2 Reward: 0 Epsilon: 0.22
Seller value: 0.63 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.7 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.22
Seller value: 0.71 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.68 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.22
Seller value: 0.74 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.22
Seller value: 0.7 Buyer value: 0.62 Bid: 0.5 Reward: 0 Epsilon: 0.22
Seller value: 0.65 Buyer value: 0.68 Bid: 0.5 Reward: 0 Epsilon: 0.22
Seller value: 0.69 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.22
Seller

Seller value: 0.71 Buyer value: 0.75 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.75 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.65 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.22
Seller value: 0.73 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.8 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.22
Seller value: 0.72 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.76 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.22
Seller value: 0.71 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.73 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.22
Seller value: 0.68 Buyer value: 0.8 Bid: 1.0 Reward: -0.2 Epsilon: 0.22
Seller value: 0.67 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.22
Seller value: 0.76 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.22
Sell

Seller value: 0.72 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.7 Buyer value: 0.78 Bid: 0.75 Reward: 0.03 Epsilon: 0.22
Seller value: 0.75 Buyer value: 0.78 Bid: 0.75 Reward: 0 Epsilon: 0.22
Seller value: 0.79 Buyer value: 0.92 Bid: 0.75 Reward: 0 Epsilon: 0.22
Seller value: 0.8 Buyer value: 0.8 Bid: 0.75 Reward: 0 Epsilon: 0.22
Seller value: 0.76 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.22
Seller value: 0.76 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.69 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.22
Seller value: 0.76 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.79 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.22
Seller value: 0.73 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.22
Seller value: 0.68 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.22
Seller value: 0.71 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.22
Seller value: 0.69 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.22
Sell

Seller value: 0.73 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.2
Seller value: 0.64 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.2
Seller value: 0.62 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.2
Seller value: 0.72 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.2
Seller value: 0.7 Buyer value: 0.88 Bid: 0.5 Reward: 0 Epsilon: 0.2
Seller value: 0.68 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.2
Seller value: 0.67 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.2
Seller value: 0.65 Buyer value: 0.7 Bid: 0.65 Reward: 0.05 Epsilon: 0.2
Seller value: 0.62 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.2
Seller value: 0.71 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.2
Seller value: 0.71 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.2
Seller value: 0.7 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.2
Seller value: 0.67 Buyer value: 0.72 Bid: 0.05 Reward: 0 Epsilon: 0.2
Seller value: 0.73 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.2
Seller value: 0.67 Buye

Seller value: 0.75 Buyer value: 0.78 Bid: 0.7 Reward: 0 Epsilon: 0.19
Seller value: 0.58 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.19
Seller value: 0.74 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.19
Seller value: 0.75 Buyer value: 0.8 Bid: 0.75 Reward: 0 Epsilon: 0.19
Seller value: 0.69 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.19
Seller value: 0.76 Buyer value: 0.92 Bid: 0.75 Reward: 0 Epsilon: 0.19
Seller value: 0.73 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.19
Seller value: 0.69 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.19
Seller value: 0.71 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.19
Seller value: 0.68 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.19
Seller value: 0.65 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.19
Seller value: 0.74 Buyer value: 1.0 Bid: 0.55 Reward: 0 Epsilon: 0.19
Seller value: 0.74 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.19
Seller value: 0.68 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.19

Seller value: 0.67 Buyer value: 0.72 Bid: 0.3 Reward: 0 Epsilon: 0.18
Seller value: 0.62 Buyer value: 0.98 Bid: 0.5 Reward: 0 Epsilon: 0.18
Seller value: 0.73 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.18
Seller value: 0.74 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.18
Seller value: 0.68 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.18
Seller value: 0.67 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.18
Seller value: 0.7 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.18
Seller value: 0.73 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.18
Seller value: 0.62 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.18
Seller value: 0.74 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0.18
Seller value: 0.7 Buyer value: 0.7 Bid: 0.25 Reward: 0 Epsilon: 0.18
Seller value: 0.61 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.18
Seller value: 0.77 Buyer value: 0.88 Bid: 0.75 Reward: 0 Epsilon: 0.18
Seller value: 0.71 Buyer value: 0.9 Bid: 0.95 Reward: -0.05 Epsilon: 0.18

Seller value: 0.65 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.17
Seller value: 0.65 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.17
Seller value: 0.67 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.17
Seller value: 0.75 Buyer value: 0.78 Bid: 0.7 Reward: 0 Epsilon: 0.17
Seller value: 0.7 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.17
Seller value: 0.78 Buyer value: 0.75 Bid: 0.7 Reward: 0 Epsilon: 0.17
Seller value: 0.64 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.17
Seller value: 0.72 Buyer value: 0.57 Bid: 0.2 Reward: 0 Epsilon: 0.17
Seller value: 0.68 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.17
Seller value: 0.75 Buyer value: 0.6 Bid: 0.1 Reward: 0 Epsilon: 0.17
Seller value: 0.68 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.17
Seller value: 0.7 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.17
Seller value: 0.69 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.17
Seller value: 0.71 Buyer value: 0.75 Bid: 0.7 Reward: 0 Epsilon: 0.17
Seller value

Seller value: 0.67 Buyer value: 0.68 Bid: 1.0 Reward: -0.32 Epsilon: 0.16
Seller value: 0.69 Buyer value: 0.6 Bid: 0.35 Reward: 0 Epsilon: 0.16
Seller value: 0.68 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.16
Seller value: 0.54 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.16
Seller value: 0.73 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.16
Seller value: 0.69 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.16
Seller value: 0.73 Buyer value: 0.95 Bid: 0.75 Reward: 0.2 Epsilon: 0.16
Seller value: 0.72 Buyer value: 0.92 Bid: 0.75 Reward: 0.18 Epsilon: 0.16
Seller value: 0.72 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.16
Seller value: 0.67 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.16
Seller value: 0.71 Buyer value: 0.78 Bid: 0.55 Reward: 0 Epsilon: 0.16
Seller value: 0.76 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.16
Seller value: 0.79 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.16
Seller value: 0.69 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.16
Selle

Seller value: 0.69 Buyer value: 0.5 Bid: 0.6 Reward: 0 Epsilon: 0.15
Seller value: 0.72 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.15
Seller value: 0.7 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.15
Seller value: 0.68 Buyer value: 0.52 Bid: 0.55 Reward: 0 Epsilon: 0.15
Seller value: 0.75 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.15
Seller value: 0.63 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.15
Seller value: 0.71 Buyer value: 0.78 Bid: 0.9 Reward: -0.12 Epsilon: 0.15
Seller value: 0.63 Buyer value: 0.5 Bid: 0.6 Reward: 0 Epsilon: 0.15
Seller value: 0.71 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.15
Seller value: 0.68 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.15
Seller value: 0.74 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.15
Seller value: 0.71 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.15
Seller value: 0.73 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.15
Seller value: 0.63 Buyer value: 0.85 Bid: 0.4 Reward: 0 Epsilon: 0.15
Se

Seller value: 0.66 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.14
Seller value: 0.67 Buyer value: 0.6 Bid: 0.65 Reward: 0 Epsilon: 0.14
Seller value: 0.67 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.14
Seller value: 0.65 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.14
Seller value: 0.74 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.14
Seller value: 0.73 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.14
Seller value: 0.74 Buyer value: 0.62 Bid: 0.0 Reward: 0 Epsilon: 0.14
Seller value: 0.64 Buyer value: 0.68 Bid: 0.65 Reward: 0.03 Epsilon: 0.14
Seller value: 0.64 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.14
Seller value: 0.8 Buyer value: 0.92 Bid: 0.8 Reward: 0 Epsilon: 0.14
Seller value: 0.74 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.14
Seller value: 0.72 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.14
Seller value: 0.78 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.14
Seller value: 0.75 Buyer value: 0.9 Bid: 0.75 Reward: 0.15 Epsilon: 0.14
Seller v

Seller value: 0.69 Buyer value: 0.88 Bid: 0.55 Reward: 0 Epsilon: 0.13
Seller value: 0.67 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.13
Seller value: 0.74 Buyer value: 0.78 Bid: 0.7 Reward: 0 Epsilon: 0.13
Seller value: 0.65 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.13
Seller value: 0.83 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.13
Seller value: 0.65 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.13
Seller value: 0.72 Buyer value: 0.9 Bid: 0.8 Reward: 0.1 Epsilon: 0.13
Seller value: 0.76 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.13
Seller value: 0.71 Buyer value: 0.75 Bid: 0.7 Reward: 0 Epsilon: 0.13
Seller value: 0.7 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.13
Seller value: 0.77 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.13
Seller value: 0.66 Buyer value: 0.72 Bid: 0.7 Reward: 0.02 Epsilon: 0.13
Seller value: 0.68 Buyer value: 0.7 Bid: 1.0 Reward: -0.3 Epsilon: 0.13
Seller value: 0.65 Buyer value: 0.82 Bid: 0.6 Reward: 0 Epsilon: 0.13
Selle

Seller value: 0.69 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.12
Seller value: 0.75 Buyer value: 0.52 Bid: 0.95 Reward: -0.43 Epsilon: 0.12
Seller value: 0.58 Buyer value: 0.62 Bid: 0.6 Reward: 0.02 Epsilon: 0.12
Seller value: 0.62 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.12
Seller value: 0.68 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.12
Seller value: 0.8 Buyer value: 0.62 Bid: 0.6 Reward: 0 Epsilon: 0.12
Seller value: 0.66 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.12
Seller value: 0.86 Buyer value: 0.7 Bid: 0.15 Reward: 0 Epsilon: 0.12
Seller value: 0.71 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.12
Seller value: 0.69 Buyer value: 0.9 Bid: 0.8 Reward: 0.1 Epsilon: 0.12
Seller value: 0.74 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.12
Seller value: 0.69 Buyer value: 0.92 Bid: 0.75 Reward: 0.18 Epsilon: 0.12
Seller value: 0.79 Buyer value: 0.65 Bid: 1.0 Reward: -0.35 Epsilon: 0.12
Seller value: 0.69 Buyer value: 0.62 Bid: 0.6 Reward: 0 Epsilon: 0.1

Seller value: 0.72 Buyer value: 0.62 Bid: 0.75 Reward: -0.12 Epsilon: 0.11
Seller value: 0.72 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.11
Seller value: 0.76 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.11
Seller value: 0.68 Buyer value: 0.88 Bid: 0.9 Reward: -0.03 Epsilon: 0.11
Seller value: 0.7 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.11
Seller value: 0.61 Buyer value: 0.95 Bid: 0.75 Reward: 0.2 Epsilon: 0.11
Seller value: 0.68 Buyer value: 0.78 Bid: 0.0 Reward: 0 Epsilon: 0.11
Seller value: 0.74 Buyer value: 0.62 Bid: 0.6 Reward: 0 Epsilon: 0.11
Seller value: 0.7 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.11
Seller value: 0.69 Buyer value: 0.55 Bid: 0.95 Reward: -0.4 Epsilon: 0.11
Seller value: 0.79 Buyer value: 0.98 Bid: 0.8 Reward: 0.18 Epsilon: 0.11
Seller value: 0.68 Buyer value: 0.72 Bid: 0.65 Reward: 0 Epsilon: 0.11
Seller value: 0.73 Buyer value: 0.8 Bid: 0.75 Reward: 0.05 Epsilon: 0.11
Seller value: 0.64 Buyer value: 0.75 Bid: 0.65 Reward: 0.1 Ep

Seller value: 0.75 Buyer value: 0.95 Bid: 0.8 Reward: 0.15 Epsilon: 0.1
Seller value: 0.75 Buyer value: 0.78 Bid: 0.7 Reward: 0 Epsilon: 0.1
Seller value: 0.77 Buyer value: 0.8 Bid: 0.75 Reward: 0 Epsilon: 0.1
Seller value: 0.71 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.1
Seller value: 0.67 Buyer value: 0.95 Bid: 0.1 Reward: 0 Epsilon: 0.1
Seller value: 0.69 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.1
Seller value: 0.66 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.1
Seller value: 0.69 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.1
Seller value: 0.69 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.1
Seller value: 0.71 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.1
Seller value: 0.7 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.1
Seller value: 0.64 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.1
Seller value: 0.66 Buyer value: 0.95 Bid: 0.25 Reward: 0 Epsilon: 0.1
Seller value: 0.74 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.1
Seller value:

Seller value: 0.71 Buyer value: 0.7 Bid: 0.65 Reward: 0 Epsilon: 0.1
Seller value: 0.65 Buyer value: 0.9 Bid: 0.8 Reward: 0.1 Epsilon: 0.1
Seller value: 0.8 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.1
Seller value: 0.63 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.1
Seller value: 0.82 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.1
Seller value: 0.68 Buyer value: 0.62 Bid: 0.6 Reward: 0 Epsilon: 0.1
Seller value: 0.64 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.1
Seller value: 0.8 Buyer value: 0.57 Bid: 0.25 Reward: 0 Epsilon: 0.1
Seller value: 0.59 Buyer value: 0.95 Bid: 0.75 Reward: 0.2 Epsilon: 0.1
Seller value: 0.77 Buyer value: 0.72 Bid: 0.7 Reward: 0 Epsilon: 0.1
Seller value: 0.67 Buyer value: 0.6 Bid: 0.65 Reward: 0 Epsilon: 0.1
Seller value: 0.62 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.1
Seller value: 0.66 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.1
Seller value: 0.76 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.1
Seller value: 0.67 Buyer value: 

Seller value: 0.8 Buyer value: 0.57 Bid: 0.0 Reward: 0 Epsilon: 0.09
Seller value: 0.7 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.09
Seller value: 0.71 Buyer value: 0.52 Bid: 0.25 Reward: 0 Epsilon: 0.09
Seller value: 0.71 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.09
Seller value: 0.65 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.09
Seller value: 0.71 Buyer value: 0.82 Bid: 0.25 Reward: 0 Epsilon: 0.09
Seller value: 0.68 Buyer value: 0.62 Bid: 0.6 Reward: 0 Epsilon: 0.09
Seller value: 0.71 Buyer value: 0.92 Bid: 0.45 Reward: 0 Epsilon: 0.09
Seller value: 0.66 Buyer value: 0.98 Bid: 0.75 Reward: 0.23 Epsilon: 0.09
Seller value: 0.68 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.09
Seller value: 0.73 Buyer value: 0.92 Bid: 0.8 Reward: 0.12 Epsilon: 0.09
Seller value: 0.69 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.09
Seller value: 0.73 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.09
Seller value: 0.72 Buyer value: 0.82 Bid: 0.75 Reward: 0.07 Epsilon: 0

Seller value: 0.69 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.09
Seller value: 0.72 Buyer value: 1.0 Bid: 0.8 Reward: 0.2 Epsilon: 0.09
Seller value: 0.7 Buyer value: 0.6 Bid: 0.0 Reward: 0 Epsilon: 0.09
Seller value: 0.68 Buyer value: 0.95 Bid: 0.75 Reward: 0.2 Epsilon: 0.09
Seller value: 0.69 Buyer value: 0.75 Bid: 0.7 Reward: 0.05 Epsilon: 0.09
Seller value: 0.71 Buyer value: 0.78 Bid: 0.7 Reward: 0 Epsilon: 0.09
Seller value: 0.79 Buyer value: 0.85 Bid: 0.4 Reward: 0 Epsilon: 0.09
Seller value: 0.68 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.09
Seller value: 0.7 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.09
Seller value: 0.65 Buyer value: 0.5 Bid: 0.0 Reward: 0 Epsilon: 0.09
Seller value: 0.75 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.09
Seller value: 0.8 Buyer value: 0.85 Bid: 0.75 Reward: 0 Epsilon: 0.09
Seller value: 0.73 Buyer value: 0.78 Bid: 0.7 Reward: 0 Epsilon: 0.09
Seller value: 0.66 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.09
S

Seller value: 0.66 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.08
Seller value: 0.65 Buyer value: 0.78 Bid: 0.7 Reward: 0.07 Epsilon: 0.08
Seller value: 0.78 Buyer value: 0.82 Bid: 0.75 Reward: 0 Epsilon: 0.08
Seller value: 0.72 Buyer value: 0.65 Bid: 0.6 Reward: 0 Epsilon: 0.08
Seller value: 0.68 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.08
Seller value: 0.63 Buyer value: 0.62 Bid: 0.6 Reward: 0 Epsilon: 0.08
Seller value: 0.79 Buyer value: 0.68 Bid: 0.65 Reward: 0 Epsilon: 0.08
Seller value: 0.71 Buyer value: 0.52 Bid: 0.0 Reward: 0 Epsilon: 0.08
Seller value: 0.7 Buyer value: 0.92 Bid: 0.75 Reward: 0.18 Epsilon: 0.08
Seller value: 0.68 Buyer value: 0.92 Bid: 0.75 Reward: 0.18 Epsilon: 0.08
Seller value: 0.63 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.08
Seller value: 0.7 Buyer value: 0.85 Bid: 0.75 Reward: 0.1 Epsilon: 0.08
Seller value: 0.64 Buyer value: 0.55 Bid: 0.0 Reward: 0 Epsilon: 0.08
Seller value: 0.65 Buyer value: 0.88 Bid: 0.75 Reward: 0.12 Epsilon: 0.08

In [95]:
print(buyer_value_divisions)
print(action2bid[np.argmax(q_table, axis=1)])

[0.5   0.525 0.55  0.575 0.6   0.625 0.65  0.675 0.7   0.725 0.75  0.775
 0.8   0.825 0.85  0.875 0.9   0.925 0.95  0.975 1.   ]
[0.   0.   0.   0.   0.   0.6  0.6  0.65 0.65 0.7  0.7  0.7  0.75 0.75
 0.75 0.75 0.8  0.75 0.75 0.75 0.8 ]
