In [1]:
#!sudo apt-get update
#!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install --force-reinstall -v "setuptools<70"
!HDF5_DIR=/usr/local pip install --no-binary=h5py h5py
!HDF5_DIR=/usr/local pip install tf-agents tf-keras "tensorflow>=2.15" numpy pandas pandas-ta --force-reinstall


Using pip 23.3 from /opt/conda/lib/python3.11/site-packages/pip (python 3.11)
Collecting setuptools<70
  Obtaining dependency information for setuptools<70 from https://files.pythonhosted.org/packages/f7/29/13965af254e3373bceae8fb9a0e6ea0d0e571171b80d6646932131d6439b/setuptools-69.5.1-py3-none-any.whl.metadata
  Downloading setuptools-69.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading setuptools-69.5.1-py3-none-any.whl (894 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m894.6/894.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: setuptools
  Attempting uninstall: setuptools
    Found existing installation: setuptools 74.1.2
    Uninstalling setuptools-74.1.2:
      Removing file or directory /opt/conda/lib/python3.11/site-packages/_distutils_hack/
      Removing file or directory /opt/conda/lib/python3.11/site-packages/distutils-precedence.pth
      Removing file or directory /opt/conda/lib/python3.11/site-packa

In [2]:
import tensorflow as tf
from tensorflow.keras import layers

# Actor Network
class Actor(tf.keras.Model):
    def __init__(self, num_action_types, num_shares_options):
        super(Actor, self).__init__()
        self.fc1 = layers.Dense(128, activation='relu')
        self.fc2 = layers.Dense(128, activation='relu')
        # Two output layers, one for action type, one for number of shares
        self.action_type_logits = layers.Dense(num_action_types)
        self.num_shares_logits = layers.Dense(num_shares_options)

    def call(self, inputs):
        x = self.fc1(inputs)
        x = self.fc2(x)
        # Output logits for both action type and number of shares
        action_type_logits = self.action_type_logits(x)
        num_shares_logits = self.num_shares_logits(x)
        return action_type_logits, num_shares_logits


# Critic Network
class Critic(tf.keras.Model):
    def __init__(self):
        super(Critic, self).__init__()
        self.fc1 = layers.Dense(128, activation='relu')
        self.fc2 = layers.Dense(128, activation='relu')
        self.value = layers.Dense(1)

    def call(self, inputs):
        x = self.fc1(inputs)
        x = self.fc2(x)
        return self.value


In [3]:
class A3CAgent:
    def __init__(self, action_spec, observation_spec, lr=1e-4):
        self.actor = Actor()
        self.critic = Critic()

        # Optimizers for both actor and critic
        self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        self.critic_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

        # Action and observation specs
        self.action_spec = action_spec
        self.observation_spec = observation_spec

    def policy(self, observation):
        action_type_logits, num_shares_logits = self.actor(observation)
        action_type_probs = tf.nn.softmax(action_type_logits)
        num_shares_probs = tf.nn.softmax(num_shares_logits)
        return action_type_probs, num_shares_probs
    
    def sample_actions(self, action_type_probs, num_shares_probs):
        action_type = np.random.choice(3, p=action_type_probs.numpy())
        num_shares = np.random.choice(3, p=num_shares_probs.numpy())
        return action_type, num_shares

    def value(self, observation):
        return self.critic(observation)

    def compute_loss(self, rewards, values, log_probs, entropy, gamma=0.99):
        # Calculate discounted rewards
        discounted_rewards = []
        running_add = 0
        for reward in reversed(rewards):
            running_add = reward + gamma * running_add
            discounted_rewards.insert(0, running_add)
        discounted_rewards = tf.convert_to_tensor(discounted_rewards)

        # Compute critic loss
        advantages = discounted_rewards - values
        critic_loss = advantages ** 2

        # Compute actor loss
        actor_loss = -log_probs * tf.stop_gradient(advantages)
        entropy_loss = -0.01 * entropy  # Encourage exploration
        total_loss = actor_loss + 0.5 * critic_loss + entropy_loss

        return tf.reduce_mean(total_loss)

    def train_step(self, observations, actions, rewards, next_observations, dones):
        with tf.GradientTape() as tape:
            values = self.value(observations)
            action_type_logits, num_shares_logits = self.actor(observations)
            action_type_probs = tf.nn.softmax(action_type_logits)
            num_shares_probs = tf.nn.softmax(num_shares_logits)
            action_type_log_probs = tf.math.log(action_type_probs)
            num_shares_log_probs = tf.math.log(num_shares_probs)
            entropy = -tf.reduce_sum(action_type_probs * action_type_log_probs, axis=1) - \
                      tf.reduce_sum(num_shares_probs * num_shares_log_probs, axis=1)
    
            # Sampled actions log_probs
            action_type_log_prob = tf.gather_nd(action_type_log_probs, tf.stack([tf.range(len(actions)), actions[:, 0]], axis=1))
            num_shares_log_prob = tf.gather_nd(num_shares_log_probs, tf.stack([tf.range(len(actions)), actions[:, 1]], axis=1))
    
            next_values = self.value(next_observations)
    
            # Compute targets
            targets = rewards + (1 - dones) * 0.99 * next_values
    
            # Calculate total loss
            loss = self.compute_loss(rewards, values, action_type_log_prob + num_shares_log_prob, entropy)
    
        # Backpropagation
        gradients = tape.gradient(loss, self.actor.trainable_variables + self.critic.trainable_variables)
        self.actor_optimizer.apply_gradients(zip(gradients[:len(self.actor.trainable_variables)], self.actor.trainable_variables))
        self.critic_optimizer.apply_gradients(zip(gradients[len(self.actor.trainable_variables):], self.critic.trainable_variables))



In [4]:
import threading

class Worker(threading.Thread):
    def __init__(self, env, agent, global_episode_counter):
        threading.Thread.__init__(self)
        self.env = env
        self.agent = agent
        self.global_episode_counter = global_episode_counter

    def run(self):
        while self.global_episode_counter < MAX_EPISODES:
            observations = self.env.reset()
            done = False
            total_reward = 0

            while not done:
                # Get the action probabilities for action type and number of shares
                action_type_probs, num_shares_probs = self.agent.policy(observations)

                # Sample an action tuple (action type, number of shares)
                action_type, num_shares = self.agent.sample_actions(action_type_probs, num_shares_probs)

                # Create action tuple
                actions = np.array([action_type, num_shares])

                next_observations, reward, done, _ = self.env.step(actions)

                # Train step
                self.agent.train_step(observations, actions, reward, next_observations, done)

                observations = next_observations
                total_reward += reward

            self.global_episode_counter += 1
            print(f"Episode {self.global_episode_counter} Total Reward: {total_reward}")



In [5]:
from utils import load_dataset
from envs.combined_env import CombinedEnv

## Helper functions
PARAM_ENV_TYPE = 'com'

def create_env(env_type: str, df, window_size, frame_bound):
    if env_type == 'com':
        return CombinedEnv(df=df, window_size=window_size, frame_bound=frame_bound)

    raise NotImplementedError('unknown type')


def create_training_envs(env_type: str):
    ko_df = load_dataset('./resources/KO.csv')
    amzn_df = load_dataset('./resources/AMZN.csv')
    amd_df = load_dataset('./resources/AMD.csv')
    pypl_df = load_dataset('./resources/PYPL.csv')
    nflx_df = load_dataset('./resources/NFLX.csv')
    window_size = 10

    return [
        # KO training envs
        create_env(env_type, ko_df, window_size, (10, 120)),
        create_env(env_type, ko_df, window_size, (120, 230)),
        create_env(env_type, ko_df, window_size, (350, 470)),
        create_env(env_type, ko_df, window_size, (1000, 1120)),
        create_env(env_type, ko_df, window_size, (1700, 1820)),

        # AMZN training envs
        create_env(env_type, amzn_df, window_size, (10, 120)),
        create_env(env_type, amzn_df, window_size, (120, 230)),
        create_env(env_type, amzn_df, window_size, (350, 470)),
        create_env(env_type, amzn_df, window_size, (1000, 1120)),
        create_env(env_type, amzn_df, window_size, (1700, 1820)),

        # AMD training envs
        create_env(env_type, amd_df, window_size, (10, 120)),
        create_env(env_type, amd_df, window_size, (120, 230)),
        create_env(env_type, amd_df, window_size, (350, 470)),
        create_env(env_type, amd_df, window_size, (1000, 1120)),
        create_env(env_type, amd_df, window_size, (1700, 1820)),

        # PYPL training envs
        create_env(env_type, pypl_df, window_size, (10, 120)),
        create_env(env_type, pypl_df, window_size, (120, 230)),
        create_env(env_type, pypl_df, window_size, (350, 470)),
        create_env(env_type, pypl_df, window_size, (1000, 1120)),
        create_env(env_type, pypl_df, window_size, (1700, 1820)),

        # NFLX training envs
        create_env(env_type, nflx_df, window_size, (10, 120)),
        create_env(env_type, nflx_df, window_size, (120, 230)),
        create_env(env_type, nflx_df, window_size, (350, 470)),
        create_env(env_type, nflx_df, window_size, (1000, 1120)),
        create_env(env_type, nflx_df, window_size, (1700, 1820)),
    ]


def create_testing_env(env_type: str):
    ko_df = load_dataset('./resources/KO.csv')
    window_size = 10
    return create_env(env_type, ko_df, window_size, (2000, 2300))



# ====================================== Create environments ======================================

train_py_envs = create_training_envs(PARAM_ENV_TYPE)
train_env_sample = tf_py_environment.TFPyEnvironment(train_py_envs[0])

eval_py_env = create_testing_env(PARAM_ENV_TYPE)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)

ImportError: cannot import name 'tarfile' from 'backports' (/opt/conda/lib/python3.11/site-packages/backports/__init__.py)

In [None]:
# Start workers
env = YourPyEnvironment()
global_episode_counter = 0
agent = A3CAgent(env.action_spec(), env.observation_spec())
workers = [Worker(env, agent, global_episode_counter) for _ in range(num_workers)]

for worker in workers:
    worker.start()

for worker in workers:
    worker.join()