<a href="https://colab.research.google.com/github/sabuniemmanuelc/Autoencoder_DeepLearning/blob/main/DRLPortfolioAllocationWithRiskMitigation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import gym
import yfinance as yf

# Function to fetch data from Yahoo Finance
def fetch_yahoo_finance_data(tickers, start_date, end_date):
    data = yf.download(tickers, start=start_date, end=end_date)
    return data['Adj Close']

# Function to calculate returns and covariances
def calculate_returns_and_covariances(data):
    returns = data.pct_change().dropna()
    covariances = returns.cov()
    return returns.mean(), covariances

# Define the environment for portfolio allocation
class PortfolioAllocationEnv(gym.Env):
    def __init__(self, initial_capital, risk_free_rate, asset_returns, asset_covariances):
        super(PortfolioAllocationEnv, self).__init__()
        self.initial_capital = initial_capital
        self.current_capital = initial_capital
        self.risk_free_rate = risk_free_rate
        self.asset_returns = asset_returns
        self.asset_covariances = asset_covariances
        self.num_assets = len(asset_returns)
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(self.num_assets,), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(self.num_assets+1,), dtype=np.float32)
        self.reset()

    def reset(self):
        self.current_capital = self.initial_capital
        self.portfolio_weights = np.ones(self.num_assets) / self.num_assets
        self.time_step = 0
        return self._get_observation()

    def _get_observation(self):
        return np.concatenate([self.portfolio_weights, [self.current_capital]])

    def step(self, action):
        # Update portfolio weights based on action
        self.portfolio_weights = action / np.sum(action)

        # Calculate portfolio return and update capital
        portfolio_return = np.dot(self.portfolio_weights, self.asset_returns)
        self.current_capital *= (1 + portfolio_return)

        # Calculate risk-adjusted performance metric (e.g., Sharpe ratio)
        sharpe_ratio = (portfolio_return - self.risk_free_rate) / np.std(self.asset_returns)

        # Reward function: maximize Sharpe ratio
        reward = sharpe_ratio

        # Implement risk management techniques
        # For example, apply constraint on maximum drawdown
        max_drawdown = np.max(np.maximum.accumulate(self.current_capital) - self.current_capital)
        max_drawdown_threshold = 0.2  # Example threshold
        violation_penalty = -10  # Penalty for violating constraint
        if max_drawdown > max_drawdown_threshold:
            reward += violation_penalty
            done = True  # End episode due to constraint violation
        else:
            done = False

        # Update time step
        self.time_step += 1

        # Return observation, reward, done, info
        return self._get_observation(), reward, done, {}

# Define the deep reinforcement learning model
def build_drl_model(input_shape, num_actions):
    input_layer = Input(shape=input_shape)
    hidden_layer = Dense(64, activation='relu')(input_layer)
    output_layer = Dense(num_actions, activation='softmax')(hidden_layer)
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

# Main training loop
def train_drl_model(env, num_episodes, batch_size):
    input_shape = env.observation_space.shape
    num_actions = env.action_space.shape[0]
    drl_model = build_drl_model(input_shape, num_actions)
    drl_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            # Sample action from the model
            action = drl_model.predict(np.array([state]))[0]
            next_state, reward, done, _ = env.step(action)
            # Train the model using experience replay or other techniques
            # Update the DRL model based on the observed reward
            # Implement exploration-exploitation strategy
            state = next_state

# Example usage
if __name__ == "__main__":
    # Fetch data from Yahoo Finance
    tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']  # Example tickers
    start_date = '2020-01-01'
    end_date = '2022-01-01'
    data = fetch_yahoo_finance_data(tickers, start_date, end_date)

    # Calculate returns and covariances
    asset_returns, covariances = calculate_returns_and_covariances(data)

    # Define other parameters for the environment
    initial_capital = 1000000
    risk_free_rate = 0.02

    # Create environment
    env = PortfolioAllocationEnv(initial_capital, risk_free_rate, asset_returns, covariances)

    # Define training parameters
    num_episodes = 1000
    batch_size = 32

    # Train DRL model
    train_drl_model(env, num_episodes, batch_size)


[*********************100%%**********************]  5 of 5 completed




TypeError: cannot accumulate on a scalar