# Part 1: Data Processing - Stock Trading with Yahoo Finance

Following the NeurIPS 2018 paper structure, this notebook focuses on data download and preprocessing.

In [11]:
# Install required packages
!pip install yfinance
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

Collecting git+https://github.com/AI4Finance-Foundation/FinRL.git
  Cloning https://github.com/AI4Finance-Foundation/FinRL.git to /tmp/pip-req-build-cy0957t8
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/FinRL.git /tmp/pip-req-build-cy0957t8
Collecting git+https://github.com/AI4Finance-Foundation/FinRL.git
  Cloning https://github.com/AI4Finance-Foundation/FinRL.git to /tmp/pip-req-build-cy0957t8
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/FinRL.git /tmp/pip-req-build-cy0957t8
  Resolved https://github.com/AI4Finance-Foundation/FinRL.git to commit 69776b349ee4e63efe3826f318aef8e5c5f59648
  Installing build dependencies ... [?25l  Resolved https://github.com/AI4Finance-Foundation/FinRL.git to commit 69776b349ee4e63efe3826f318aef8e5c5f59648
  Installing build dependencies ... [?25l-done
[?25h  Getting requirements to build wheel ... [?25done
[?25h  Getting requirements to build whee

In [12]:
import pandas as pd
import numpy as np
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.config import INDICATORS

# Define parameters
TRAIN_START_DATE = '2009-01-01'  # Following NeurIPS 2018 paper timeframe
TRAIN_END_DATE = '2019-01-01'
TEST_START_DATE = '2019-01-01'
TEST_END_DATE = '2021-01-01'

# Define stock symbols (you can modify this list)
TICKER_LIST = ['AAPL', 'MSFT', 'JPM', 'BA', 'GE', 'T']

def process_data(start_date, end_date):
    """Download and process data for a given date range"""
    print(f"Downloading data from {start_date} to {end_date}...")
    df = YahooDownloader(
        start_date=start_date,
        end_date=end_date,
        ticker_list=TICKER_LIST
    ).fetch_data()
    
    print("Processing features...")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=INDICATORS,
        use_turbulence=True
    )
    
    processed = fe.preprocess_data(df)
    return processed

# Process training data
train_data = process_data(TRAIN_START_DATE, TRAIN_END_DATE)
print("\nTraining data shape:", train_data.shape)

# Process testing data
test_data = process_data(TEST_START_DATE, TEST_END_DATE)
print("Testing data shape:", test_data.shape)

# Save processed data
train_data.to_csv('train.csv', index=False)
test_data.to_csv('trade.csv', index=False)

print("\nData has been processed and saved to train.csv and trade.csv")

# Display sample of processed data
print("\nSample of processed training data:")
print(train_data.head())

# Display feature information
print("\nFeatures included:")
print(train_data.columns.tolist())

Downloading data from 2009-01-01 to 2019-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed



Shape of DataFrame:  (15096, 8)
Processing features...
Successfully added technical indicators
Successfully added technical indicators


[*********************100%***********************]  1 of 1 completed

Successfully added turbulence index

Training data shape: (15096, 17)
Downloading data from 2019-01-01 to 2021-01-01...



[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed



Shape of DataFrame:  (3030, 8)
Processing features...
Successfully added technical indicators
Successfully added technical indicators
Successfully added turbulence index
Testing data shape: (3030, 17)
Successfully added turbulence index
Testing data shape: (3030, 17)

Data has been processed and saved to train.csv and trade.csv

Sample of processed training data:
         date      close       high        low       open     volume   tic  \
0  2009-01-02   2.730994   3.251429   3.041429   3.067143  746015200  AAPL   
1  2009-01-02  33.941101  45.560001  42.779999  42.799999    7010200    BA   
2  2009-01-02  56.576836  81.806908  77.972954  79.123138   11917130    GE   
3  2009-01-02  20.973110  31.639999  30.469999  31.190001   32494900   JPM   
4  2009-01-02  14.924775  20.400000  19.370001  19.530001   50084000  MSFT   

   day  macd   boll_ub   boll_lb  rsi_30     cci_30  dx_30  close_30_sma  \
0    4   0.0  2.951623  2.625623   100.0  66.666667  100.0      2.730994   
1    4   0.0 

# Part 2: Model Training

This section implements the training process following the NeurIPS 2018 paper approach.

In [13]:
from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Load the preprocessed training data
train_data = pd.read_csv('train.csv')

# Set up environment parameters
stock_dimension = len(train_data.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension

# Convert data to arrays for training
price_array = train_data[['close']].values
tech_array = train_data[INDICATORS].values
turbulence_array = train_data['turbulence'].values

# Environment configuration
env_config = {
    "price_array": price_array,
    "tech_array": tech_array,
    "turbulence_array": turbulence_array,
    "if_train": True,
    "initial_amount": 1000000,  # $1M initial capital
    "buy_cost_pct": 0.001,    # 0.1% trading cost
    "sell_cost_pct": 0.001,   # 0.1% trading cost
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

# Create training environment
env_train = StockTradingEnv(config=env_config)

# Initialize DRL agent
agent = DRLAgent(env=env_train)

# PPO model parameters following NeurIPS 2018
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128
}

# Get and train the model
model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)

# Train the model
trained_ppo = agent.train_model(
    model=model_ppo,
    tb_log_name="ppo_train",
    total_timesteps=100000  # Increase for better performance
)

# Save the trained model
trained_ppo.save("trained_ppo_model")
print("Model training completed and saved as 'trained_ppo_model'")

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




----------------------------------
| time/              |           |
|    fps             | 1437      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 2.6158385 |
----------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1258         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0044814735 |
|    clip_fraction        | 0.0344       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 0.000857     |
|    learning_rate        | 0.00025      |
|    loss                 | 21.8         |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00327    

## Model Evaluation

Test the trained model on the test dataset

In [None]:
# Load test data
test_data = pd.read_csv('trade.csv')

# Prepare test data arrays
price_array_test = test_data[['close']].values
tech_array_test = test_data[INDICATORS].values
turbulence_array_test = test_data['turbulence'].values

# Configure test environment
env_config_test = env_config.copy()
env_config_test.update({
    "price_array": price_array_test,
    "tech_array": tech_array_test,
    "turbulence_array": turbulence_array_test,
    "if_train": False
})

env_test = StockTradingEnv(config=env_config_test)

# Run test episodes
state = env_test.reset()
done = False
episode_returns = []

try:
    while not done:
        # Convert state to numpy array if it's not already
        if isinstance(state, tuple):
            state = state[0]  # Get the first element if it's a tuple
        state_array = np.array(state)
        
        # Ensure state is 2D array with shape (1, state_dim)
        if state_array.ndim == 1:
            state_array = state_array.reshape(1, -1)
            
        # Get action from model
        action, _ = trained_ppo.predict(state_array)
        
        # Execute action in environment
        step_result = env_test.step(action)
        
        # Handle different step() return formats
        if len(step_result) == 4:
            next_state, reward, done, info = step_result
        elif len(step_result) == 5:
            next_state, reward, terminated, truncated, info = step_result
            done = terminated or truncated
        else:
            raise ValueError(f"Unexpected step() return format with {len(step_result)} values")
            
        state = next_state
        episode_returns.append(float(reward))  # Convert reward to float
        
except Exception as e:
    print(f"An error occurred during evaluation: {str(e)}")
    print(f"Error type: {type(e)}")
    done = True

# Print results
print(f"\nEvaluation Results:")
if episode_returns:
    total_return = sum(episode_returns)
    print(f"Test Episode Return: {float(total_return):.2f}")  # Convert to float before formatting
    print(f"Initial Portfolio Value: ${env_config_test['initial_amount']:,.2f}")
    try:
        print(f"Final Portfolio Value: ${float(env_test.portfolio_value):,.2f}")  # Convert to float
    except Exception as e:
        print(f"Could not get final portfolio value: {str(e)}")


Evaluation Results:


TypeError: unsupported format string passed to numpy.ndarray.__format__