In [None]:
!pip install yfinance tensorflow pandas numpy


In [5]:
import yfinance as yf
import pandas as pd
import numpy as np
import tensorflow as tf
from google.colab import files

from agent import Agent
from trading_env import TradingEnvironment
from data_processing import get_data, preprocess_data, normalize_features
from tqdm.notebook import tqdm

In [None]:
# GPU/TPU detection and strategy setup
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print('GPU device not found, using the CPU instead.')
else:
    print('Found GPU at: {}'.format(device_name))

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # Detect TPU
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print("Running on TPU")
except ValueError:
    strategy = tf.distribute.get_strategy()  # For CPU or GPU
    print("Running on CPU or GPU")

In [None]:
# List of assets
assets = ["GC=F", "CL=F", "^GSPC", "^IXIC", "^TNX", "EURUSD=X", "JPYUSD=X", "^VIX"]

train_start = "2007-01-01"
train_end = "2024-01-01"
test_start = "2012-01-01"
test_end = "2024-01-01"
window_size = 30
train_years = 5
test_years = 4

def train_and_test(symbol, start_date, end_date, test_start, test_end, window_size, train_years=4, test_years=4, strategy=None):
    prices = get_data(symbol, start_date, end_date)
    features = preprocess_data(prices)
    normalized_features = normalize_features(features)

    train_end_years = pd.date_range(start=start_date, end=end_date, freq=f"{train_years}Y").tolist()
    train_end_years = train_end_years[1:]

    overall_reward = 0
    for train_end in train_end_years:
        train_start = start_date
        test_start = train_end
        test_end = test_start + pd.DateOffset(years=test_years)
        test_end = min(test_end, pd.Timestamp(end_date))

        print(f"\nTraining from {train_start} to {train_end}, Testing from {test_start} to {test_end}")

        train_start_idx = 0
        train_end_idx = prices.index.get_loc(train_end)
        test_start_idx = train_end_idx
        test_end_idx = prices.index.get_loc(test_end)

        train_env = TradingEnvironment(
            prices=prices.iloc[:train_end_idx+1],
            features=normalized_features[:train_end_idx+1],
            window_size=window_size,
            portfolio_ret_w=1.0,
            sharpe_w=0.1,
            drawdown_w=0.1,
            transaction_cost_w=0.1
        )

        with strategy.scope():
            input_shape = (window_size, normalized_features.shape[1])
            agent = Agent(strategy=strategy, input_shape=input_shape, num_actions=3)
            agent.train(train_env, num_episodes=500)

            test_env = TradingEnvironment(
                prices=prices.iloc[test_start_idx:test_end_idx+1],
                features=normalized_features[test_start_idx:test_end_idx+1],
                window_size=window_size,
                portfolio_ret_w=1.0,
                sharpe_w=0.1,
                drawdown_w=0.1,
                transaction_cost_w=0.1
            )

            state = test_env.reset()
            done = False
            total_reward = 0
            while not done:
                action = agent.select_action(state)
                state, reward, done, _ = test_env.step(action)
                total_reward += reward
            overall_reward += total_reward

        if test_end >= pd.Timestamp(end_date):
            break

    print(f"\nOverall Reward for {symbol}: {overall_reward}")
    return overall_reward

for symbol in assets:
    train_and_test(symbol, train_start, train_end, test_start, test_end, window_size, train_years, test_years, strategy)
