In [41]:
# SETUP: Imports & Paths ===========================
import jupyter
from src.utils.system import boot, Notify

boot()
import os
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


from tqdm import tqdm

from src.data.feature_pipeline import basic_chart_features,load_base_dataframe
from src.predictability.easiness import rolling_sharpe, rolling_r2, rolling_info_ratio, rolling_autocorr
from src.predictability.pipeline import generate_universe_easiness_report
from IPython import display

from src.experiments.experiment_tracker import ExperimentTracker
from src.config import TOP2_STOCK_BY_SECTOR


from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from scipy.stats import skew, kurtosis, entropy
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import acf, acovf
from src.defaults import TOP2_STOCK_BY_SECTOR
from src.agent.base_model import TransformerPpo

from tqdm import tqdm
from sklearn.preprocessing import  RobustScaler
from IPython.display import display

# FRAMEWORK STUFF =========================

from src.data.feature_pipeline import load_base_dataframe
from src.experiments.experiment_tracker import ExperimentTracker
from src.env.base_trading_env import CumulativeTradingEnv,AlphaTradingEnv
from src.env.base_timeseries_trading_env import SequenceAwareAlphaTradingEnv,SequenceAwareCumulativeTradingEnv
from src.agent.base_model import TransformerPpo
from src.defaults import RANDOM_SEEDS
from src.utils.db import ConfigurableMixin

import warnings
warnings.filterwarnings("ignore")


In [42]:

EXPERIENCE_NAME = "episode_learning_transferability_v2"
FEATURE_COLS= ["return_1d", "volume", "vix"]
# Prepare your dataframes for two consecutive episodes:
SEEDS = RANDOM_SEEDS
EPISODE_LENGTH = 21
WINDOW_LENGTH = EPISODE_LENGTH*2
N_UPDATES = 10

excluded_tickers=['CEG', 'GEHC', 'GEV', 'KVUE', 'SOLV']
excluded_tickers.sort()

config={
    "excluded_tickers": excluded_tickers,
    "episode_length":EPISODE_LENGTH,
    "window_length":WINDOW_LENGTH,
    "n_updates":N_UPDATES,
    "agent":"TransformerPpoAgent",
    "environment":"SequenceAwareCumulativeTradingEnv"
}
run_settings={
    "excluded_tickers": excluded_tickers,
    "start_date": '2024-01-01',
    "end_date":"2026-01-01",
    "episode_length":EPISODE_LENGTH,
    "window_length":WINDOW_LENGTH,
    "n_updates":N_UPDATES
}

# Config section


In [43]:
# LOAD OHLCV ==========================================


ohlcv_df = load_base_dataframe()
ohlcv_df['date'] = pd.to_datetime(ohlcv_df['date'])
ohlcv_df = ohlcv_df[(ohlcv_df['date'] >= run_settings["start_date"]) & (ohlcv_df['date'] < run_settings["end_date"])]
ohlcv_df['month'] = ohlcv_df['date'].dt.to_period('M')
ohlcv_df['return_1d'] = ohlcv_df['return_1d'].fillna(0)
ohlcv_df['sector_id'] = ohlcv_df['sector_id'].fillna('unknown')
ohlcv_df['industry_id'] = ohlcv_df['industry_id'].fillna('unknown')

In [44]:
# SETUP ===================================
import jupyter
import warnings

from src.utils.system import boot, Notify



In [45]:
ohlcv_df = load_base_dataframe()
notification = Notify(EXPERIENCE_NAME)

In [48]:
# --- Imports & Setup ----------------------------------------------------
import os
import time
import json
import hashlib
import numpy as np
import pandas as pd
from datetime import datetime
from tqdm import tqdm
from scipy.stats import ttest_ind, mannwhitneyu, skew, kurtosis, entropy
import matplotlib.pyplot as plt

# Your custom imports:
from src.utils.system import boot, Notify
from src.data.feature_pipeline import load_base_dataframe, basic_chart_features
from src.predictability.easiness import rolling_sharpe, rolling_r2, rolling_info_ratio, rolling_autocorr
from src.experiments.experiment_tracker import ExperimentTracker
from src.env.base_trading_env import CumulativeTradingEnv, AlphaTradingEnv
from src.env.base_timeseries_trading_env import SequenceAwareAlphaTradingEnv, SequenceAwareCumulativeTradingEnv
from src.agent.base_model import TransformerPpo
from src.defaults import RANDOM_SEEDS, TOP2_STOCK_BY_SECTOR
from src.utils.db import ConfigurableMixin
import torch
import torch.nn as nn
import numpy as np
import gymnasium as gym
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
from sb3_contrib.common.recurrent.policies import RecurrentActorCriticPolicy
import warnings
warnings.filterwarnings("ignore")
boot()

import torch
import torch.nn as nn
import numpy as np
import gymnasium as gym
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
from sb3_contrib.common.recurrent.policies import RecurrentActorCriticPolicy

import torch
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

def generate_causal_mask(seq_len):
    return torch.triu(torch.ones((seq_len, seq_len), dtype=torch.bool), diagonal=1)

def sinusoidal_positional_encoding(seq_len, d_model, device):
    """Returns a (seq_len, d_model) matrix with classic Transformer sin-cos encoding."""
    pe = torch.zeros(seq_len, d_model, device=device)
    position = torch.arange(0, seq_len, dtype=torch.float, device=device).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2, device=device).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    return pe

class TransformerFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space, d_model=64, n_heads=4, n_layers=2):
        super().__init__(observation_space, features_dim=d_model)
        self.d_model = d_model
        input_dim = observation_space.shape[-1]
        self.input_proj = nn.Linear(input_dim, d_model)

        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

    def forward(self, obs):
        # obs: (batch, seq_len, input_dim)
        x = self.input_proj(obs)
        seq_len = x.size(1)
        device = x.device
        pe = sinusoidal_positional_encoding(seq_len, self.d_model, device)
        x = x + pe.unsqueeze(0)  # (1, seq_len, d_model) for broadcasting
        causal_mask = generate_causal_mask(seq_len).to(device)
        x = self.transformer(x, mask=causal_mask)
        pooled_output = x[:, -1]
        return pooled_output

# Transformer Policy ===================================
class TransformerPolicy(RecurrentActorCriticPolicy):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs,
                         features_extractor_class=TransformerFeatureExtractor,
                         features_extractor_kwargs=dict(
                             d_model=64, n_heads=4, n_layers=2
                         ))
        #self._build(self.lr_schedule)

# Regime Augmentation Wrapper ===========================
class RegimeAugmentingWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.regime_dim = 3  # One-hot: bull, bear, sideways
        obs_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(obs_shape[0], obs_shape[1] + self.regime_dim),
            dtype=np.float32
        )

    def observation(self, obs):
        regime = self.env.get_current_regime()  # 0,1,2 -> bull,bear,sideways
        one_hot = np.zeros(self.regime_dim)
        one_hot[regime] = 1.0
        one_hot = np.repeat(one_hot[None, :], obs.shape[0], axis=0)
        return np.concatenate([obs, one_hot], axis=-1)
# --- Config -------------------------------------------------------------

EXPERIENCE_NAME = "episode_learning_transferability_v2"
RESULTS_CSV = EXPERIENCE_NAME + ".csv"
FEATURE_COLS = ["return_1d", "volume", "vix"]

EPISODE_LENGTH = 21
WINDOW_LENGTH = EPISODE_LENGTH * 2
N_UPDATES = 10
MAX_LAG = 3   # How many months ahead to test (multi-lag transfer)
RANDOM_SEEDS = [7, 314, 42]
excluded_tickers = ['CEG', 'GEHC', 'GEV', 'KVUE', 'SOLV']
excluded_tickers.sort()
tickers = [t for t in TOP2_STOCK_BY_SECTOR if t not in excluded_tickers]

run_settings = {
    "excluded_tickers": excluded_tickers,
    "start_date": '2023-01-01',
    "end_date": "2026-01-01",
    "episode_length": EPISODE_LENGTH,
    "window_length": WINDOW_LENGTH,
    "n_updates": N_UPDATES,
    "max_lag": MAX_LAG,
    "random_seeds": RANDOM_SEEDS
}

notification = Notify(EXPERIENCE_NAME)

# --- Utilities ----------------------------------------------------------

def encode_hash(run_config: dict) -> str:
    xx = json.dumps(run_config, sort_keys=True)
    return hashlib.sha256(xx.encode()).hexdigest()

def month_ranges(start_date_str):
    start = pd.Timestamp(start_date_str).replace(day=1)
    today = pd.Timestamp.today().replace(hour=0, minute=0, second=0, microsecond=0)
    next_month = (today + pd.offsets.MonthBegin(1)).replace(day=1)
    final_month = (next_month + pd.offsets.MonthBegin(1)).replace(day=1)
    ranges = []
    d = start
    while d < final_month:
        next_d = (d + pd.offsets.MonthBegin(1)).replace(day=1)
        ranges.append([d.strftime('%Y-%m-%d'), next_d.strftime('%Y-%m-%d')])
        d = next_d
    return ranges

def compute_meta_features(episode_df, returns_col='return_1d'):
    """Compute meta-features for a window/episode."""
    r = episode_df[returns_col].values

    features = {
        'volatility': np.std(r),
        'mean_return': np.mean(r),
        'skew': skew(r),
        'kurtosis': kurtosis(r),
        'entropy': entropy(np.histogram(r, bins=10, density=True)[0] + 1e-8),
        'sharpe':rolling_sharpe(episode_df['return_1d'], window=EPISODE_LENGTH),
        'autocorr':rolling_autocorr(episode_df['return_1d'], window=EPISODE_LENGTH),
        'info_ratio': rolling_info_ratio(
    episode_df['return_1d'],
    episode_df['market_return_1d'],  # replace with actual column
    window=EPISODE_LENGTH
).iloc[-1],
        'rolling_r2':rolling_r2(episode_df['return_1d'], window=EPISODE_LENGTH),
        #'sharpe': rolling_sharpe(episode_df[['return_1d']], window=EPISODE_LENGTH).iloc[-1],
        #' rolling_autocorr(episode_df[['return_1d']], window=EPISODE_LENGTH).iloc[-1],
        #' rolling_info_ratio(episode_df[['return_1d']], window=EPISODE_LENGTH).iloc[-1],
        #' rolling_r2(episode_df['return_1d'], window=EPISODE_LENGTH).iloc[-1],
        'drawdown': episode_df['close'].cummax() - episode_df['return_1d'],
    }
    # Optionally: add regime label if available
    if "regime" in episode_df.columns:
        features["regime"] = episode_df["regime"].iloc[-1]
    else:
        features["regime"] = 0  # Or compute via your regime classifier
    return features

# --- Environment & Agent Factory ----------------------------------------

def env_maker(seed, ohlcv_df, feature_cols, episode_length, window_length):
    return SequenceAwareCumulativeTradingEnv(
        ohlcv_df,
        feature_cols=feature_cols,
        episode_length=episode_length,
        transaction_cost=0,
        seed=seed,
        window_length=window_length
    )

def agent_maker(environment, seed, policy_class, agent_class=TransformerPpo, **kwargs):
    agent = agent_class(
        RecurrentPPO,
        TransformerPolicy,
        environment,
        model_config={
            "n_steps": EPISODE_LENGTH,
            "batch_size": EPISODE_LENGTH,
        },
        run_config={"seed": seed, "verbose": 0},
        **kwargs
    )
    return agent

# --- Transferability Test Function --------------------------------------

def test_transferability(
    env_maker, agent_maker, ohlcv_df,
    episode_sequence_train, episode_sequence_test, feature_cols, episode_length,
    episode_id_train, episode_id_test, n_updates=8, window_length=10,
    random_seeds=[7, 314], policy_class=None, agent_class=None,
    ablation_tag=None, verbose=True, meta_train=None, meta_test=None
):
    agent_rewards_B, random_rewards_B = [], []
    total_timesteps = episode_length * n_updates
    run_results = []

    run_hash = encode_hash({
        "policy_class": str(policy_class),
        "agent_class": str(agent_class),
        "n_updates": n_updates,
        "train_episode_id": episode_id_train,
        "test_episode_id": episode_id_test,
        "ablation": ablation_tag
    })

    # --- CSV result caching -----------
    if os.path.exists(RESULTS_CSV):
        df = pd.read_csv(RESULTS_CSV)
        if not df.empty and (df['hash'] == run_hash).any():
            return [], [], 0, 0, {}
    else:
        df = pd.DataFrame()

    for run, seed in enumerate(random_seeds):
        np.random.seed(seed)
        # Train on Episode A
        envA = env_maker(seed, ohlcv_df, feature_cols, episode_length, window_length)
        envA.set_episode_sequence(episode_sequence_train)
        print(envA)
        model = agent_maker(envA, seed, policy_class, agent_class)
        agent = model.agent
        agent.learn(total_timesteps=total_timesteps)

        # Test trained agent on Episode B
        envB = env_maker(seed, ohlcv_df, feature_cols, episode_length, window_length)
        envB.set_episode_sequence(episode_sequence_test)
        obs, _ = envB.reset()
        rewards = []
        done = False
        while not done:
            action, _ = agent.predict(obs)
            obs, reward, done, truncated, info = envB.step(action)
            rewards.append(reward)
        agent_rewards_B.append(np.sum(rewards))

        # Random policy on Episode B
        envB_rand = env_maker(seed, ohlcv_df, feature_cols, episode_length, window_length)
        envB_rand.set_episode_sequence(episode_sequence_test)
        obs, _ = envB_rand.reset()
        rand_rewards = []
        done = False
        while not done:
            action = envB_rand.action_space.sample()
            obs, reward, done, truncated, info = envB_rand.step(action)
            rand_rewards.append(reward)
        random_rewards_B.append(np.sum(rand_rewards))

    # --- Statistical testing on Episode B
    agent_rewards_B = np.array(agent_rewards_B)
    random_rewards_B = np.array(random_rewards_B)

    t_stat, t_pval = ttest_ind(agent_rewards_B, random_rewards_B, equal_var=False)
    mw_stat, mw_pval = mannwhitneyu(agent_rewards_B, random_rewards_B, alternative='greater')

    # --- Save meta-features, report, and results
    study_result = {
        "experience_name": EXPERIENCE_NAME,
        "policy_class": str(policy_class),
        "agent_class": str(agent_class),
        "ablation": ablation_tag,
        "train_meta": json.dumps(meta_train),
        "test_meta": json.dumps(meta_test),
        "train_episode_id": episode_id_train,
        "test_episode_id": episode_id_test,
        "hash": run_hash,
        "agent_rewards_mean": np.median(agent_rewards_B),
        "agent_rewards_std": agent_rewards_B.std(),
        "random_rewards_mean": np.median(random_rewards_B),
        "random_rewards_std": random_rewards_B.std(),
        "t_pval": float(t_pval),
        "mw_pval": float(mw_pval),
        "advantage": np.median(agent_rewards_B) - np.median(random_rewards_B),
        "agent_rewards": json.dumps(agent_rewards_B.tolist()),
        "random_rewards": json.dumps(random_rewards_B.tolist()),
        "train_date": meta_train.get("date") if meta_train else "",
        "test_date": meta_test.get("date") if meta_test else "",
    }
    df = pd.concat([df, pd.DataFrame([study_result])], ignore_index=True)
    print(RESULTS_CSV)
    df.to_csv(RESULTS_CSV, index=False)

    # Optionally: Save a per-run markdown/HTML report here for later
    with open(f"report_{run_hash}.md", "w") as f:
        f.write(f"# Transferability Run: {run_hash}\n")
        f.write(f"## Train Meta\n{json.dumps(meta_train, indent=2)}\n")
        f.write(f"## Test Meta\n{json.dumps(meta_test, indent=2)}\n")
        f.write(f"### Results\n")
        f.write(json.dumps(study_result, indent=2))

    return agent_rewards_B, random_rewards_B, t_pval, mw_pval, study_result

# --- Multi-Lag Transfer, Meta-Features, NxN Matrix ----------------------
def safe_get_episode(env, ticker, start):
    try:
        return env.get_episode_by_start_date(ticker, start)
    except Exception as e:
        print(f"[SKIP] get_episode_by_start_date failed for {ticker} {start}: {e}")
        return None, None, None
    
def main():
    ohlcv_df = load_base_dataframe()
    ohlcv_df['date'] = pd.to_datetime(ohlcv_df['date'])
    ohlcv_df = ohlcv_df[
        (ohlcv_df['date'] >= run_settings["start_date"]) &
        (ohlcv_df['date'] < run_settings["end_date"])
    ]
    ohlcv_df['month'] = ohlcv_df['date'].dt.to_period('M')
    ohlcv_df['return_1d'] = ohlcv_df['return_1d'].fillna(0)
    ohlcv_df['sector_id'] = ohlcv_df['sector_id'].fillna('unknown')
    ohlcv_df['industry_id'] = ohlcv_df['industry_id'].fillna('unknown')

    month_pairs = month_ranges(run_settings['start_date'])[:-3]
    ablation_settings = [
        {"policy_class": TransformerPolicy, "agent_class": TransformerPpo, "ablation_tag": "full"},
        # Add ablation variants here, e.g., no regime, different policies, etc.
        # {"policy_class": "SimpleMLPPolicy", "agent_class": MlpPpo, "ablation_tag": "mlp"},
    ]

    # For each ticker, perform NxN transfer, meta-feature logging, and multi-lag evaluation
    for ticker in tqdm(tickers):
        run_start_time = time.time()
        notification.info(f"Train start for {ticker} | {datetime.now().strftime('%Y-%m-%d %H:%M')}")
        # --- Precompute episodes for all months for this ticker
        episodes = []
        train_env = SequenceAwareCumulativeTradingEnv(
            ohlcv_df,
            feature_cols=FEATURE_COLS,
            episode_length=EPISODE_LENGTH,
            transaction_cost=0,
            seed=RANDOM_SEEDS[0],
            window_length=WINDOW_LENGTH
        )
        skipped = 0
        for p in month_pairs:
            try:
                _, start_train, id_train = train_env.get_episode_by_start_date(ticker, p[0])
                episodes.append({"ticker": ticker, "start": start_train, "id": id_train, "date": p[0]})
            except ValueError as e:
                # Optionally print or log this skip for traceability
                print(f"[SKIP] {ticker} {p[0]}: {e}")
                skipped += 1
                print(f"{ticker}: Skipped {skipped}/{len(month_pairs)} episodes due to insufficient lookback.")

                continue
        # --- NxN transfer matrix loop
        for ablation in ablation_settings:
            valid_train_episodes = 0
            valid_test_episodes = 0
            for i, train_ep in enumerate(episodes):
                try:
                    print(train_ep)
                    train_env.set_episode_sequence([[ticker, train_ep["start"]]])
               
                    train_env.reset()
                    train_df = train_env.episode_df.copy()
                    #train_df, _, _ = train_env.get_episode_by_start_date(ticker, train_ep["start"])
                    #valid_train_episodes += 1
                except Exception as e:
                    print(f"[SKIP] Train episode failed for {ticker} {train_ep['start']}: {e}")
                    continue
                for lag in range(1, MAX_LAG+1):
                    test_idx = i + lag
                    if test_idx >= len(episodes):
                        continue
                    test_ep = episodes[test_idx]
                    print(episodes[test_idx])
                    try:
                        #test_df, _, _ = train_env.get_episode_by_start_date(ticker, test_ep["start"])
                        train_env.set_episode_sequence([[ticker, test_ep["start"]]])
                        valid_test_episodes += 1
                        train_env.reset()
                        test_df = train_env.episode_df.copy()
                    except Exception as e:
                        print(f"[SKIP] Test episode failed for {ticker} {test_ep['start']}: {e}")
                        continue
                    # ... rest of logic ...
                    
                    print('will test')
                    meta_test = compute_meta_features(test_df)
                    meta_train = compute_meta_features(test_df)
                    meta_test["date"] = test_ep["date"]
                    meta_train["date"] = train_ep["date"]
                    #meta_test["date"] = test_ep["date"]

                    train_sequence = [[ticker, train_ep["start"]]]
                    test_sequence = [[ticker, test_ep["start"]]]
                    agent_rewards, random_rewards, t_pval, mw_pval, results = test_transferability(
                        env_maker,
                        agent_maker,
                        ohlcv_df,
                        train_sequence,
                        test_sequence,
                        feature_cols=FEATURE_COLS,
                        episode_length=EPISODE_LENGTH,
                        random_seeds=RANDOM_SEEDS,
                        n_updates=N_UPDATES,
                        window_length=WINDOW_LENGTH,
                        episode_id_train=train_ep["id"],
                        episode_id_test=test_ep["id"],
                        policy_class=ablation["policy_class"],
                        agent_class=ablation["agent_class"],
                        ablation_tag=ablation["ablation_tag"],
                        meta_train=meta_train,
                        meta_test=meta_test
                    )
                print(f"{ticker}: Valid train episodes: {valid_train_episodes}, Valid test episodes: {valid_test_episodes}")
        elapsed = time.time() - run_start_time
        notification.info(f"Train complete for {ticker} | {datetime.now().strftime('%Y-%m-%d %H:%M')} | Exec time: {elapsed:.1f}s")

if __name__ == "__main__":
    main()

# --- Visualization Example: Transfer Matrix -----------------------------
def plot_transfer_matrix(results_csv=RESULTS_CSV, ticker=None):
    df = pd.read_csv(results_csv)
    if ticker is not None:
        df = df[df["ticker"] == ticker]
    # Pivot: rows=train_date, cols=test_date, values=advantage or t_pval
    matrix = df.pivot_table(index="train_date", columns="test_date", values="advantage")
    plt.figure(figsize=(12, 10))
    sns.heatmap(matrix, annot=True, fmt=".2f", cmap="coolwarm")
    plt.title(f"Transferability Matrix ({ticker})")
    plt.ylabel("Train Month")
    plt.xlabel("Test Month")
    plt.show()


  0%|          | 0/22 [00:00<?, ?it/s]

[SKIP] AAPL 2023-01-01: Not enough lookback: start_idx=0 < window_length=42
AAPL: Skipped 1/28 episodes due to insufficient lookback.
[SKIP] AAPL 2023-02-01: Not enough lookback: start_idx=20 < window_length=42
AAPL: Skipped 2/28 episodes due to insufficient lookback.
[SKIP] AAPL 2023-03-01: Not enough lookback: start_idx=39 < window_length=42
AAPL: Skipped 3/28 episodes due to insufficient lookback.
{'ticker': 'AAPL', 'start': 62, 'id': 126, 'date': '2023-04-01'}
{'ticker': 'AAPL', 'start': 81, 'id': 127, 'date': '2023-05-01'}
will test
<SequenceAwareCumulativeTradingEnv instance>
<SequenceAwareCumulativeTradingEnv instance>
<SequenceAwareCumulativeTradingEnv instance>


  0%|          | 0/22 [02:21<?, ?it/s]


TypeError: Object of type Series is not JSON serializable

In [None]:
plot_transfer_matrix("episode_learning_transferability_v2.csv")

In [None]:
e= env_maker(1,ohlcv_df,FEATURE_COLS,EPISODE_LENGTH,WINDOW_LENGTH)

In [None]:
e.set_episode_sequence([['AAPL',100]])
e.reset()
e.episode_df

In [None]:
e.set_episode_sequence([['AAPL',200]])
e.reset()
e.episode_df
rolling_sharpe(e.episode_df['return_1d'], window=EPISODE_LENGTH).dropna().iloc[-1]

In [None]:
e.episode_df