In [1]:
import polars as pl
import numpy as np
from datetime import datetime, timedelta
import json
import os

from data.cons_data import get_cons
from data.market_data import market_data

from utils.market_time import market_hours
from utils.params import PARAMS
from utils.clustering_methods import Clustering_methods

from pairs_finding.pairs_identification import cointegration_pairs
from pairs_finding.clustering import Clustering

from trade.pairs_trader import PairsTrader
from trade.optimizer import optimizer

import warnings

warnings.filterwarnings("ignore")

In [2]:
params = {
    ("GOOGL", "GOOG"): {
        PARAMS.beta_win: 100,
        PARAMS.z_win: 10,
        PARAMS.z_entry: 2,
        PARAMS.z_exit: 1,
        PARAMS.trade_freq: "1m",
    },
    ("GOOGL", "AAPL"): {
        PARAMS.beta_win: 100,
        PARAMS.z_win: 10,
        PARAMS.z_entry: 2,
        PARAMS.z_exit: 1,
        PARAMS.trade_freq: "5m",
    },
}

In [3]:
etf = "QQQ"
cons = get_cons(etf=etf)
cons_date = cons.read()

data = market_data(
    file_path="C:/Users/edmun/OneDrive/Desktop/Quantitative Trading Strategies/Project/qts/data/polygon/*.parquet"
)
out_path = "output/polygon"
earliest_date_year = [
    i
    for i in cons_date.keys()
    if datetime.strptime(i, "%Y-%m-%d").date()
    >= datetime.strptime("2020-06-30", "%Y-%m-%d").date()
]

periods = 30

period_ends = (
    pl.DataFrame(earliest_date_year, schema=["Date"])
    .with_columns(
        pl.all().cast(pl.Date),
    )
    .with_columns((pl.col("Date").rank() // periods).alias("Chunk"))
    .group_by("Chunk", maintain_order=True)
    .agg(pl.col("Date").last())["Date"]
    .dt.strftime("%Y-%m-%d")
    .to_list()
)

In [4]:
output = {}
for i in range(10, len(period_ends)):  # range(2, len(period_ends))
    warm_start, train_start, train_end, trade_end = (
        period_ends[i - 10],
        period_ends[i - 2],
        period_ends[i - 1],
        period_ends[i],
    )

    print(warm_start, train_start, train_end, trade_end)
    # next trading day
    last_date = datetime.strptime(train_end, "%Y-%m-%d")
    next_day = (last_date + timedelta(days=1)).strftime("%Y-%m-%d")

    if os.path.isfile((f"{out_path}/result/result_{next_day}_{trade_end}.csv")):
        continue
    # TRAINING PERIOD FINDING OPTIMAL PARAMS #
    data.read(cons=cons_date[train_end], start=train_start, end=train_end)

    train = data.filter(resample_freq="15m", hours=market_hours.MARKET)

    c = Clustering(df=train.select(pl.all().exclude(["date", "time"])))

    # c.run_clustering(method=Clustering_methods.kmeans, min_clusters=2, max_clusters=6)

    c.run_clustering(method=Clustering_methods.agnes, min_clusters=2, max_clusters=5)

    find_pairs = cointegration_pairs(
        df=train.select(pl.all().exclude(["date", "time"])),
        p_val_cutoff=0.005,
        cluster_pairs=c.cluster_pairs,
    )
    find_pairs.identify_pairs()
    for_j = find_pairs.get_top_pairs()
    
    output[f'{train_start}_{train_end}'] = for_j

2020-08-10 2021-07-23 2021-09-03 2021-10-18
2020-09-22 2021-09-03 2021-10-18 2021-11-30
2020-11-03 2021-10-18 2021-11-30 2022-01-12
2020-12-16 2021-11-30 2022-01-12 2022-02-25
2021-02-01 2022-01-12 2022-02-25 2022-04-08
2021-03-16 2022-02-25 2022-04-08 2022-05-23
2021-04-28 2022-04-08 2022-05-23 2022-07-07
2021-06-10 2022-05-23 2022-07-07 2022-08-18
2021-07-23 2022-07-07 2022-08-18 2022-09-30
2021-09-03 2022-08-18 2022-09-30 2022-11-11
2021-10-18 2022-09-30 2022-11-11 2022-12-27
2021-11-30 2022-11-11 2022-12-27 2023-02-09
2022-01-12 2022-12-27 2023-02-09 2023-03-24
2022-02-25 2023-02-09 2023-03-24 2023-05-08
2022-04-08 2023-03-24 2023-05-08 2023-06-21
2022-05-23 2023-05-08 2023-06-21 2023-08-03
2022-07-07 2023-06-21 2023-08-03 2023-09-15
2022-08-18 2023-08-03 2023-09-15 2023-10-27
2022-09-30 2023-09-15 2023-10-27 2023-12-11
2022-11-11 2023-10-27 2023-12-11 2024-01-25
2022-12-27 2023-12-11 2024-01-25 2024-03-08
2023-02-09 2024-01-25 2024-03-08 2024-04-22
2023-03-24 2024-03-08 2024-04-22

In [6]:
output

{'2021-07-23_2021-09-03': [('AMD', 'ATVI'),
  ('CSX', 'IDXX'),
  ('SIRI', 'VRTX'),
  ('BIDU', 'INTC'),
  ('MTCH', 'SWKS'),
  ('ANSS', 'INTC'),
  ('SBUX', 'SWKS'),
  ('ALGN', 'ATVI'),
  ('MAR', 'SWKS'),
  ('ADBE', 'INTU'),
  ('CTSH', 'SWKS'),
  ('GILD', 'SWKS'),
  ('CPRT', 'INTC'),
  ('ADBE', 'AEP'),
  ('FB', 'FOX'),
  ('FOXA', 'IDXX'),
  ('AAPL', 'AEP'),
  ('BIIB', 'SWKS'),
  ('CMCSA', 'CTAS'),
  ('KLAC', 'TXN')],
 '2021-09-03_2021-10-18': [('CERN', 'KHC'),
  ('ASML', 'KHC'),
  ('MRNA', 'REGN'),
  ('IDXX', 'KHC'),
  ('INCY', 'KHC'),
  ('CDNS', 'KHC'),
  ('ADBE', 'KHC'),
  ('ANSS', 'EXC'),
  ('AMZN', 'DOCU'),
  ('FOXA', 'KHC'),
  ('FOX', 'KHC'),
  ('ILMN', 'KHC'),
  ('BIIB', 'KHC'),
  ('AMZN', 'COST'),
  ('GOOG', 'NVDA'),
  ('GILD', 'KHC'),
  ('EXC', 'VRSN'),
  ('CERN', 'VRTX'),
  ('GOOG', 'HON'),
  ('AMZN', 'WBA'),
  ('DLTR', 'MTCH'),
  ('LULU', 'MTCH')],
 '2021-10-18_2021-11-30': [('FOX', 'FOXA'),
  ('KDP', 'PCAR'),
  ('DOCU', 'ZM'),
  ('MSFT', 'PEP'),
  ('CDNS', 'SNPS'),
  ('HON', 'T

In [5]:
with open(
        "C:/Users/edmun/Downloads/j.json", "w"
    ) as json_file:
        json.dump(output, json_file, default=str)

In [4]:
for i in range(10, len(period_ends)):  # range(2, len(period_ends))
    warm_start, train_start, train_end, trade_end = (
        period_ends[i - 10],
        period_ends[i - 2],
        period_ends[i - 1],
        period_ends[i],
    )

    print(warm_start, train_start, train_end, trade_end)
    # next trading day
    last_date = datetime.strptime(train_end, "%Y-%m-%d")
    next_day = (last_date + timedelta(days=1)).strftime("%Y-%m-%d")

    if os.path.isfile((f"{out_path}/result/result_{next_day}_{trade_end}.csv")):
        continue
    # TRAINING PERIOD FINDING OPTIMAL PARAMS #
    data.read(cons=cons_date[train_end], start=train_start, end=train_end)

    train = data.filter(resample_freq="15m", hours=market_hours.MARKET)

    c = Clustering(df=train.select(pl.all().exclude(["date", "time"])))

    # c.run_clustering(method=Clustering_methods.kmeans, min_clusters=2, max_clusters=6)

    c.run_clustering(method=Clustering_methods.agnes, min_clusters=2, max_clusters=5)

    find_pairs = cointegration_pairs(
        df=train.select(pl.all().exclude(["date", "time"])),
        p_val_cutoff=0.005,
        cluster_pairs=c.cluster_pairs,
    )
    find_pairs.identify_pairs()

    potential_pairs = [
        pair[0]
        for sublist in find_pairs.cluster_sorted_pairs.values()
        for pair in sublist
    ]
    data.read(
        cons=set([item for pair in potential_pairs for item in pair]),
        start=warm_start,
        end=train_end,
    )

    opt = optimizer(
        data=data,
        find_pairs=find_pairs,  # list(params.keys()), # pairs_to_trade
        start=pl.lit(train_start).str.strptime(pl.Date, "%Y-%m-%d"),
        end=pl.lit(train_end).str.strptime(pl.Date, "%Y-%m-%d"),
    )

    study = opt.optimize(
        study_name="PAIRS_TRADING",
        output_file_name=f"{out_path}/db/result_{next_day}_{trade_end}.db",
        n_trials=10,
    )
    p = study.best_params

    study.trials_dataframe().to_csv(
        f"{out_path}/trials/trials_{train_start}_{train_end}.csv"
    )

    optimal_params = {}
    for key, value in p.items():
        if key not in ["pairs_to_trade", "buffer_capital"]:
            parts = key.split("_")

            pair = (parts[0], parts[1])
            param_name = "_".join(parts[2:])

            if pair not in optimal_params:
                optimal_params[pair] = {}

            optimal_params[pair][param_name] = value

    # TRADING PERIOD USING PARAMS

    # reading pairs only from next trading day to next q end
    pairs_to_trade = list(optimal_params.keys())
    data.read(
        cons=set([item for pair in pairs_to_trade for item in pair]),
        start=train_start,
        end=trade_end,
    )

    trader = PairsTrader(
        data=data,
        pairs=pairs_to_trade,  # list(params.keys()),  # pairs_to_trade
        params=optimal_params,
        trade_hour=market_hours.MARKET,
    )

    pl_next_day = pl.lit(next_day).str.strptime(pl.Date, "%Y-%m-%d")
    pl_trade_end = pl.lit(trade_end).str.strptime(pl.Date, "%Y-%m-%d")
    returns = trader.backtest(
        start=pl_next_day,
        end=pl_trade_end,
        cost=0.0005,
        stop_loss=None,
        # np.array(
        #     [optimal_params[(p1, p2)][PARAMS.stop_loss] for p1, p2 in pairs_to_trade]
        # ),
    )

    returns.with_columns(
        pl.col("CAPITAL").pct_change().fill_null(0).alias("PORT_RET")
    ).write_csv(f"{out_path}/result/result_{next_day}_{trade_end}.csv")

    convert_json = {f"{p1}_{p2}": params for (p1, p2), params in optimal_params.items()}
    convert_json["pairs_to_trade"] = p["pairs_to_trade"]
    convert_json["buffer_capital"] = p["buffer_capital"]
    with open(
        f"{out_path}/params/optimal_params_{next_day}_{trade_end}.json", "w"
    ) as json_file:
        json.dump(convert_json, json_file, default=str)

    del c, opt, find_pairs, trader  # free ram

2020-06-30 2020-07-23 2020-07-27 2020-07-29
2020-07-02 2020-07-27 2020-07-29 2020-07-31


[I 2025-06-14 18:33:05,345] A new study created in RDB with name: PAIRS_TRADING


  0%|          | 0/10 [00:00<?, ?it/s]

[I 2025-06-14 18:33:09,671] Trial 0 finished with value: -5.684916362775381 and parameters: {'pairs_to_trade': 3, 'AMGN_COST_beta_win': 336, 'AMGN_COST_z_win': 498, 'AMGN_COST_z_entry': 3.5, 'AMGN_COST_z_exit': 2.0, 'AMGN_COST_trade_freq': '3m', 'AMGN_COST_stop_loss': 0.02, 'PCAR_VRSN_beta_win': 874, 'PCAR_VRSN_z_win': 390, 'PCAR_VRSN_z_entry': 1.0, 'PCAR_VRSN_z_exit': 3.0, 'PCAR_VRSN_trade_freq': '3m', 'PCAR_VRSN_stop_loss': 0.005, 'FOXA_VRSN_beta_win': 1000, 'FOXA_VRSN_z_win': 464, 'FOXA_VRSN_z_entry': 2.5, 'FOXA_VRSN_z_exit': 2.0, 'FOXA_VRSN_trade_freq': '1m', 'FOXA_VRSN_stop_loss': 0.005, 'AMGN_COST_z_stop_scaler': 1.3, 'PCAR_VRSN_z_stop_scaler': 1.3, 'FOXA_VRSN_z_stop_scaler': 0.6, 'buffer_capital': 0.5}. Best is trial 0 with value: -5.684916362775381.
[I 2025-06-14 18:33:09,924] Trial 1 finished with value: -7.068437654392485 and parameters: {'pairs_to_trade': 3, 'AMGN_COST_beta_win': 320, 'AMGN_COST_z_win': 314, 'AMGN_COST_z_entry': 4.0, 'AMGN_COST_z_exit': 1.5, 'AMGN_COST_trade

KeyboardInterrupt: 

In [7]:
with open(
    "C:/Users/edmun/OneDrive/Desktop/Quantitative Trading Strategies/Project/qts/output/polygon/params/optimal_params_2020-07-30_2020-07-31.json"
) as r:
    p = json.load(r)
p

{'AMGN_COST': {'beta_win': 264,
  'z_win': 44,
  'z_entry': 1.0,
  'z_exit': 1.5,
  'trade_freq': '4m',
  'stop_loss': 0.02,
  'z_stop_scaler': 0.5},
 'PCAR_VRSN': {'beta_win': 978,
  'z_win': 696,
  'z_entry': 1.5,
  'z_exit': 4.0,
  'trade_freq': '3m',
  'stop_loss': 0.015,
  'z_stop_scaler': 0.7},
 'FOXA_VRSN': {'beta_win': 568,
  'z_win': 382,
  'z_entry': 1.5,
  'z_exit': 3.5,
  'trade_freq': '4m',
  'stop_loss': 0.02,
  'z_stop_scaler': 2.0},
 'KHC_VRSN': {'beta_win': 26,
  'z_win': 224,
  'z_entry': 3.0,
  'z_exit': 3.5,
  'trade_freq': '2m',
  'stop_loss': 0.01,
  'z_stop_scaler': 0.8},
 'FOX_VRSN': {'beta_win': 102,
  'z_win': 140,
  'z_entry': 1.0,
  'z_exit': 4.0,
  'trade_freq': '4m',
  'stop_loss': 0.005,
  'z_stop_scaler': 1.6},
 'EBAY_VRSN': {'beta_win': 918,
  'z_win': 354,
  'z_entry': 2.5,
  'z_exit': 1.0,
  'trade_freq': '3m',
  'stop_loss': 0.01,
  'z_stop_scaler': 1.3},
 'MDLZ_VRSN': {'beta_win': 668,
  'z_win': 544,
  'z_entry': 2.0,
  'z_exit': 2.5,
  'trade_freq

In [None]:
pairs_to_trade = list([pair for pair in optimal_params.keys() if len(pair) == 2])

trader = PairsTrader(
    data=data,
    pairs=pairs_to_trade,  # list(params.keys()),  # pairs_to_trade
    params=optimal_params,
    trade_hour=market_hours.MARKET,
)

pl_next_day = pl.lit(next_day).str.strptime(pl.Date, "%Y-%m-%d")
pl_trade_end = pl.lit(trade_end).str.strptime(pl.Date, "%Y-%m-%d")
returns = trader.backtest(
    start=pl_next_day,
    end=pl_trade_end,
    cost=0.000,
    stop_loss=np.array(
        [optimal_params[(p1, p2)][PARAMS.stop_loss] for p1, p2 in pairs_to_trade]
    ),
)
returns

date,time,PRICE_INCY_INCY_ON_VRTX,PRICE_VRTX_INCY_ON_VRTX,PRICE_SNPS_SNPS_ON_VRTX,PRICE_VRTX_SNPS_ON_VRTX,PRICE_KLAC_KLAC_ON_MU,PRICE_MU_KLAC_ON_MU,PRICE_BIDU_BIDU_ON_CTAS,PRICE_CTAS_BIDU_ON_CTAS,BETA_BIDU_ON_CTAS,SPREAD_BIDU_ON_CTAS,Z_BIDU_ON_CTAS,BETA_KLAC_ON_MU,SPREAD_KLAC_ON_MU,Z_KLAC_ON_MU,BETA_SNPS_ON_VRTX,SPREAD_SNPS_ON_VRTX,Z_SNPS_ON_VRTX,BETA_INCY_ON_VRTX,SPREAD_INCY_ON_VRTX,Z_INCY_ON_VRTX,market_close,SIGNAL_BIDU_ON_CTAS,SIGNAL_KLAC_ON_MU,SIGNAL_SNPS_ON_VRTX,SIGNAL_INCY_ON_VRTX,POS_BIDU_ON_CTAS,POS_KLAC_ON_MU,POS_SNPS_ON_VRTX,POS_INCY_ON_VRTX,POS_BETA_BIDU_ON_CTAS,POS_BETA_KLAC_ON_MU,POS_BETA_SNPS_ON_VRTX,POS_BETA_INCY_ON_VRTX,PAIR_BASE_BIDU_ON_CTAS,PAIR_BASE_KLAC_ON_MU,PAIR_BASE_SNPS_ON_VRTX,PAIR_BASE_INCY_ON_VRTX,PAIR_RET_BIDU_ON_CTAS,PAIR_RET_KLAC_ON_MU,PAIR_RET_SNPS_ON_VRTX,PAIR_RET_INCY_ON_VRTX,SL_BIDU_ON_CTAS,SL_KLAC_ON_MU,SL_SNPS_ON_VRTX,SL_INCY_ON_VRTX,LOSS_BIDU_ON_CTAS,LOSS_KLAC_ON_MU,LOSS_SNPS_ON_VRTX,LOSS_INCY_ON_VRTX,COOLDOWN_BIDU_ON_CTAS,COOLDOWN_KLAC_ON_MU,COOLDOWN_SNPS_ON_VRTX,COOLDOWN_INCY_ON_VRTX,CAPITAL_BIDU_ON_CTAS,CAPITAL_KLAC_ON_MU,CAPITAL_SNPS_ON_VRTX,CAPITAL_INCY_ON_VRTX,REMAINING_CAPITAL,CAPITAL
date,time,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2020-07-28,13:29:00,101.61,283.58,197.09,283.58,195.19,51.28,115.9,74.57,0.494573,79.019721,-0.888826,4.747673,-48.270665,-0.368773,0.836923,-40.244557,-0.298453,0.5351,-50.133699,-0.466753,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2020-07-28,13:30:00,102.3185,283.29,196.18,283.29,195.49,50.975,115.455,74.6975,,,,,,,,,,,,,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2020-07-28,13:31:00,102.28,283.29,196.18,283.29,195.665,51.01,115.2479,74.68,0.56917,72.742291,-1.039516,,,,,,,,,,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,157.753509,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9,0.0,0.0,0.0,0.1,1.0
2020-07-28,13:32:00,101.88,282.67,195.01,282.67,196.49,51.01,115.2,74.6975,,,,4.493096,-32.702852,-0.185836,0.837024,-41.591467,-0.334879,0.535919,-49.608169,-0.453081,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.56917,0.0,0.0,0.0,157.715569,0.0,0.0,0.0,-0.000367,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.89967,0.0,0.0,0.0,0.1,0.99967
2020-07-28,13:33:00,101.87,282.27,195.23,282.27,195.05,51.02,114.82,74.5425,1.208492,24.735977,-2.217233,,,,,,,,,,0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.56917,0.0,0.0,0.0,157.247348,0.0,0.0,0.0,-0.00185,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.000367,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.996344,0.996344
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2020-07-29,19:55:00,97.92,278.63,197.98,278.63,194.77,50.36,119.8,76.4163,-0.023162,121.569947,1.462547,,,,,,,,,,0,-1.0,0.0,0.0,0.0,1.0,-1.0,-1.0,0.0,1.96616,0.498624,0.676442,0.0,270.046653,219.880699,386.457032,0.0,0.000876,0.000988,0.000215,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.004378,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.892232,0.0,0.099244,0.991476
2020-07-29,19:56:00,98.04,278.63,198.03,278.63,194.86,50.365,119.895,76.57,,,,1.13444,137.723908,0.835137,0.367203,95.716262,3.341027,0.732454,-106.043651,-2.147847,0,0.0,0.0,0.0,0.0,1.0,-1.0,-1.0,0.0,1.96616,0.498624,0.676442,0.0,270.443852,219.973192,386.507032,0.0,-0.000767,-0.000398,-0.000129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.004163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.892116,0.0,0.099244,0.991361
2020-07-29,19:57:00,98.08,278.3901,197.85,278.3901,194.67,50.3,119.88,76.4775,-0.122914,129.280138,1.641743,,,,,,,,,,0,-1.0,0.0,0.0,0.0,1.0,-1.0,-1.0,0.0,1.96616,0.498624,0.676442,0.0,270.246982,219.750782,386.164754,0.0,0.000617,0.000716,0.000046,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.004292,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.892157,0.0,0.099244,0.991402
2020-07-29,19:58:00,98.07,278.6,197.83,278.6,194.64,50.285,119.87,76.5363,,,,,,,,,,,,,0,0.0,0.0,0.0,0.0,1.0,-1.0,-1.0,0.0,1.96616,0.498624,0.676442,0.0,270.352592,219.713302,386.286739,0.0,-0.000465,0.000102,0.000419,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.004247,0.0,20.0,30.0,30.0,0.0,0.0,0.0,0.0,0.0,0.991401,0.991401


In [None]:
p = {
    "pairs_to_trade": 1,
    "CPRT_SBUX_beta_win": 30,
    "CPRT_SBUX_hurst_win": 50,
    "CPRT_SBUX_z_win": 45,
    # "CPRT_SBUX_z_win": 50,
    "CPRT_SBUX_z_entry": 3.0,
    "CPRT_SBUX_z_exit": -1.7,
    "CPRT_SBUX_trade_freq": "7m",
    "CPRT_SBUX_stop_loss": 0.01,
    "MRNA_TSLA_beta_win": 40,
    "MRNA_TSLA_hurst_win": 10,
    "MRNA_TSLA_z_win": 5,
    # "MRNA_TSLA_z_win": 5,
    "MRNA_TSLA_z_entry": 2.8,
    "MRNA_TSLA_z_exit": -2.5,
    "MRNA_TSLA_trade_freq": "1m",
    "MRNA_TSLA_stop_loss": 0.004,
}

train_start, train_end, trade_end = "2020-06-30", "2020-07-31", "2020-08-31"

In [None]:
optimal_params = {}
for key, value in p.items():
    if key != "pairs_to_trade":
        parts = key.split("_")

        pair = (parts[0], parts[1])
        param_name = "_".join(parts[2:])

        if pair not in optimal_params:
            optimal_params[pair] = {}

        optimal_params[pair][param_name] = value

# TRADING PERIOD USING PARAMS
# next trading day
last_date = datetime.strptime(train_end, "%Y-%m-%d")
next_day = (last_date + timedelta(days=1)).strftime("%Y-%m-%d")

# reading pairs only from next trading day to next q end
pairs_to_trade = list(optimal_params.keys())

data.read(
    cons=set([item for pair in pairs_to_trade for item in pair]),
    start=train_start,
    end=train_end,
)

trader = PairsTrader(
    data=data,
    pairs=pairs_to_trade,  # list(params.keys()),  # pairs_to_trade
    params=optimal_params,
    trade_hour=market_hours.MARKET,
)

pl_next_day = pl.lit(train_start).str.strptime(pl.Date, "%Y-%m-%d")
pl_trade_end = pl.lit(train_end).str.strptime(pl.Date, "%Y-%m-%d")
returns = trader.backtest(
    start=pl_next_day,
    end=pl_trade_end,
    cost=0.0005,
    stop_loss=np.array(
        [optimal_params[(p1, p2)][PARAMS.stop_loss] for p1, p2 in pairs_to_trade]
    ),
)

In [None]:
trader.generate_backtest_df()

In [None]:
trader = PairsTrader(
    data=data,
    pairs=pairs_to_trade,  # list(params.keys()),  # pairs_to_trade
    params=optimal_params,
    trade_hour=market_hours.MARKET,
)

pl_next_day = pl.lit(train_start).str.strptime(pl.Date, "%Y-%m-%d")
pl_trade_end = pl.lit(train_end).str.strptime(pl.Date, "%Y-%m-%d")
returns = trader.backtest(
    start=pl_next_day,
    end=pl_trade_end,
    cost=0.0005,
    stop_loss=np.array(
        [optimal_params[(p1, p2)][PARAMS.stop_loss] for p1, p2 in pairs_to_trade]
    ),
)

In [None]:
returns.write_csv("check.csv")

In [None]:
df = trader.generate_backtest_df()

In [None]:
df

In [None]:
Z_arr = df.select(
    # select reorders the columns
    [f"Z_MRNA_ON_TSLA", "Z_CPRT_ON_SBUX"]
).to_numpy()  # shape: n rows, n pairs

beta_arr = df.select(
    # select reorders the columns
    [f"BETA_MRNA_ON_TSLA", "Z_CPRT_ON_SBUX"]
).to_numpy()  # shape: n rows, n pairs

# hurst_arr = df.select(
#     # select reorders the columns
#     [f"HURST_MRNA_ON_TSLA" for p1, p2 in self.pairs]
# ).to_numpy()  # shape: n rows, n pairs

# market_close_flag = df.select("market_close").to_numpy().flatten()

z_entry_arr = np.array([1, 1])
z_exit_arr = np.array([0.6, 2])

signal_arr, pos_arr, pos_beta_arr = trader.compute_pos(
    Z_arr=Z_arr,
    beta_arr=beta_arr,
    # hurst_arr=hurst_arr,
    n_pairs=2,
    z_entry_arr=z_entry_arr,
    z_exit_arr=z_exit_arr,
    market_close_flag=np.zeros(len(df)),
)


In [None]:
pos_arr

In [None]:
pl.DataFrame(pos_arr).write_csv("hmm.csv")

In [None]:
mask = np.isnan(pos_arr)
idx = np.where(~mask, np.arange(mask.shape[1]), 0)
np.maximum.accumulate(idx, axis=1, out=idx)
pos_arr[mask] = pos_arr[np.nonzero(mask)[0], idx[mask]]
pl.DataFrame(pos_arr).write_csv("wfill.csv")

In [None]:
pl.DataFrame(pos_arr).to_pandas().plot()

In [None]:
pl.DataFrame(pos_beta_arr)

In [None]:
mask = np.isnan(pos_arr)
idx = np.where(~mask, np.arange(mask.shape[1]), 0)
np.maximum.accumulate(idx, axis=1, out=idx)
pos_arr[mask] = pos_arr[np.nonzero(mask)[0], idx[mask]]
pos_arr[np.isnan(pos_arr)] = 0

pos_arr