In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, date, timedelta, time
os.environ['TZ'] ='America/New_York'

import yfinance as yf

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

from ib_async import *
util.startLoop()

### Data Prep for Inferencing

In [54]:
ib = IB()
ib.connect(port=4002, clientId=3)

<IB connected to 127.0.0.1:4002 clientId=3>

In [3]:
contract = Contract(symbol='META', secType='STK', exchange='SMART', currency='USD')
ib.qualifyContracts(contract)

[Contract(secType='STK', conId=107113386, symbol='META', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='META', tradingClass='NMS')]

In [4]:
contract_vix = Contract(symbol= 'VIX', secType = 'IND',exchange = 'CBOE', currency='USD')
ib.qualifyContracts(contract_vix)

[Contract(secType='IND', conId=13455763, symbol='VIX', exchange='CBOE', currency='USD', localSymbol='VIX')]

In [5]:
contract_nas = Contract(symbol= 'TQQQ', secType = 'STK',exchange = 'SMART', currency='USD')
ib.qualifyContracts(contract_nas)

[Contract(secType='STK', conId=72539702, symbol='TQQQ', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='TQQQ', tradingClass='NMS')]

In [6]:
class Data_Request :

    """
    Returns PD data containning equity price with specific time control

    Returns:
        pd: information regarding the contract of interst.
    """
    
    def __init__(self, list_days, contract, barsize, duration, *,specific_time= None, specific_pick_time= None):

        self.list_days = list_days
        self.contract = contract
        #self.init_date = init_date
        self.barsize = barsize
        self.duration = duration
        self.specific_time = specific_time # cut off time . Time object: time(14, 30, 0)
        self.specific_pick_time= specific_pick_time

    def D_request(self):

        his_df =pd.DataFrame()
        
        for i in range(len(self.list_days)):
            init_date=self.list_days[i]
            bars = ib.reqHistoricalData(self.contract, init_date, barSizeSetting=self.barsize, durationStr=self.duration, whatToShow="TRADES", useRTH=True)
            add_his_df = pd.DataFrame(bars)
            add_his_df['date'] = pd.to_datetime(add_his_df['date'])
            print(f'retriveing data for {init_date}')

            if self.specific_time is not None:
                cut_off = datetime.combine(self.list_days[i],self.specific_time)  # need logic if specific_time is defined
                pd_cut_off = pd.to_datetime(cut_off).tz_localize('US/Eastern')
            #datetime64_eastern = pd.Timestamp(date_time_str).tz_localize('US/Eastern')
                add_his_df = add_his_df[add_his_df['date'] > pd_cut_off]
            
            if self.specific_pick_time is not None:
                pick_time = datetime.combine(self.list_days[i],self.specific_pick_time)  # need logic if specific_time is defined
                pd_pick_time = pd.to_datetime(pick_time).tz_localize('US/Eastern')
            #datetime64_eastern = pd.Timestamp(date_time_str).tz_localize('US/Eastern')
                add_his_df = add_his_df[add_his_df['date'] == pd_pick_time]

            
            his_df = pd.concat([his_df, add_his_df], ignore_index=True)
            his_df['Date_Only'] = his_df['date'].dt.date
            print(len(his_df))

        
        return his_df


In [7]:
# picking out Friday and before specific-time data

#specific_time = time(14, 30, 0)
def data_filter(df_initial, list_days, specific_time_before = None, specific_time_after= None, on_time= None):
    his_df = pd.DataFrame()
    for i in range(len(list_days)):
        init_date=list_days[i]
        if specific_time_before is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time < specific_time_before)]
           
            
        if specific_time_after is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time >= specific_time_after)]

        if on_time is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time == on_time)]

        if (specific_time_before is None and specific_time_after is None) and on_time is None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date)]
        
        print(len(his_df))
        print(f'retrieving {init_date}')
        per_proc = np.round(i+1/len(list_days)*100, decimals=1)
        print(f'{per_proc} percent is done')
        his_df = pd.concat([his_df, add_his_df], ignore_index=True)
        his_df.drop_duplicates(subset=None, keep='first', inplace=True)
        his_df['Date_Only'] = his_df['date'].dt.date
    return his_df

In [14]:
from datetime import date, timedelta

def calculate_business_days(start_date, end_date):
    """
    Calculates the number of business days between two dates, excluding weekends.

    Args:
        start_date (date): The start date.
        end_date (date): The end date.

    Returns:
        int: The number of business days.
    """
    business_days = 0
    current_date = start_date
    while current_date <= end_date:
        if current_date.weekday() < 5:  # Monday to Friday
            business_days += 1
        current_date += timedelta(days=1)
    return business_days

# Example usage:
start_date = date(2025, 2, 1)  # April 7, 2025 (Monday)
end_date = date(2025, 4, 3)  # April 18, 2025 (Friday)
num_business_days = calculate_business_days(start_date, end_date)
print(f"Number of business days between {start_date} and {end_date}: {num_business_days}")

Number of business days between 2025-02-01 and 2025-04-03: 44


In [47]:
def get_fridays_in_range(start_date, end_date):
    """
    Returns a list of dates that are Fridays within the given date range.

    Args:
        start_date (date): The start date of the range.
        end_date (date): The end date of the range.

    Returns:
        list: A list of date objects representing Fridays within the range.
    """
    fridays = []
    all_dates = []
    current_date = start_date
    all_current_date = start_date

    # Adjust to the first Friday
    days_until_friday = (4 - current_date.weekday()) % 7
    current_date += timedelta(days=days_until_friday)

    while current_date <= end_date:
        fridays.append(current_date)
        current_date += timedelta(days=7) # Move to the next Friday

    while all_current_date <= end_date:
        all_dates.append(all_current_date)
        all_current_date += timedelta(days=1)
    
    return fridays, all_dates

# Example Usage
start_date = date(2024, 2, 1)
end_date = date(2025, 4, 1)

fridays, all_dates = get_fridays_in_range(start_date, end_date)
# removing Non-trading Holidays

fridays.remove(date(2024, 3, 29))
fridays.remove(date(2024, 11, 29))

#all_dates.remove(date(2024, 1, 15))
all_dates.remove(date(2024, 2, 19))
all_dates.remove(date(2024, 3, 29))
all_dates.remove(date(2024, 5, 27))
all_dates.remove(date(2024, 6, 19))
all_dates.remove(date(2024, 7, 4))
all_dates.remove(date(2024, 9, 2))
all_dates.remove(date(2024, 11, 28))
all_dates.remove(date(2024, 11, 29))
all_dates.remove(date(2024, 12, 25))

all_dates.remove(date(2025, 1, 1))
all_dates.remove(date(2025, 1, 20))
all_dates.remove(date(2025, 2, 17))

# Wednesday, January 1, 2025: New Year's Day
# Monday, January 20, 2025: Martin Luther King Jr. Day
# Monday, February 17, 2025: Presidents' Day
# Friday, April 18, 2025: Good Friday
# Monday, May 26, 2025: Memorial Day
# Thursday, June 19, 2025: Juneteenth National Independence Day
# Friday, July 4, 2025: Independence Day
# Monday, September 1, 2025: Labor Day
# Thursday, November 27, 2025: Thanksgiving Day
# Thursday, December 25, 2025: Christmas Day

In [48]:
biz_dates = [ d for d in all_dates if d.weekday()<5]

In [10]:
now =datetime.now()
print(now.date())

2025-04-03


### Trainning Data Set Collection ###

In [49]:
barsize ="1 min"
duration = "1 D"
DR_stock_meta = Data_Request(biz_dates, contract, barsize, duration)
df_all_days_stock = DR_stock_meta.D_request()

retriveing data for 2024-02-01
390
retriveing data for 2024-02-02
780
retriveing data for 2024-02-05
1170
retriveing data for 2024-02-06
1560
retriveing data for 2024-02-07
1950
retriveing data for 2024-02-08
2340
retriveing data for 2024-02-09
2730
retriveing data for 2024-02-12
3120
retriveing data for 2024-02-13
3510
retriveing data for 2024-02-14
3900
retriveing data for 2024-02-15
4290
retriveing data for 2024-02-16
4680
retriveing data for 2024-02-20
5070
retriveing data for 2024-02-21
5460
retriveing data for 2024-02-22
5850
retriveing data for 2024-02-23
6240
retriveing data for 2024-02-26
6630
retriveing data for 2024-02-27
7020
retriveing data for 2024-02-28
7410
retriveing data for 2024-02-29
7800
retriveing data for 2024-03-01
8190
retriveing data for 2024-03-04
8580
retriveing data for 2024-03-05
8970
retriveing data for 2024-03-06
9360
retriveing data for 2024-03-07
9750
retriveing data for 2024-03-08
10140
retriveing data for 2024-03-11
10530
retriveing data for 2024-03-

In [55]:
DR_IND_VIX = Data_Request(biz_dates, contract_vix, barsize, duration)
DR_STK_TQQQ = Data_Request(biz_dates, contract_nas, barsize, duration)
df_all_days_VIX = DR_IND_VIX.D_request()


retriveing data for 2024-02-01
810
retriveing data for 2024-02-02
1620
retriveing data for 2024-02-05
2430
retriveing data for 2024-02-06
3240
retriveing data for 2024-02-07
4050
retriveing data for 2024-02-08
4860
retriveing data for 2024-02-09
5670
retriveing data for 2024-02-12
6480
retriveing data for 2024-02-13
7290
retriveing data for 2024-02-14
8100
retriveing data for 2024-02-15
8910
retriveing data for 2024-02-16
9720
retriveing data for 2024-02-20
10530
retriveing data for 2024-02-21
11340
retriveing data for 2024-02-22
12150
retriveing data for 2024-02-23
12960
retriveing data for 2024-02-26
13770
retriveing data for 2024-02-27
14580
retriveing data for 2024-02-28
15390
retriveing data for 2024-02-29
16200
retriveing data for 2024-03-01
17010
retriveing data for 2024-03-04
17820
retriveing data for 2024-03-05
18630
retriveing data for 2024-03-06
19440
retriveing data for 2024-03-07
20250
retriveing data for 2024-03-08
21060
retriveing data for 2024-03-11
21870
retriveing dat

In [56]:
df_all_days_TQQQ = DR_STK_TQQQ.D_request()

retriveing data for 2024-02-01
390
retriveing data for 2024-02-02
780
retriveing data for 2024-02-05
1170
retriveing data for 2024-02-06
1560
retriveing data for 2024-02-07
1950
retriveing data for 2024-02-08
2340
retriveing data for 2024-02-09
2730
retriveing data for 2024-02-12
3120
retriveing data for 2024-02-13
3510
retriveing data for 2024-02-14
3900
retriveing data for 2024-02-15
4290
retriveing data for 2024-02-16
4680
retriveing data for 2024-02-20
5070
retriveing data for 2024-02-21
5460
retriveing data for 2024-02-22
5850
retriveing data for 2024-02-23
6240
retriveing data for 2024-02-26
6630
retriveing data for 2024-02-27
7020
retriveing data for 2024-02-28
7410
retriveing data for 2024-02-29
7800
retriveing data for 2024-03-01
8190
retriveing data for 2024-03-04
8580
retriveing data for 2024-03-05
8970
retriveing data for 2024-03-06
9360
retriveing data for 2024-03-07
9750
retriveing data for 2024-03-08
10140
retriveing data for 2024-03-11
10530
retriveing data for 2024-03-

In [65]:
df_TQQQ = df_all_days_TQQQ

In [66]:
df_VIX = df_all_days_VIX[(df_all_days_VIX['date'].dt.time >= time(9,30,0))].copy()

In [67]:
df_all_days_stock.to_csv('dev_master_stock_info.csv', index=False)
df_all_days_stock.to_pickle('dev_master_stock_info.pkl')

df_VIX.to_csv('dev_master_vix_info.csv', index=False)
df_VIX.to_pickle('dev_master_vix_info.pkl')

df_TQQQ.to_csv('dev_master_tqqq_info.csv', index=False)
df_TQQQ.to_pickle('dev_master_tqqq_info.pkl')

In [78]:
df_all_days_stock = pd.read_pickle('dev_master_stock_info.pkl')
df_VIX = pd.read_pickle('dev_master_vix_info.pkl')
df_TQQQ = pd.read_pickle('dev_master_tqqq_info.pkl')

In [68]:
cut_time = time(14, 30, 0)

In [61]:

# def calculate_rsi(prices, period=14):
#     """
#     Calculates the Relative Strength Index (RSI) for a given price series.

#     Args:
#         prices (pd.Series): A Pandas Series representing the price data.
#         period (int, optional): The lookback period for RSI calculation. Defaults to 14.

#     Returns:
#         pd.Series: A Pandas Series containing the RSI values.
#     """

#     delta = prices.diff()
#     gain = delta.where(delta > 0, 0)
#     loss = -delta.where(delta < 0, 0)

#     avg_gain = gain.rolling(window=period, min_periods=period).mean()
#     avg_loss = loss.rolling(window=period, min_periods=period).mean()

#     rs = avg_gain / avg_loss
#     rsi = 100 - (100 / (1 + rs))

#     return rsi



In [62]:
# Example usage:
# #data = {'Close': [45, 48, 50, 47, 49, 52, 55, 53, 51, 54]}
# #df = pd.DataFrame(data)
# df = pd.DataFrame()
# rsi_period = 14 
# df_all_days_stock['RSI_14'] = calculate_rsi(df_all_days_stock['close'], period=rsi_period)
# rsi_period = 30 
# df_all_days_stock['RSI_30'] = calculate_rsi(df_all_days_stock['close'], period=rsi_period)
# rsi_period = 60 
# df_all_days_stock['RSI_60'] = calculate_rsi(df_all_days_stock['close'], period=rsi_period)

# df_all_days_stock

In [120]:
df_filter_after.groupby('Date_Only')['open'].max()

Date_Only
2024-02-01    397.36
2024-02-02    480.83
2024-02-05    468.59
2024-02-06    457.25
2024-02-07    470.24
               ...  
2025-03-26    611.81
2025-03-27    608.14
2025-03-28    579.96
2025-03-31    578.09
2025-04-01    586.51
Name: open, Length: 288, dtype: float64

In [121]:
df_filter_after.groupby('Date_Only')['close'].min()

Date_Only
2024-02-01    394.10
2024-02-02    472.81
2024-02-05    459.38
2024-02-06    454.09
2024-02-07    467.10
               ...  
2025-03-26    606.91
2025-03-27    601.90
2025-03-28    574.27
2025-03-31    573.25
2025-04-01    579.44
Name: close, Length: 288, dtype: float64

In [122]:

def data_sparcing_construc(df, sp_day, *, cut_time = None):
    #df_group = df.groupby("Date_Only").std(numeric_only=True)
    df_filter_prior = data_filter(df, sp_day, specific_time_before = cut_time )
    df_group_prior = df_filter_prior.groupby("Date_Only").std(numeric_only=True)
    
    df_filter_after = data_filter(df, sp_day, specific_time_after = cut_time )
    df_group_after = df_filter_after.groupby("Date_Only").std(numeric_only=True)


    df_group_train =pd.DataFrame()
    df_group_train['Target_std'] = pd.DataFrame(df_group_after[['open','high','low','close']].max(axis=1))
    df_group_train['Target_spread'] = df_filter_after.groupby('Date_Only')['open'].max() - df_filter_after.groupby('Date_Only')['open'].min()
    
    df_group_train['prior_std'] = pd.DataFrame(df_group_prior[['open','high','low','close']].max(axis=1))
    df_group_train['prior_Vol'] = pd.DataFrame(df_filter_prior.groupby("Date_Only").mean()['volume'])
    df_group_train['prior_spread'] = df_filter_prior.groupby('Date_Only')['open'].max() - df_filter_prior.groupby('Date_Only')['open'].min()

    df_group_train['prior_range'] =pd.DataFrame(df_filter_prior.groupby('Date_Only')['close'].max() -df_filter_prior.groupby('Date_Only')['close'].min() )
    
    df_initial = df_filter_prior.copy()
    new_time = time(cut_time.hour, cut_time.minute -1)
    df_group_train['cut_open'] = df_initial[(df_initial['date'].dt.time == new_time)]['open'].values
   # df_group_train['RSI_14'] = df_initial[df_initial['date'].dt.time == new_time]['RSI_14'].values
   # df_group_train['RSI_30'] = df_initial[df_initial['date'].dt.time == new_time]['RSI_30'].values
   # df_group_train['RSI_60'] = df_initial[df_initial['date'].dt.time == new_time]['RSI_60'].values
    
    cut_name =['cut_1hr','cut_2hr','cut_3hr']
    for i in range(3):
        print(i)
        prior = time(cut_time.hour - (i+1), cut_time.minute)
        df_group_train[cut_name[i]] = df_initial[(df_initial['date'].dt.time == new_time)]['open'].values- df_initial[(df_initial['date'].dt.time == prior)]['open'].values

    #df_group_train['Class_target'] = df_filter_after[(df_filter_after['date'].dt.time == time(15, 49, 0))]['close'].values- df_initial[(df_initial['date'].dt.time == new_time)]['open'].values
    
    return df_group_train, df_filter_prior, df_filter_after, df_group_prior, df_group_after
    

In [123]:
df_group_train, df_filter_prior, df_filter_after, df_group_prior, df_group_after = data_sparcing_construc(df_all_days_stock, all_dates, cut_time= cut_time)

0
retrieving 2024-02-01
0.2 percent is done
300
retrieving 2024-02-02
1.2 percent is done
600
retrieving 2024-02-03
2.2 percent is done
600
retrieving 2024-02-04
3.2 percent is done
600
retrieving 2024-02-05
4.2 percent is done
900
retrieving 2024-02-06
5.2 percent is done
1200
retrieving 2024-02-07
6.2 percent is done
1500
retrieving 2024-02-08
7.2 percent is done
1800
retrieving 2024-02-09
8.2 percent is done
2100
retrieving 2024-02-10
9.2 percent is done
2100
retrieving 2024-02-11
10.2 percent is done
2100
retrieving 2024-02-12
11.2 percent is done
2400
retrieving 2024-02-13
12.2 percent is done
2700
retrieving 2024-02-14
13.2 percent is done
3000
retrieving 2024-02-15
14.2 percent is done
3300
retrieving 2024-02-16
15.2 percent is done
3600
retrieving 2024-02-17
16.2 percent is done
3600
retrieving 2024-02-18
17.2 percent is done
3600
retrieving 2024-02-20
18.2 percent is done
3900
retrieving 2024-02-21
19.2 percent is done
4200
retrieving 2024-02-22
20.2 percent is done
4500
retri

In [70]:
df_all_days_stock

Unnamed: 0,date,open,high,low,close,volume,average,barCount,Date_Only
0,2024-02-01 09:30:00-05:00,393.96,396.03,393.66,395.48,305515.0,394.495,760,2024-02-01
1,2024-02-01 09:31:00-05:00,395.41,396.38,394.68,396.38,83112.0,395.372,435,2024-02-01
2,2024-02-01 09:32:00-05:00,396.34,397.79,396.26,397.20,131978.0,397.086,786,2024-02-01
3,2024-02-01 09:33:00-05:00,397.21,398.23,397.10,397.25,122000.0,397.734,666,2024-02-01
4,2024-02-01 09:34:00-05:00,397.22,399.45,397.00,399.20,154192.0,398.259,784,2024-02-01
...,...,...,...,...,...,...,...,...,...
113515,2025-04-01 15:55:00-04:00,586.51,587.37,585.49,585.74,51718.0,586.359,386,2025-04-01
113516,2025-04-01 15:56:00-04:00,585.78,586.14,585.44,586.02,55195.0,585.823,399,2025-04-01
113517,2025-04-01 15:57:00-04:00,586.10,586.20,585.74,585.93,43672.0,585.957,339,2025-04-01
113518,2025-04-01 15:58:00-04:00,585.95,586.35,585.82,586.31,63359.0,586.068,517,2025-04-01


In [105]:
df_all_days_stock.shape

(112320, 9)

In [106]:
df_VIX.shape

(112320, 9)

In [107]:
df_TQQQ.shape

(112320, 9)

In [88]:
for d in biz_dates:
    for df in df_TQQQ:
        if len(df_TQQQ[df_TQQQ['Date_Only']==d]) !=390:
            print(d)

2024-07-03
2024-07-03
2024-07-03
2024-07-03
2024-07-03
2024-07-03
2024-07-03
2024-07-03
2024-07-03
2024-12-24
2024-12-24
2024-12-24
2024-12-24
2024-12-24
2024-12-24
2024-12-24
2024-12-24
2024-12-24
2025-01-08
2025-01-08
2025-01-08
2025-01-08
2025-01-08
2025-01-08
2025-01-08
2025-01-08
2025-01-08
2025-01-09
2025-01-09
2025-01-09
2025-01-09
2025-01-09
2025-01-09
2025-01-09
2025-01-09
2025-01-09


In [124]:
df_VIX_clean = df_VIX[(df_VIX['Date_Only'] !=date(2024,7,3)) &  (df_VIX['Date_Only'] != date(2024,12,24)) & (df_VIX['Date_Only'] != date(2025,1,8)) &(df_VIX['Date_Only'] != date(2025,1,9)) ]

In [125]:
df_stock_clean = df_all_days_stock[(df_all_days_stock['Date_Only'] !=date(2024,7,3)) &  (df_all_days_stock['Date_Only'] != date(2024,12,24)) & (df_all_days_stock['Date_Only'] != date(2025,1,8)) &(df_all_days_stock['Date_Only'] != date(2025,1,9)) ]

In [128]:
df_TQQQ_clean = df_TQQQ[(df_TQQQ['Date_Only'] !=date(2024,7,3)) &  (df_TQQQ['Date_Only'] != date(2024,12,24)) & (df_TQQQ['Date_Only'] != date(2025,1,8)) &(df_TQQQ['Date_Only'] != date(2025,1,9)) ]

In [129]:
df_TQQQ.shape

(112320, 9)

In [130]:
# switching
df_VIX = pd.DataFrame()
df_VIX = df_VIX_clean.copy()

df_TQQQ = pd.DataFrame()
df_TQQQ = df_TQQQ_clean.copy()

df_all_days_stock = pd.DataFrame()
df_all_days_stock = df_stock_clean.copy()

In [131]:
df_group_train_vix, *_  = data_sparcing_construc(df_VIX, all_dates, cut_time= cut_time)
df_group_train_tqqq, *_ = data_sparcing_construc(df_TQQQ, all_dates, cut_time= cut_time)

0
retrieving 2024-02-01
0.2 percent is done
300
retrieving 2024-02-02
1.2 percent is done
600
retrieving 2024-02-03
2.2 percent is done
600
retrieving 2024-02-04
3.2 percent is done
600
retrieving 2024-02-05
4.2 percent is done
900
retrieving 2024-02-06
5.2 percent is done
1200
retrieving 2024-02-07
6.2 percent is done
1500
retrieving 2024-02-08
7.2 percent is done
1800
retrieving 2024-02-09
8.2 percent is done
2100
retrieving 2024-02-10
9.2 percent is done
2100
retrieving 2024-02-11
10.2 percent is done
2100
retrieving 2024-02-12
11.2 percent is done
2400
retrieving 2024-02-13
12.2 percent is done
2700
retrieving 2024-02-14
13.2 percent is done
3000
retrieving 2024-02-15
14.2 percent is done
3300
retrieving 2024-02-16
15.2 percent is done
3600
retrieving 2024-02-17
16.2 percent is done
3600
retrieving 2024-02-18
17.2 percent is done
3600
retrieving 2024-02-20
18.2 percent is done
3900
retrieving 2024-02-21
19.2 percent is done
4200
retrieving 2024-02-22
20.2 percent is done
4500
retri

In [133]:
df_group_train_tqqq

Unnamed: 0_level_0,Target_std,Target_spread,prior_std,prior_Vol,prior_spread,prior_range,cut_open,cut_1hr,cut_2hr,cut_3hr
Date_Only,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-02-01,0.157610,0.63,0.329090,179495.303333,1.30,1.29,54.12,0.21,0.48,1.06
2024-02-02,0.079812,0.30,0.611902,229504.093333,2.70,2.71,57.40,0.33,-0.04,0.68
2024-02-05,0.165655,0.65,0.410078,169410.313333,1.69,1.68,57.09,0.30,0.22,0.96
2024-02-06,0.162608,0.57,0.365961,168546.906667,1.92,1.92,55.96,-0.14,-0.10,-0.60
2024-02-07,0.073213,0.29,0.338455,162977.396667,1.45,1.45,58.33,-0.07,0.22,0.12
...,...,...,...,...,...,...,...,...,...,...
2025-03-26,0.239224,0.95,0.975800,180336.183333,3.84,3.83,63.70,0.19,-0.64,-1.32
2025-03-27,0.278571,1.37,0.511152,167367.360000,2.36,2.34,63.02,0.60,-0.07,-0.59
2025-03-28,0.168169,0.68,1.230583,216589.716667,4.57,4.56,57.51,0.24,-0.20,-1.13
2025-03-31,0.284283,1.24,0.734290,228802.953333,3.40,3.44,56.40,0.97,0.64,1.49


In [134]:
df_final_jointed_vix=pd.DataFrame()
df_final_jointed_all=pd.DataFrame()

df_group_train_vix.columns =['vix_std','vix_spread','vix_prior_std','vix_Vol', 'vix_pr_spread','vix_prior_range','vix_cut_open','vix_cut_1hr','vix_cut_2hr','vix_cut_3hr']
df_final_jointed_vix = pd.concat([df_group_train,df_group_train_vix], axis=1)
df_group_train_tqqq.columns =['tqqq_std','tqqq_spread','tqqq_prior_std','tqqq_Vol','tqqq_pr_spread','tqqq_prior_range','tqqq_cut_open','tqqq_cut_1hr','tqqq_cut_2hr','tqqq_cut_3hr']
df_final_jointed_all = pd.concat([df_final_jointed_vix,df_group_train_tqqq], axis=1)
df_final_jointed_all.drop(['vix_std','vix_Vol','tqqq_std'], axis=1, inplace=True)

In [135]:
df_final_jointed_all

Unnamed: 0_level_0,Target_std,Target_spread,prior_std,prior_Vol,prior_spread,prior_range,cut_open,cut_1hr,cut_2hr,cut_3hr,...,vix_cut_3hr,tqqq_spread,tqqq_prior_std,tqqq_Vol,tqqq_pr_spread,tqqq_prior_range,tqqq_cut_open,tqqq_cut_1hr,tqqq_cut_2hr,tqqq_cut_3hr
Date_Only,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-02-01,0.844811,3.26,1.691430,31344.016667,6.71,6.67,395.19,0.09,-0.37,-0.47,...,-0.21,0.63,0.329090,179495.303333,1.30,1.29,54.12,0.21,0.48,1.06
2024-02-02,2.322338,8.01,4.712763,156749.593333,31.04,31.27,478.94,2.13,-2.85,1.92,...,-0.11,0.30,0.611902,229504.093333,2.70,2.71,57.40,0.33,-0.04,0.68
2024-02-05,2.048324,7.94,1.724566,68771.400000,10.29,10.36,462.41,1.89,-0.45,1.97,...,-0.18,0.65,0.410078,169410.313333,1.69,1.68,57.09,0.30,0.22,0.96
2024-02-06,0.910985,3.12,2.308883,36062.643333,12.75,12.71,456.89,-0.49,-0.80,-1.43,...,-0.32,0.57,0.365961,168546.906667,1.92,1.92,55.96,-0.14,-0.10,-0.60
2024-02-07,0.765408,3.14,1.859712,40304.960000,13.58,13.72,467.23,-1.60,-1.36,-0.61,...,-0.17,0.29,0.338455,162977.396667,1.45,1.45,58.33,-0.07,0.22,0.12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-26,1.298835,4.85,3.493051,19297.573333,15.58,15.59,611.47,-0.67,-3.43,-5.52,...,0.45,0.95,0.975800,180336.183333,3.84,3.83,63.70,0.19,-0.64,-1.32
2025-03-27,1.403355,6.24,2.809916,13900.750000,12.78,12.78,606.49,0.99,-1.25,-4.01,...,-0.28,1.37,0.511152,167367.360000,2.36,2.34,63.02,0.60,-0.07,-0.59
2025-03-28,1.229460,5.72,5.344685,21354.313333,23.49,23.51,578.25,-3.48,-4.16,-7.86,...,0.33,0.68,1.230583,216589.716667,4.57,4.56,57.51,0.24,-0.20,-1.13
2025-03-31,1.129441,4.91,5.330998,27184.223333,21.80,22.41,575.47,4.25,3.75,10.77,...,0.06,1.24,0.734290,228802.953333,3.40,3.44,56.40,0.97,0.64,1.49


In [137]:
df_final_jointed_all.columns

Index(['Target_std', 'Target_spread', 'prior_std', 'prior_Vol', 'prior_spread',
       'prior_range', 'cut_open', 'cut_1hr', 'cut_2hr', 'cut_3hr',
       'vix_spread', 'vix_prior_std', 'vix_pr_spread', 'vix_prior_range',
       'vix_cut_open', 'vix_cut_1hr', 'vix_cut_2hr', 'vix_cut_3hr',
       'tqqq_spread', 'tqqq_prior_std', 'tqqq_Vol', 'tqqq_pr_spread',
       'tqqq_prior_range', 'tqqq_cut_open', 'tqqq_cut_1hr', 'tqqq_cut_2hr',
       'tqqq_cut_3hr'],
      dtype='object')

In [139]:
df_final_jointed_all.drop(['vix_spread','tqqq_spread'], axis=1, inplace =True)

In [141]:
df_final_jointed_all.columns

Index(['Target_std', 'Target_spread', 'prior_std', 'prior_Vol', 'prior_spread',
       'prior_range', 'cut_open', 'cut_1hr', 'cut_2hr', 'cut_3hr',
       'vix_prior_std', 'vix_pr_spread', 'vix_prior_range', 'vix_cut_open',
       'vix_cut_1hr', 'vix_cut_2hr', 'vix_cut_3hr', 'tqqq_prior_std',
       'tqqq_Vol', 'tqqq_pr_spread', 'tqqq_prior_range', 'tqqq_cut_open',
       'tqqq_cut_1hr', 'tqqq_cut_2hr', 'tqqq_cut_3hr'],
      dtype='object')

In [140]:
df_final_jointed_all.to_pickle('dev_df_final_jointed_all_train_beta.pkl')