In [None]:
!pip install ib_async

In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, date, timedelta, time
os.environ['TZ'] ='America/New_York'

#import yfinance as yf

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

from ib_async import *
util.startLoop()

### Data Prep for Inferencing

In [2]:
ib = IB()
ib.connect(port=7496, clientId=1)

<IB connected to 127.0.0.1:7496 clientId=1>

In [3]:
contract = Contract(symbol='META', secType='STK', exchange='SMART', currency='USD')
ib.qualifyContracts(contract)

[Contract(secType='STK', conId=107113386, symbol='META', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='META', tradingClass='NMS')]

In [4]:
contract_vix = Contract(symbol= 'VIX', secType = 'IND',exchange = 'CBOE', currency='USD')
ib.qualifyContracts(contract_vix)

[Contract(secType='IND', conId=13455763, symbol='VIX', exchange='CBOE', currency='USD', localSymbol='VIX')]

In [5]:
contract_nas = Contract(symbol= 'TQQQ', secType = 'STK',exchange = 'SMART', currency='USD')
ib.qualifyContracts(contract_nas)

[Contract(secType='STK', conId=72539702, symbol='TQQQ', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='TQQQ', tradingClass='NMS')]

In [6]:
def get_fridays_in_range(start_date, end_date):
    """
    Returns a list of dates that are Fridays within the given date range.

    Args:
        start_date (date): The start date of the range.
        end_date (date): The end date of the range.

    Returns:
        list: A list of date objects representing Fridays within the range.
    """
    fridays = []
    all_dates = []
    current_date = start_date
    all_current_date = start_date

    # Adjust to the first Friday
    days_until_friday = (4 - current_date.weekday()) % 7
    current_date += timedelta(days=days_until_friday)

    while current_date <= end_date:
        fridays.append(current_date)
        current_date += timedelta(days=7) # Move to the next Friday

    while all_current_date <= end_date:
        all_dates.append(all_current_date)
        all_current_date += timedelta(days=1)

    biz_dates = [ d for d in all_dates if d.weekday()<5]
    
    return fridays, biz_dates

# Example Usage
start_date = date(2024, 2, 1)
end_date = date(2025, 4, 8)

fridays, all_dates = get_fridays_in_range(start_date, end_date)
# removing Non-trading Holidays

fridays.remove(date(2024, 3, 29))
fridays.remove(date(2024, 11, 29))

#all_dates.remove(date(2024, 1, 15))
all_dates.remove(date(2024, 2, 19))
all_dates.remove(date(2024, 3, 29))
all_dates.remove(date(2024, 5, 27))
all_dates.remove(date(2024, 6, 19))
all_dates.remove(date(2024, 7, 3))
all_dates.remove(date(2024, 7, 4))
all_dates.remove(date(2024, 9, 2))
all_dates.remove(date(2024, 11, 28))
all_dates.remove(date(2024, 11, 29))
all_dates.remove(date(2024, 12, 24))
all_dates.remove(date(2024, 12, 25))

all_dates.remove(date(2025, 1, 1))
all_dates.remove(date(2025, 1, 8))
all_dates.remove(date(2025, 1, 20))
all_dates.remove(date(2025, 2, 17))


In [17]:
# duration is fixed for an year
def simple_data_req( contract, end_date , barsize ="1 min", data_clean = True):

   
    df_acc=pd.DataFrame()
    df_hist=pd.DataFrame()

    for i in range(17):
        print(f'fetching data for {i}')
        bars = ib.reqHistoricalData(contract= contract, endDateTime=end_date, barSizeSetting= barsize, durationStr= "20 D", whatToShow="TRADES", useRTH=True)
        df = util.df(bars)
        end_date = end_date - timedelta(20+2)
        df_acc = pd.concat([df, df_acc], ignore_index=True)
        print(f"head date {df_acc.head(1)['date']}")
        print(f"tail date {df_acc.tail(1)['date']}")
        #print(df)

        df_acc['Date_Only'] = df_acc['date'].dt.date
    
    if data_clean == True:
        df_hist = df_acc[(df_acc['date'].dt.time >= time(9, 30)) & (df_acc['date'].dt.time < time(16, 00))]
    else:
        df_hist = df_acc

    #df_hist.drop_duplicates(inplace=True)
    
    return df_hist

In [18]:
# picking out Friday and before specific-time data

#specific_time = time(14, 30, 0)
def data_filter_v46(df_initial, list_days, specific_time_before = None, specific_time_after= None, on_time= None):
    his_df = pd.DataFrame()

    df_trimmed = df_initial[df_initial['date'].dt.date.isin(list_days)]

    if specific_time_before is not None:

        his_df = df_trimmed[(df_trimmed['date'].dt.time < specific_time_before)]
        
            
    if specific_time_after is not None:

        his_df = df_trimmed[(df_trimmed['date'].dt.time >= specific_time_after)]

    if on_time is not None:
        his_df = df_trimmed[(df_trimmed['date'].dt.time == on_time)]

    if (specific_time_before is None and specific_time_after is None) and on_time is None:

        his_df = df_trimmed
        
    return his_df

In [19]:
from datetime import date, timedelta

def calculate_business_days(start_date, end_date):
    """
    Calculates the number of business days between two dates, excluding weekends.

    Args:
        start_date (date): The start date.
        end_date (date): The end date.

    Returns:
        int: The number of business days.
    """
    business_days = 0
    current_date = start_date
    while current_date <= end_date:
        if current_date.weekday() < 5:  # Monday to Friday
            business_days += 1
        current_date += timedelta(days=1)
    return business_days

# Example usage:
# start_date = date(2025, 2, 1)  # April 7, 2025 (Monday)
# end_date = date(2025, 4, 3)  # April 18, 2025 (Friday)
# num_business_days = calculate_business_days(start_date, end_date)
# print(f"Number of business days between {start_date} and {end_date}: {num_business_days}")

In [None]:
df_META = pd.read_pickle('META_master_stock_info.pkl')
df_VIX = pd.read_pickle('VIX_master_info.pkl')
df_TQQQ = pd.read_pickle('TQQQ_master_info.pkl')

### Trainning Data Set Collection ###

In [10]:
barsize ="5 mins"

df_all_days_stock = simple_data_req(contract = contract, end_date = end_date, barsize= barsize )

fetching data for 0
                          date    open    high     low   close    volume  \
0    2025-03-12 09:30:00-04:00  629.33  631.75  625.00  629.90  933023.0   
1    2025-03-12 09:35:00-04:00  629.89  632.33  628.00  629.36  390419.0   
2    2025-03-12 09:40:00-04:00  629.28  633.33  626.26  627.11  503426.0   
3    2025-03-12 09:45:00-04:00  627.10  631.90  625.88  626.86  275863.0   
4    2025-03-12 09:50:00-04:00  626.90  627.73  623.21  623.74  209251.0   
...                        ...     ...     ...     ...     ...       ...   
1555 2025-04-08 15:35:00-04:00  509.61  509.61  504.73  505.55  345326.0   
1556 2025-04-08 15:40:00-04:00  505.56  508.10  504.56  504.63  405264.0   
1557 2025-04-08 15:45:00-04:00  504.53  506.00  502.86  503.28  228753.0   
1558 2025-04-08 15:50:00-04:00  503.29  511.34  503.00  509.80  557441.0   
1559 2025-04-08 15:55:00-04:00  509.65  511.00  505.24  510.62  735689.0   

      average  barCount  
0     629.200      4554  
1     630.099  

KeyboardInterrupt: 

In [32]:
#df_META = pd.read_pickle('META_master_stock_info.pkl')
df_VIX = pd.read_pickle('VIX_master_info.pkl')
df_TQQQ = pd.read_pickle('TQQQ_master_info.pkl')

In [57]:

def calculate_macd(data, fast_period=12, slow_period=26, signal_period=9):
    """
    Calculates the MACD indicator for a given time series data.

    Args:
        data (pd.Series): Time series data (e.g., closing prices).
        fast_period (int): Period for the fast EMA (default: 12).
        slow_period (int): Period for the slow EMA (default: 26).
        signal_period (int): Period for the signal line EMA (default: 9).

    Returns:
        pd.DataFrame: DataFrame containing MACD, signal line, and histogram.
    """
    fast_ema = data.ewm(span=fast_period, adjust=False).mean()
    slow_ema = data.ewm(span=slow_period, adjust=False).mean()
    macd_line = fast_ema - slow_ema
    signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
    histogram = macd_line - signal_line

    macd_df = pd.DataFrame({
        'MACD': macd_line,
        'Signal_Line': signal_line,
        'Histogram': histogram
    })
    return macd_df

# Example usage with sample data
#data = pd.Series(np.random.rand(100))
data = df_META['close']
df_META['MACD']= calculate_macd(data)['MACD']
df_META['diff_MACD'] = df_META['MACD'].diff()
df_META['MACD_Sig'] =calculate_macd(data)['Signal_Line']
df_META['diff_M_Sig'] = df_META['MACD_Sig'].diff()
#print(macd_result)

In [58]:
df_META

Unnamed: 0,date,open,high,low,close,volume,average,barCount,Date_Only,MACD,MACD_Sig,RSI_12,diff_MACD,diff_M_Sig
0,2024-03-22 09:30:00-04:00,507.00,507.38,505.52,506.03,99707.0,506.833,172,2024-03-22,0.000000,0.000000,,,
1,2024-03-22 09:31:00-04:00,506.00,506.30,505.33,505.86,18062.0,505.705,115,2024-03-22,-0.013561,-0.002712,,-0.013561,-0.002712
2,2024-03-22 09:32:00-04:00,505.65,505.89,504.61,504.84,25951.0,505.025,176,2024-03-22,-0.105399,-0.023250,,-0.091838,-0.020537
3,2024-03-22 09:33:00-04:00,504.65,505.63,504.50,505.25,15252.0,505.088,117,2024-03-22,-0.143444,-0.047289,,-0.038045,-0.024039
4,2024-03-22 09:34:00-04:00,505.28,505.31,504.52,504.60,20420.0,504.865,110,2024-03-22,-0.223469,-0.082525,,-0.080025,-0.035236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132055,2025-04-08 15:55:00-04:00,509.65,509.65,505.24,508.04,126395.0,507.204,864,2025-04-08,-0.191831,-0.766965,53.701016,0.096203,0.143784
132056,2025-04-08 15:56:00-04:00,508.09,508.98,507.59,508.88,85229.0,508.218,688,2025-04-08,-0.047263,-0.623025,67.375307,0.144568,0.143940
132057,2025-04-08 15:57:00-04:00,508.93,509.71,508.76,509.15,125106.0,509.145,1088,2025-04-08,0.088079,-0.480804,66.071429,0.135342,0.142221
132058,2025-04-08 15:58:00-04:00,509.25,509.75,508.76,509.57,95853.0,509.303,830,2025-04-08,0.226617,-0.339320,71.653543,0.138538,0.141484


In [29]:
import pandas as pd

def calculate_rsi(data, period=12):
    """
    Calculates the Relative Strength Index (RSI) for a given dataset.

    Args:
        data (pd.Series): A Pandas Series representing the price data.
        period (int, optional): The lookback period for calculating RSI. Defaults to 14.

    Returns:
        pd.Series: A Pandas Series containing the RSI values.
    """
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)
    
    avg_gain = gain.rolling(window=period, min_periods=period).mean()
    avg_loss = loss.rolling(window=period, min_periods=period).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Example usa#ge:
#data = pd.Series([45.49, 45.71, 46.14, 46.13, 45.53, 45.81, 46.44, 46.09, 46.08, 46.71])
#rsi_values = calculate_rsi(data)
#print(rsi_values)

In [30]:
df_META['RSI_12'] = calculate_rsi(df_META['close'])

In [34]:
### data cleaning ## step
# clean duplicate
# is in the days of interest
# within windows

def data_cleaning(df, in_days):
    df_his = pd.DataFrame()
    df_return = pd.DataFrame()
    
    df.drop_duplicates(inplace = True)
    df_his = df[df["Date_Only"].isin(in_days)]
    df_return = df_his[(df_his['date'].dt.time > time(9,29)) & (df_his['date'].dt.time < time(16,00))]

    for i in df_return['Date_Only'].unique():
        if df_return[df_return['Date_Only'] ==i].shape[0] % 390 != 0:
            print(i)
    
    return df_return

In [35]:
df_META = data_cleaning(df_META, all_dates)

df_META.shape

(100620, 12)

In [36]:
df_TQQQ_trim = data_cleaning(df_TQQQ, all_dates)

df_TQQQ_trim.shape

(100620, 9)

In [37]:
df_VIX_trim = data_cleaning(df_VIX, all_dates)

df_VIX_trim.shape

2025-04-08


(100597, 9)

In [48]:
df_META

Unnamed: 0,date,open,high,low,close,volume,average,barCount,Date_Only,MACD,MACD_Sig,RSI_12
0,2024-03-22 09:30:00-04:00,507.00,507.38,505.52,506.03,99707.0,506.833,172,2024-03-22,0.000000,0.000000,
1,2024-03-22 09:31:00-04:00,506.00,506.30,505.33,505.86,18062.0,505.705,115,2024-03-22,-0.013561,-0.002712,
2,2024-03-22 09:32:00-04:00,505.65,505.89,504.61,504.84,25951.0,505.025,176,2024-03-22,-0.105399,-0.023250,
3,2024-03-22 09:33:00-04:00,504.65,505.63,504.50,505.25,15252.0,505.088,117,2024-03-22,-0.143444,-0.047289,
4,2024-03-22 09:34:00-04:00,505.28,505.31,504.52,504.60,20420.0,504.865,110,2024-03-22,-0.223469,-0.082525,
...,...,...,...,...,...,...,...,...,...,...,...,...
132055,2025-04-08 15:55:00-04:00,509.65,509.65,505.24,508.04,126395.0,507.204,864,2025-04-08,-0.191831,-0.766965,53.701016
132056,2025-04-08 15:56:00-04:00,508.09,508.98,507.59,508.88,85229.0,508.218,688,2025-04-08,-0.047263,-0.623025,67.375307
132057,2025-04-08 15:57:00-04:00,508.93,509.71,508.76,509.15,125106.0,509.145,1088,2025-04-08,0.088079,-0.480804,66.071429
132058,2025-04-08 15:58:00-04:00,509.25,509.75,508.76,509.57,95853.0,509.303,830,2025-04-08,0.226617,-0.339320,71.653543


In [38]:
cut_time = time(14, 30, 0)

In [49]:

def data_sparcing_construc(df, sp_day, *, cut_time = None):
    #df_group = df.groupby("Date_Only").std(numeric_only=True)
    df_filter_prior = data_filter_v46(df, sp_day, specific_time_before = cut_time )
    df_group_prior = df_filter_prior.groupby("Date_Only").std(numeric_only=True)
    
    df_filter_after = data_filter_v46(df, sp_day, specific_time_after = cut_time )
    df_group_after = df_filter_after.groupby("Date_Only").std(numeric_only=True)


    df_group_train =pd.DataFrame()
    df_group_train['Target_std'] = pd.DataFrame(df_group_after[['open','high','low','close']].max(axis=1))
    df_group_train['Target_spread'] = df_filter_after.groupby('Date_Only')['open'].max() - df_filter_after.groupby('Date_Only')['open'].min()

    
    
    df_group_train['prior_std'] = pd.DataFrame(df_group_prior[['open','high','low','close']].max(axis=1))
    df_group_train['prior_Vol'] = pd.DataFrame(df_filter_prior.groupby("Date_Only").mean()['volume'])
    df_group_train['prior_spread'] = df_filter_prior.groupby('Date_Only')['open'].max() - df_filter_prior.groupby('Date_Only')['open'].min()

    df_group_train['prev_day_spred'] = df_group_train["Target_spread"].shift(1)
    df_group_train = df_group_train.fillna(df_group_train['prior_spread'].iloc[0])

    
    df_group_train['prior_range'] =pd.DataFrame(df_filter_prior.groupby('Date_Only')['close'].max() -df_filter_prior.groupby('Date_Only')['close'].min() )
    
    df_initial = df_filter_prior.copy()
    new_time = time(cut_time.hour, cut_time.minute -1)
    df_group_train['cut_open'] = df_filter_prior[(df_filter_prior['date'].dt.time == new_time)]['open'].values
    df_group_train['MACD'] = df_filter_prior[(df_filter_prior['date'].dt.time == new_time)]['MACD'].values
    df_group_train['MACD_Sig'] = df_filter_prior[(df_filter_prior['date'].dt.time == new_time)]['MACD_Sig'].values
    df_group_train['RSI_12'] = df_filter_prior[(df_filter_prior['date'].dt.time == new_time)]['RSI_12'].values

    cut_name =['cut_1hr','cut_2hr','cut_3hr']
    for i in range(3):
        print(i)
        prior = time(cut_time.hour - (i+1), cut_time.minute)
        df_group_train[cut_name[i]] = df_initial[(df_initial['date'].dt.time == new_time)]['open'].values- df_initial[(df_initial['date'].dt.time == prior)]['open'].values


    
    return df_group_train, df_filter_prior, df_filter_after, df_group_prior, df_group_after
    

In [50]:
df_group_train, df_filter_prior, df_filter_after, df_group_prior, df_group_after = data_sparcing_construc(df_META, all_dates, cut_time= cut_time)

0
1
2


In [None]:
# df_VIX_check = df_all_days_VIX[(df_all_days_VIX['date'].dt.time > time(9,29)) & (df_all_days_VIX['date'].dt.time < time(16,00))]

In [None]:
# for d in biz_dates:
#     for df in df_TQQQ_clean:
#         if len(df_TQQQ_clean[df_TQQQ_clean['Date_Only']==d]) !=390:
#             print(d)

In [None]:
# # switching
# df_VIX = pd.DataFrame()
# df_VIX = df_VIX_clean.copy()

# df_TQQQ = pd.DataFrame()
# df_TQQQ = df_TQQQ_clean.copy()

# df_all_days_stock = pd.DataFrame()
# df_all_days_stock = df_stock_clean.copy()

In [41]:
df_group_train_vix, *_  = data_sparcing_construc(df_VIX, all_dates, cut_time= cut_time)
df_group_train_tqqq, *_ = data_sparcing_construc(df_TQQQ, all_dates, cut_time= cut_time)

0
1
2
0
1
2


In [None]:
df_group_train_vix

In [51]:
df_final_jointed_vix=pd.DataFrame()
df_final_jointed_all=pd.DataFrame()

df_group_train_vix.columns =['vix_std','vix_spread','vix_prior_std','vix_Vol', 'vix_pr_spread', 'vix_pr_day_spread','vix_prior_range','vix_cut_open','vix_cut_1hr','vix_cut_2hr','vix_cut_3hr']
df_final_jointed_vix = pd.concat([df_group_train,df_group_train_vix], axis=1)
df_group_train_tqqq.columns =['tqqq_std','tqqq_spread','tqqq_prior_std','tqqq_Vol','tqqq_pr_spread', 'tqqq_pr_day_spread','tqqq_prior_range','tqqq_cut_open','tqqq_cut_1hr','tqqq_cut_2hr','tqqq_cut_3hr']
df_final_jointed_all = pd.concat([df_final_jointed_vix,df_group_train_tqqq], axis=1)
df_final_jointed_all.drop(['vix_std','vix_Vol','tqqq_std'], axis=1, inplace=True)

In [52]:
df_final_jointed_all

Unnamed: 0_level_0,Target_std,Target_spread,prior_std,prior_Vol,prior_spread,prev_day_spred,prior_range,cut_open,MACD,MACD_Sig,...,tqqq_spread,tqqq_prior_std,tqqq_Vol,tqqq_pr_spread,tqqq_pr_day_spread,tqqq_prior_range,tqqq_cut_open,tqqq_cut_1hr,tqqq_cut_2hr,tqqq_cut_3hr
Date_Only,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-03-22,0.552147,2.40,1.383638,9426.510000,4.91,4.91,4.80,509.45,0.099820,0.024722,...,0.60,0.337054,99536.866667,1.20,1.20,1.21,63.02,0.12,0.56,0.57
2024-03-25,0.559834,2.53,0.994936,10644.700000,6.71,2.40,6.72,505.42,-0.022815,-0.002432,...,0.62,0.290495,98000.976667,1.44,0.60,1.44,62.52,0.16,0.36,0.31
2024-03-26,3.155713,10.01,1.519994,12899.953333,5.29,2.53,5.29,505.00,-0.019230,-0.014251,...,1.23,0.173323,95270.283333,0.74,0.62,0.75,62.52,0.11,0.00,-0.09
2024-03-27,1.003206,3.52,1.475265,14056.196667,10.81,10.01,10.42,492.60,0.288658,0.250866,...,1.04,0.276448,117766.133333,1.82,1.23,1.73,61.42,0.24,0.16,0.49
2024-03-28,0.788424,2.86,1.639355,16885.163333,7.64,3.52,6.14,487.52,-0.118541,-0.052319,...,0.56,0.166780,88527.400000,0.72,1.04,0.73,61.65,0.12,-0.11,-0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-02,1.558082,6.06,3.046880,18833.313333,18.69,7.29,18.34,581.33,-0.895225,-0.775861,...,1.57,0.970570,234781.373333,4.97,1.92,4.95,58.82,-1.86,-1.29,-0.63
2025-04-03,3.467484,10.38,4.404153,51662.170000,16.74,6.06,16.76,539.36,-0.857748,-1.039109,...,1.46,0.636954,302669.146667,3.06,1.57,3.08,51.02,-0.68,-1.87,-0.52
2025-04-04,3.046787,10.48,3.366649,58130.603333,22.60,10.38,22.77,508.59,-1.015509,-1.013688,...,2.45,1.186273,524172.183333,4.63,1.46,4.62,42.14,-0.37,-0.73,-2.17
2025-04-07,2.658004,11.86,9.973768,57493.520000,53.49,10.48,53.93,518.33,-0.609162,-0.126619,...,2.19,1.860110,940899.706667,11.38,2.45,11.38,41.75,0.00,1.62,3.08


In [53]:
df_final_jointed_all.columns

Index(['Target_std', 'Target_spread', 'prior_std', 'prior_Vol', 'prior_spread',
       'prev_day_spred', 'prior_range', 'cut_open', 'MACD', 'MACD_Sig',
       'RSI_12', 'cut_1hr', 'cut_2hr', 'cut_3hr', 'vix_spread',
       'vix_prior_std', 'vix_pr_spread', 'vix_pr_day_spread',
       'vix_prior_range', 'vix_cut_open', 'vix_cut_1hr', 'vix_cut_2hr',
       'vix_cut_3hr', 'tqqq_spread', 'tqqq_prior_std', 'tqqq_Vol',
       'tqqq_pr_spread', 'tqqq_pr_day_spread', 'tqqq_prior_range',
       'tqqq_cut_open', 'tqqq_cut_1hr', 'tqqq_cut_2hr', 'tqqq_cut_3hr'],
      dtype='object')

In [54]:
df_final_jointed_all.drop(['vix_spread','tqqq_spread'], axis=1, inplace =True)

In [55]:
df_final_jointed_all.columns

Index(['Target_std', 'Target_spread', 'prior_std', 'prior_Vol', 'prior_spread',
       'prev_day_spred', 'prior_range', 'cut_open', 'MACD', 'MACD_Sig',
       'RSI_12', 'cut_1hr', 'cut_2hr', 'cut_3hr', 'vix_prior_std',
       'vix_pr_spread', 'vix_pr_day_spread', 'vix_prior_range', 'vix_cut_open',
       'vix_cut_1hr', 'vix_cut_2hr', 'vix_cut_3hr', 'tqqq_prior_std',
       'tqqq_Vol', 'tqqq_pr_spread', 'tqqq_pr_day_spread', 'tqqq_prior_range',
       'tqqq_cut_open', 'tqqq_cut_1hr', 'tqqq_cut_2hr', 'tqqq_cut_3hr'],
      dtype='object')

In [56]:
df_final_jointed_all.to_pickle('META_trainning_master_data.pkl')