In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, date, timedelta, time
os.environ['TZ'] ='America/New_York'

import yfinance as yf

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

from ib_async import *
util.startLoop()

### Data Prep for Inferencing

In [None]:
ib = IB()
ib.connect(port=4002, clientId=2)

In [None]:
contract = Contract(symbol='META', secType='STK', exchange='SMART', currency='USD')
ib.qualifyContracts(contract)

In [None]:
contract_vix = Contract(symbol= 'VIX', secType = 'IND',exchange = 'CBOE', currency='USD')
ib.qualifyContracts(contract_vix)

In [None]:
contract_nas = Contract(symbol= 'TQQQ', secType = 'STK',exchange = 'SMART', currency='USD')
ib.qualifyContracts(contract_nas)

In [None]:
class Data_Request :

    """
    Returns PD data containning equity price with specific time control

    Returns:
        pd: information regarding the contract of interst.
    """
    
    def __init__(self, list_days, contract, barsize, duration, *,specific_time= None, specific_pick_time= None):

        self.list_days = list_days
        self.contract = contract
        #self.init_date = init_date
        self.barsize = barsize
        self.duration = duration
        self.specific_time = specific_time # cut off time . Time object: time(14, 30, 0)
        self.specific_pick_time= specific_pick_time

    def D_request(self):

        his_df =pd.DataFrame()
        
        for i in range(len(self.list_days)):
            init_date=self.list_days[i]
            bars = ib.reqHistoricalData(self.contract, init_date, barSizeSetting=self.barsize, durationStr=self.duration, whatToShow="TRADES", useRTH=True)
            add_his_df = pd.DataFrame(bars)
            add_his_df['date'] = pd.to_datetime(add_his_df['date'])
            print(f'retriveing data for {init_date}')

            if self.specific_time is not None:
                cut_off = datetime.combine(self.list_days[i],self.specific_time)  # need logic if specific_time is defined
                pd_cut_off = pd.to_datetime(cut_off).tz_localize('US/Eastern')
            #datetime64_eastern = pd.Timestamp(date_time_str).tz_localize('US/Eastern')
                add_his_df = add_his_df[add_his_df['date'] > pd_cut_off]
            
            if self.specific_pick_time is not None:
                pick_time = datetime.combine(self.list_days[i],self.specific_pick_time)  # need logic if specific_time is defined
                pd_pick_time = pd.to_datetime(pick_time).tz_localize('US/Eastern')
            #datetime64_eastern = pd.Timestamp(date_time_str).tz_localize('US/Eastern')
                add_his_df = add_his_df[add_his_df['date'] == pd_pick_time]

            
            his_df = pd.concat([his_df, add_his_df], ignore_index=True)
            his_df['Date_Only'] = his_df['date'].dt.date
            print(len(his_df))

        
        return his_df


In [2]:
# picking out Friday and before specific-time data

#specific_time = time(14, 30, 0)
def data_filter(df_initial, list_days, specific_time_before = None, specific_time_after= None, on_time= None):
    his_df = pd.DataFrame()
    for i in range(len(list_days)):
        init_date=list_days[i]
        if specific_time_before is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time < specific_time_before)]
           
            
        if specific_time_after is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time >= specific_time_after)]

        if on_time is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time == on_time)]

        if (specific_time_before is None and specific_time_after is None) and on_time is None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date)]
        
        print(len(his_df))
        print(f'retrieving {init_date}')
        per_proc = np.round(i+1/len(list_days)*100, decimals=1)
        print(f'{per_proc} percent is done')
        his_df = pd.concat([his_df, add_his_df], ignore_index=True)
        his_df.drop_duplicates(subset=None, keep='first', inplace=True)
        his_df['Date_Only'] = his_df['date'].dt.date
    return his_df

In [3]:
def get_fridays_in_range(start_date, end_date):
    """
    Returns a list of dates that are Fridays within the given date range.

    Args:
        start_date (date): The start date of the range.
        end_date (date): The end date of the range.

    Returns:
        list: A list of date objects representing Fridays within the range.
    """
    fridays = []
    all_dates = []
    current_date = start_date
    all_current_date = start_date

    # Adjust to the first Friday
    days_until_friday = (4 - current_date.weekday()) % 7
    current_date += timedelta(days=days_until_friday)

    while current_date <= end_date:
        fridays.append(current_date)
        current_date += timedelta(days=7) # Move to the next Friday

    while all_current_date <= end_date:
        all_dates.append(all_current_date)
        all_current_date += timedelta(days=1)
    
    return fridays, all_dates

# Example Usage
start_date = date(2024, 1, 1)
end_date = date(2025, 4, 1)

fridays, all_dates = get_fridays_in_range(start_date, end_date)
# removing Non-trading Holidays
fridays.remove(date(2024, 3, 29))
fridays.remove(date(2024, 11, 29))

In [None]:
now =datetime.now()
print(now.date())

### Trainning Data Set Collection ###

In [None]:
barsize ="1 min"
duration = "1 D"
DR_stock_meta = Data_Request(all_dates, contract, barsize, duration)

In [None]:
DR_IND_VIX = Data_Request(all_dates, contract_vix, barsize, duration)
DR_STK_TQQQ = Data_Request(all_dates, contract_nas, barsize, duration)
df_all_days_VIX = DR_IND_VIX.D_request()
df_all_days_TQQQ = DR_STK_TQQQ.D_request()

In [None]:
df_all_days_stock = DR_stock_meta.D_request()

In [None]:
df_all_days_stock.to_csv('master_stock_info.csv', index=False)
df_all_days_stock.to_pickle('master_stock_info.pkl')

df_all_days_VIX.to_csv('master_vix_info.csv', index=False)
df_all_days_VIX.to_pickle('master_vix_info.pkl')

df_all_days_TQQQ.to_csv('master_tqqq_info.csv', index=False)
df_all_days_TQQQ.to_pickle('master_tqqq_info.pkl')

In [88]:
df_all_days_stock = pd.read_pickle('master_stock_info.pkl')
df_all_days_VIX = pd.read_pickle('master_vix_info.pkl')
df_all_days_TQQQ = pd.read_pickle('master_tqqq_info.pkl')

In [89]:
cut_time = time(14, 30, 0)

In [6]:

def calculate_rsi(prices, period=14):
    """
    Calculates the Relative Strength Index (RSI) for a given price series.

    Args:
        prices (pd.Series): A Pandas Series representing the price data.
        period (int, optional): The lookback period for RSI calculation. Defaults to 14.

    Returns:
        pd.Series: A Pandas Series containing the RSI values.
    """

    delta = prices.diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    avg_gain = gain.rolling(window=period, min_periods=period).mean()
    avg_loss = loss.rolling(window=period, min_periods=period).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))

    return rsi



In [34]:
# Example usage:
# #data = {'Close': [45, 48, 50, 47, 49, 52, 55, 53, 51, 54]}
# #df = pd.DataFrame(data)
# df = pd.DataFrame()
# rsi_period = 14 
# df_all_days_stock['RSI_14'] = calculate_rsi(df_all_days_stock['close'], period=rsi_period)
# rsi_period = 30 
# df_all_days_stock['RSI_30'] = calculate_rsi(df_all_days_stock['close'], period=rsi_period)
# rsi_period = 60 
# df_all_days_stock['RSI_60'] = calculate_rsi(df_all_days_stock['close'], period=rsi_period)

# df_all_days_stock

Unnamed: 0,date,open,high,low,close,volume,average,barCount,Date_Only,RSI_14,RSI_30,RSI_60
0,2023-12-29 09:30:00-05:00,358.99,360.00,358.99,359.88,155021.0,359.308,510,2023-12-29,,,
1,2023-12-29 09:31:00-05:00,359.82,359.82,359.27,359.71,38832.0,359.594,254,2023-12-29,,,
2,2023-12-29 09:32:00-05:00,359.69,359.72,358.33,358.48,67898.0,359.113,374,2023-12-29,,,
3,2023-12-29 09:33:00-05:00,358.62,358.72,357.66,357.94,30127.0,358.207,180,2023-12-29,,,
4,2023-12-29 09:34:00-05:00,357.83,357.89,357.25,357.28,24586.0,357.559,153,2023-12-29,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
176965,2025-04-01 15:55:00-04:00,586.51,587.37,585.49,585.74,51718.0,586.359,386,2025-04-01,58.218126,58.355438,62.526316
176966,2025-04-01 15:56:00-04:00,585.78,586.14,585.44,586.02,55195.0,585.823,399,2025-04-01,61.666667,60.563380,63.333333
176967,2025-04-01 15:57:00-04:00,586.10,586.20,585.74,585.93,43672.0,585.957,339,2025-04-01,60.837070,59.628975,62.414338
176968,2025-04-01 15:58:00-04:00,585.95,586.35,585.82,586.31,63359.0,586.068,517,2025-04-01,62.731152,61.149228,64.046122


In [33]:
df_filter_prior[df_filter_prior['date'].dt.time == time(cut_time.hour, cut_time.minute-1)]

Unnamed: 0,date,open,high,low,close,volume,average,barCount,Date_Only,RSI_14,RSI_30
299,2024-01-05 14:29:00-05:00,351.89,352.00,351.88,351.95,4951.0,351.937,34,2024-01-05,61.068702,61.068702
613,2024-01-12 14:29:00-05:00,375.53,375.55,375.39,375.41,15956.0,375.493,79,2024-01-12,55.263158,55.263158
927,2024-01-19 14:29:00-05:00,383.97,384.05,383.27,383.58,48131.0,383.787,312,2024-01-19,50.000000,50.000000
1241,2024-01-26 14:29:00-05:00,395.97,396.02,395.94,395.94,11952.0,395.984,58,2024-01-26,53.614458,53.614458
1555,2024-02-02 14:29:00-05:00,478.94,479.06,478.69,478.98,69241.0,478.892,368,2024-02-02,28.089888,28.089888
...,...,...,...,...,...,...,...,...,...,...,...
18511,2025-02-28 14:29:00-05:00,657.29,657.53,657.22,657.49,5951.0,657.369,51,2025-02-28,63.763608,63.763608
18825,2025-03-07 14:29:00-05:00,626.00,627.30,626.00,626.86,25475.0,626.577,169,2025-03-07,66.885246,66.885246
19139,2025-03-14 14:29:00-04:00,605.71,605.83,605.32,605.45,3589.0,605.533,26,2025-03-14,59.183673,59.183673
19453,2025-03-21 14:29:00-04:00,593.88,593.97,593.65,593.79,4003.0,593.809,31,2025-03-21,44.857143,44.857143


In [66]:
df_filter_prior.head(400)

Unnamed: 0,date,open,high,low,close,volume,average,barCount,Date_Only,RSI_14,RSI_30,RSI_60
0,2024-01-05 09:30:00-05:00,346.99,348.60,346.55,347.56,181933.0,347.322,416,2024-01-05,57.239057,57.692308,52.637890
1,2024-01-05 09:31:00-05:00,347.60,347.95,347.28,347.85,38864.0,347.628,282,2024-01-05,60.802469,57.961783,54.292343
2,2024-01-05 09:32:00-05:00,347.89,347.99,347.17,347.21,59818.0,347.493,266,2024-01-05,52.393617,51.412429,50.108932
3,2024-01-05 09:33:00-05:00,347.18,347.18,346.26,346.79,31038.0,346.691,174,2024-01-05,48.402948,46.808511,48.677249
4,2024-01-05 09:34:00-05:00,346.73,347.74,346.62,347.62,33355.0,347.253,129,2024-01-05,62.780269,52.456418,52.775073
...,...,...,...,...,...,...,...,...,...,...,...,...
395,2024-01-12 10:05:00-05:00,372.80,372.98,372.63,372.85,30768.0,372.761,170,2024-01-12,36.601307,57.497782,57.120169
396,2024-01-12 10:06:00-05:00,372.85,373.50,372.75,373.50,38029.0,373.086,251,2024-01-12,37.419355,58.954584,58.418891
397,2024-01-12 10:07:00-05:00,373.50,374.23,373.40,374.13,72656.0,373.821,296,2024-01-12,39.121339,59.509721,59.398496
398,2024-01-12 10:08:00-05:00,374.13,374.38,373.90,373.92,37672.0,374.156,217,2024-01-12,39.702760,57.301452,59.102244


In [65]:
df_filter_prior.groupby('Date_Only')['open'].min()

Date_Only
2024-01-05    346.73
2024-01-12    370.16
2024-01-19    378.17
2024-01-26    391.85
2024-02-02    454.71
               ...  
2025-02-28    644.70
2025-03-07    601.18
2025-03-14    596.61
2025-03-21    581.10
2025-03-28    578.00
Name: open, Length: 63, dtype: float64

In [67]:
df_filter_prior.groupby('Date_Only')['open'].max()

Date_Only
2024-01-05    353.46
2024-01-12    376.89
2024-01-19    383.97
2024-01-26    396.75
2024-02-02    485.75
               ...  
2025-02-28    665.70
2025-03-07    634.19
2025-03-14    607.78
2025-03-21    597.47
2025-03-28    601.49
Name: open, Length: 63, dtype: float64

In [90]:

def data_sparcing_construc(df, sp_day, *, cut_time = None):
    #df_group = df.groupby("Date_Only").std(numeric_only=True)
    df_filter_prior = data_filter(df, sp_day, specific_time_before = cut_time )
    df_group_prior = df_filter_prior.groupby("Date_Only").std(numeric_only=True)
    
    df_filter_after = data_filter(df, sp_day, specific_time_after = cut_time )
    df_group_after = df_filter_after.groupby("Date_Only").std(numeric_only=True)


    df_group_train =pd.DataFrame()
    df_group_train['Target_std'] = pd.DataFrame(df_group_after[['open','high','low','close']].max(axis=1))
    df_group_train['prior_std'] = pd.DataFrame(df_group_prior[['open','high','low','close']].max(axis=1))
    df_group_train['prior_Vol'] = pd.DataFrame(df_filter_prior.groupby("Date_Only").mean()['volume'])

    df_group_train['prior_range'] =pd.DataFrame(df_filter_prior.groupby('Date_Only')['close'].max() -df_filter_prior.groupby('Date_Only')['close'].min() )
    
    df_initial = df_filter_prior.copy()
    new_time = time(cut_time.hour, cut_time.minute -1)
    df_group_train['cut_open'] = df_initial[(df_initial['date'].dt.time == new_time)]['open'].values
   # df_group_train['RSI_14'] = df_initial[df_initial['date'].dt.time == new_time]['RSI_14'].values
   # df_group_train['RSI_30'] = df_initial[df_initial['date'].dt.time == new_time]['RSI_30'].values
   # df_group_train['RSI_60'] = df_initial[df_initial['date'].dt.time == new_time]['RSI_60'].values
    
    cut_name =['cut_1hr','cut_2hr','cut_3hr']
    for i in range(3):
        prior = time(cut_time.hour - (i+1), cut_time.minute)
        df_group_train[cut_name[i]] = df_initial[(df_initial['date'].dt.time == new_time)]['open'].values- df_initial[(df_initial['date'].dt.time == prior)]['open'].values

    #df_group_train['Class_target'] = df_filter_after[(df_filter_after['date'].dt.time == time(15, 49, 0))]['close'].values- df_initial[(df_initial['date'].dt.time == new_time)]['open'].values
    
    return df_group_train, df_filter_prior, df_filter_after, df_group_prior, df_group_after
    

In [91]:
df_group_train, df_filter_prior, df_filter_after, df_group_prior, df_group_after = data_sparcing_construc(df_all_days_stock, fridays, cut_time= cut_time)

0
retrieving 2024-01-05
1.6 percent is done
300
retrieving 2024-01-12
2.6 percent is done
600
retrieving 2024-01-19
3.6 percent is done
900
retrieving 2024-01-26
4.6 percent is done
1200
retrieving 2024-02-02
5.6 percent is done
1500
retrieving 2024-02-09
6.6 percent is done
1800
retrieving 2024-02-16
7.6 percent is done
2100
retrieving 2024-02-23
8.6 percent is done
2400
retrieving 2024-03-01
9.6 percent is done
2700
retrieving 2024-03-08
10.6 percent is done
3000
retrieving 2024-03-15
11.6 percent is done
3300
retrieving 2024-03-22
12.6 percent is done
3600
retrieving 2024-04-05
13.6 percent is done
3900
retrieving 2024-04-12
14.6 percent is done
4200
retrieving 2024-04-19
15.6 percent is done
4500
retrieving 2024-04-26
16.6 percent is done
4800
retrieving 2024-05-03
17.6 percent is done
5100
retrieving 2024-05-10
18.6 percent is done
5400
retrieving 2024-05-17
19.6 percent is done
5700
retrieving 2024-05-24
20.6 percent is done
6000
retrieving 2024-05-31
21.6 percent is done
6300
re

In [92]:
df_group_train_vix, *_  = data_sparcing_construc(df_all_days_VIX, fridays, cut_time= cut_time)
df_group_train_tqqq, *_ = data_sparcing_construc(df_all_days_TQQQ, fridays, cut_time= cut_time)

0
retrieving 2024-01-05
1.6 percent is done
720
retrieving 2024-01-12
2.6 percent is done
1440
retrieving 2024-01-19
3.6 percent is done
2160
retrieving 2024-01-26
4.6 percent is done
2880
retrieving 2024-02-02
5.6 percent is done
3600
retrieving 2024-02-09
6.6 percent is done
4320
retrieving 2024-02-16
7.6 percent is done
5040
retrieving 2024-02-23
8.6 percent is done
5760
retrieving 2024-03-01
9.6 percent is done
6480
retrieving 2024-03-08
10.6 percent is done
7200
retrieving 2024-03-15
11.6 percent is done
7920
retrieving 2024-03-22
12.6 percent is done
8640
retrieving 2024-04-05
13.6 percent is done
9360
retrieving 2024-04-12
14.6 percent is done
10080
retrieving 2024-04-19
15.6 percent is done
10800
retrieving 2024-04-26
16.6 percent is done
11520
retrieving 2024-05-03
17.6 percent is done
12240
retrieving 2024-05-10
18.6 percent is done
12960
retrieving 2024-05-17
19.6 percent is done
13680
retrieving 2024-05-24
20.6 percent is done
14400
retrieving 2024-05-31
21.6 percent is don

In [96]:
df_group_train_vix

Unnamed: 0_level_0,vix_std,vix_prior_std,vix_Vol,vix_prior_range,vix_cut_open,vix_cut_1hr,vix_cut_2hr,vix_cut_3hr
Date_Only,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-05,0.071025,0.463383,0.0,1.29,13.53,0.10,0.03,-0.17
2024-01-12,0.074357,0.109005,0.0,0.56,12.55,-0.02,-0.03,-0.15
2024-01-19,0.059231,0.097359,0.0,1.18,13.41,-0.28,-0.22,-0.08
2024-01-26,0.021562,0.176661,0.0,0.53,13.30,0.02,0.01,-0.04
2024-02-02,0.031420,0.108060,0.0,0.50,13.81,-0.24,-0.22,-0.11
...,...,...,...,...,...,...,...,...
2025-02-28,0.460062,0.465128,0.0,2.45,20.65,-0.91,-1.07,-0.07
2025-03-07,0.151596,0.575226,0.0,3.19,23.47,-0.26,-1.38,-2.53
2025-03-14,0.078107,0.894907,0.0,2.78,21.74,-0.48,-0.61,-0.98
2025-03-21,0.193971,0.371233,0.0,1.70,19.82,0.14,0.00,0.10


In [97]:
df_final_jointed_vix=pd.DataFrame()
df_final_jointed_all=pd.DataFrame()

df_group_train_vix.columns =['vix_std','vix_prior_std','vix_Vol','vix_prior_range','vix_cut_open','vix_cut_1hr','vix_cut_2hr','vix_cut_3hr']
df_final_jointed_vix = pd.concat([df_group_train,df_group_train_vix], axis=1)
df_group_train_tqqq.columns =['tqqq_std','tqqq_prior_std','tqqq_Vol','tqqq_prior_range','tqqq_cut_open','tqqq_cut_1hr','tqqq_cut_2hr','tqqq_cut_3hr']
df_final_jointed_all = pd.concat([df_final_jointed_vix,df_group_train_tqqq], axis=1)
df_final_jointed_all.drop(['vix_std','vix_Vol','tqqq_std'], axis=1, inplace=True)

In [98]:
df_final_jointed_all

Unnamed: 0_level_0,Target_std,prior_std,prior_Vol,prior_range,cut_open,cut_1hr,cut_2hr,cut_3hr,vix_prior_std,vix_prior_range,...,vix_cut_1hr,vix_cut_2hr,vix_cut_3hr,tqqq_prior_std,tqqq_Vol,tqqq_prior_range,tqqq_cut_open,tqqq_cut_1hr,tqqq_cut_2hr,tqqq_cut_3hr
Date_Only,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-05,0.650053,1.058044,18218.376667,6.68,351.89,0.70,0.97,-0.11,0.463383,1.29,...,0.10,0.03,-0.17,0.301283,199340.133333,1.18,46.10,0.05,0.18,-0.34
2024-01-12,0.629948,0.993937,29812.156667,6.51,375.53,0.89,1.75,-0.75,0.109005,0.56,...,-0.02,-0.03,-0.15,0.223571,156363.693333,1.09,50.40,0.06,0.29,-0.05
2024-01-19,0.509282,1.115984,28760.476667,5.66,383.97,0.83,2.83,2.65,0.097359,1.18,...,-0.28,-0.22,-0.08,0.651070,206204.083333,2.31,54.46,0.71,1.27,1.55
2024-01-26,0.813494,1.238336,18011.023333,4.88,395.97,0.11,-0.05,-0.30,0.176661,0.53,...,0.02,0.01,-0.04,0.356931,146601.786667,1.13,55.44,-0.11,-0.63,-0.75
2024-02-02,2.322338,4.712763,156749.593333,31.27,478.94,2.13,-2.85,1.92,0.108060,0.50,...,-0.24,-0.22,-0.11,0.611902,229504.093333,2.71,57.40,0.33,-0.04,0.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-28,2.284799,3.457627,19624.563333,20.68,657.29,-1.04,-3.43,-1.62,0.465128,2.45,...,-0.91,-1.07,-0.07,0.767429,200316.643333,3.21,71.61,0.12,-0.97,-0.34
2025-03-07,1.311185,8.718339,35741.483333,33.05,626.00,10.33,19.82,18.71,0.575226,3.19,...,-0.26,-1.38,-2.53,1.292159,314475.686667,4.91,67.48,1.73,3.52,3.54
2025-03-14,0.758964,2.965842,18741.093333,11.18,605.71,0.88,0.58,2.23,0.894907,2.78,...,-0.48,-0.61,-0.98,0.581809,234059.553333,2.22,61.26,-0.16,0.19,-0.10
2025-03-21,0.708330,3.255008,28634.216667,16.37,593.88,1.38,1.20,3.91,0.371233,1.70,...,0.14,0.00,0.10,0.657912,184808.116667,2.47,61.25,0.22,0.01,1.19


In [99]:
df_final_jointed_all.to_pickle('df_final_jointed_all_train_beta.pkl')