In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, date, timedelta, time
os.environ['TZ'] ='America/New_York'

import yfinance as yf

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

from ib_async import *
util.startLoop()

### Data Prep for Inferencing

In [2]:
ib = IB()
ib.connect(port=4002, clientId=5)

<IB connected to 127.0.0.1:4002 clientId=5>

In [3]:
contract = Contract(symbol='META', secType='STK', exchange='SMART', currency='USD')
ib.qualifyContracts(contract)

[Contract(secType='STK', conId=107113386, symbol='META', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='META', tradingClass='NMS')]

In [4]:
contract_vix = Contract(symbol= 'VIX', secType = 'IND',exchange = 'CBOE', currency='USD')
ib.qualifyContracts(contract_vix)

[Contract(secType='IND', conId=13455763, symbol='VIX', exchange='CBOE', currency='USD', localSymbol='VIX')]

In [5]:
contract_nas = Contract(symbol= 'TQQQ', secType = 'STK',exchange = 'SMART', currency='USD')
ib.qualifyContracts(contract_nas)

[Contract(secType='STK', conId=72539702, symbol='TQQQ', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='TQQQ', tradingClass='NMS')]

In [6]:
class Data_Request :

    """
    Returns PD data containning equity price with specific time control

    Returns:
        pd: information regarding the contract of interst.
    """
    
    def __init__(self, list_days, contract, barsize, duration, *,specific_time= None, specific_pick_time= None):

        self.list_days = list_days
        self.contract = contract
        #self.init_date = init_date
        self.barsize = barsize
        self.duration = duration
        self.specific_time = specific_time # cut off time . Time object: time(14, 30, 0)
        self.specific_pick_time= specific_pick_time

    def D_request(self):

        his_df =pd.DataFrame()
        
        for i in range(len(self.list_days)):
            init_date=self.list_days[i]
            bars = ib.reqHistoricalData(self.contract, init_date, barSizeSetting=self.barsize, durationStr=self.duration, whatToShow="TRADES", useRTH=True)
            add_his_df = pd.DataFrame(bars)
            add_his_df['date'] = pd.to_datetime(add_his_df['date'])
            print(f'retriveing data for {init_date}')

            if self.specific_time is not None:
                cut_off = datetime.combine(self.list_days[i],self.specific_time)  # need logic if specific_time is defined
                pd_cut_off = pd.to_datetime(cut_off).tz_localize('US/Eastern')
            #datetime64_eastern = pd.Timestamp(date_time_str).tz_localize('US/Eastern')
                add_his_df = add_his_df[add_his_df['date'] > pd_cut_off]
            
            if self.specific_pick_time is not None:
                pick_time = datetime.combine(self.list_days[i],self.specific_pick_time)  # need logic if specific_time is defined
                pd_pick_time = pd.to_datetime(pick_time).tz_localize('US/Eastern')
            #datetime64_eastern = pd.Timestamp(date_time_str).tz_localize('US/Eastern')
                add_his_df = add_his_df[add_his_df['date'] == pd_pick_time]

            
            his_df = pd.concat([his_df, add_his_df], ignore_index=True)
            his_df['Date_Only'] = his_df['date'].dt.date
            print(len(his_df))

        
        return his_df


In [30]:
# picking out Friday and before specific-time data

#specific_time = time(14, 30, 0)
def data_filter(df_initial, list_days, specific_time_before = None, specific_time_after= None, on_time= None):
    his_df = pd.DataFrame()
    for i in range(len(list_days)):
        init_date=list_days[i]
        if specific_time_before is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time < specific_time_before)]
           
            
        if specific_time_after is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time >= specific_time_after)]

        if on_time is not None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date) & (df_initial['date'].dt.time == on_time)]

        if (specific_time_before is None and specific_time_after is None) and on_time is None:
            add_his_df = df_initial[(df_initial['date'].dt.date == init_date)]
        
        print(len(his_df))
        print(f'retrieving {init_date}')
        per_proc = np.round(i+1/len(list_days)*100, decimals=1)
        print(f'{per_proc} percent is done')
        his_df = pd.concat([his_df, add_his_df], ignore_index=True)
        his_df.drop_duplicates(subset=None, keep='first', inplace=True)
        his_df['Date_Only'] = his_df['date'].dt.date
    return his_df

In [8]:
def get_fridays_in_range(start_date, end_date):
    """
    Returns a list of dates that are Fridays within the given date range.

    Args:
        start_date (date): The start date of the range.
        end_date (date): The end date of the range.

    Returns:
        list: A list of date objects representing Fridays within the range.
    """
    fridays = []
    all_dates = []
    current_date = start_date
    all_current_date = start_date

    # Adjust to the first Friday
    days_until_friday = (4 - current_date.weekday()) % 7
    current_date += timedelta(days=days_until_friday)

    while current_date <= end_date:
        fridays.append(current_date)
        current_date += timedelta(days=7) # Move to the next Friday

    while all_current_date <= end_date:
        all_dates.append(all_current_date)
        all_current_date += timedelta(days=1)
    
    return fridays, all_dates

# Example Usage
start_date = date(2024, 1, 1)
end_date = date(2025, 4, 1)

fridays, all_dates = get_fridays_in_range(start_date, end_date)
# removing Non-trading Holidays
fridays.remove(date(2024, 3, 29))
fridays.remove(date(2024, 11, 29))

In [51]:
now =datetime.now()
#print(now.date())
#print(now.time().minute)
infer_time = time(now.time().hour, now.time().minute -15 )
inferencing_date = now.date()
#infer_time = time(14,30,0)

### Inferencing Data Set Collection ###

In [52]:
barsize ="30 secs"
duration = "1 D"
#DR_stock_meta = Data_Request( " ",contract, barsize, duration)
DR_stock_meta = Data_Request([inferencing_date], contract, barsize, duration)
df_all_days_stock = DR_stock_meta.D_request()

retriveing data for 2025-04-03
196


In [53]:
DR_IND_VIX = Data_Request([inferencing_date], contract_vix, barsize, duration)
DR_STK_TQQQ = Data_Request([inferencing_date], contract_nas, barsize, duration)
df_all_days_VIX = DR_IND_VIX.D_request()
df_all_days_TQQQ = DR_STK_TQQQ.D_request()

retriveing data for 2025-04-03
916
retriveing data for 2025-04-03
196


In [54]:
print(df_all_days_stock.tail(1))
print(df_all_days_VIX.tail(1))
print(df_all_days_TQQQ.tail(1))

                         date   open    high     low  close   volume  average  \
195 2025-04-03 11:07:30-04:00  537.6  537.61  537.16  537.3  10024.0  537.358   

     barCount   Date_Only  
195        83  2025-04-03  
                         date   open   high    low  close  volume  average  \
915 2025-04-03 10:07:30-05:00  29.23  29.23  29.21  29.21     0.0      0.0   

     barCount   Date_Only  
915         2  2025-04-03  
                         date   open  high    low  close    volume  average  \
195 2025-04-03 11:07:30-04:00  50.65  50.7  50.61  50.62  207280.0   50.652   

     barCount   Date_Only  
195       575  2025-04-03  


In [13]:
#df_all_days_stock = DR_stock_meta.D_request()

In [55]:

def data_sparcing_construc(df, sp_day, *, cut_time = None):
    #df_group = df.groupby("Date_Only").std(numeric_only=True)
    df_filter_prior = data_filter(df, sp_day, specific_time_before = cut_time )
    df_group_prior = df_filter_prior.groupby("Date_Only").std(numeric_only=True)
    
    #df_filter_after = data_filter(df, sp_day, specific_time_after = cut_time )
    #df_group_after = df_filter_after.groupby("Date_Only").std(numeric_only=True)


    df_group_train =pd.DataFrame()
    #df_group_train['Target_std'] = pd.DataFrame(df_group_after[['open','high','low','close']].max(axis=1))
    df_group_train['prior_std'] = pd.DataFrame(df_group_prior[['open','high','low','close']].max(axis=1))
    df_group_train['prior_Vol'] = pd.DataFrame(df_filter_prior.groupby("Date_Only").mean()['volume'])
    
    df_initial = df_filter_prior
    new_time = time(cut_time.hour, cut_time.minute -1)
    df_group_train['cut_open'] = df_initial[(df_initial['date'].dt.time == new_time)]['open'].values
    
    cut_name =['cut_1hr','cut_2hr','cut_3hr']
    for i in range(3):
        prior = time(cut_time.hour - (i+1), cut_time.minute)
        df_group_train[cut_name[i]] = df_initial[(df_initial['date'].dt.time == new_time)]['open'].values- df_initial[(df_initial['date'].dt.time == prior)]['open'].values


    
    return df_group_train, df_filter_prior, df_group_prior
    

In [60]:
#print(now.time().minute)
infer_time = time(now.time().hour, now.time().minute -15 )

In [61]:
df_group_train, df_filter_prior, df_group_prior = data_sparcing_construc(df_all_days_stock, [inferencing_date], cut_time= infer_time)

0
retrieving 2025-04-03
100.0 percent is done


ValueError: Length of values (0) does not match length of index (1)

In [29]:
df_all_days_VIX

Unnamed: 0,date,open,high,low,close,volume,average,barCount,Date_Only
0,2025-04-03 02:15:00-05:00,26.38,26.38,26.37,26.37,0.0,0.0,2,2025-04-03
1,2025-04-03 02:15:30-05:00,26.27,26.27,26.22,26.22,0.0,0.0,2,2025-04-03
2,2025-04-03 02:16:00-05:00,26.25,26.28,26.25,26.28,0.0,0.0,2,2025-04-03
3,2025-04-03 02:16:30-05:00,26.26,26.26,26.26,26.26,0.0,0.0,1,2025-04-03
4,2025-04-03 02:17:00-05:00,26.23,26.29,26.23,26.29,0.0,0.0,2,2025-04-03
...,...,...,...,...,...,...,...,...,...
894,2025-04-03 09:57:00-05:00,28.41,28.42,28.41,28.42,0.0,0.0,2,2025-04-03
895,2025-04-03 09:57:30-05:00,28.41,28.41,28.37,28.37,0.0,0.0,2,2025-04-03
896,2025-04-03 09:58:00-05:00,28.38,28.49,28.38,28.49,0.0,0.0,2,2025-04-03
897,2025-04-03 09:58:30-05:00,28.50,28.52,28.50,28.52,0.0,0.0,2,2025-04-03


In [None]:
df_group_train_vix, *_  = data_sparcing_construc(df_all_days_VIX, [inferencing_date], cut_time= infer_time)
df_group_train_tqqq, *_ = data_sparcing_construc(df_all_days_TQQQ, [inferencing_date], cut_time= infer_time)

In [None]:
df_group_train_vix

In [None]:
df_final_joninted_vix=pd.DataFrame()
df_final_joninted_all=pd.DataFrame()

df_group_train_vix.columns =['vix_prior_std','vix_Vol','vix_cut_open','vix_cut_1hr','vix_cut_2hr','vix_cut_3hr']
df_final_jointed_vix = pd.concat([df_group_train,df_group_train_vix], axis=1)
df_group_train_tqqq.columns =['tqqq_prior_std','tqqq_Vol','tqqq_cut_open','tqqq_cut_1hr','tqqq_cut_2hr','tqqq_cut_3hr']
df_final_jointed_all = pd.concat([df_final_jointed_vix,df_group_train_tqqq], axis=1)
df_final_jointed_all.drop(['vix_Vol'], axis=1, inplace=True)

In [None]:
df_final_jointed_all

### INFERENCING###

In [None]:
df_final_jointed_all.reset_index(inplace=True)
inference_data = df_final_jointed_all.drop('Date_Only', axis=1)

In [None]:
inference_data

In [None]:
import joblib
# Assuming 'model' is your trained model object
filename = 'xgb_model.joblib'
loaded_model = joblib.load(filename)

In [None]:
yhat = loaded_model.predict(inference_data)

In [None]:
yhat

In [None]:
df_all_days_stock.iloc[300]

In [None]:
y_value = df_all_days_stock.iloc[300]['close']
y_value_p = df_all_days_stock.iloc[300]['close'] + yhat[0]*2.5
y_value_m = df_all_days_stock.iloc[300]['close'] - yhat[0]*2.5

In [None]:
yhat*2.5

In [None]:
df_all_days_stock['close'].plot(xlim=(300,400), ylim=(575,587))
plt.xlabel('time')
plt.axhline(y=y_value, color='r', linestyle='--', label=f'y = {y_value}')
plt.axhline(y=y_value_p, color='black', linestyle='--', label=f'y = {y_value_p}')
plt.axhline(y=y_value_m, color='black', linestyle='--', label=f'y = {y_value_m}')