In [12]:
# importing required packages
import numpy as np
import pandas as pd

from datetime import datetime
from pandas.tseries.holiday import USFederalHolidayCalendar, AbstractHolidayCalendar, Holiday, nearest_workday
from pandas.tseries.offsets import CustomBusinessDay

import pytz

# Defining Required Functions

In [9]:
def get_NYSEMarketHours(date):
    """
    Takes a date and returns two datetime objects which have the opening and closing time of the exchange.
    
    Parameters:
        - date (str): String containing the date of interest. Must be in 'YYYY-MM-DD' format. 
        
    Returns:
        - start_time: datetime object of the opening time
        - end_time: datetime object of the closing time
    """
    ny_tz = pytz.timezone('America/New_York')
    
    # Parse the input date string
    date_obj = datetime.strptime(date, '%Y-%m-%d')
    
    # Create datetime objects for 9:30 AM and 4:00 PM
    start_time = ny_tz.localize(
        datetime(
            date_obj.year,
            date_obj.month,
            date_obj.day,
            9,
            30,
            0
        )
    )
    
    end_time = ny_tz.localize(
        datetime(
            date_obj.year,
            date_obj.month,
            date_obj.day,
            16,
            0,
            0
        )
    )
    
    return start_time, end_time

def get_HKEXMarketHours(date):
   """
    Takes a date and returns two datetime objects which have the opening and closing time of the exchange.
    
    Parameters:
        - date (str): String containing the date of interest. Must be in 'YYYY-MM-DD' format. 
        
    Returns:
        - start_time: datetime object of the opening time
        - end_time: datetime object of the closing time
    """ 
   # Create timezone object for Hong Kong
   hk_tz = pytz.timezone('Asia/Hong_Kong')
   
   # Parse the input date string
   date_obj = datetime.strptime(date, '%Y-%m-%d')
   
   # Create datetime objects for 9:15 AM and 4:00 PM
   start_time = hk_tz.localize(
       datetime(
           date_obj.year,
           date_obj.month,
           date_obj.day,
           9,
           15,
           0
       )
   )
   
   end_time = hk_tz.localize(
       datetime(
           date_obj.year,
           date_obj.month,
           date_obj.day,
           16,
           0,
           0
       )
   )
   
   return start_time, end_time

In [6]:
get_NYSEMarketHours("2024-01-02")

(datetime.datetime(2024, 1, 2, 9, 30, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
 datetime.datetime(2024, 1, 2, 16, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>))

In [11]:
get_HKEXMarketHours("2024-01-02")

(datetime.datetime(2024, 1, 2, 9, 15, tzinfo=<DstTzInfo 'Asia/Hong_Kong' HKT+8:00:00 STD>),
 datetime.datetime(2024, 1, 2, 16, 0, tzinfo=<DstTzInfo 'Asia/Hong_Kong' HKT+8:00:00 STD>))

In [None]:
class MarketAnalyzer:
    def __init__(self):
        pass
        
    def get_previous_weekday(self, date):
        """
        Gets the previous weekday (Mon-Fri) for a given date
        
        Parameters:
            - date (str): Date in 'YYYY-MM-DD' format
            
        Returns:
            - str: Previous weekday in 'YYYY-MM-DD' format
        """
        date_obj = datetime.strptime(date, '%Y-%m-%d')
        prev_day = date_obj - timedelta(days=1)
        
        # Keep going back until we hit a weekday (Mon = 0, Sun = 6)
        while prev_day.weekday() >= 5:  # 5 = Saturday, 6 = Sunday
            prev_day = prev_day - timedelta(days=1)
            
        return prev_day.strftime('%Y-%m-%d')

    def get_NYSEMarketHours(self, date):
        """
        Takes a date and returns two datetime objects which have the opening and closing time of the exchange.
        """
        ny_tz = pytz.timezone('America/New_York')
        date_obj = datetime.strptime(date, '%Y-%m-%d')
        
        start_time = ny_tz.localize(
            datetime(
                date_obj.year,
                date_obj.month,
                date_obj.day,
                9,
                30,
                0
            )
        )
        
        end_time = ny_tz.localize(
            datetime(
                date_obj.year,
                date_obj.month,
                date_obj.day,
                16,
                0,
                0
            )
        )
        
        return start_time, end_time

    def get_HKEXMarketHours(self, date):
        """
        Takes a date and returns two datetime objects which have the opening and closing time of the exchange.
        """
        hk_tz = pytz.timezone('Asia/Hong_Kong')
        date_obj = datetime.strptime(date, '%Y-%m-%d')
        
        start_time = hk_tz.localize(
            datetime(
                date_obj.year,
                date_obj.month,
                date_obj.day,
                9,
                15,
                0
            )
        )
        
        end_time = hk_tz.localize(
            datetime(
                date_obj.year,
                date_obj.month,
                date_obj.day,
                16,
                0,
                0
            )
        )
        
        return start_time, end_time
    
    def analyse_Volatility(self, date):
        """
        Analyses volatility data for a given date and returns a DataFrame row with the results.
        Returns None if any data is missing or errors occur.
        
        Parameters:
            - date (str): Date in 'YYYY-MM-DD' format for TODAY's date
            
        Returns:
            - pd.DataFrame: Single row DataFrame with volatility metrics, or None if data is missing
        """
        # Get market hours for current date
        nyse_start, nyse_end = self.get_NYSEMarketHours(date)
        hkex_start, hkex_end = self.get_HKEXMarketHours(date)
        
        # Get previous weekday
        prev_date = self.get_previous_weekday(date)
        
        try:
            # Get volatility data
            hsi_rvol_night = hds(prev_date)  # yesterday's data
            es_rvol_night = hds(prev_date)   # yesterday's data
            hsi_rvol_day = hds(date)         # today's data
            
            # Check if any of the dataframes are empty
            if any(df.empty for df in [hsi_rvol_night, es_rvol_night, hsi_rvol_day]):
                return None
                
            # Extract last observations
            hsi_intraday_rvol_night = hsi_rvol_night['INTRADAY_RVOL'].iat[-1]
            hsi_intraday_rvol_day = hsi_rvol_day['INTRADAY_RVOL'].iat[-1]
            es_intraday_rvol_night = es_rvol_night['INTRADAY_RVOL'].iat[-1]
            
            # Create DataFrame with datetime index
            df = pd.DataFrame({
                'HSI_RVOL_NIGHT': [hsi_intraday_rvol_night],
                'HSI_RVOL_DAY': [hsi_intraday_rvol_day],
                'ES_RVOL_NIGHT': [es_intraday_rvol_night]
            }, index=[pd.to_datetime(date)])
            
            return df
            
        except Exception:
            # If any error occurs (no data, holiday, etc.), return None
            return None

# Collecting Historical Data for Future Analysis

## Retrieving Complete Dataset from HDS

In [None]:
# get realised volatility data for HKEX HSI overnight
#hsi_rvol_night = hds()
# get realised volatility data for ES overnight (or day trading session NY time)
#es_rvol_night = hds()

# get realised volatility data for HKEX HSI day trading hours
#hsi_rvol_day = hds()

## Filtering for Intraday Realised Volatility

In [None]:
# will need to find the actual column

#hsi_intraday_rvol_night = hsi_rvol_night['INTRADAY_RVOL'].iat[-1] if not df.empty else None
#hsi_intraday_rvol_day = hsi_rvol_day['INTRADAY_RVOL'].iat[-1] if not df.empty else None
#es_intraday_rvol_night = es_rvol_night['INTRADAY_RVOL'].iat[-1] if not df.empty else None

In [None]:
def analyse_VolatilityRange(self, start_date, end_date):
    """
    Analyses volatility data for a range of dates and returns a DataFrame with results.
    Skips weekends and returns None for any days where data is missing/errors occur.
    
    Parameters:
        - start_date (str): Start date in 'YYYY-MM-DD' format
        - end_date (str): End date in 'YYYY-MM-DD' format
        
    Returns:
        - pd.DataFrame: DataFrame with volatility metrics indexed by date
    """
    # Convert dates to datetime objects
    start = pd.to_datetime(start_date)
    end = pd.to_datetime(end_date)
    
    # Generate list of business days (Monday-Friday)
    date_range = pd.date_range(start=start, end=end, freq='B')
    
    # List to store individual day results
    all_results = []
    
    # Process each business day
    for date in date_range:
        date_str = date.strftime('%Y-%m-%d')
        daily_result = self.analyse_Volatility(date_str)
        
        if daily_result is not None:
            all_results.append(daily_result)
    
    # If we have any results, combine them
    if all_results:
        combined_df = pd.concat(all_results)
        return combined_df
    else:
        return None

In [None]:
analyzer = MarketAnalyzer()
volatility_timeseries = analyzer.analyse_VolatilityRange('2025-01-01', '2025-01-31')

if volatility_timeseries is not None:
    print(volatility_timeseries)