In [72]:
''' 
Development Juptyer Notebooks for The Benchmark Calculation Overtime . 
''' 

from abc import abstractmethod, ABC 
import pandas as pd 
from typing import Iterable, List, Optional
from datetime import datetime
import numpy as np


In [None]:
'''  
pulling cleaned data into the notebook 
'''
df = pd.read_csv("bloomberg_data_cleaned.csv")

#set date to datetime object 
df["date"] = pd.to_datetime(df["date"])
df.set_index(["date","ticker"],inplace= True)
pd.set_option('display.float_format', '{:,.4f}'.format)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,market_cap,volume,price_to_book,price_to_sales,pe_ratio,current_cap_share_class,close_price
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-03,A,47257.0847,554098.0,8.7692,7.5274,42.0279,47257.0847,156.48
2022-01-03,AA,11293.546,1033749.0,2.3785,0.9249,8.9225,11293.546,60.36
2022-01-03,AAL,12140.8973,8786970.0,,0.4051,,12140.8973,18.75
2022-01-03,AAON,4106.6876,83790.0,8.8262,7.6801,63.5297,4106.6876,52.22
2022-01-03,AAPL,2986128.318,20242137.0,41.3474,7.9614,30.1565,2986128.318,182.01


In [None]:


''' 
Benchmark Interface/Abstract Class
'''
# maybe this doesn't need to be an abstract class rather it 
# just be super class. 


class BenchmarkA(ABC):
    @abstractmethod
    def __init__(self, data: pd.DataFrame): 
        self.price_history: pd.Series
        self.data: pd.DataFrame 
        self.cur_constituents: List[str] #this is all of current member of the index. 
        self.cur_date: Optional[datetime]

    @abstractmethod
    def is_reconstitution_date(self) -> bool:  
        return False 
    
    @abstractmethod
    def next_date(self) -> Optional[datetime]: 
        return None 






In [None]:



class Benchmark:
    def __init__(self, data: pd.DataFrame): 
        self.price_history: pd.Series
        self.data: pd.DataFrame = data
        self.cur_constituents: List[str] #this is all of current member of the index. 
        self.cur_date: Optional[datetime]
        self.dates: List[datetime]= self.data.index.get_level_values("date").unique().to_list()
        self._i: int = 0 
        self.benchmark_divisor: float
        self.quarterly_recon_dates: List[datetime] = []
        self._get_quarterly_reconst_dates() 
    

    def next_benchmark_price(self) -> float: 
        numerator: float = 0
        # some call to method that checks if the date is a 
        # reconstitution date annd performs the reconsistution. 
        for ticker in self.cur_constituents: 
            #some sort of error handling if there is NaN value 
            key = (self.cur_date,ticker)
            cap_val  = self.data.loc[key]["market_cap"] # type: ignore 
            if np.isnan(cap_val): 
                cap_val = 0.00 
            numerator +=  cap_val 
        next_benchmark = numerator / self.benchmark_divisor
        self.price_history += [next_benchmark]
        self.next_date() #increment the day count forward. 

        return next_benchmark
    

    def calc_benchmark_divisor(self, start_val: float) -> float: 
        # numerator / start_vale = divisor. 
        return 0.0 

    #========================================================
    #           Date Handling Methods: 
    #========================================================
    
    def _get_quarterly_reconst_dates(self) -> None: 
        """
        Last trading of the months January, April, July, October
        """
        s_dates: pd.Series = pd.Series(self.dates)
        for year in range(self.dates[0].year, self.dates[-1].year + 1):
            for month in [1,4,7,10]:
                year_month_mask = (s_dates.dt.year == year) &  (s_dates.dt.month == month) #type: ignore 
                self.quarterly_recon_dates += [s_dates[year_month_mask].iloc[-1]] 


    def is_reconstitution_date(self) -> bool:  
        out = False
        annual_recon_dates = [
                             datetime(2021,5,25),
                             datetime(2022,5,24),
                             datetime(2023,5,23),
                             datetime(2024,5,28),
                             datetime(2025,5,27)]
        if self.cur_date in annual_recon_dates: 
            return True
        if self.cur_date in self.quarterly_recon_dates: 
            return True 
        return False 
              

    def next_date(self) -> Optional[datetime]:  
        self.cur_date = self.dates[self._i]
        self._i += 1 
        return self.cur_date


In [88]:
df.index.get_level_values("date")[0]

bench = Benchmark(df)
bench.next_date()




Timestamp('2022-01-03 00:00:00')

In [None]:
bench.next_date()

dates: pd.Series = pd.Series(bench.dates)
dates == 

0      2022-01-03
1      2022-01-04
2      2022-01-05
3      2022-01-06
4      2022-01-07
          ...    
998    2025-12-24
999    2025-12-26
1000   2025-12-29
1001   2025-12-30
1002   2025-12-31
Length: 1003, dtype: datetime64[ns]

In [56]:
d = reversed(bench.dates).__next__()

In [58]:
bench.data.loc[(d,"Z")]["market_cap"]

np.float64(16497.162)

In [71]:
import numpy as np

val = bench.data.loc[(d,"Z")]["price_to_book"]

np.isnan(val) == True

np.True_

In [76]:
# index history should be a 
list(range(bench.dates[0].year, bench.dates[-1].year+ 1))

[2022, 2023, 2024, 2025]

In [84]:

s_dates = pd.Series(bench.dates)
year_month_mask = (s_dates.dt.year == 2025) &  (s_dates.dt.month == 10)
s_dates.loc[year_month_mask].iloc[-1]



Timestamp('2025-10-31 00:00:00')

In [98]:
bench = Benchmark(df)
bench._get_quarterly_reconst_dates_2()
bench.quarterly_recon_dates

d = datetime.strptime('2025-10-31 00:00:00', '%Y-%m-%d %H:%M:%S')
d in bench.quarterly_recon_dates

True

In [101]:
bench = Benchmark(df)