In [None]:
from abc import abstractmethod, ABC 
import pandas as pd 
from typing import Iterable, List, Optional
from datetime import datetime
import numpy as np

df = pd.read_csv("bloomberg_data_cleaned.csv")

#set date to datetime object 
df["date"] = pd.to_datetime(df["date"])
df.set_index(["date","ticker"],inplace= True)
pd.set_option('display.float_format', '{:,.4f}'.format)
#df.head()

In [None]:
''' 
========================================================
        Class for Updating and Improving our Dataframe
========================================================
Class Functionality: 
    (1): Add growth on a rolling basis: 
    (2): 

:::: Rolling Basis Methodology Explained::::

unfortunately, we are unable to calculated a 5 year rolling basis 
because we do not have five years of lookback data (we are going to
work on expanding are data include this). Instead we propose the 
below lookback logic as an "best-effort alternative. 

(1) 1st approach that's complicated and not consistent.  
start the benchmark price window on a date such that there is half a year of lookback
data. approximately 151 trading days. The lookback window will continue to expand 
until that rolling window is 2 years of 504 trading days approximately. At that

(2) 2nd approach. use a year lookback window. 

'''

def rolling_growth_rate(price_to_sales_series : pd.Series) -> float:  
    lookback_days = 252 #one trading year 
    """
    Returns: Na if No 
    """
    #see if there are NA values:
    NA_mask = price_to_sales_series.isna()
    if price_to_sales_series.loc[NA_mask].__len__() > 0:
        return np.nan
    print("being read")
    # else slope of linear fit is growth rate
    x_arbitrary = range(price_to_sales_series.__len__())
    slope, intercept = np.polyfit(x_arbitrary, price_to_sales_series,1) 
    return slope 


#df["rolling_test"] = df["price_to_sales"].rolling(252,min_periods=252).apply(rolling_growth_rate)
#df["rolling_test"].loc[df["rolling_test"].isna()]



In [None]:
class DataframeHelper:
    DATAOG: pd.DataFrame
    def __init__(self, data: pd.DataFrame) -> None:
        DataframeHelper.DATAOG = data 
        self.data = data 
    
    def slice_by_ticker(self,ticker: str) -> pd.DataFrame:
        ticker_mask = self.data.index.get_level_values("ticker") == ticker
        return self.data.loc[ticker_mask]

    def slice_by_day(self, day: datetime) -> pd.DataFrame: 
        day_mask = self.data.index.get_level_values("date") == day
        return self.data.loc[day_mask]
    
    def slice_any_row_with_na(self) -> pd.DataFrame:
        NA_mask = self.data.isna() 
        return self.data[NA_mask.any(axis=1)]
    
    def slice_complete_na_rows(self) -> pd.DataFrame: 
        #todo: 
        return pd.DataFrame()
    def slice_by_day_range(self, start_date: datetime, end_date: datetime) -> pd.DataFrame: 
        #todo 
        return pd.DataFrame() 
    
    @staticmethod
    def _rolling_growth_rate(price_to_sales_series : pd.Series) -> float:  
        lookback_days = 252 #one trading year 
        #see if there are NA values, there are growth is NA 
        NA_mask = price_to_sales_series.isna()
        if price_to_sales_series.loc[NA_mask].__len__() > 0:
            return np.nan
        # else slope of linear fit is growth rate
        x_arbitrary = range(price_to_sales_series.__len__())
        slope, intercept = np.polyfit(x_arbitrary, price_to_sales_series,1) 
        return slope*1000
        
    def add_roling_sales_growth_col(self):
        # add rolling sales growth , not very performant 
        self.data["1year_PtoS_growth"] = 0.0
        for ticker in self.data.index.get_level_values("ticker").unique():
            df_slice = self.slice_by_ticker(ticker)
            df_slice["rolling_growth"] = df_slice["price_to_sales"].rolling(252,min_periods= 252).apply(self._rolling_growth_rate)
            ticker_mask = df.index.get_level_values("ticker") == ticker
            self.data.loc[ticker_mask,"1year_PtoS_growth"] = df_slice["rolling_growth"]



In [27]:
"""===============================================
        How to Use DataHelper Example 
==============================================="""
 
"""(1): I want just data on ticker AAL """
dhelp = DataframeHelper(df) 
dhelp.slice_by_ticker("AAL")

"""(2): Get data just from 2022,1,5 """
dhelp = DataframeHelper(df) 
dhelp.slice_by_day(datetime(2022,1,5))

"""(3): all rows with atleast one NA value """
dhelp.slice_any_row_with_na() 



Unnamed: 0_level_0,Unnamed: 1_level_0,market_cap,volume,price_to_book,price_to_sales,pe_ratio,current_cap_share_class,close_price
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-03,AAL,12140.8973,8786970.0000,,0.4051,,12140.8973,18.7500
2022-01-03,ADT,7574.3542,430701.0000,2.1948,1.3097,,7113.4053,8.4200
2022-01-03,AFRM,26754.5374,1807912.0000,10.9242,22.4449,,19822.2759,95.2100
2022-01-03,ALAB,,,,,,,
2022-01-03,ALK,6775.5478,590412.0000,1.7910,1.0961,,6775.5478,54.0700
...,...,...,...,...,...,...,...,...
2025-12-31,WYNN,12511.2491,351075.0000,,1.7768,21.5143,12511.2491,120.3300
2025-12-31,YUM,42003.3200,448630.0000,,5.2416,26.1948,42003.3200,151.2800
2025-12-31,Z,16497.1620,1002015.0000,,,,12686.5149,68.2200
2025-12-31,ZG,16497.1620,495003.0000,3.3081,6.6120,,3386.4618,68.2300


In [None]:
""" 
Function development, final funtion in class above! 
"""

def rolling_growth_rate(price_to_sales_series : pd.Series) -> float:  
    lookback_days = 252 #one trading year 
    """
    Returns: Na if No 
    """
    #see if there are NA values:
    NA_mask = price_to_sales_series.isna()
    if price_to_sales_series.loc[NA_mask].__len__() > 0:
        return np.nan
    # else slope of linear fit is growth rate
    x_arbitrary = range(price_to_sales_series.__len__())
    slope, intercept = np.polyfit(x_arbitrary, price_to_sales_series,1) 
    return slope*1000

dh = DataframeHelper(df)
df2 = dh.slice_by_ticker("XP") 
df2["rolling_test2"] = df2["price_to_sales"].rolling(252,min_periods=252).apply(rolling_growth_rate)
print(df2["rolling_test2"].iloc[500:620])

ticker_mask = df.index.get_level_values("ticker") == "XP"
df.loc[ticker_mask,"rolling_window"] = df2["rolling_test2"]
df.loc[ticker_mask]

date        ticker
2023-12-29  XP        8.3189
2024-01-02  XP        8.3416
2024-01-03  XP        8.3353
2024-01-04  XP        8.2929
2024-01-05  XP        8.2701
                       ...  
2024-06-14  XP       -4.4743
2024-06-17  XP       -4.5989
2024-06-18  XP       -4.7101
2024-06-20  XP       -4.7816
2024-06-21  XP       -4.8457
Name: rolling_test2, Length: 120, dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["rolling_test2"] = df2["price_to_sales"].rolling(252,min_periods=252).apply(rolling_growth_rate)


Unnamed: 0_level_0,Unnamed: 1_level_0,market_cap,volume,price_to_book,price_to_sales,pe_ratio,current_cap_share_class,close_price,rolling window,rolling_window
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-01-03,XP,16221.3175,593088.0000,6.3717,7.2133,24.9346,12296.2168,28.9900,0,
2022-01-04,XP,15740.1056,861274.0000,6.2063,7.0260,24.2872,11931.4446,28.1300,0,
2022-01-05,XP,14962.3329,1781434.0000,5.8933,6.6717,23.0623,11341.8709,26.7400,0,
2022-01-06,XP,15158.1749,1011833.0000,5.9892,6.7803,23.4378,11490.3247,27.0900,0,
2022-01-07,XP,15521.8816,1107360.0000,6.0625,6.8633,23.7246,11766.0246,27.7400,0,
...,...,...,...,...,...,...,...,...,...,...
2025-12-24,XP,8829.8668,674270.0000,2.0317,2.7051,9.6522,7131.4036,16.4300,0,1.7623
2025-12-26,XP,8835.2410,941425.0000,2.0338,2.7079,9.6621,7135.7441,16.4400,0,1.7129
2025-12-29,XP,8840.6153,1071219.0000,2.0500,2.7295,9.7392,7140.0846,16.4500,0,1.6652
2025-12-30,XP,8888.9834,1545768.0000,2.0257,2.6972,9.6238,7179.1489,16.5400,0,1.6146


In [90]:
dh = DataframeHelper(df)
dh.add_roling_sales_growth_col()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_slice["rolling_growth"] = df_slice["price_to_sales"].rolling(252,min_periods= 252).apply(self._rolling_growth_rate)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_slice["rolling_growth"] = df_slice["price_to_sales"].rolling(252,min_periods= 252).apply(self._rolling_growth_rate)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#

In [None]:
dh.data