In [8]:
import yfinance as yf
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [9]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

response = requests.get(url, headers=headers)

# --- DIAGNOSTIC CHECK ---
# Check if the request was successful (should be 200)
print(f"Request Status Code: {response.status_code}")

soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'id': 'constituents'})
if table:
    print("\nSuccess! Found the table with id='constituents'.")
    ticker_list = []
    
    # Get all rows from the table body, skip the header row [1:]
    for row in table.find('tbody').find_all('tr')[1:]: 
        
        # Get all data cells ('td') in the row
        cols = row.find_all('td')
        
        if cols:
            # The ticker is the text in the very first cell (index 0)
            ticker = cols[0].text.strip()
            ticker_list.append(ticker)

    print(f"Total tickers: {len(ticker_list)}")
    # Change "." to "-". This is because the Wikipedia list uses "BRK.B" but yfinance uses "BRK-B"
    ticker_list = [ticker.replace('.', '-') for ticker in ticker_list]
    print(ticker_list)
else:
    print("\nError: Could not find table with id='constituents' even with headers.")
    print("This is strange, the page structure may have changed.")


Request Status Code: 200

Success! Found the table with id='constituents'.
Total tickers: 502
['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'AME', 'AMGN', 'APH', 'ADI', 'AON', 'APA', 'APO', 'AAPL', 'AMAT', 'APP', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BAX', 'BDX', 'BRK-B', 'BBY', 'TECH', 'BIIB', 'BLK', 'BX', 'XYZ', 'BK', 'BA', 'BKNG', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF-B', 'BLDR', 'BG', 'BXP', 'CHRW', 'CDNS', 'CPT', 'CPB', 'COF', 'CAH', 'CCL', 'CARR', 'CAT', 'CBOE', 'CBRE', 'CDW', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'COIN', 'CL', 'CMCSA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CPAY', 'CTV

In [10]:
factors = ["momentum", "defensive", "quality", "value"]

columns = pd.MultiIndex.from_product(
    [ticker_list, factors],
    names=["ticker", "factor"]
)

dates = pd.date_range(start="2022-02-01", end="2025-12-31", freq="M")

df_global = pd.DataFrame(index=dates, columns=columns, dtype=float)

  dates = pd.date_range(start="2022-02-01", end="2025-12-31", freq="M")


In [11]:
df_global.head()

ticker,MMM,MMM,MMM,MMM,AOS,AOS,AOS,AOS,ABT,ABT,...,ZBRA,ZBRA,ZBH,ZBH,ZBH,ZBH,ZTS,ZTS,ZTS,ZTS
factor,momentum,defensive,quality,value,momentum,defensive,quality,value,momentum,defensive,...,quality,value,momentum,defensive,quality,value,momentum,defensive,quality,value
2022-02-28,,,,,,,,,,,...,,,,,,,,,,
2022-03-31,,,,,,,,,,,...,,,,,,,,,,
2022-04-30,,,,,,,,,,,...,,,,,,,,,,
2022-05-31,,,,,,,,,,,...,,,,,,,,,,
2022-06-30,,,,,,,,,,,...,,,,,,,,,,


In [12]:
def get_sector_map(tickers):
    """
    Fetches sector information for a list of tickers.
    Note: Fetching info one-by-one can be slow.
    """
    sector_map = {}
    print("Fetching sector data (this may take a moment)...")
    for i, ticker in enumerate(tickers, 1):
        try:
            # In a production environment, cache this data
            info = yf.Ticker(ticker).info
            sector_map[ticker] = info.get('sector', 'Unknown')
            if i % 50 == 0:
                print(f"Processed {i}/{len(tickers)} tickers...")
        except Exception as e:
            print(f"Could not fetch sector for {ticker}: {e}")
            sector_map[ticker] = 'Unknown'
    return sector_map

In [13]:
sector_map = get_sector_map(ticker_list)

Fetching sector data (this may take a moment)...
Processed 50/502 tickers...
Processed 100/502 tickers...
Processed 150/502 tickers...
Processed 200/502 tickers...
Processed 250/502 tickers...
Processed 300/502 tickers...
Processed 350/502 tickers...
Processed 400/502 tickers...
Processed 450/502 tickers...
Processed 500/502 tickers...


In [14]:
def calculate_momentum_factor(tickers, lookback_years=5):
    # 1. FETCH DATA
    print("Downloading price history...")
    print(f"Attempting to download {len(tickers)} tickers...")
    
    try:
        data = yf.download(
            tickers, 
            period=f"{lookback_years}y", 
            interval="1mo", 
            progress=True,
            auto_adjust=True,
            threads=True
        )
        
        # When auto_adjust=True, yfinance returns adjusted prices directly
        # For multiple tickers, columns are multi-level: (Price Type, Ticker)
        # We need to extract just the Close prices
        if isinstance(data.columns, pd.MultiIndex):
            # Multi-ticker case: select 'Close' from the multi-level columns
            if 'Close' in data.columns.get_level_values(0):
                data = data['Close']
            else:
                print("Available columns:", data.columns.get_level_values(0).unique().tolist())
                print("Error: Could not find 'Close' prices in data")
                return None, None, None
        else:
            # Single ticker or already the right format
            if 'Close' in data.columns:
                data = data['Close']
            
    except Exception as e:
        print(f"Error downloading data: {e}")
        return None, None, None
    
    # Drop columns with insufficient history (less than 12 months)
    # We require > 12 non-NaN values
    data = data.dropna(axis=1, thresh=12)
    valid_tickers = data.columns.tolist()
    
    if len(valid_tickers) == 0:
        print("ERROR: No valid tickers with sufficient data!")
        print("This may be due to network issues or yfinance rate limiting.")
        print("Try running the cell again or reduce the number of tickers.")
        return None, None, None
    
    # 2. CALCULATE RAW MOMENTUM
    # Formula: P(t-2) / P(t-12) - 1
    # logic: shift(2) moves the price from 2 months ago to current row
    mom_raw = data.shift(2) / data.shift(12) - 1
    
    # Drop the first 12 months as they will be NaN due to the shift
    mom_raw = mom_raw.dropna(how='all')

    # 3. PREPARE FOR CROSS-SECTIONAL STANDARDIZATION
    # Convert from Wide (Tickers as columns) to Long (Date/Ticker rows)
    df_long = mom_raw.stack().reset_index()
    df_long.columns = ['Date', 'Ticker', 'Raw_Momentum']
    
    # Map Sectors
    sector_map = get_sector_map(valid_tickers)
    df_long['Sector'] = df_long['Ticker'].map(sector_map)

    # 4. WINSORIZATION (Cross-sectional per Date)
    # We clip outliers at the 5th and 95th percentiles for each month
    def winsorize_group(group):
        lower = group.quantile(0.05)
        upper = group.quantile(0.95)
        return group.clip(lower, upper)

    df_long['Mom_Winsorized'] = df_long.groupby('Date')['Raw_Momentum'] \
                                       .transform(winsorize_group)

    # 5. SECTOR Z-SCORES
    # Calculate Z-Score per Date and Sector
    # Formula: (x - mean) / std
    def calc_zscore(group):
        if len(group) < 2: 
            return 0.0 # Neutral score if not enough peers in sector
        sigma = group.std()
        if sigma == 0:
            return 0.0
        return (group - group.mean()) / sigma

    df_long['Z_Momentum'] = df_long.groupby(['Date', 'Sector'])['Mom_Winsorized'] \
                                   .transform(calc_zscore)
    
    # 6. ADD QUINTILES
    # Group by date and assign quintiles (1 = lowest momentum, 5 = highest momentum)
    df_long['Momentum_Quintile'] = df_long.groupby('Date')['Z_Momentum'].transform(
        lambda x: pd.qcut(x, q=5, labels=[1, 2, 3, 4, 5], duplicates='drop')
    )
    
    # 7. CALCULATE FORWARD RETURNS
    # Convert price_data to long format
    price_long = data.stack().reset_index()
    price_long.columns = ['Date', 'Ticker', 'Price']
    price_long = price_long.sort_values(['Ticker', 'Date'])
    
    # Calculate forward return: P(t+1)/P(t) - 1
    price_long['Forward_Return'] = price_long.groupby('Ticker')['Price'].transform(
        lambda x: x.shift(-1) / x - 1
    )
    
    # Merge forward returns with detailed_df
    df_long = df_long.merge(
        price_long[['Date', 'Ticker', 'Forward_Return']],
        on=['Date', 'Ticker'],
        how='left'
    )
    
    # Pivot back to wide format for easy viewing/trading matrix
    final_factor = df_long.pivot(index='Date', columns='Ticker', values='Z_Momentum')
    
    return final_factor, df_long, data

# --- EXECUTION ---

# Run calculation
z_score_matrix, detailed_df, price_data = calculate_momentum_factor(ticker_list)

# Check if calculation was successful
if z_score_matrix is not None and not z_score_matrix.empty:
    # Display quintile distribution
    print("MOMENTUM QUINTILE DISTRIBUTION")
    print("\nQuintile Distribution:")
    print(detailed_df['Momentum_Quintile'].value_counts().sort_index())
    print(f"\nSample of data with quintiles:")
    print(detailed_df[['Date', 'Ticker', 'Z_Momentum', 'Momentum_Quintile']].head(20))
    
    # Calculate backtest statistics
    print("MOMENTUM STRATEGY BACKTEST - Monthly Rebalancing")
    
    quintile_stats = detailed_df.groupby('Momentum_Quintile')['Forward_Return'].agg([
        ('Mean_Monthly_Return', 'mean'),
        ('Median_Monthly_Return', 'median'),
        ('Std_Dev', 'std'),
        ('Count', 'count')
    ])
    
    # Convert to percentage
    quintile_stats['Mean_Monthly_Return'] = quintile_stats['Mean_Monthly_Return'] * 100
    quintile_stats['Median_Monthly_Return'] = quintile_stats['Median_Monthly_Return'] * 100
    quintile_stats['Std_Dev'] = quintile_stats['Std_Dev'] * 100
    
    print("\nAverage Returns by Momentum Quintile (%):")
    print(quintile_stats)
    
    # Calculate spread (Q5 - Q1)
    if 5 in quintile_stats.index and 1 in quintile_stats.index:
        spread = quintile_stats.loc[5, 'Mean_Monthly_Return'] - quintile_stats.loc[1, 'Mean_Monthly_Return']
        print(f"\nMomentum Spread (Q5 - Q1): {spread:.2f}% per month")
        print(f"Annualized Spread: {spread * 12:.2f}%")
    
    # Output the most recent Momentum Z-Scores
    print("Most Recent Momentum Z-Scores (Top 10):")
    latest_date = z_score_matrix.index[-1]
    print(z_score_matrix.loc[latest_date].sort_values(ascending=False).head(10))
else:
    print("\nCalculation failed. Please check the errors above and try again.")


[                       1%                       ]  3 of 502 completed

Downloading price history...
Attempting to download 502 tickers...


[*********************100%***********************]  502 of 502 completed


Fetching sector data (this may take a moment)...
Processed 50/500 tickers...
Processed 100/500 tickers...
Processed 150/500 tickers...
Processed 200/500 tickers...
Processed 250/500 tickers...
Processed 300/500 tickers...
Processed 350/500 tickers...
Processed 400/500 tickers...
Processed 450/500 tickers...
Processed 500/500 tickers...
MOMENTUM QUINTILE DISTRIBUTION

Quintile Distribution:
Momentum_Quintile
1    4772
2    4751
3    4754
4    4753
5    4763
Name: count, dtype: int64

Sample of data with quintiles:
         Date Ticker  Z_Momentum Momentum_Quintile
0  2021-12-01      A    0.476929                 4
1  2021-12-01   AAPL   -0.521990                 2
2  2021-12-01   ABBV   -0.553334                 2
3  2021-12-01    ABT   -0.178007                 3
4  2021-12-01   ACGL   -1.023012                 1
5  2021-12-01    ACN    0.622695                 4
6  2021-12-01   ADBE    0.232221                 4
7  2021-12-01    ADI   -0.247786                 3
8  2021-12-01    ADM  

  quintile_stats = detailed_df.groupby('Momentum_Quintile')['Forward_Return'].agg([


In [15]:
def calculate_defensive_factor(tickers, sector_map, lookback_years=5, min_daily_obs=40):
    """
    Calculate Low Volatility (Defensive) Factor
    
    Methodology:
    1. Download daily price data
    2. Calculate 60-day realized volatility using log returns
    3. Annualize volatility: sqrt(252) * std(log returns over 60 days)
    4. Invert volatility (lower vol = better): x_low = -sigma
    5. Winsorize across stocks
    6. Calculate sector-neutral Z-scores
    
    Parameters:
    - tickers: List of stock tickers
    - sector_map: Dictionary mapping tickers to sectors (to avoid re-fetching)
    - lookback_years: Years of historical data to fetch
    - min_daily_obs: Minimum daily observations required (default 40 out of 60)
    
    Returns:
    - z_score_matrix: Wide format (Date x Ticker) of defensive Z-scores
    - detailed_df: Long format with all intermediate calculations
    """
    
    
    # 1. FETCH DAILY DATA
    print(f"\nDownloading daily price data for {len(tickers)} tickers...")
    
    try:
        data = yf.download(
            tickers,
            period=f"{lookback_years}y",
            interval="1d",
            progress=True,
            auto_adjust=True,
            threads=True
        )
        
        # Extract Close prices
        if isinstance(data.columns, pd.MultiIndex):
            if 'Close' in data.columns.get_level_values(0):
                prices = data['Close']
            else:
                print("Error: Could not find 'Close' prices")
                return None, None
        else:
            if 'Close' in data.columns:
                prices = data['Close']
            else:
                prices = data
                
    except Exception as e:
        print(f"Error downloading data: {e}")
        return None, None
    
    print(f"Data shape: {prices.shape}")
    
    # 2. CALCULATE DAILY LOG RETURNS
    print("\nCalculating daily log returns...")
    log_returns = np.log(prices / prices.shift(1))
    
    # 3. CALCULATE 60-DAY ROLLING VOLATILITY (ANNUALIZED)
    print("Calculating 60-day rolling realized volatility...")
    window = 60
    
    # Rolling standard deviation of log returns
    rolling_vol = log_returns.rolling(window=window, min_periods=min_daily_obs).std()
    
    # Annualize: multiply by sqrt(252 trading days)
    annualized_vol = rolling_vol * np.sqrt(252)
    
    # 4. RESAMPLE TO MONTHLY (END OF MONTH)
    print("Resampling to monthly frequency...")
    monthly_vol = annualized_vol.resample('M').last()
    
    # Drop tickers with insufficient data
    monthly_vol = monthly_vol.dropna(axis=1, thresh=12)
    valid_tickers = monthly_vol.columns.tolist()
    
    print(f"Valid tickers with sufficient data: {len(valid_tickers)}")
    
    if len(valid_tickers) == 0:
        print("ERROR: No valid tickers with sufficient data!")
        return None, None
    
    # 5. INVERT VOLATILITY (LOWER VOL IS BETTER)
    # Define x_low = -sigma
    low_vol_signal = -monthly_vol
    
    # Drop rows with all NaN
    low_vol_signal = low_vol_signal.dropna(how='all')
    
    # 6. PREPARE FOR CROSS-SECTIONAL STANDARDIZATION
    # Convert to long format
    df_long = low_vol_signal.stack().reset_index()
    df_long.columns = ['Date', 'Ticker', 'Low_Vol_Raw']
    
    # Add original volatility for reference
    vol_long = monthly_vol.stack().reset_index()
    vol_long.columns = ['Date', 'Ticker', 'Realized_Vol']
    df_long = df_long.merge(vol_long, on=['Date', 'Ticker'], how='left')
    
    # Map Sectors (reuse sector map from momentum calculation)
    print("\nMapping sectors from existing sector map...")
    df_long['Sector'] = df_long['Ticker'].map(sector_map)
    
    # 7. WINSORIZATION (Cross-sectional per Date)
    print("Applying winsorization...")
    def winsorize_group(group):
        lower = group.quantile(0.05)
        upper = group.quantile(0.95)
        return group.clip(lower, upper)
    
    df_long['Low_Vol_Winsorized'] = df_long.groupby('Date')['Low_Vol_Raw'] \
                                           .transform(winsorize_group)
    
    # 8. SECTOR Z-SCORES
    print("Calculating sector-neutral Z-scores...")
    def calc_zscore(group):
        if len(group) < 2:
            return 0.0
        sigma = group.std()
        if sigma == 0:
            return 0.0
        return (group - group.mean()) / sigma
    
    df_long['Z_LowVol'] = df_long.groupby(['Date', 'Sector'])['Low_Vol_Winsorized'] \
                                 .transform(calc_zscore)
    
    # Pivot to wide format
    final_factor = df_long.pivot(index='Date', columns='Ticker', values='Z_LowVol')
    
    return final_factor, df_long

# --- EXECUTION ---
# Extract sector map from detailed_df to reuse it
sector_map = detailed_df[['Ticker', 'Sector']].drop_duplicates().set_index('Ticker')['Sector'].to_dict()

print("\nRunning defensive factor calculation...")
lowvol_matrix, defensive_df = calculate_defensive_factor(ticker_list, sector_map)

# Check if calculation was successful
if lowvol_matrix is not None and not lowvol_matrix.empty:
    print("\n" + "="*70)
    print("CALCULATION SUCCESSFUL")
    print("="*70)
    print(f"\nData shape: {defensive_df.shape}")
    print(f"Date range: {defensive_df['Date'].min()} to {defensive_df['Date'].max()}")
    print(f"\nMost Recent Low Volatility Z-Scores (Top 10):")
    latest_date = lowvol_matrix.index[-1]
    print(lowvol_matrix.loc[latest_date].sort_values(ascending=False).head(10))
else:
    print("\nCalculation failed. Please check the errors above and try again.")

[                       0%                       ]


Running defensive factor calculation...

Downloading daily price data for 502 tickers...


[*********************100%***********************]  502 of 502 completed
  monthly_vol = annualized_vol.resample('M').last()


Data shape: (1256, 502)

Calculating daily log returns...
Calculating 60-day rolling realized volatility...
Resampling to monthly frequency...
Valid tickers with sufficient data: 500

Mapping sectors from existing sector map...
Applying winsorization...
Calculating sector-neutral Z-scores...

CALCULATION SUCCESSFUL

Data shape: (29294, 7)
Date range: 2021-01-31 00:00:00 to 2025-11-30 00:00:00

Most Recent Low Volatility Z-Scores (Top 10):
Ticker
DAY     1.719144
BR      1.659116
JNJ     1.629319
TJX     1.519044
MCD     1.514995
MSFT    1.450254
LIN     1.423737
CVX     1.415749
ABT     1.367711
GEN     1.356906
Name: 2025-11-30 00:00:00, dtype: float64


In [16]:
# Align dates to Month End to ensure matching with df_global
def align_to_month_end(df):
    df_aligned = df.copy()
    # Convert to period 'M' and back to timestamp 'M' (Month End)
    df_aligned.index = pd.to_datetime(df_aligned.index).to_period('M').to_timestamp('M')
    return df_aligned

# Align the source matrices
z_score_aligned = align_to_month_end(z_score_matrix)
lowvol_aligned = align_to_month_end(lowvol_matrix)

print("Updating df_global with Momentum and Defensive scores...")

# 1. Update Momentum
# Iterate through tickers that exist in both
common_tickers_mom = z_score_aligned.columns.intersection(df_global.columns.get_level_values('ticker').unique())
print(common_tickers_mom)
for ticker in common_tickers_mom:
    # Reindex the source series to match df_global's index
    # This aligns dates and handles any missing/extra dates
    series_aligned = z_score_aligned[ticker].reindex(df_global.index)
    df_global.loc[:, (ticker, "momentum")] = series_aligned

# 2. Update Defensive (Low Vol)
common_tickers_def = lowvol_aligned.columns.intersection(df_global.columns.get_level_values('ticker').unique())

for ticker in common_tickers_def:
    series_aligned = lowvol_aligned[ticker].reindex(df_global.index)
    df_global.loc[:, (ticker, "defensive")] = series_aligned

print("Update complete.")

# Verify
print("\nSample of df_global (Momentum & Defensive):")
# Pick a ticker that likely has data
if len(common_tickers_mom) > 0:
    sample_ticker = common_tickers_mom[30]
    print(f"Ticker: {sample_ticker}")
    print(df_global.loc[:, (sample_ticker, ["momentum", "defensive"])].dropna().head())
else:
    print("No common tickers found.")

Updating df_global with Momentum and Defensive scores...
Index(['A', 'AAPL', 'ABBV', 'ABNB', 'ABT', 'ACGL', 'ACN', 'ADBE', 'ADI', 'ADM',
       ...
       'WY', 'WYNN', 'XEL', 'XOM', 'XYL', 'XYZ', 'YUM', 'ZBH', 'ZBRA', 'ZTS'],
      dtype='object', length=500)
Update complete.

Sample of df_global (Momentum & Defensive):
Ticker: AMT
ticker           AMT          
factor      momentum defensive
2022-02-28 -0.087586  0.039027
2022-03-31 -1.059595 -0.012202
2022-04-30 -1.266129  0.474757
2022-05-31 -1.019809 -0.023569
2022-06-30 -1.401087 -0.227043


In [17]:
from pandas.tseries.offsets import MonthEnd
import pandas as pd

df_quality_value = pd.read_csv("spx_quality_value.csv")

df_quality_value["Date"] = pd.to_datetime(
    df_quality_value["Date"],
    format="%m/%d/%Y"
)

df_quality_value["Date"] = df_quality_value["Date"] + MonthEnd(0)

df_quality_value["EV/EBIT"] = pd.to_numeric(df_quality_value["EV/EBIT"], errors="coerce")
df_quality_value["ROIC"]    = pd.to_numeric(df_quality_value["ROIC"],    errors="coerce")
df_quality_value["EV/EBIT"] = 1.0 / df_quality_value["EV/EBIT"] 
df_quality_value.rename(columns={"EV/EBIT" : "EBIT/EV"})
df_quality_value

Unnamed: 0,Date,Ticker,EV/EBIT,ROIC
0,2021-12-31,A,0.029270,12.3997
1,2022-01-31,A,0.028335,12.3997
2,2022-02-28,A,0.031962,12.4219
3,2022-03-31,A,0.035243,12.4219
4,2022-04-30,A,0.034802,12.4219
...,...,...,...,...
23836,2025-07-31,ZTS,0.045290,15.0786
23837,2025-08-31,ZTS,0.048661,15.0786
23838,2025-09-30,ZTS,0.047390,15.0786
23839,2025-10-31,ZTS,0.050205,14.4794


In [18]:
df = df_quality_value.copy()
df["Sector"] = df["Ticker"].map(sector_map).fillna("Unknown")

# Decide which columns are factors (everything numeric that is not Date/Ticker/Sector)
exclude_cols = {"Date", "Ticker", "Sector"}
factor_cols = [
    c for c in df.columns
    if c not in exclude_cols and np.issubdtype(df[c].dtype, np.number)
]

print("Factors to normalise:", factor_cols)

# --------- 2) Winsorise cross-sectionally by month ---------
def winsorise_month(group, lower_q=0.01, upper_q=0.99):
    for col in factor_cols:
        p1  = group[col].quantile(lower_q)
        p99 = group[col].quantile(upper_q)
        group[col + "_win"] = group[col].clip(lower=p1, upper=p99)
    return group

df = df.groupby("Date", group_keys=False).apply(winsorise_month)

# --------- 3) Sector z-score within (Date, Sector) ---------
def sector_z_scores(group):
    for col in factor_cols:
        win_col = col + "_win"
        z_col   = col + "_z"

        mu = group[win_col].mean()
        sigma = group[win_col].std(ddof=1)

        if sigma == 0 or np.isnan(sigma):
            group[z_col] = np.nan
        else:
            group[z_col] = (group[win_col] - mu) / sigma
    return group

df = df.groupby(["Date", "Sector"], group_keys=False).apply(sector_z_scores)

# If you only want the normalised (z-scored) values and not the winsorised columns, you can drop them:
# df = df.drop(columns=[c for c in df.columns if c.endswith("_win")])

df_normalised = df
df_normalised.head()


Factors to normalise: ['EV/EBIT', 'ROIC']


  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtrac

Unnamed: 0,Date,Ticker,EV/EBIT,ROIC,Sector,EV/EBIT_win,ROIC_win,EV/EBIT_z,ROIC_z
0,2021-12-31,A,0.02927,12.3997,Healthcare,0.02927,12.3997,-0.832565,-0.000117
1,2022-01-31,A,0.028335,12.3997,Healthcare,0.028335,12.3997,-0.784013,-0.048445
2,2022-02-28,A,0.031962,12.4219,Healthcare,0.031962,12.4219,-0.728157,-0.004601
3,2022-03-31,A,0.035243,12.4219,Healthcare,0.035243,12.4219,-0.554917,-0.013044
4,2022-04-30,A,0.034802,12.4219,Healthcare,0.034802,12.4219,-0.566935,0.041856


In [19]:
df_normalised

Unnamed: 0,Date,Ticker,EV/EBIT,ROIC,Sector,EV/EBIT_win,ROIC_win,EV/EBIT_z,ROIC_z
0,2021-12-31,A,0.029270,12.3997,Healthcare,0.029270,12.3997,-0.832565,-0.000117
1,2022-01-31,A,0.028335,12.3997,Healthcare,0.028335,12.3997,-0.784013,-0.048445
2,2022-02-28,A,0.031962,12.4219,Healthcare,0.031962,12.4219,-0.728157,-0.004601
3,2022-03-31,A,0.035243,12.4219,Healthcare,0.035243,12.4219,-0.554917,-0.013044
4,2022-04-30,A,0.034802,12.4219,Healthcare,0.034802,12.4219,-0.566935,0.041856
...,...,...,...,...,...,...,...,...,...
23836,2025-07-31,ZTS,0.045290,15.0786,Healthcare,0.045290,15.0786,-0.369071,0.518135
23837,2025-08-31,ZTS,0.048661,15.0786,Healthcare,0.048661,15.0786,-0.358546,0.517568
23838,2025-09-30,ZTS,0.047390,15.0786,Healthcare,0.047390,15.0786,-0.358267,0.517568
23839,2025-10-31,ZTS,0.050205,14.4794,Healthcare,0.050205,14.4794,-0.256269,0.439994


In [20]:
df_qv = df_normalised[["Date", "Ticker", "EV/EBIT_z", "ROIC_z"]].copy()
df_qv = df_qv.rename(columns={
    "EV/EBIT_z": "value",
    "ROIC_z": "quality"
})

df_qv = df_qv.set_index(["Date", "Ticker"])

df_qv_wide = df_qv.unstack("Ticker")      # columns: (factor, Ticker)
df_qv_wide = df_qv_wide.swaplevel(axis=1) # now: (Ticker, factor)
df_qv_wide = df_qv_wide.sort_index(axis=1)

# Make sure column names match df_global
df_qv_wide.columns.set_names(["ticker", "factor"], inplace=True)

# 4) Update df_global in place (only matching index/columns are filled)
df_global.update(df_qv_wide)

# df_global now has quality & value filled where available
df_global.head()


ticker,MMM,MMM,MMM,MMM,AOS,AOS,AOS,AOS,ABT,ABT,...,ZBRA,ZBRA,ZBH,ZBH,ZBH,ZBH,ZTS,ZTS,ZTS,ZTS
factor,momentum,defensive,quality,value,momentum,defensive,quality,value,momentum,defensive,...,quality,value,momentum,defensive,quality,value,momentum,defensive,quality,value
2022-02-28,-0.951138,0.833597,-0.222626,0.891645,1.020438,0.547649,0.704285,-0.045489,-0.426678,0.607635,...,0.612203,-0.312895,-1.811571,-0.389831,-0.985093,-0.426869,1.317383,0.47558,0.119167,-0.842493
2022-03-31,-1.302146,0.881599,-0.230946,1.134598,0.520869,0.684724,0.695043,0.184541,-0.350766,0.481924,...,0.589274,-0.13066,-1.963255,-0.179312,-0.968841,-0.482919,0.905216,0.326307,0.107607,-0.671347
2022-04-30,-1.623111,1.187283,-0.372789,1.000279,0.199596,0.567783,0.79898,0.240995,-0.33002,0.512633,...,0.641488,-0.118717,-1.812422,-0.34554,-1.00357,-0.49414,0.300224,0.539838,0.149127,-0.685685
2022-05-31,-1.429615,1.162275,-0.369093,0.828876,-0.523202,0.312245,0.810383,0.588815,-0.390014,0.535799,...,0.575922,-0.064808,-1.684447,0.370928,-1.004145,-0.468246,-0.143121,0.303216,0.153191,-0.607311
2022-06-30,-1.163771,1.117635,-0.365133,0.66743,-0.729656,0.371013,0.811978,0.379333,-0.025424,0.575285,...,0.576111,-0.067514,-1.094248,0.302247,-1.004145,-0.420942,-0.202688,0.394114,0.153191,-0.524816


In [21]:
# Calculate the composite signal by averaging the 4 factors
# We take the mean across the 'factor' level (level 1) of the columns
# This will result in a DataFrame with Date index and Ticker columns
df_signal = df_global.groupby(level='ticker', axis=1).mean()

print("Signal DataFrame created.")
print(f"Shape: {df_signal.shape}")
print("\nSample of df_signal (First 5 rows, first 5 columns):")
print(df_signal.iloc[:5, :5])

# Optional: Check for missing values
print(f"\nTotal missing values: {df_signal.isna().sum().sum()}")
print(f"Percentage of missing values: {df_signal.isna().sum().sum() / df_signal.size * 100:.2f}%")

Signal DataFrame created.
Shape: (47, 502)

Sample of df_signal (First 5 rows, first 5 columns):
ticker             A      AAPL      ABBV      ABNB       ABT
2022-02-28 -0.127266  1.015874  0.155431 -1.537786 -0.195561
2022-03-31 -0.216583  1.242434  0.324296 -1.454247 -0.180915
2022-04-30 -0.299053  1.044159  0.438667 -0.853759 -0.120947
2022-05-31 -0.361356  1.053965  0.526917 -0.330173 -0.110247
2022-06-30 -0.394775  0.976640  0.453757 -0.557573 -0.006353

Total missing values: 699
Percentage of missing values: 2.96%


  df_signal = df_global.groupby(level='ticker', axis=1).mean()


In [22]:
df_signal

ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WY,WYNN,XEL,XOM,XYL,XYZ,YUM,ZBH,ZBRA,ZTS
2022-02-28,-0.127266,1.015874,0.155431,-1.537786,-0.195561,0.622659,0.531822,-0.235388,-0.514558,0.309244,...,1.602215,-1.167619,-0.105715,0.353229,-0.770741,-1.429756,0.67007,-0.903341,0.035739,0.267409
2022-03-31,-0.216583,1.242434,0.324296,-1.454247,-0.180915,0.681576,0.462696,-0.074534,-0.490462,0.529643,...,1.729321,-1.361995,-0.034978,0.37389,-0.733623,-1.383953,0.634205,-0.898582,0.057756,0.166946
2022-04-30,-0.299053,1.044159,0.438667,-0.853759,-0.120947,0.773699,0.294812,-0.16996,-0.424111,0.352259,...,1.792802,-1.306048,-0.155388,0.412694,-0.938487,-1.40531,0.651151,-0.913918,-0.102298,0.075876
2022-05-31,-0.361356,1.053965,0.526917,-0.330173,-0.110247,0.820084,0.371029,-0.143537,-0.376741,0.485153,...,1.7242,-1.263754,-0.282241,0.555464,-0.826223,-1.475392,0.586229,-0.696478,-0.1194,-0.073506
2022-06-30,-0.394775,0.97664,0.453757,-0.557573,-0.006353,0.881137,0.312577,-0.264623,-0.283991,0.388828,...,2.221922,-1.354326,0.01666,0.461742,-0.862049,-1.49928,0.732945,-0.554272,-0.13178,-0.04505
2022-07-31,-0.362548,0.824132,0.478551,-0.562206,0.082998,0.925276,0.25455,-0.233734,-0.042505,0.596125,...,1.961746,-1.307513,-0.021647,0.402165,-0.845077,-1.519614,0.5748,-0.471845,-0.271263,-0.174339
2022-08-31,-0.441758,0.96108,0.538591,-0.812392,0.085228,0.945577,0.35836,-0.244044,-0.095061,0.287566,...,1.667377,-1.226934,0.01669,0.468997,-0.793363,-1.560363,0.627349,-0.422918,-0.622878,-0.105069
2022-09-30,-0.35023,0.964234,0.403675,-0.692899,0.093242,0.823012,0.357806,-0.578307,-0.114471,0.465829,...,1.437913,-1.166235,0.040922,0.475759,-0.809627,-1.590419,0.708031,-0.490226,-0.614172,-0.053049
2022-10-31,-0.161847,0.880491,0.378622,-0.516753,-0.073713,0.381195,0.287606,-0.559218,-0.212977,0.433096,...,1.840741,-1.307457,0.050812,0.558204,-0.76128,-1.56333,0.421036,-0.342047,-0.573819,-0.097237
2022-11-30,-0.197656,0.70478,0.430994,-0.621669,-0.028737,0.533114,0.220293,-0.499171,-0.195604,0.759893,...,1.496351,-0.945669,0.011152,0.65095,-0.594492,-1.385883,0.549389,-0.212187,-0.656513,-0.407887


In [32]:
df_signal['A']['2022-02-28']

np.float64(-0.12726631059094)

In [42]:
df_signal

ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WY,WYNN,XEL,XOM,XYL,XYZ,YUM,ZBH,ZBRA,ZTS
2022-02-28,-0.127266,1.015874,0.155431,-1.537786,-0.195561,0.622659,0.531822,-0.235388,-0.514558,0.309244,...,1.602215,-1.167619,-0.105715,0.353229,-0.770741,-1.429756,0.67007,-0.903341,0.035739,0.267409
2022-03-31,-0.216583,1.242434,0.324296,-1.454247,-0.180915,0.681576,0.462696,-0.074534,-0.490462,0.529643,...,1.729321,-1.361995,-0.034978,0.37389,-0.733623,-1.383953,0.634205,-0.898582,0.057756,0.166946
2022-04-30,-0.299053,1.044159,0.438667,-0.853759,-0.120947,0.773699,0.294812,-0.16996,-0.424111,0.352259,...,1.792802,-1.306048,-0.155388,0.412694,-0.938487,-1.40531,0.651151,-0.913918,-0.102298,0.075876
2022-05-31,-0.361356,1.053965,0.526917,-0.330173,-0.110247,0.820084,0.371029,-0.143537,-0.376741,0.485153,...,1.7242,-1.263754,-0.282241,0.555464,-0.826223,-1.475392,0.586229,-0.696478,-0.1194,-0.073506
2022-06-30,-0.394775,0.97664,0.453757,-0.557573,-0.006353,0.881137,0.312577,-0.264623,-0.283991,0.388828,...,2.221922,-1.354326,0.01666,0.461742,-0.862049,-1.49928,0.732945,-0.554272,-0.13178,-0.04505
2022-07-31,-0.362548,0.824132,0.478551,-0.562206,0.082998,0.925276,0.25455,-0.233734,-0.042505,0.596125,...,1.961746,-1.307513,-0.021647,0.402165,-0.845077,-1.519614,0.5748,-0.471845,-0.271263,-0.174339
2022-08-31,-0.441758,0.96108,0.538591,-0.812392,0.085228,0.945577,0.35836,-0.244044,-0.095061,0.287566,...,1.667377,-1.226934,0.01669,0.468997,-0.793363,-1.560363,0.627349,-0.422918,-0.622878,-0.105069
2022-09-30,-0.35023,0.964234,0.403675,-0.692899,0.093242,0.823012,0.357806,-0.578307,-0.114471,0.465829,...,1.437913,-1.166235,0.040922,0.475759,-0.809627,-1.590419,0.708031,-0.490226,-0.614172,-0.053049
2022-10-31,-0.161847,0.880491,0.378622,-0.516753,-0.073713,0.381195,0.287606,-0.559218,-0.212977,0.433096,...,1.840741,-1.307457,0.050812,0.558204,-0.76128,-1.56333,0.421036,-0.342047,-0.573819,-0.097237
2022-11-30,-0.197656,0.70478,0.430994,-0.621669,-0.028737,0.533114,0.220293,-0.499171,-0.195604,0.759893,...,1.496351,-0.945669,0.011152,0.65095,-0.594492,-1.385883,0.549389,-0.212187,-0.656513,-0.407887


In [44]:
try:
    prices = yf.download(
        ticker_list, 
        period="5y", 
        interval="1mo",
        progress=True,
        auto_adjust=True,
        threads=True
    )
    
    # When auto_adjust=True, yfinance returns adjusted prices directly
    # For multiple tickers, columns are multi-level: (Price Type, Ticker)
    # We need to extract just the Close prices
    if isinstance(prices.columns, pd.MultiIndex):
        # Multi-ticker case: select 'Close' from the multi-level columns
        if 'Close' in prices.columns.get_level_values(0):
            prices = prices['Close']
        else:
            print("Available columns:", prices.columns.get_level_values(0).unique().tolist())
            print("Error: Could not find 'Close' prices in data")
    else:
        # Single ticker or already the right format
        if 'Close' in prices.columns:
            prices = prices['Close']
        
except Exception as e:
    print(f"Error downloading data: {e}")

[*********************100%***********************]  502 of 502 completed


In [45]:
prices

Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WY,WYNN,XEL,XOM,XYL,XYZ,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-01,114.531624,129.167389,88.171532,,100.034485,34.298908,243.131653,500.119995,135.255081,43.853817,...,27.248526,109.669479,56.950058,33.689522,96.035034,217.639999,99.369385,143.75676,384.329987,158.226669
2021-01-01,116.155495,128.456757,84.328674,183.630005,112.916809,29.867723,225.176697,458.769989,135.486008,43.50584,...,25.496939,96.742027,55.02261,36.648193,91.129028,215.960007,92.897919,143.596237,387.829987,147.471054
2021-02-01,118.185547,118.040833,89.693939,206.350006,109.876717,34.061184,234.333237,459.670013,143.293518,49.221367,...,27.687756,128.04007,50.379356,44.437145,93.931137,230.029999,94.765228,152.370667,499.429993,148.647629
2021-03-01,123.0942,119.084885,90.093536,187.940002,109.931755,36.485977,258.00946,475.369995,143.223953,49.933369,...,29.101976,121.858231,57.189465,46.405334,99.514893,227.050003,99.49617,149.586029,485.179993,150.792465
2021-04-01,129.38736,128.16124,92.824165,172.710007,110.151909,37.760181,270.823486,508.339996,141.450745,55.303402,...,31.854872,124.803345,61.765518,47.577309,104.690269,244.820007,109.92588,165.798386,487.73999,165.682129
2021-05-01,133.937988,121.483154,95.385223,140.399994,107.398399,37.931343,264.344696,504.579987,152.016144,58.281876,...,31.18935,128.176147,61.401661,48.516548,111.757919,222.520004,110.339752,157.534653,497.049988,169.429077
2021-06-01,143.324219,133.750076,94.913345,153.139999,106.735504,37.027988,276.177338,585.640015,159.670456,53.379051,...,28.28075,118.874214,57.070293,53.195286,113.771637,243.800003,106.237152,150.506287,529.48999,178.711853
2021-07-01,148.579712,142.441437,97.997368,144.009995,111.384995,37.085041,297.62204,621.630005,155.274323,52.603905,...,27.839159,95.575645,59.515339,48.548702,119.357796,247.259995,121.346611,153.163147,552.47998,194.381241
2021-08-01,170.368362,148.27153,102.906906,154.990005,116.790443,39.081928,316.200043,663.700012,151.128571,52.85054,...,29.712708,98.841537,59.95134,45.97665,129.278198,268.070007,121.014137,141.007217,587.169983,196.413116
2021-09-01,152.949982,138.390686,91.907356,167.75,109.175079,36.305305,300.566711,575.719971,155.986221,53.173233,...,29.357811,82.376045,54.501228,50.353607,117.545418,239.839996,113.382378,137.17392,515.419983,186.408112


In [47]:
import pandas as pd
import numpy as np
from math import ceil
from typing import Optional
from collections import defaultdict

# --------------------------------------------------------
# 1. Function: from factor df -> (date, buy, sell) rows
# --------------------------------------------------------
def monthly_buy_sell_rows(df: pd.DataFrame, pct: float = 0.10, fill_value: Optional[str]=None) -> pd.DataFrame:
    """
    For each date (row in df), select exactly ceil(N*pct) tickers for buy (top) and sell (bottom),
    then expand into rows so each row contains one buy ticker and one sell ticker for that date.

    Parameters:
    - df: DataFrame with dates as index (EOM rows) and tickers as numeric columns.
    - pct: fraction (e.g. 0.10 for top/bottom 10%).
    - fill_value: value used to fill missing buy/sell when counts differ (default None).
   
    Returns:
    - DataFrame with columns ['date', 'buy', 'sell'] and index = date repeated.
    """
    df = df.copy()
    # ensure datetime index
    if not isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index)

    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    if len(numeric_cols) == 0:
        raise ValueError("No numeric columns found. Ensure ticker columns are numeric.")

    rows = []
    for date, row in df.iterrows():
        s = row[numeric_cols].dropna()
        n = len(s)
        if n == 0:
            continue
        n_pick = max(1, ceil(n * pct))   # exact count per side
        top = s.nlargest(n_pick).index.tolist()
        bottom = s.nsmallest(n_pick).index.tolist()

        # pair them into rows; if one side shorter, fill with fill_value
        m = max(len(top), len(bottom))
        top_extended = top + [fill_value] * (m - len(top))
        bottom_extended = bottom + [fill_value] * (m - len(bottom))

        for b, s_ in zip(top_extended, bottom_extended):
            rows.append((date, b, s_))

    out = pd.DataFrame(rows, columns=["date", "buy", "sell"])
    out = out.set_index("date")
    return out


# --------------------------------------------------------
# 2. Build instructions_df from your signal DataFrame
# --------------------------------------------------------
# df_signal: factor values by date (index) x ticker (cols)
instructions_df = monthly_buy_sell_rows(df_signal, pct=0.10)

# Standardise column names to match backtest code
instructions_df = instructions_df.rename(columns={"buy": "Buy", "sell": "Sell"})

# Ensure datetime index and add Month column
instructions_df.index = pd.to_datetime(instructions_df.index)
instructions_df["Month"] = instructions_df.index   # signal date (month-end)


# --------------------------------------------------------
# 3. Backtest using prices DataFrame
#    prices: DateTimeIndex (month-start), columns = tickers, values = prices
# --------------------------------------------------------
prices = prices.copy()
prices.index = pd.to_datetime(prices.index)
prices = prices.sort_index()

# Map each YEAR-MONTH period to its row position in prices
price_periods = prices.index.to_period("M")
period_to_pos = {p: i for i, p in enumerate(price_periods)}

returns_per_month = defaultdict(list)

for _, row in instructions_df.iterrows():
    signal_date = row["Month"]
    buy_stock = row["Buy"]
    sell_stock = row["Sell"]

    # Map month-end signal to the same year-month as prices
    period = signal_date.to_period("M")

    # Skip if we don't have prices for that period
    if period not in period_to_pos:
        continue

    curr_pos = period_to_pos[period]

    # Skip if there's no "next month" price
    if curr_pos >= len(prices.index) - 1:
        continue

    price_date = prices.index[curr_pos]        # e.g. 2022-02-01
    next_price_date = prices.index[curr_pos+1] # next month, e.g. 2022-03-01

    # Skip if tickers not in prices columns
    if (buy_stock not in prices.columns) or (sell_stock not in prices.columns):
        continue

    # BUY leg: multiplier (1 + r)
    price_buy_now = prices.loc[price_date, buy_stock]
    price_buy_next = prices.loc[next_price_date, buy_stock]
    return_buy_mult = price_buy_next / price_buy_now

    # SELL leg: multiplier (1 + r)
    price_sell_now = prices.loc[price_date, sell_stock]
    price_sell_next = prices.loc[next_price_date, sell_stock]
    return_sell_mult = price_sell_next / price_sell_now

    # Store both legs for that month (equal weight later)
    trade_month = period.to_timestamp("M")  # normalise month key (month-end)
    returns_per_month[trade_month].append(return_buy_mult)
    returns_per_month[trade_month].append(return_sell_mult)

# --------------------------------------------------------
# 4. Convert to monthly equal-weight returns
# --------------------------------------------------------
if len(returns_per_month) == 0:
    raise RuntimeError("No trades generated – check date alignment or ticker overlap.")

monthly_mult = pd.Series(
    {month: np.mean(mult_list) for month, mult_list in returns_per_month.items()}
).sort_index()

# Simple monthly returns
monthly_ret = monthly_mult - 1.0

# Cumulative multiplier and total cumulative return
cum_mult = monthly_mult.cumprod()
total_cumulative_return = float(cum_mult.iloc[-1] - 1.0)

results_df = pd.DataFrame({
    "ReturnMult": monthly_mult,
    "Return": monthly_ret,
    "CumMult": cum_mult,
})

# --------------------------------------------------------
# 5. Sharpe ratio (annualised)
# --------------------------------------------------------
rf_annual = 0.0          # change if you want a non-zero risk-free
rf_monthly = (1 + rf_annual) ** (1 / 12) - 1

excess_ret = monthly_ret - rf_monthly

sharpe_monthly = excess_ret.mean() / excess_ret.std(ddof=1)
sharpe_annual = float(sharpe_monthly * np.sqrt(12))

print("Monthly returns:")
print(monthly_ret)
print("\nCumulative multiplier:")
print(cum_mult)

print(f"\nTotal cumulative return: {total_cumulative_return:.4%}")
print(f"Annualised Sharpe ratio: {sharpe_annual:.3f}")


Monthly returns:
2022-02-28    0.032431
2022-03-31   -0.083750
2022-04-30   -0.000522
2022-05-31   -0.116452
2022-06-30    0.108192
2022-07-31   -0.015427
2022-08-31   -0.082940
2022-09-30    0.115957
2022-10-31    0.059404
2022-11-30   -0.067712
2022-12-31    0.114900
2023-01-31   -0.020180
2023-02-28    0.005805
2023-03-31    0.000940
2023-04-30    0.020346
2023-05-31    0.097779
2023-06-30    0.057377
2023-07-31   -0.032309
2023-08-31   -0.053526
2023-09-30   -0.038910
2023-10-31    0.125185
2023-11-30    0.074358
2023-12-31   -0.007431
2024-01-31    0.045659
2024-02-29    0.059872
2024-03-31   -0.052097
2024-04-30    0.038298
2024-05-31   -0.018074
2024-06-30    0.043922
2024-07-31    0.021852
2024-08-31    0.031933
2024-09-30   -0.012092
2024-10-31    0.073625
2024-11-30   -0.055078
2024-12-31    0.037528
2025-01-31   -0.021249
2025-02-28   -0.041120
2025-03-31   -0.007381
2025-04-30    0.056136
2025-05-31    0.047821
2025-06-30    0.014790
2025-07-31    0.015034
2025-08-31    0.0