#### **Objective**

The goal is to benchmark different portfolio selection strategies. The idea is to simulate investing 10,000 USD at the beginning of each month using different portfolio allocation models, and then compare which strategy delivers the best performance over time.

The comparison will be based on:

- The evolution of the portfolio's total value over time.
- The allocation method used each month (e.g., Markowitz, equal-weight, etc.).
- Key metrics such as total return, volatility, and Sharpe ratio.

#### **1. Imports**

In [1]:
# %pip install selenium pandas

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import yfinance as yf
from datetime import datetime
from dateutil.relativedelta import relativedelta
import numpy as np

#### **2. Variables**

In [3]:
adjusted_close_prices_csv = 'data/adjusted_close_prices.csv'


search_all_tickers_flag = False
adjusted_closes_flag = False

#####################################

# search_all_tickers_flag = True
# adjusted_closes_flag = True

##### **2. Download - Asset Dataset**

In [4]:
MAX_ATTEMPTS = 10
BASE_URL = "https://finviz.com/screener.ashx"

USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
    "Mozilla/5.0 (X11; Linux x86_64)",
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64)",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_4)"
]

def scrape_finviz_by_exchange_requests(exchange: str):
    exchange_map = {
        "nasdaq": "exch_nasd",
        "nyse": "exch_nyse"
    }

    if exchange.lower() not in exchange_map:
        raise ValueError("Invalid exchange. Use 'nasdaq' or 'nyse'.")

    exchange_code = exchange_map[exchange.lower()]
    all_data = []
    r = 1  # page offset

    while True:
        success = False
        attempts = 0

        while not success and attempts < MAX_ATTEMPTS:
            try:
                headers = {
                    "User-Agent": random.choice(USER_AGENTS)
                }
                params = {
                    "v": "111",
                    "f": exchange_code,
                    "r": r
                }
                response = requests.get(BASE_URL, headers=headers, params=params, timeout=10)
                if response.status_code == 429:
                    raise Exception("HTTP 429 Too Many Requests")
                response.raise_for_status()

                soup = BeautifulSoup(response.text, "html.parser")
                tables = soup.find_all("table")

                target_table = None
                for table in tables:
                    rows = table.find_all("tr")
                    if len(rows) < 2:
                        continue
                    cols = rows[1].find_all("td")
                    if len(cols) >= 11 and cols[0].text.strip().isdigit():
                        target_table = table
                        break

                if not target_table:
                    raise ValueError("No valid table found.")

                rows = target_table.find_all("tr")[1:]
                if not rows:
                    print(f"No more data on page starting at {r}. Stopping.")
                    return finalize(all_data, exchange)

                for row in rows:
                    cols = row.find_all("td")
                    if len(cols) >= 11:
                        ticker = cols[1].text.strip()
                        company = cols[2].text.strip()
                        sector = cols[3].text.strip()
                        industry = cols[4].text.strip()
                        country = cols[5].text.strip()
                        market_cap = cols[6].text.strip()
                        volume = cols[10].text.strip()
                        all_data.append([ticker, company, sector, industry, country, market_cap, volume])

                print(f"Page {r} OK")
                success = True
                r += 20

                # Simula navegação humana
                time.sleep(random.uniform(3, 6))

            except Exception as e:
                attempts += 1
                print(f"Error on page {r}, attempt {attempts}: {e}")
                time.sleep(5 * attempts + random.uniform(1, 3))  # backoff crescente

        if not success:
            print(f"\n⚠️  Page {r} failed {MAX_ATTEMPTS} times. Finalizing with partial data.")
            return finalize(all_data, exchange)

def finalize(all_data, exchange):
    df = pd.DataFrame(all_data, columns=[
        "Ticker", "Company", "Sector", "Industry", "Country", "Market Cap", "Volume"
    ])
    df = df[df["Volume"] != "0"].reset_index(drop=True)
    filename = f"data/tickers_{exchange.lower()}.csv"
    df.to_csv(filename, index=False)
    print(f"\n✅ Scraping finished. Saved {len(df)} tickers to '{filename}'")
    return df


##### **2.1 Nasdaq Tickers**

In [5]:
if search_all_tickers_flag:
    nasdaq = scrape_finviz_by_exchange_requests("nasdaq")
    nasdaq

nasdaq = pd.read_csv("data/tickers_nasdaq.csv")
nasdaq

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,AACB,Artius II Acquisition Inc,Financial,Shell Companies,USA,298.12M,1
1,AACG,ATA Creativity Global ADR,Consumer Defensive,Education & Training Services,China,67.45M,254981
2,AACI,Armada Acquisition Corp. II,Financial,Shell Companies,USA,321.12M,3
3,AADR,AdvisorShares Dorsey Wright ADR ETF,Financial,Exchange Traded Fund,USA,-,20
4,AAL,American Airlines Group Inc,Industrials,Airlines,USA,7.28B,24043012
...,...,...,...,...,...,...,...
4231,ZVSA,ZyVersa Therapeutics Inc,Healthcare,Biotechnology,USA,0.93M,18248
4232,ZYBT,Zhengye Biotechnology Holding Ltd,Healthcare,Drug Manufacturers - Specialty & Generic,China,256.85M,6705
4233,ZYME,Zymeworks BC Inc,Healthcare,Biotechnology,USA,879.92M,402780
4234,ZYXI,Zynex Inc,Healthcare,Medical Distribution,USA,42.03M,840704


##### **2.2 Nyse Tickers**


In [6]:
if search_all_tickers_flag:
    nyse = scrape_finviz_by_exchange_requests("nyse")
    nyse

nyse = pd.read_csv("data/tickers_nyse.csv")
nyse

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,A,Agilent Technologies Inc,Healthcare,Diagnostics & Research,USA,32.14B,682198
1,AA,Alcoa Corp,Basic Materials,Aluminum,USA,7.44B,3594093
2,AAA,Alternative Access First Priority CLO Bond ETF,Financial,Exchange Traded Fund,USA,-,6048
3,AACT,Ares Acquisition Corporation II,Financial,Shell Companies,USA,705.51M,209977
4,AAM,AA Mission Acquisition Corp,Financial,Shell Companies,USA,459.97M,2413
...,...,...,...,...,...,...,...
4669,ZTO,ZTO Express (Cayman) Inc ADR,Industrials,Integrated Freight & Logistics,China,12.03B,2439145
4670,ZTR,Virtus Total Return Fund Inc,Financial,Closed-End Fund - Equity,USA,342.65M,128272
4671,ZTS,Zoetis Inc,Healthcare,Drug Manufacturers - Specialty & Generic,USA,65.41B,1582264
4672,ZVIA,Zevia PBC,Consumer Defensive,Beverages - Non-Alcoholic,USA,219.33M,239408


In [7]:
tickers = pd.concat([nasdaq, nyse], ignore_index=True)
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,AACB,Artius II Acquisition Inc,Financial,Shell Companies,USA,298.12M,1
1,AACG,ATA Creativity Global ADR,Consumer Defensive,Education & Training Services,China,67.45M,254981
2,AACI,Armada Acquisition Corp. II,Financial,Shell Companies,USA,321.12M,3
3,AADR,AdvisorShares Dorsey Wright ADR ETF,Financial,Exchange Traded Fund,USA,-,20
4,AAL,American Airlines Group Inc,Industrials,Airlines,USA,7.28B,24043012
...,...,...,...,...,...,...,...
8905,ZTO,ZTO Express (Cayman) Inc ADR,Industrials,Integrated Freight & Logistics,China,12.03B,2439145
8906,ZTR,Virtus Total Return Fund Inc,Financial,Closed-End Fund - Equity,USA,342.65M,128272
8907,ZTS,Zoetis Inc,Healthcare,Drug Manufacturers - Specialty & Generic,USA,65.41B,1582264
8908,ZVIA,Zevia PBC,Consumer Defensive,Beverages - Non-Alcoholic,USA,219.33M,239408


#### **3. Process data**


In [8]:
tickers['Volume'] = tickers['Volume'].str.replace(',', '').astype(float)
tickers.sort_values(by="Volume", inplace=True, ascending=False)
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
2236,LOBO,Lobo Ev Technologies Ltd,Consumer Cyclical,Auto Manufacturers,China,13.39M,277221913.0
3138,RAYA,Erayak Power Solution Group Inc,Industrials,Electrical Equipment & Parts,China,4.35M,229545591.0
3724,TNON,Tenon Medical Inc,Healthcare,Medical Devices,USA,12.22M,211413022.0
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0
2010,IXHL,Incannex Healthcare Inc,Healthcare,Drug Manufacturers - Specialty & Generic,Australia,38.40M,137451462.0
...,...,...,...,...,...,...,...
6555,JDVI,John Hancock Disciplined Value International S...,Financial,Exchange Traded Fund,USA,-,1.0
6656,KBUF,KraneShares 90% KWEB Defined Outcome January 2...,Financial,Exchange Traded Fund,USA,-,1.0
4490,AUSM,Allspring Ultra Short Municipal ETF,Financial,Exchange Traded Fund,USA,-,1.0
7753,ROPE,Coastal Compass 100 ETF,Financial,Exchange Traded Fund,USA,-,1.0


#### **4. Filter assets**

##### **4.1 Liquidity: Remove low liquidity**
- Filters out assets with insufficient daily trading volume.

In [9]:
tickers = tickers[tickers['Volume'] > 500_000]
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
2236,LOBO,Lobo Ev Technologies Ltd,Consumer Cyclical,Auto Manufacturers,China,13.39M,277221913.0
3138,RAYA,Erayak Power Solution Group Inc,Industrials,Electrical Equipment & Parts,China,4.35M,229545591.0
3724,TNON,Tenon Medical Inc,Healthcare,Medical Devices,USA,12.22M,211413022.0
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0
2010,IXHL,Incannex Healthcare Inc,Healthcare,Drug Manufacturers - Specialty & Generic,Australia,38.40M,137451462.0
...,...,...,...,...,...,...,...
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0
8065,SPBO,SPDR Portfolio Corporate Bond ETF,Financial,Exchange Traded Fund,USA,-,500751.0
4586,BC,Brunswick Corp,Consumer Cyclical,Recreational Vehicles,USA,3.70B,500656.0


##### **4.2 Market Cap: Remove small or undefined market capitalization**
- Excludes microcaps and assets without valid market capitalization data to reduce exposure to volatility, manipulation, and poor fundamentals.

In [10]:
# Remove null or invalid Market Cap values
tickers = tickers[tickers['Market Cap'] != '-'].copy()

# Convert Market Cap to numeric format
def convert_market_cap(val):
    val = val.replace('$', '').replace(',', '').strip()
    if val.endswith('B'):
        return float(val.replace('B', '')) * 1e9
    elif val.endswith('M'):
        return float(val.replace('M', '')) * 1e6
    elif val.endswith('K'):
        return float(val.replace('K', '')) * 1e3
    else:
        return float(val)

tickers['market_cap'] = tickers['Market Cap'].apply(convert_market_cap)

# Filter by Volume and Market Cap
tickers = tickers[
    (tickers['market_cap'] > 500_000_000)
]
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume,market_cap
2754,OPEN,Opendoor Technologies Inc,Real Estate,Real Estate Services,USA,1.49B,126981253.0,1.490000e+09
2649,NVDA,NVIDIA Corp,Technology,Semiconductors,USA,4306.36B,106136868.0,4.306360e+12
212,AMZN,Amazon.com Inc,Consumer Cyclical,Internet Retail,USA,2294.14B,57251389.0,2.294140e+12
7153,NIO,NIO Inc ADR,Consumer Cyclical,Auto Manufacturers,China,9.45B,56365932.0,9.450000e+09
8547,VALE,Vale S.A. ADR,Basic Materials,Other Industrial Metals & Mining,Brazil,41.58B,55509604.0,4.158000e+10
...,...,...,...,...,...,...,...,...
6706,KODK,Eastman Kodak Co,Industrials,Specialty Business Services,USA,525.21M,503893.0,5.252100e+08
4359,ALLE,Allegion plc,Industrials,Security & Protection Services,Ireland,14.19B,503835.0,1.419000e+10
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0,1.530000e+09
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0,5.094000e+10


In [11]:
print(tickers['Country'].unique())

allowed_countries = [
    'USA', 'Canada', 'United Kingdom', 'Switzerland', 'Germany', 'France',
    'Netherlands', 'Japan', 'Australia', 'Denmark', 'Sweden', 'Norway',
    'Finland', 'Ireland', 'Belgium', 'Spain', 'Israel', 'Brazil', 'India'
]

# Filter tickers to include only allowed countries
tickers = tickers[tickers['Country'].isin(allowed_countries)]

print(tickers['Country'].unique())

tickers

['USA' 'China' 'Brazil' 'Canada' 'Denmark' 'Cayman Islands' 'Switzerland'
 'United Kingdom' 'Singapore' 'Australia' 'Finland' 'Netherlands' 'Taiwan'
 'Israel' 'India' 'Mexico' 'Sweden' 'South Africa' 'Belgium' 'Bermuda'
 'Spain' 'Ireland' 'Japan' 'Luxembourg' 'Germany' 'Hong Kong' 'Norway'
 'Colombia' 'Monaco' 'France' 'Italy' 'Turkey' 'Cyprus' 'Argentina']
['USA' 'Brazil' 'Canada' 'Denmark' 'Switzerland' 'United Kingdom'
 'Australia' 'Finland' 'Netherlands' 'Israel' 'India' 'Sweden' 'Belgium'
 'Spain' 'Ireland' 'Japan' 'Germany' 'Norway' 'France']


Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume,market_cap
2754,OPEN,Opendoor Technologies Inc,Real Estate,Real Estate Services,USA,1.49B,126981253.0,1.490000e+09
2649,NVDA,NVIDIA Corp,Technology,Semiconductors,USA,4306.36B,106136868.0,4.306360e+12
212,AMZN,Amazon.com Inc,Consumer Cyclical,Internet Retail,USA,2294.14B,57251389.0,2.294140e+12
8547,VALE,Vale S.A. ADR,Basic Materials,Other Industrial Metals & Mining,Brazil,41.58B,55509604.0,4.158000e+10
3486,SOFI,SoFi Technologies Inc,Financial,Credit Services,USA,24.12B,53230046.0,2.412000e+10
...,...,...,...,...,...,...,...,...
6706,KODK,Eastman Kodak Co,Industrials,Specialty Business Services,USA,525.21M,503893.0,5.252100e+08
4359,ALLE,Allegion plc,Industrials,Security & Protection Services,Ireland,14.19B,503835.0,1.419000e+10
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0,1.530000e+09
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0,5.094000e+10


#### **5. Download - Adjusted Close Prices**

In [12]:
if adjusted_closes_flag:
    # Define start and end dates
    end_date = datetime.today()
    start_date = datetime(end_date.year - 11, 10, 31)

    # Get the list of tickers
    tickers_list = tickers['Ticker'].unique().tolist()

    # Download adjusted close prices
    data = yf.download(
        tickers=tickers_list,
        start=start_date.strftime('%Y-%m-%d'),
        end=end_date.strftime('%Y-%m-%d'),
        progress=True,
        group_by='ticker',
        auto_adjust=False
    )

    # Extract only the adjusted close prices
    adjusted_closes = {
        ticker: data[ticker]['Adj Close']
        for ticker in tickers_list if ticker in data
    }

    # Combine into a single DataFrame
    df = pd.DataFrame(adjusted_closes)

    # Save to CSV (optional)
    df.to_csv(adjusted_close_prices_csv)

# Read the CSV file containing adjusted close prices
# index_col=0 => Use the first column (dates) as the DataFrame index
# parse_dates=True => Parse the index column as datetime objects
df = pd.read_csv(adjusted_close_prices_csv, index_col=0, parse_dates=True)

df

Unnamed: 0_level_0,OPEN,NVDA,AMZN,VALE,SOFI,TSLA,LCID,BBAI,INTC,F,...,EXLS,WSM,CELC,EXG,LFST,KODK,ALLE,QDEL,MPC,BC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-10-31,,0.466931,15.273000,5.329594,,16.113333,,,25.923483,8.019918,...,5.598000,25.175978,,3.543701,,21.570000,47.039684,28.549999,32.622379,39.420841
2014-11-03,,0.474817,15.286000,5.186979,,16.172667,,,26.152164,7.962998,...,5.682000,25.148884,,3.565114,,21.670000,47.340923,28.209999,32.880772,39.648262
2014-11-04,,0.481030,15.140500,5.039082,,15.928667,,,26.327475,7.940230,...,5.794000,24.904984,,3.529426,,20.870001,47.066261,27.719999,32.525494,39.631420
2014-11-05,,0.481030,14.826000,5.049646,,15.398000,,,25.901663,7.934539,...,5.774000,25.059843,,3.561545,,23.480000,47.004250,28.290001,32.554176,40.254749
2014-11-06,,0.483180,14.832000,4.827801,,16.081333,,,25.947691,8.076838,...,5.786000,25.524416,,3.543701,,22.900000,47.332066,28.680000,33.422676,40.499020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-28,2.34,176.750000,232.789993,9.840000,21.020000,325.589996,2.79,7.15,20.680000,11.280000,...,42.220001,188.229996,36.790001,8.840000,4.17,6.930000,163.669998,26.090000,174.800003,60.310001
2025-07-29,2.05,175.509995,231.009995,9.890000,22.400000,321.200012,2.56,6.62,20.410000,11.080000,...,42.200001,187.110001,38.500000,8.810000,4.13,6.920000,164.110001,25.440001,175.429993,61.189999
2025-07-30,2.07,179.270004,230.190002,9.630000,21.870001,319.040009,2.50,6.41,20.340000,10.870000,...,42.790001,190.529999,40.299999,8.800000,4.23,6.860000,164.820007,24.290001,170.779999,59.160000
2025-07-31,1.84,177.869995,234.110001,9.530000,22.580000,308.269989,2.46,6.35,19.799999,11.070000,...,43.430000,187.050003,39.165001,8.750000,3.98,6.700000,165.919998,23.020000,170.190002,58.290001


In [13]:
# Count the number of columns with at least one null value
null_counts = df.isnull().any(axis=0).sum()
print(f"{null_counts} columns contain at least one null value.")

# List columns that contain any null values
cols_with_nulls = df.columns[df.isnull().any()].tolist()
print("Columns with null values:", cols_with_nulls)

# Remove columns that contain any null values
df = df.loc[:, ~df.isnull().any()].copy()
df

425 columns contain at least one null value.
Columns with null values: ['OPEN', 'SOFI', 'LCID', 'BBAI', 'RKT', 'HOOD', 'AVTR', 'PLTR', 'JOBY', 'RIOT', 'APLD', 'QBTS', 'RGTI', 'SNAP', 'RIVN', 'RDDT', 'QS', 'HIMS', 'ACHR', 'SMR', 'SOUN', 'COIN', 'NVTS', 'ENVX', 'TLRY', 'IREN', 'OKLO', 'BITF', 'OSCR', 'FUBO', 'IONQ', 'RXRX', 'BE', 'HLN', 'RKLB', 'DOW', 'MP', 'CIFR', 'BTBT', 'AUR', 'MRNA', 'CLSK', 'CFLT', 'ROKU', 'CRCL', 'KHC', 'ADT', 'UBER', 'EOSE', 'PTON', 'RBLX', 'IR', 'CCCS', 'MIR', 'UWMC', 'NBIS', 'LYFT', 'CORZ', 'PYPL', 'COMP', 'SLDP', 'CRWV', 'PATH', 'OWL', 'HPE', 'USAR', 'PR', 'VRT', 'NFE', 'WSC', 'PINS', 'TMC', 'U', 'BULL', 'FSM', 'ZETA', 'SNOW', 'SHOP', 'GLXY', 'REPL', 'ASTS', 'KVUE', 'TOST', 'TTD', 'AMPX', 'TDOC', 'NET', 'XYZ', 'CPNG', 'TEM', 'RUN', 'LAC', 'VICI', 'AI', 'ARM', 'DJT', 'RBRK', 'ONON', 'ATAI', 'LUNR', 'RZLV', 'UPST', 'NVT', 'SG', 'ARRY', 'PL', 'ALAB', 'VST', 'PK', 'TNGX', 'NVST', 'MS', 'PCOR', 'IBRX', 'APP', 'DKNG', 'MBLY', 'DELL', 'S', 'GENI', 'MVST', 'FRSH', 'GDY

Unnamed: 0_level_0,NVDA,AMZN,VALE,TSLA,INTC,F,AAPL,AMD,PLUG,LUMN,...,GRMN,ASB,EXLS,WSM,EXG,KODK,ALLE,QDEL,MPC,BC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-10-31,0.466931,15.273000,5.329594,16.113333,25.923483,8.019918,23.889521,2.800000,4.71,20.505371,...,40.026833,13.219654,5.598000,25.175978,3.543701,21.570000,47.039684,28.549999,32.622379,39.420841
2014-11-03,0.474817,15.286000,5.186979,16.172667,26.152164,7.962998,24.199198,2.870000,5.14,20.668507,...,40.185570,13.212624,5.682000,25.148884,3.565114,21.670000,47.340923,28.209999,32.880772,39.648262
2014-11-04,0.481030,15.140500,5.039082,15.928667,26.327475,7.940230,24.022238,2.830000,5.35,20.579521,...,40.286579,13.170435,5.794000,24.904984,3.529426,20.870001,47.066261,27.719999,32.525494,39.631420
2014-11-05,0.481030,14.826000,5.049646,15.398000,25.901663,7.934539,24.079760,2.800000,4.96,20.589418,...,40.113419,13.233721,5.774000,25.059843,3.561545,23.480000,47.004250,28.290001,32.554176,40.254749
2014-11-06,0.483180,14.832000,4.827801,16.081333,25.947691,8.076838,24.148623,2.760000,5.12,19.279404,...,39.637249,13.353255,5.786000,25.524416,3.543701,22.900000,47.332066,28.680000,33.422676,40.499020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-28,176.750000,232.789993,9.840000,325.589996,20.680000,11.280000,214.050003,173.660004,1.77,4.470000,...,236.360001,25.410000,42.220001,188.229996,8.840000,6.930000,163.669998,26.090000,174.800003,60.310001
2025-07-29,175.509995,231.009995,9.890000,321.200012,20.410000,11.080000,211.270004,177.440002,1.59,4.590000,...,239.300003,25.230000,42.200001,187.110001,8.810000,6.920000,164.110001,25.440001,175.429993,61.189999
2025-07-30,179.270004,230.190002,9.630000,319.040009,20.340000,10.870000,209.050003,179.509995,1.55,4.460000,...,221.490005,25.170000,42.790001,190.529999,8.800000,6.860000,164.820007,24.290001,170.779999,59.160000
2025-07-31,177.869995,234.110001,9.530000,308.269989,19.799999,11.070000,207.570007,176.309998,1.50,4.450000,...,218.759995,24.740000,43.430000,187.050003,8.750000,6.700000,165.919998,23.020000,170.190002,58.290001


#### **6. Calculate Monthly Log Returns**
- Portfolio models (Markowitz, Equal Weight, etc.) work on returns, not raw prices.
- Using monthly log returns avoids lookahead bias and is standard for multi-asset backtesting.
- Log returns are more robust, easier to analyze statistically, and preferred for multi-period backtests and portfolio analysis.

In [14]:
# Resample prices to monthly frequency (use last available price of each month)
monthly_prices = df.resample('ME').last()

# Calculate log returns: log(P_t / P_{t-1})
log_returns = np.log(monthly_prices / monthly_prices.shift(1)).dropna()

# Show the first rows
log_returns


Unnamed: 0_level_0,NVDA,AMZN,VALE,TSLA,INTC,F,AAPL,AMD,PLUG,LUMN,...,GRMN,ASB,EXLS,WSM,EXG,KODK,ALLE,QDEL,MPC,BC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-11-30,0.074852,0.103119,-0.113210,0.011600,0.097533,0.110104,0.100731,-0.003578,-0.209438,-0.004045,...,0.032279,-0.011902,0.001428,0.136755,0.020146,-0.059206,0.014214,-0.024465,-0.003524,0.062329
2014-12-31,-0.044864,-0.087237,-0.096643,-0.094775,-0.026110,-0.014730,-0.074606,-0.043963,-0.241638,-0.029623,...,-0.072446,0.008084,0.023970,0.014910,-0.049160,0.065675,0.030937,0.037341,0.001885,0.031308
2015-01-31,-0.043319,0.133092,-0.151505,-0.088365,-0.093823,-0.042160,0.059612,-0.038173,-0.116534,-0.062822,...,-0.008936,-0.102800,0.023069,0.037620,-0.022390,-0.186846,-0.026493,-0.205832,0.025487,0.057245
2015-02-28,0.142699,0.069799,0.053992,-0.001278,0.013483,0.105089,0.096016,0.190717,0.142851,0.018393,...,-0.053547,0.108707,0.172173,0.027727,0.069591,0.046116,0.066608,0.087011,0.130723,0.001545
2015-03-31,-0.052583,-0.021430,-0.272523,-0.074350,-0.061395,-0.012316,-0.031874,-0.148806,-0.173272,-0.077053,...,-0.033705,-0.002148,0.063822,-0.009241,0.000104,0.006869,0.059594,0.049383,-0.025171,-0.052808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-30,0.004970,-0.031176,-0.069494,0.085029,-0.122085,-0.001996,-0.044321,-0.053889,-0.439367,-0.101965,...,-0.150083,-0.021082,0.026546,-0.018421,0.001088,-0.004758,0.064845,-0.229814,-0.058512,-0.156475
2025-05-31,0.215624,0.105843,-0.019523,0.205293,-0.027745,0.050782,-0.055073,0.128782,0.011429,0.101965,...,0.082645,0.049092,-0.052727,0.046114,0.068956,-0.102010,0.024833,0.098609,0.162469,0.102733
2025-06-30,0.156364,0.067792,0.061591,-0.086786,0.136086,0.044284,0.021281,0.248028,0.526610,0.110957,...,0.032416,0.061291,-0.049019,0.009904,0.026088,-0.005296,0.013586,-0.062215,0.032862,0.087341
2025-07-31,0.118521,0.064940,-0.018712,-0.030006,-0.123379,0.020074,0.011630,0.217121,0.006689,0.015855,...,0.046982,0.014248,-0.008255,0.139244,0.007520,0.170452,0.140859,-0.224706,0.024265,0.053743
