#### **Objective**

The goal is to benchmark different portfolio selection strategies. The idea is to simulate investing 10,000 USD at the beginning of each month using different portfolio allocation models, and then compare which strategy delivers the best performance over time.

The comparison will be based on:

- The evolution of the portfolio's total value over time.
- The allocation method used each month (e.g., Markowitz, equal-weight, etc.).
- Key metrics such as total return, volatility, and Sharpe ratio.


#### **1. Imports**

In [15]:
# %pip install selenium pandas

In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import yfinance as yf
from datetime import datetime
from dateutil.relativedelta import relativedelta
import numpy as np
from datetime import datetime

#### **2. Variables**

In [17]:
adjusted_close_prices_csv = 'data/adjusted_close_prices.csv'

end_date = datetime.today()
start_date = datetime(end_date.year - 12, 10, 31)


search_all_tickers_flag = False
adjusted_closes_flag = False

#####################################

# search_all_tickers_flag = True
# adjusted_closes_flag = True

##### **2. Download - Asset Dataset**

In [18]:
MAX_ATTEMPTS = 10
BASE_URL = "https://finviz.com/screener.ashx"

USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
    "Mozilla/5.0 (X11; Linux x86_64)",
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64)",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_4)"
]

def scrape_finviz_by_exchange_requests(exchange: str):
    exchange_map = {
        "nasdaq": "exch_nasd",
        "nyse": "exch_nyse"
    }

    if exchange.lower() not in exchange_map:
        raise ValueError("Invalid exchange. Use 'nasdaq' or 'nyse'.")

    exchange_code = exchange_map[exchange.lower()]
    all_data = []
    r = 1  # page offset

    while True:
        success = False
        attempts = 0

        while not success and attempts < MAX_ATTEMPTS:
            try:
                headers = {
                    "User-Agent": random.choice(USER_AGENTS)
                }
                params = {
                    "v": "111",
                    "f": exchange_code,
                    "r": r
                }
                response = requests.get(BASE_URL, headers=headers, params=params, timeout=10)
                if response.status_code == 429:
                    raise Exception("HTTP 429 Too Many Requests")
                response.raise_for_status()

                soup = BeautifulSoup(response.text, "html.parser")
                tables = soup.find_all("table")

                target_table = None
                for table in tables:
                    rows = table.find_all("tr")
                    if len(rows) < 2:
                        continue
                    cols = rows[1].find_all("td")
                    if len(cols) >= 11 and cols[0].text.strip().isdigit():
                        target_table = table
                        break

                if not target_table:
                    raise ValueError("No valid table found.")

                rows = target_table.find_all("tr")[1:]
                if not rows:
                    print(f"No more data on page starting at {r}. Stopping.")
                    return finalize(all_data, exchange)

                for row in rows:
                    cols = row.find_all("td")
                    if len(cols) >= 11:
                        ticker = cols[1].text.strip()
                        company = cols[2].text.strip()
                        sector = cols[3].text.strip()
                        industry = cols[4].text.strip()
                        country = cols[5].text.strip()
                        market_cap = cols[6].text.strip()
                        volume = cols[10].text.strip()
                        all_data.append([ticker, company, sector, industry, country, market_cap, volume])

                print(f"Page {r} OK")
                success = True
                r += 20

                # Simula navegação humana
                time.sleep(random.uniform(3, 6))

            except Exception as e:
                attempts += 1
                print(f"Error on page {r}, attempt {attempts}: {e}")
                time.sleep(5 * attempts + random.uniform(1, 3))  # backoff crescente

        if not success:
            print(f"\n⚠️  Page {r} failed {MAX_ATTEMPTS} times. Finalizing with partial data.")
            return finalize(all_data, exchange)

def finalize(all_data, exchange):
    df = pd.DataFrame(all_data, columns=[
        "Ticker", "Company", "Sector", "Industry", "Country", "Market Cap", "Volume"
    ])
    df = df[df["Volume"] != "0"].reset_index(drop=True)
    filename = f"data/tickers_{exchange.lower()}.csv"
    df.to_csv(filename, index=False)
    print(f"\n✅ Scraping finished. Saved {len(df)} tickers to '{filename}'")
    return df


##### **2.1 Nasdaq Tickers**

In [19]:
if search_all_tickers_flag:
    nasdaq = scrape_finviz_by_exchange_requests("nasdaq")
    nasdaq

nasdaq = pd.read_csv("data/tickers_nasdaq.csv")
nasdaq

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,AACB,Artius II Acquisition Inc,Financial,Shell Companies,USA,298.12M,1
1,AACG,ATA Creativity Global ADR,Consumer Defensive,Education & Training Services,China,67.45M,254981
2,AACI,Armada Acquisition Corp. II,Financial,Shell Companies,USA,321.12M,3
3,AADR,AdvisorShares Dorsey Wright ADR ETF,Financial,Exchange Traded Fund,USA,-,20
4,AAL,American Airlines Group Inc,Industrials,Airlines,USA,7.28B,24043012
...,...,...,...,...,...,...,...
4231,ZVSA,ZyVersa Therapeutics Inc,Healthcare,Biotechnology,USA,0.93M,18248
4232,ZYBT,Zhengye Biotechnology Holding Ltd,Healthcare,Drug Manufacturers - Specialty & Generic,China,256.85M,6705
4233,ZYME,Zymeworks BC Inc,Healthcare,Biotechnology,USA,879.92M,402780
4234,ZYXI,Zynex Inc,Healthcare,Medical Distribution,USA,42.03M,840704


##### **2.2 Nyse Tickers**


In [20]:
if search_all_tickers_flag:
    nyse = scrape_finviz_by_exchange_requests("nyse")
    nyse

nyse = pd.read_csv("data/tickers_nyse.csv")
nyse

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,A,Agilent Technologies Inc,Healthcare,Diagnostics & Research,USA,32.14B,682198
1,AA,Alcoa Corp,Basic Materials,Aluminum,USA,7.44B,3594093
2,AAA,Alternative Access First Priority CLO Bond ETF,Financial,Exchange Traded Fund,USA,-,6048
3,AACT,Ares Acquisition Corporation II,Financial,Shell Companies,USA,705.51M,209977
4,AAM,AA Mission Acquisition Corp,Financial,Shell Companies,USA,459.97M,2413
...,...,...,...,...,...,...,...
4669,ZTO,ZTO Express (Cayman) Inc ADR,Industrials,Integrated Freight & Logistics,China,12.03B,2439145
4670,ZTR,Virtus Total Return Fund Inc,Financial,Closed-End Fund - Equity,USA,342.65M,128272
4671,ZTS,Zoetis Inc,Healthcare,Drug Manufacturers - Specialty & Generic,USA,65.41B,1582264
4672,ZVIA,Zevia PBC,Consumer Defensive,Beverages - Non-Alcoholic,USA,219.33M,239408


In [21]:
tickers = pd.concat([nasdaq, nyse], ignore_index=True)
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,AACB,Artius II Acquisition Inc,Financial,Shell Companies,USA,298.12M,1
1,AACG,ATA Creativity Global ADR,Consumer Defensive,Education & Training Services,China,67.45M,254981
2,AACI,Armada Acquisition Corp. II,Financial,Shell Companies,USA,321.12M,3
3,AADR,AdvisorShares Dorsey Wright ADR ETF,Financial,Exchange Traded Fund,USA,-,20
4,AAL,American Airlines Group Inc,Industrials,Airlines,USA,7.28B,24043012
...,...,...,...,...,...,...,...
8905,ZTO,ZTO Express (Cayman) Inc ADR,Industrials,Integrated Freight & Logistics,China,12.03B,2439145
8906,ZTR,Virtus Total Return Fund Inc,Financial,Closed-End Fund - Equity,USA,342.65M,128272
8907,ZTS,Zoetis Inc,Healthcare,Drug Manufacturers - Specialty & Generic,USA,65.41B,1582264
8908,ZVIA,Zevia PBC,Consumer Defensive,Beverages - Non-Alcoholic,USA,219.33M,239408


#### **3. Process data**


In [22]:
tickers['Volume'] = tickers['Volume'].str.replace(',', '').astype(float)
tickers.sort_values(by="Volume", inplace=True, ascending=False)
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
2236,LOBO,Lobo Ev Technologies Ltd,Consumer Cyclical,Auto Manufacturers,China,13.39M,277221913.0
3138,RAYA,Erayak Power Solution Group Inc,Industrials,Electrical Equipment & Parts,China,4.35M,229545591.0
3724,TNON,Tenon Medical Inc,Healthcare,Medical Devices,USA,12.22M,211413022.0
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0
2010,IXHL,Incannex Healthcare Inc,Healthcare,Drug Manufacturers - Specialty & Generic,Australia,38.40M,137451462.0
...,...,...,...,...,...,...,...
6555,JDVI,John Hancock Disciplined Value International S...,Financial,Exchange Traded Fund,USA,-,1.0
6656,KBUF,KraneShares 90% KWEB Defined Outcome January 2...,Financial,Exchange Traded Fund,USA,-,1.0
4490,AUSM,Allspring Ultra Short Municipal ETF,Financial,Exchange Traded Fund,USA,-,1.0
7753,ROPE,Coastal Compass 100 ETF,Financial,Exchange Traded Fund,USA,-,1.0


#### **4. Filter assets**

##### **4.1 Liquidity: Remove low liquidity**
- Filters out assets with insufficient daily trading volume.

In [23]:
tickers = tickers[tickers['Volume'] > 500_000]
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
2236,LOBO,Lobo Ev Technologies Ltd,Consumer Cyclical,Auto Manufacturers,China,13.39M,277221913.0
3138,RAYA,Erayak Power Solution Group Inc,Industrials,Electrical Equipment & Parts,China,4.35M,229545591.0
3724,TNON,Tenon Medical Inc,Healthcare,Medical Devices,USA,12.22M,211413022.0
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0
2010,IXHL,Incannex Healthcare Inc,Healthcare,Drug Manufacturers - Specialty & Generic,Australia,38.40M,137451462.0
...,...,...,...,...,...,...,...
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0
8065,SPBO,SPDR Portfolio Corporate Bond ETF,Financial,Exchange Traded Fund,USA,-,500751.0
4586,BC,Brunswick Corp,Consumer Cyclical,Recreational Vehicles,USA,3.70B,500656.0


##### **4.2 Market Cap: Remove small or undefined market capitalization**
- Excludes microcaps and assets without valid market capitalization data to reduce exposure to volatility, manipulation, and poor fundamentals.

In [24]:
# Convert Market Cap to numeric format
def convert_market_cap(val):
    val = val.replace('$', '').replace(',', '').strip()
    if val == '-':
        return np.nan  
    elif val.endswith('B'):
        return float(val.replace('B', '')) * 1e9
    elif val.endswith('M'):
        return float(val.replace('M', '')) * 1e6
    elif val.endswith('K'):
        return float(val.replace('K', '')) * 1e3
    else:
        return float(val)

tickers['market_cap'] = tickers['Market Cap'].apply(convert_market_cap)

# Filter by Volume and Market Cap
tickers = tickers[
    (tickers['Market Cap'] == '-') 
    | (tickers['market_cap'] > 500_000_000).copy()
]
tickers

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tickers['market_cap'] = tickers['Market Cap'].apply(convert_market_cap)


Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume,market_cap
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0,
2754,OPEN,Opendoor Technologies Inc,Real Estate,Real Estate Services,USA,1.49B,126981253.0,1.490000e+09
3534,SQQQ,ProShares UltraPro Short QQQ -3x Shares,Financial,Exchange Traded Fund,USA,-,115254308.0,
2649,NVDA,NVIDIA Corp,Technology,Semiconductors,USA,4306.36B,106136868.0,4.306360e+12
3776,TSLL,Direxion Daily TSLA Bull 2X Shares,Financial,Exchange Traded Fund,USA,-,105416065.0,
...,...,...,...,...,...,...,...,...
5387,DXD,ProShares UltraShort Dow 30-2X Shares,Financial,Exchange Traded Fund,USA,-,503040.0,
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0,1.530000e+09
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0,5.094000e+10
8065,SPBO,SPDR Portfolio Corporate Bond ETF,Financial,Exchange Traded Fund,USA,-,500751.0,


In [25]:
print("Tickers with valid Market Cap:")
print(tickers['Country'].unique())

allowed_countries = [
    'USA', 'Canada', 'United Kingdom', 'Switzerland', 'Germany', 'France',
    'Netherlands', 'Japan', 'Australia', 'Denmark', 'Sweden', 'Norway',
    'Finland', 'Ireland', 'Belgium', 'Spain', 'Israel', 'Brazil', 'India'
]

# Filter tickers to include only allowed countries
tickers = tickers[tickers['Country'].isin(allowed_countries)]

print("\nAllowed countries:")
print(tickers['Country'].unique())

tickers

Tickers with valid Market Cap:
['USA' 'China' 'Brazil' 'Canada' 'Denmark' 'Cayman Islands' 'Switzerland'
 'United Kingdom' 'Singapore' 'Australia' 'Finland' 'Netherlands' 'Taiwan'
 'Israel' 'India' 'Mexico' 'Sweden' 'South Africa' 'Belgium' 'Bermuda'
 'Spain' 'Ireland' 'Japan' 'Luxembourg' 'Germany' 'Hong Kong' 'Norway'
 'Colombia' 'Monaco' 'France' 'Italy' 'Turkey' 'Cyprus' 'Argentina']

Allowed countries:
['USA' 'Brazil' 'Canada' 'Denmark' 'Switzerland' 'United Kingdom'
 'Australia' 'Finland' 'Netherlands' 'Israel' 'India' 'Sweden' 'Belgium'
 'Spain' 'Ireland' 'Japan' 'Germany' 'Norway' 'France']


Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume,market_cap
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0,
2754,OPEN,Opendoor Technologies Inc,Real Estate,Real Estate Services,USA,1.49B,126981253.0,1.490000e+09
3534,SQQQ,ProShares UltraPro Short QQQ -3x Shares,Financial,Exchange Traded Fund,USA,-,115254308.0,
2649,NVDA,NVIDIA Corp,Technology,Semiconductors,USA,4306.36B,106136868.0,4.306360e+12
3776,TSLL,Direxion Daily TSLA Bull 2X Shares,Financial,Exchange Traded Fund,USA,-,105416065.0,
...,...,...,...,...,...,...,...,...
5387,DXD,ProShares UltraShort Dow 30-2X Shares,Financial,Exchange Traded Fund,USA,-,503040.0,
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0,1.530000e+09
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0,5.094000e+10
8065,SPBO,SPDR Portfolio Corporate Bond ETF,Financial,Exchange Traded Fund,USA,-,500751.0,


#### **5. Download - Adjusted Close Prices**

In [26]:
if adjusted_closes_flag:
    # Get the list of tickers
    tickers_list = tickers['Ticker'].unique().tolist()

    # Download adjusted close prices
    data = yf.download(
        tickers=tickers_list,
        start=start_date.strftime('%Y-%m-%d'),
        end=end_date.strftime('%Y-%m-%d'),
        progress=True,
        group_by='ticker',
        auto_adjust=False
    )

    # Extract only the adjusted close prices
    adjusted_closes = {
        ticker: data[ticker]['Adj Close']
        for ticker in tickers_list if ticker in data
    }

    # Combine into a single DataFrame
    df = pd.DataFrame(adjusted_closes)

    # Save to CSV (optional)
    df.to_csv(adjusted_close_prices_csv)

# Read the CSV file containing adjusted close prices
# index_col=0 => Use the first column (dates) as the DataFrame index
# parse_dates=True => Parse the index column as datetime objects
df_adjusted_close_prices = pd.read_csv(adjusted_close_prices_csv, index_col=0, parse_dates=True)

df_adjusted_close_prices

Unnamed: 0_level_0,OPEN,NVDA,AMZN,VALE,SOFI,TSLA,LCID,BBAI,INTC,F,...,EXLS,WSM,CELC,EXG,LFST,KODK,ALLE,QDEL,MPC,BC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-10-31,,0.356181,18.201500,8.126354,,10.662667,,,18.028404,9.434296,...,5.782000,19.883125,,3.203766,,26.00,,24.660000,25.192692,37.642387
2013-11-01,,0.357823,17.950001,8.308969,,10.811333,,,17.925257,9.312988,...,5.812000,19.716303,,3.187554,,26.25,,24.290001,24.911442,37.667408
2013-11-04,,0.347505,17.937000,8.664054,,11.680000,,,17.873684,9.373639,...,5.814000,20.133369,,3.200524,,26.90,,24.590000,25.203234,38.443108
2013-11-05,,0.347037,17.944500,8.552455,,11.787333,,,17.869974,9.423265,...,4.672000,20.322952,,3.181068,,26.90,,24.400000,24.862213,38.526524
2013-11-06,,0.349381,17.809000,8.658980,,10.077333,,,18.033562,9.324013,...,4.804000,20.231955,,3.187554,,26.00,,24.430000,24.749727,38.201221
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-28,2.34,176.750000,232.789993,9.840000,21.020000,325.589996,2.79,7.15,20.680000,11.280000,...,42.220001,188.229996,36.790001,8.840000,4.17,6.93,163.669998,26.090000,174.800003,60.310001
2025-07-29,2.05,175.509995,231.009995,9.890000,22.400000,321.200012,2.56,6.62,20.410000,11.080000,...,42.200001,187.110001,38.500000,8.810000,4.13,6.92,164.110001,25.440001,175.429993,61.189999
2025-07-30,2.07,179.270004,230.190002,9.630000,21.870001,319.040009,2.50,6.41,20.340000,10.870000,...,42.790001,190.529999,40.299999,8.800000,4.23,6.86,164.820007,24.290001,170.779999,59.160000
2025-07-31,1.84,177.869995,234.110001,9.530000,22.580000,308.269989,2.46,6.35,19.799999,11.070000,...,43.430000,187.050003,39.165001,8.750000,3.98,6.70,165.919998,23.020000,170.190002,58.290001


In [27]:
# Count the number of columns with at least one null value
null_counts = df_adjusted_close_prices.isnull().any(axis=0).sum()
print(f"{null_counts} columns contain at least one null value.")

# List columns that contain any null values
cols_with_nulls = df_adjusted_close_prices.columns[df_adjusted_close_prices.isnull().any()].tolist()
print("Columns with null values:", cols_with_nulls)

# Remove columns that contain any null values
df_adjusted_close_prices = df_adjusted_close_prices.loc[:, ~df_adjusted_close_prices.isnull().any()].copy()
df_adjusted_close_prices


456 columns contain at least one null value.
Columns with null values: ['OPEN', 'SOFI', 'LCID', 'BBAI', 'RKT', 'HOOD', 'AVTR', 'PLTR', 'JOBY', 'RIOT', 'APLD', 'QBTS', 'RGTI', 'SNAP', 'RIVN', 'RDDT', 'QS', 'HIMS', 'ACHR', 'SMR', 'SOUN', 'COIN', 'NVTS', 'ENVX', 'TLRY', 'IREN', 'OKLO', 'BITF', 'OSCR', 'FUBO', 'IONQ', 'RXRX', 'BE', 'HLN', 'RKLB', 'DOW', 'MP', 'CIFR', 'BTBT', 'AUR', 'MRNA', 'CLSK', 'CFLT', 'ROKU', 'CRCL', 'KHC', 'ADT', 'UBER', 'EOSE', 'PTON', 'RBLX', 'IR', 'CCCS', 'MIR', 'UWMC', 'NBIS', 'LYFT', 'CORZ', 'PYPL', 'COMP', 'ANET', 'SLDP', 'CRWV', 'PATH', 'OWL', 'HPE', 'USAR', 'PR', 'VRT', 'NFE', 'WSC', 'PINS', 'TMC', 'U', 'BULL', 'ZETA', 'SNOW', 'SHOP', 'GLXY', 'AMC', 'REPL', 'ASTS', 'KVUE', 'TOST', 'TTD', 'AMPX', 'TDOC', 'NET', 'XYZ', 'CPNG', 'TEM', 'RUN', 'LAC', 'VICI', 'AI', 'ARM', 'DJT', 'RBRK', 'ONON', 'ATAI', 'LUNR', 'RZLV', 'UPST', 'NVT', 'SG', 'W', 'ARRY', 'PL', 'ALAB', 'VST', 'PK', 'TNGX', 'NVST', 'AUPH', 'SABR', 'PCOR', 'IBRX', 'APP', 'DKNG', 'MBLY', 'DELL', 'S', 'GENI

Unnamed: 0_level_0,NVDA,AMZN,VALE,TSLA,INTC,F,AAPL,AMD,PLUG,LUMN,...,CRI,GRMN,ASB,EXLS,WSM,EXG,KODK,QDEL,MPC,BC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-10-31,0.356181,18.201500,8.126354,10.662667,18.028404,9.434296,16.152670,3.340000,0.58,15.736377,...,52.909382,32.577412,11.197347,5.782000,19.883125,3.203766,26.00,24.660000,25.192692,37.642387
2013-11-01,0.357823,17.950001,8.308969,10.811333,17.925257,9.312988,16.070154,3.310000,0.58,15.606244,...,53.544483,32.479847,11.183576,5.812000,19.716303,3.187554,26.25,24.290001,24.911442,37.667408
2013-11-04,0.347505,17.937000,8.664054,11.680000,17.873684,9.373639,16.277822,3.320000,0.57,15.689898,...,53.919376,33.385738,11.259325,5.814000,20.133369,3.200524,26.90,24.590000,25.203234,38.443108
2013-11-05,0.347037,17.944500,8.552455,11.787333,17.869974,9.423265,16.237646,3.330000,0.55,15.513291,...,53.605667,33.441490,11.293754,4.672000,20.322952,3.181068,26.90,24.400000,24.862213,38.526524
2013-11-06,0.349381,17.809000,8.658980,10.077333,18.033562,9.324013,16.191641,3.320000,0.48,15.750313,...,53.138935,33.413620,11.335073,4.804000,20.231955,3.187554,26.00,24.430000,24.749727,38.201221
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-28,176.750000,232.789993,9.840000,325.589996,20.680000,11.280000,214.050003,173.660004,1.77,4.470000,...,26.530001,236.360001,25.410000,42.220001,188.229996,8.840000,6.93,26.090000,174.800003,60.310001
2025-07-29,175.509995,231.009995,9.890000,321.200012,20.410000,11.080000,211.270004,177.440002,1.59,4.590000,...,25.590000,239.300003,25.230000,42.200001,187.110001,8.810000,6.92,25.440001,175.429993,61.189999
2025-07-30,179.270004,230.190002,9.630000,319.040009,20.340000,10.870000,209.050003,179.509995,1.55,4.460000,...,24.660000,221.490005,25.170000,42.790001,190.529999,8.800000,6.86,24.290001,170.779999,59.160000
2025-07-31,177.869995,234.110001,9.530000,308.269989,19.799999,11.070000,207.570007,176.309998,1.50,4.450000,...,24.240000,218.759995,24.740000,43.430000,187.050003,8.750000,6.70,23.020000,170.190002,58.290001


#### **6. Calculate Monthly Log Returns**
- Portfolio models (Markowitz, Equal Weight, etc.) work on returns, not raw prices.
- Using monthly log returns avoids lookahead bias and is standard for multi-asset backtesting.
- Log returns are more robust, easier to analyze statistically, and preferred for multi-period backtests and portfolio analysis.

In [29]:
# Resample prices to monthly frequency (use last available price of each month)
df_monthly_prices = df_adjusted_close_prices.resample('ME').last()

# Calculate log returns: log(P_t / P_{t-1})
df_log_returns = np.log(df_monthly_prices / df_monthly_prices.shift(1)).dropna()
# df_log_returns.index = df_log_returns.index.strftime('%Y-%m-%d')

# Use the most recent date from df_adjusted_close_prices as the last date in df_log_returns
last_adj_close_date = df_adjusted_close_prices.index.max()#.strftime('%Y-%m-%d')
dates = list(df_log_returns.index)
dates[-1] = last_adj_close_date
df_log_returns.index = dates

df_log_returns.sort_index(ascending=False)


Unnamed: 0,NVDA,AMZN,VALE,TSLA,INTC,F,AAPL,AMD,PLUG,LUMN,...,CRI,GRMN,ASB,EXLS,WSM,EXG,KODK,QDEL,MPC,BC
2025-08-01,-0.023608,-0.086317,0.018712,-0.018465,-0.025059,-0.022842,-0.025322,-0.026495,-0.068993,-0.181872,...,-0.024641,-0.000137,-0.026625,-0.029679,0.003309,-0.023122,-0.038027,-0.009164,-0.027763,-0.028715
2025-07-31,0.118521,0.064940,-0.018712,-0.030006,-0.123379,0.020074,0.011630,0.217121,0.006689,0.015855,...,-0.217517,0.046982,0.014248,-0.008255,0.139244,0.007520,0.170452,-0.224706,0.024265,0.053743
2025-06-30,0.156364,0.067792,0.061591,-0.086786,0.136086,0.044284,0.021281,0.248028,0.526610,0.110957,...,-0.032329,0.032416,0.061291,-0.049019,0.009904,0.026088,-0.005296,-0.062215,0.032862,0.087341
2025-05-31,0.215624,0.105843,-0.019523,0.205293,-0.027745,0.050782,-0.055073,0.128782,0.011429,0.101965,...,-0.052169,0.082645,0.049092,-0.052727,0.046114,0.068956,-0.102010,0.098609,0.162469,0.102733
2025-04-30,0.004970,-0.031176,-0.069494,0.085029,-0.122085,-0.001996,-0.044321,-0.053889,-0.439367,-0.101965,...,-0.213109,-0.150083,-0.021082,0.026546,-0.018420,0.001088,-0.004758,-0.229814,-0.058512,-0.156475
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014-03-31,-0.025904,-0.073709,-0.024287,-0.160783,0.041532,0.013553,0.019756,0.077759,0.418936,0.066402,...,0.032786,0.037739,0.078890,0.099232,0.134733,-0.004785,0.108359,-0.026032,0.035551,0.011101
2014-02-28,0.162107,0.009462,0.041057,0.299723,0.018416,0.028338,0.055918,0.078472,0.435902,0.079883,...,0.113462,0.174895,0.018749,0.106989,0.066005,0.049903,0.061939,-0.053504,-0.030746,0.079576
2014-01-31,-0.020177,-0.105977,-0.114509,0.187262,-0.056253,-0.022951,-0.113949,-0.120694,0.667002,-0.098581,...,-0.065328,-0.024990,-0.054929,-0.093682,-0.060930,-0.011302,-0.208770,-0.044010,-0.052367,-0.105216
2013-12-31,0.026567,0.013049,-0.004580,0.167109,0.085192,-0.101595,0.008862,0.061271,0.739360,0.036775,...,0.015724,-0.040657,0.009818,0.046692,-0.014310,0.018135,0.270638,0.205177,0.103146,0.007846


#### **7. Models**

#### **Date to Benchmark**


In [30]:
# Find the first January after start_date
first_january_year = start_date.year + 1
first_january = pd.Timestamp(year=first_january_year, month=1, day=31)  # Last business day of January

# Adjust in case there's no price exactly on this day (get first index >= first_january)
benchmark_start_date = df_monthly_prices.index[df_monthly_prices.index >= first_january][0]


print("First date in df_adjusted_close_prices:", df_adjusted_close_prices.index.min().strftime("%Y-%m-%d"))
print("First date in df_log_returns:          ", df_log_returns.index.min())
print("Benchmark start date:                  ", benchmark_start_date.strftime("%Y-%m-%d"))


First date in df_adjusted_close_prices: 2013-10-31
First date in df_log_returns:           2013-11-30 00:00:00
Benchmark start date:                   2014-01-31


In [31]:
# Slice the data from this date onward
df_monthly_prices = df_monthly_prices.loc[benchmark_start_date:]
df_log_returns = df_log_returns.loc[benchmark_start_date:]

In [32]:
df_log_returns

Unnamed: 0,NVDA,AMZN,VALE,TSLA,INTC,F,AAPL,AMD,PLUG,LUMN,...,CRI,GRMN,ASB,EXLS,WSM,EXG,KODK,QDEL,MPC,BC
2014-01-31,-0.020177,-0.105977,-0.114509,0.187262,-0.056253,-0.022951,-0.113949,-0.120694,0.667002,-0.098581,...,-0.065328,-0.024990,-0.054929,-0.093682,-0.060930,-0.011302,-0.208770,-0.044010,-0.052367,-0.105216
2014-02-28,0.162107,0.009462,0.041057,0.299723,0.018416,0.028338,0.055918,0.078472,0.435902,0.079883,...,0.113462,0.174895,0.018749,0.106989,0.066005,0.049903,0.061939,-0.053504,-0.030746,0.079576
2014-03-31,-0.025904,-0.073709,-0.024287,-0.160783,0.041532,0.013553,0.019756,0.077759,0.418936,0.066402,...,0.032786,0.037739,0.078890,0.099232,0.134733,-0.004785,0.108359,-0.026032,0.035551,0.011101
2014-04-30,0.030789,-0.100757,-0.017300,-0.002690,0.033527,0.042434,0.094761,0.019754,-0.423228,0.061126,...,-0.052752,0.032755,-0.028646,-0.088218,-0.053751,0.017857,-0.111701,-0.241162,0.065694,-0.119468
2014-05-31,0.032886,0.027309,-0.036199,-0.000577,0.031886,0.017797,0.075765,-0.022251,-0.066691,0.076091,...,-0.018404,0.031207,-0.013177,0.001412,0.063224,0.025358,-0.040655,0.056640,-0.034325,0.072299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-30,0.004970,-0.031176,-0.069494,0.085029,-0.122085,-0.001996,-0.044321,-0.053889,-0.439367,-0.101965,...,-0.213109,-0.150083,-0.021082,0.026546,-0.018420,0.001088,-0.004758,-0.229814,-0.058512,-0.156475
2025-05-31,0.215624,0.105843,-0.019523,0.205293,-0.027745,0.050782,-0.055073,0.128782,0.011429,0.101965,...,-0.052169,0.082645,0.049092,-0.052727,0.046114,0.068956,-0.102010,0.098609,0.162469,0.102733
2025-06-30,0.156364,0.067792,0.061591,-0.086786,0.136086,0.044284,0.021281,0.248028,0.526610,0.110957,...,-0.032329,0.032416,0.061291,-0.049019,0.009904,0.026088,-0.005296,-0.062215,0.032862,0.087341
2025-07-31,0.118521,0.064940,-0.018712,-0.030006,-0.123379,0.020074,0.011630,0.217121,0.006689,0.015855,...,-0.217517,0.046982,0.014248,-0.008255,0.139244,0.007520,0.170452,-0.224706,0.024265,0.053743


#### **7.1 Equal Weight (baseline)**

The Equal Weight model allocates the same weight to every asset in the available universe, regardless of liquidity, size, risk, or performance.

In [None]:
# def equal_weight(monthly_prices, initial_capital=10_000):
#     """
#     Allocate initial capital equally across all assets at the first available date.
#     Returns a DataFrame with tickers, units bought, amount invested per asset, and weight (%).
#     Applies recommended decimal places for each field.
#     """
#     # Get the first available date
#     first_date = monthly_prices.index[0]
#     prices_first_date = monthly_prices.loc[first_date]
    
#     # Remove any assets with missing price on first date
#     prices_first_date = prices_first_date.dropna()
    
#     n_assets = len(prices_first_date)
#     equal_investment = initial_capital / n_assets
    
#     # Calculate number of units bought per asset
#     units_bought = equal_investment / prices_first_date
    
#     # DataFrame with initial allocation
#     df_alloc = pd.DataFrame({
#         'ticker': prices_first_date.index,
#         'price_first_date': prices_first_date.values.round(5),
#         'units_bought': units_bought.values.round(5),
#         'amount_invested': np.round(equal_investment, 2),
#         'weight_percent': np.round(100 / n_assets, 5)
#     })

#     return df_alloc, first_date

# df_equal_weight, first_date = equal_weight(monthly_prices)

# print(f"Equal Weight Allocation on {first_date.date()}")
# df_equal_weight

In [None]:
# import pandas as pd
# import numpy as np

# def equal_weight(monthly_prices, initial_capital=10_000):
#     """
#     Allocate initial capital equally across all assets at the first available date.
#     Returns a DataFrame with tickers, units bought, amount invested per asset, and weight (%).
#     """
#     # Get the first available date
#     first_date = monthly_prices.index[0]
#     prices_first_date = monthly_prices.loc[first_date]
#     prices_first_date = prices_first_date.dropna()  # Remove any assets with missing price
    
#     n_assets = len(prices_first_date)
#     equal_investment = initial_capital / n_assets
#     units_bought = equal_investment / prices_first_date

#     df_alloc = pd.DataFrame({
#         'ticker': prices_first_date.index,
#         'price_first_date': prices_first_date.values.round(5),
#         'units_bought': units_bought.values.round(5),
#         'amount_invested': np.round(equal_investment, 2),
#         'weight_percent': np.round(100 / n_assets, 5)
#     })
#     return df_alloc, first_date

# # === Equal Weight initial allocation ===
# df_equal_weight, first_date = equal_weight(monthly_prices)
# print(f"Equal Weight Allocation on {first_date.date()}")

# # === Final portfolio analysis ===
# last_date = monthly_prices.index[-1]
# prices_last_date = monthly_prices.loc[last_date]
# prices_last_date = prices_last_date[df_equal_weight['ticker']].values

# df_equal_weight_final = pd.DataFrame({
#     'ticker': df_equal_weight['ticker'],
#     'price_last_date': np.round(prices_last_date, 5),
#     'units_bought': df_equal_weight['units_bought'],
#     'amount_invested': df_equal_weight['amount_invested'],
#     'weight_percent': df_equal_weight['weight_percent'],
# })

# df_equal_weight_final['final_value'] = (df_equal_weight_final['units_bought'] * df_equal_weight_final['price_last_date']).round(2)
# total_final_portfolio_value = df_equal_weight_final['final_value'].sum()
# df_equal_weight_final['weight_percent_final'] = (df_equal_weight_final['final_value'] / total_final_portfolio_value * 100).round(5)
# df_equal_weight_final['return_percent'] = ((df_equal_weight_final['final_value'] / df_equal_weight_final['amount_invested']) - 1) * 100
# df_equal_weight_final['return_percent'] = df_equal_weight_final['return_percent'].round(2)

# print(f"Equal Weight Final Positions on {last_date.date()}")
# print("\nStarted portfolio value: ${:,.2f}".format(df_equal_weight_final['amount_invested'].sum()))
# print(f"Total final portfolio value: ${total_final_portfolio_value:,.2f}")

# # === Portfolio return and period calculation ===
# total_initial_value = df_equal_weight_final['amount_invested'].sum()
# total_return_percent = ((total_final_portfolio_value / total_initial_value) - 1) * 100

# # Calculate period length in months and years
# n_months = len(monthly_prices.loc[first_date:last_date])
# n_years = n_months / 12

# print(f"\nPortfolio total return: {total_return_percent:.2f}%")
# print(f"Period: {n_months} months ({n_years:.2f} years)")

# # === Mean Monthly Return Calculation ===
# tickers = df_equal_weight_final['ticker'].values
# units_bought = df_equal_weight_final['units_bought'].values
# monthly_prices_selected = monthly_prices[tickers]
# portfolio_value_over_time = (monthly_prices_selected * units_bought).sum(axis=1)
# monthly_returns = portfolio_value_over_time.pct_change().dropna()
# mean_monthly_return_percent = (monthly_returns.mean() * 100).round(4)

# print(f"\nMean monthly portfolio return: {mean_monthly_return_percent}%")

# df_equal_weight_final.sort_values(by='return_percent', ascending=False, inplace=True)
# df_equal_weight_final


In [None]:
# def equal_weight_final_analysis(monthly_prices, df_alloc):
#     """
#     Calculates the final value and performance for each asset in the equal weight portfolio.
#     """
#     last_date = monthly_prices.index[-1]
#     prices_last_date = monthly_prices.loc[last_date]

#     # Align tickers (in case some delisted/missing)
#     prices_last_date = prices_last_date[df_alloc['ticker']]
#     df_alloc = df_alloc.copy()
#     df_alloc['price_last_date'] = prices_last_date.values.round(5)
#     df_alloc['final_value'] = (df_alloc['units_bought'] * df_alloc['price_last_date']).round(2)
#     df_alloc['return_percent'] = ((df_alloc['final_value'] / df_alloc['amount_invested']) - 1) * 100
#     df_alloc['return_percent'] = df_alloc['return_percent'].round(2)

#     total_final_value = df_alloc['final_value'].sum()
#     return df_alloc, last_date, total_final_value

# df_equal_weight, first_date = equal_weight(monthly_prices)
# df_equal_weight_final, last_date, total_final_value = equal_weight_final_analysis(monthly_prices, df_equal_weight)

# print(f"\nEqual Weight Buy & Hold: {first_date.date()} to {last_date.date()}")
# print(f"Final portfolio value: ${total_final_value:,.2f}\n")

# # Show to


In [None]:
# import numpy as np
# import pandas as pd

# def buy_and_hold_equal_weight(log_returns, initial_capital=10_000):
#     """
#     Simulate buy & hold equal weight strategy.
#     Invests equal weight in all assets at the first date and holds until the last date.
#     Returns:
#         - final_portfolio_value (float)
#         - asset_final_values (Series)
#         - asset_returns_percent (Series)
#     """
#     # Get the first and last date in log_returns
#     first_date = log_returns.index[0]
#     last_date = log_returns.index[-1]
#     assets = log_returns.columns

#     n_assets = len(assets)
#     weights = np.repeat(1 / n_assets, n_assets)
    
#     # Calculate cumulative log return for each asset over the period
#     cumulative_log_returns = log_returns.sum(axis=0)
#     # Final price multiplier for each asset
#     asset_growth = np.exp(cumulative_log_returns)
    
#     # Initial investment per asset
#     initial_allocation = initial_capital * weights
    
#     # Final value per asset
#     asset_final_values = initial_allocation * asset_growth
    
#     # Total portfolio value at the end
#     final_portfolio_value = asset_final_values.sum()
    
#     # Percent return per asset
#     asset_returns_percent = (asset_final_values / initial_allocation - 1) * 100
    
#     return {
#         "first_date": first_date,
#         "last_date": last_date,
#         "final_portfolio_value": final_portfolio_value,
#         "asset_final_values": pd.Series(asset_final_values, index=assets),
#         "asset_returns_percent": pd.Series(asset_returns_percent, index=assets)
#     }

# # Run the simulation
# result = buy_and_hold_equal_weight(log_returns)

# print(f"Buy & Hold Equal Weight from {result['first_date'].date()} to {result['last_date'].date()}:")
# print(f"Final portfolio value: ${result['final_portfolio_value']:,.2f}\n")
# print("Final values per asset (USD):")
# print(result["asset_final_values"].head())  # Show first 5 assets

# print("\nPercent return per asset (%):")
# print(result["asset_returns_percent"].head())  # Show first 5 assets


In [None]:
# # Cria DataFrame com os resultados individuais
# summary_df = pd.DataFrame({
#     'final_value_usd': result['asset_final_values'],
#     'return_percent': result['asset_returns_percent']
# })

# # Ordena do maior para o menor retorno percentual
# summary_df_sorted = summary_df.sort_values(by='return_percent', ascending=False)

# # Exibe as 10 maiores e 10 menores valorizações
# print("Top 10 assets (percent return):")
# print(summary_df_sorted.head(10))

# print("\nBottom 10 assets (percent return):")
# print(summary_df_sorted.tail(10))


In [None]:
# summary_df_sorted

#### **7.2 Markowitz (Max Sharpe)**

#### **7.3 Minimum Volatility**

#### **7.4 Momentum**

#### **7.5 Random (baseline)**