#### **1. Imports**

In [1]:
# %pip install selenium pandas

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import yfinance as yf
from datetime import datetime
from dateutil.relativedelta import relativedelta

#### **2. Variables**

In [3]:
adjusted_close_prices_csv = 'adjusted_close_prices.csv'


search_all_tickers_flag = False
adjusted_closes_flag = False

#####################################

# search_all_tickers_flag = True
# adjusted_closes_flag = True

##### **2. Download - Asset Dataset**

In [4]:
MAX_ATTEMPTS = 10
BASE_URL = "https://finviz.com/screener.ashx"

USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
    "Mozilla/5.0 (X11; Linux x86_64)",
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64)",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_4)"
]

def scrape_finviz_by_exchange_requests(exchange: str):
    exchange_map = {
        "nasdaq": "exch_nasd",
        "nyse": "exch_nyse"
    }

    if exchange.lower() not in exchange_map:
        raise ValueError("Invalid exchange. Use 'nasdaq' or 'nyse'.")

    exchange_code = exchange_map[exchange.lower()]
    all_data = []
    r = 1  # page offset

    while True:
        success = False
        attempts = 0

        while not success and attempts < MAX_ATTEMPTS:
            try:
                headers = {
                    "User-Agent": random.choice(USER_AGENTS)
                }
                params = {
                    "v": "111",
                    "f": exchange_code,
                    "r": r
                }
                response = requests.get(BASE_URL, headers=headers, params=params, timeout=10)
                if response.status_code == 429:
                    raise Exception("HTTP 429 Too Many Requests")
                response.raise_for_status()

                soup = BeautifulSoup(response.text, "html.parser")
                tables = soup.find_all("table")

                target_table = None
                for table in tables:
                    rows = table.find_all("tr")
                    if len(rows) < 2:
                        continue
                    cols = rows[1].find_all("td")
                    if len(cols) >= 11 and cols[0].text.strip().isdigit():
                        target_table = table
                        break

                if not target_table:
                    raise ValueError("No valid table found.")

                rows = target_table.find_all("tr")[1:]
                if not rows:
                    print(f"No more data on page starting at {r}. Stopping.")
                    return finalize(all_data, exchange)

                for row in rows:
                    cols = row.find_all("td")
                    if len(cols) >= 11:
                        ticker = cols[1].text.strip()
                        company = cols[2].text.strip()
                        sector = cols[3].text.strip()
                        industry = cols[4].text.strip()
                        country = cols[5].text.strip()
                        market_cap = cols[6].text.strip()
                        volume = cols[10].text.strip()
                        all_data.append([ticker, company, sector, industry, country, market_cap, volume])

                print(f"Page {r} OK")
                success = True
                r += 20

                # Simula navegação humana
                time.sleep(random.uniform(3, 6))

            except Exception as e:
                attempts += 1
                print(f"Error on page {r}, attempt {attempts}: {e}")
                time.sleep(5 * attempts + random.uniform(1, 3))  # backoff crescente

        if not success:
            print(f"\n⚠️  Page {r} failed {MAX_ATTEMPTS} times. Finalizing with partial data.")
            return finalize(all_data, exchange)

def finalize(all_data, exchange):
    df = pd.DataFrame(all_data, columns=[
        "Ticker", "Company", "Sector", "Industry", "Country", "Market Cap", "Volume"
    ])
    df = df[df["Volume"] != "0"].reset_index(drop=True)
    filename = f"tickers_{exchange.lower()}.csv"
    df.to_csv(filename, index=False)
    print(f"\n✅ Scraping finished. Saved {len(df)} tickers to '{filename}'")
    return df


##### **2.1 Nasdaq Tickers**

In [5]:
if search_all_tickers_flag:
    nasdaq = scrape_finviz_by_exchange_requests("nasdaq")
    nasdaq

nasdaq = pd.read_csv("tickers_nasdaq.csv")
nasdaq

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,AACB,Artius II Acquisition Inc,Financial,Shell Companies,USA,298.12M,1
1,AACG,ATA Creativity Global ADR,Consumer Defensive,Education & Training Services,China,67.45M,254981
2,AACI,Armada Acquisition Corp. II,Financial,Shell Companies,USA,321.12M,3
3,AADR,AdvisorShares Dorsey Wright ADR ETF,Financial,Exchange Traded Fund,USA,-,20
4,AAL,American Airlines Group Inc,Industrials,Airlines,USA,7.28B,24043012
...,...,...,...,...,...,...,...
4231,ZVSA,ZyVersa Therapeutics Inc,Healthcare,Biotechnology,USA,0.93M,18248
4232,ZYBT,Zhengye Biotechnology Holding Ltd,Healthcare,Drug Manufacturers - Specialty & Generic,China,256.85M,6705
4233,ZYME,Zymeworks BC Inc,Healthcare,Biotechnology,USA,879.92M,402780
4234,ZYXI,Zynex Inc,Healthcare,Medical Distribution,USA,42.03M,840704


##### **2.2 Nyse Tickers**


In [6]:
if search_all_tickers_flag:
    nyse = scrape_finviz_by_exchange_requests("nyse")
    nyse

nyse = pd.read_csv("tickers_nyse.csv")
nyse

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,A,Agilent Technologies Inc,Healthcare,Diagnostics & Research,USA,32.14B,682198
1,AA,Alcoa Corp,Basic Materials,Aluminum,USA,7.44B,3594093
2,AAA,Alternative Access First Priority CLO Bond ETF,Financial,Exchange Traded Fund,USA,-,6048
3,AACT,Ares Acquisition Corporation II,Financial,Shell Companies,USA,705.51M,209977
4,AAM,AA Mission Acquisition Corp,Financial,Shell Companies,USA,459.97M,2413
...,...,...,...,...,...,...,...
4669,ZTO,ZTO Express (Cayman) Inc ADR,Industrials,Integrated Freight & Logistics,China,12.03B,2439145
4670,ZTR,Virtus Total Return Fund Inc,Financial,Closed-End Fund - Equity,USA,342.65M,128272
4671,ZTS,Zoetis Inc,Healthcare,Drug Manufacturers - Specialty & Generic,USA,65.41B,1582264
4672,ZVIA,Zevia PBC,Consumer Defensive,Beverages - Non-Alcoholic,USA,219.33M,239408


In [7]:
tickers = pd.concat([nasdaq, nyse], ignore_index=True)
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
0,AACB,Artius II Acquisition Inc,Financial,Shell Companies,USA,298.12M,1
1,AACG,ATA Creativity Global ADR,Consumer Defensive,Education & Training Services,China,67.45M,254981
2,AACI,Armada Acquisition Corp. II,Financial,Shell Companies,USA,321.12M,3
3,AADR,AdvisorShares Dorsey Wright ADR ETF,Financial,Exchange Traded Fund,USA,-,20
4,AAL,American Airlines Group Inc,Industrials,Airlines,USA,7.28B,24043012
...,...,...,...,...,...,...,...
8905,ZTO,ZTO Express (Cayman) Inc ADR,Industrials,Integrated Freight & Logistics,China,12.03B,2439145
8906,ZTR,Virtus Total Return Fund Inc,Financial,Closed-End Fund - Equity,USA,342.65M,128272
8907,ZTS,Zoetis Inc,Healthcare,Drug Manufacturers - Specialty & Generic,USA,65.41B,1582264
8908,ZVIA,Zevia PBC,Consumer Defensive,Beverages - Non-Alcoholic,USA,219.33M,239408


#### **3. Process data**


In [8]:
tickers['Volume'] = tickers['Volume'].str.replace(',', '').astype(float)
tickers.sort_values(by="Volume", inplace=True, ascending=False)
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
2236,LOBO,Lobo Ev Technologies Ltd,Consumer Cyclical,Auto Manufacturers,China,13.39M,277221913.0
3138,RAYA,Erayak Power Solution Group Inc,Industrials,Electrical Equipment & Parts,China,4.35M,229545591.0
3724,TNON,Tenon Medical Inc,Healthcare,Medical Devices,USA,12.22M,211413022.0
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0
2010,IXHL,Incannex Healthcare Inc,Healthcare,Drug Manufacturers - Specialty & Generic,Australia,38.40M,137451462.0
...,...,...,...,...,...,...,...
6555,JDVI,John Hancock Disciplined Value International S...,Financial,Exchange Traded Fund,USA,-,1.0
6656,KBUF,KraneShares 90% KWEB Defined Outcome January 2...,Financial,Exchange Traded Fund,USA,-,1.0
4490,AUSM,Allspring Ultra Short Municipal ETF,Financial,Exchange Traded Fund,USA,-,1.0
7753,ROPE,Coastal Compass 100 ETF,Financial,Exchange Traded Fund,USA,-,1.0


#### **4. Filter assets**

##### **4.1 Liquidity: Remove low liquidity**
- Filters out assets with insufficient daily trading volume.

In [9]:
tickers = tickers[tickers['Volume'] > 500_000]
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume
2236,LOBO,Lobo Ev Technologies Ltd,Consumer Cyclical,Auto Manufacturers,China,13.39M,277221913.0
3138,RAYA,Erayak Power Solution Group Inc,Industrials,Electrical Equipment & Parts,China,4.35M,229545591.0
3724,TNON,Tenon Medical Inc,Healthcare,Medical Devices,USA,12.22M,211413022.0
8060,SOXS,Direxion Daily Semiconductor Bear 3X Shares,Financial,Exchange Traded Fund,USA,-,200627421.0
2010,IXHL,Incannex Healthcare Inc,Healthcare,Drug Manufacturers - Specialty & Generic,Australia,38.40M,137451462.0
...,...,...,...,...,...,...,...
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0
8065,SPBO,SPDR Portfolio Corporate Bond ETF,Financial,Exchange Traded Fund,USA,-,500751.0
4586,BC,Brunswick Corp,Consumer Cyclical,Recreational Vehicles,USA,3.70B,500656.0


##### **4.2 Market Cap: Remove small or undefined market capitalization**
- Excludes microcaps and assets without valid market capitalization data to reduce exposure to volatility, manipulation, and poor fundamentals.

In [10]:
# Remove null or invalid Market Cap values
tickers = tickers[tickers['Market Cap'] != '-'].copy()

# Convert Market Cap to numeric format
def convert_market_cap(val):
    val = val.replace('$', '').replace(',', '').strip()
    if val.endswith('B'):
        return float(val.replace('B', '')) * 1e9
    elif val.endswith('M'):
        return float(val.replace('M', '')) * 1e6
    elif val.endswith('K'):
        return float(val.replace('K', '')) * 1e3
    else:
        return float(val)

tickers['market_cap'] = tickers['Market Cap'].apply(convert_market_cap)

# Filter by Volume and Market Cap
tickers = tickers[
    (tickers['market_cap'] > 500_000_000)
]
tickers

Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume,market_cap
2754,OPEN,Opendoor Technologies Inc,Real Estate,Real Estate Services,USA,1.49B,126981253.0,1.490000e+09
2649,NVDA,NVIDIA Corp,Technology,Semiconductors,USA,4306.36B,106136868.0,4.306360e+12
212,AMZN,Amazon.com Inc,Consumer Cyclical,Internet Retail,USA,2294.14B,57251389.0,2.294140e+12
7153,NIO,NIO Inc ADR,Consumer Cyclical,Auto Manufacturers,China,9.45B,56365932.0,9.450000e+09
8547,VALE,Vale S.A. ADR,Basic Materials,Other Industrial Metals & Mining,Brazil,41.58B,55509604.0,4.158000e+10
...,...,...,...,...,...,...,...,...
6706,KODK,Eastman Kodak Co,Industrials,Specialty Business Services,USA,525.21M,503893.0,5.252100e+08
4359,ALLE,Allegion plc,Industrials,Security & Protection Services,Ireland,14.19B,503835.0,1.419000e+10
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0,1.530000e+09
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0,5.094000e+10


In [11]:
print(tickers['Country'].unique())

allowed_countries = [
    'USA', 'Canada', 'United Kingdom', 'Switzerland', 'Germany', 'France',
    'Netherlands', 'Japan', 'Australia', 'Denmark', 'Sweden', 'Norway',
    'Finland', 'Ireland', 'Belgium', 'Spain', 'Israel', 'Brazil', 'India'
]

# Filter tickers to include only allowed countries
tickers = tickers[tickers['Country'].isin(allowed_countries)]

print(tickers['Country'].unique())

tickers

['USA' 'China' 'Brazil' 'Canada' 'Denmark' 'Cayman Islands' 'Switzerland'
 'United Kingdom' 'Singapore' 'Australia' 'Finland' 'Netherlands' 'Taiwan'
 'Israel' 'India' 'Mexico' 'Sweden' 'South Africa' 'Belgium' 'Bermuda'
 'Spain' 'Ireland' 'Japan' 'Luxembourg' 'Germany' 'Hong Kong' 'Norway'
 'Colombia' 'Monaco' 'France' 'Italy' 'Turkey' 'Cyprus' 'Argentina']
['USA' 'Brazil' 'Canada' 'Denmark' 'Switzerland' 'United Kingdom'
 'Australia' 'Finland' 'Netherlands' 'Israel' 'India' 'Sweden' 'Belgium'
 'Spain' 'Ireland' 'Japan' 'Germany' 'Norway' 'France']


Unnamed: 0,Ticker,Company,Sector,Industry,Country,Market Cap,Volume,market_cap
2754,OPEN,Opendoor Technologies Inc,Real Estate,Real Estate Services,USA,1.49B,126981253.0,1.490000e+09
2649,NVDA,NVIDIA Corp,Technology,Semiconductors,USA,4306.36B,106136868.0,4.306360e+12
212,AMZN,Amazon.com Inc,Consumer Cyclical,Internet Retail,USA,2294.14B,57251389.0,2.294140e+12
8547,VALE,Vale S.A. ADR,Basic Materials,Other Industrial Metals & Mining,Brazil,41.58B,55509604.0,4.158000e+10
3486,SOFI,SoFi Technologies Inc,Financial,Credit Services,USA,24.12B,53230046.0,2.412000e+10
...,...,...,...,...,...,...,...,...
6706,KODK,Eastman Kodak Co,Industrials,Specialty Business Services,USA,525.21M,503893.0,5.252100e+08
4359,ALLE,Allegion plc,Industrials,Security & Protection Services,Ireland,14.19B,503835.0,1.419000e+10
3060,QDEL,QuidelOrtho Corporation,Healthcare,Medical Devices,USA,1.53B,502153.0,1.530000e+09
7009,MPC,Marathon Petroleum Corp,Energy,Oil & Gas Refining & Marketing,USA,50.94B,500904.0,5.094000e+10


#### **5. Download - Adjusted Close Prices**

In [12]:
if adjusted_closes_flag:
    # Define start and end dates
    end_date = datetime.today()
    start_date = datetime(end_date.year - 5, 1, 1)

    # Get the list of tickers
    tickers_list = tickers['Ticker'].unique().tolist()

    # Download adjusted close prices
    data = yf.download(
        tickers=tickers_list,
        start=start_date.strftime('%Y-%m-%d'),
        end=end_date.strftime('%Y-%m-%d'),
        progress=True,
        group_by='ticker',
        auto_adjust=False
    )

    # Extract only the adjusted close prices
    adjusted_closes = {
        ticker: data[ticker]['Adj Close']
        for ticker in tickers_list if ticker in data
    }

    # Combine into a single DataFrame
    df = pd.DataFrame(adjusted_closes)

    # Save to CSV (optional)
    df.to_csv(adjusted_close_prices_csv)

df = pd.read_csv(adjusted_close_prices_csv)
df

Unnamed: 0,Date,OPEN,NVDA,AMZN,VALE,SOFI,TSLA,LCID,BBAI,INTC,...,EXLS,WSM,CELC,EXG,LFST,KODK,ALLE,QDEL,MPC,BC
0,2020-01-02,,5.971746,94.900497,8.399636,,28.684000,,,53.666462,...,14.000000,32.270748,11.680000,5.354258,,4.03,116.488945,74.800003,51.894016,54.151794
1,2020-01-03,,5.876163,93.748497,8.299714,,29.534000,,,53.013718,...,14.100000,32.035309,11.510000,5.312002,,4.03,115.048988,74.510002,48.809380,53.249271
2,2020-01-06,,5.900805,95.143997,8.206038,,30.102667,,,52.863762,...,14.032000,32.155247,11.330000,5.305964,,3.75,114.835320,75.650002,48.800983,52.491142
3,2020-01-07,,5.972244,95.343002,8.262243,,31.270666,,,51.981670,...,13.946000,32.768318,11.010000,5.342184,,3.27,113.460365,76.089996,49.572155,51.705940
4,2020-01-08,,5.983445,94.598503,8.255999,,32.809334,,,52.016960,...,14.100000,32.986012,10.740000,5.378401,,3.21,113.943459,77.260002,49.052467,52.139149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397,2025-07-25,2.54,173.500000,231.440002,10.000000,21.200001,316.059998,2.92,7.39,20.700001,...,42.860001,181.710007,13.770000,8.850000,4.14,6.80,165.389999,27.389999,171.570007,60.790001
1398,2025-07-28,2.34,176.750000,232.789993,9.840000,21.020000,325.589996,2.79,7.15,20.680000,...,42.220001,188.229996,36.790001,8.840000,4.17,6.93,163.669998,26.090000,174.800003,60.310001
1399,2025-07-29,2.05,175.509995,231.009995,9.890000,22.400000,321.200012,2.56,6.62,20.410000,...,42.200001,187.110001,38.500000,8.810000,4.13,6.92,164.110001,25.440001,175.429993,61.189999
1400,2025-07-30,2.07,179.270004,230.190002,9.630000,21.870001,319.040009,2.50,6.41,20.340000,...,42.790001,190.529999,40.299999,8.800000,4.23,6.86,164.820007,24.290001,170.779999,59.160000


In [13]:
null_counts = df.isnull().any(axis=0).sum()
print(f"{null_counts} columns contain at least one null value.")

cols_with_nulls = df.columns[df.isnull().any()].tolist()
print(cols_with_nulls)

246 columns contain at least one null value.
['OPEN', 'SOFI', 'LCID', 'BBAI', 'RKT', 'HOOD', 'PLTR', 'JOBY', 'APLD', 'QBTS', 'RGTI', 'RIVN', 'RDDT', 'QS', 'ACHR', 'SMR', 'SOUN', 'COIN', 'NVTS', 'ENVX', 'IREN', 'OKLO', 'OSCR', 'IONQ', 'RXRX', 'HLN', 'RKLB', 'MP', 'CIFR', 'AUR', 'CFLT', 'CRCL', 'EOSE', 'RBLX', 'CCCS', 'MIR', 'UWMC', 'NBIS', 'CORZ', 'COMP', 'SLDP', 'CRWV', 'PATH', 'OWL', 'USAR', 'TMC', 'U', 'BULL', 'ZETA', 'SNOW', 'GLXY', 'KVUE', 'TOST', 'AMPX', 'CPNG', 'TEM', 'LAC', 'AI', 'ARM', 'DJT', 'RBRK', 'ONON', 'ATAI', 'LUNR', 'RZLV', 'UPST', 'SG', 'ARRY', 'PL', 'ALAB', 'TNGX', 'PCOR', 'APP', 'MBLY', 'S', 'GENI', 'FRSH', 'ABCL', 'ACI', 'ZIM', 'BTSG', 'TIC', 'COUR', 'DNUT', 'SERV', 'IOT', 'AFRM', 'VG', 'RYAN', 'ULCC', 'PRME', 'CLOV', 'LION', 'ALIT', 'SANA', 'GEV', 'RSI', 'RDW', 'CARR', 'HTZ', 'VSCO', 'ETWO', 'CRGY', 'DEFT', 'NNE', 'PCT', 'GTLB', 'EVGO', 'NUVB', 'RELY', 'SAIL', 'MQ', 'OTIS', 'ASAN', 'FLNC', 'PSNY', 'AS', 'GEHC', 'BROS', 'AEVA', 'SYM', 'OGN', 'HAYW', 'GTM', 'DOCN', '

In [14]:
df = df[df.columns[~df.isnull().any()]].copy()
df['Date'] = pd.to_datetime(df['Date'])
# df['year'] = df['Date'].dt.year
# df['month'] = df['Date'].dt.month
df = df.set_index('Date')
df

Unnamed: 0_level_0,NVDA,AMZN,VALE,TSLA,INTC,F,AAPL,AMD,AVTR,PLUG,...,ASB,EXLS,WSM,CELC,EXG,KODK,ALLE,QDEL,MPC,BC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,5.971746,94.900497,8.399636,28.684000,53.666462,7.204662,72.620819,49.099998,18.450001,3.24,...,17.530243,14.000000,32.270748,11.680000,5.354258,4.03,116.488945,74.800003,51.894016,54.151794
2020-01-03,5.876163,93.748497,8.299714,29.534000,53.013718,7.044048,71.914833,48.599998,18.400000,3.23,...,17.394409,14.100000,32.035309,11.510000,5.312002,4.03,115.048988,74.510002,48.809380,53.249271
2020-01-06,5.900805,95.143997,8.206038,30.102667,52.863762,7.005807,72.487862,48.389999,18.490000,3.82,...,17.010885,14.032000,32.155247,11.330000,5.305964,3.75,114.835320,75.650002,48.800983,52.491142
2020-01-07,5.972244,95.343002,8.262243,31.270666,51.981670,7.074641,72.146942,48.250000,18.879999,3.81,...,16.851078,13.946000,32.768318,11.010000,5.342184,3.27,113.460365,76.089996,49.572155,51.705940
2020-01-08,5.983445,94.598503,8.255999,32.809334,52.016960,7.074641,73.307510,47.830002,18.670000,4.08,...,16.915001,14.100000,32.986012,10.740000,5.378401,3.21,113.943459,77.260002,49.052467,52.139149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-25,173.500000,231.440002,10.000000,316.059998,20.700001,11.470000,213.880005,166.470001,14.210000,1.84,...,25.600000,42.860001,181.710007,13.770000,8.850000,6.80,165.389999,27.389999,171.570007,60.790001
2025-07-28,176.750000,232.789993,9.840000,325.589996,20.680000,11.280000,214.050003,173.660004,13.990000,1.77,...,25.410000,42.220001,188.229996,36.790001,8.840000,6.93,163.669998,26.090000,174.800003,60.310001
2025-07-29,175.509995,231.009995,9.890000,321.200012,20.410000,11.080000,211.270004,177.440002,14.010000,1.59,...,25.230000,42.200001,187.110001,38.500000,8.810000,6.92,164.110001,25.440001,175.429993,61.189999
2025-07-30,179.270004,230.190002,9.630000,319.040009,20.340000,10.870000,209.050003,179.509995,13.800000,1.55,...,25.170000,42.790001,190.529999,40.299999,8.800000,6.86,164.820007,24.290001,170.779999,59.160000


In [None]:
# import numpy as np

# # Remove columns with any missing values and make a safe copy
# df = df[df.columns[~df.isnull().any()]].copy()

# # Convert all remaining columns to numeric just in case
# df = df.apply(pd.to_numeric, errors='coerce')

# # Calculate daily returns
# returns = df.pct_change().dropna()

# # Create insights DataFrame
# insights = pd.DataFrame({
#     'mean_return_annual': returns.mean() * 252,
#     'volatility_annual': returns.std() * np.sqrt(252),
#     'sharpe_ratio': (returns.mean() / returns.std()) * np.sqrt(252),
#     'max_drawdown': (df / df.cummax() - 1).min(),
#     'positive_days_pct': (returns > 0).sum() / len(returns),
#     'amplitude_pct': (df.max() - df.min()) / df.min()
# }).sort_values(by='sharpe_ratio', ascending=False)

# # Show insights
# insights

Unnamed: 0,mean_return_annual,volatility_annual,sharpe_ratio,max_drawdown,positive_days_pct,amplitude_pct
NVDA,0.757926,0.543763,1.393853,-0.663351,0.547466,35.648265
CLS,0.727233,0.553832,1.313092,-0.690372,0.521056,70.522971
PWR,0.485959,0.377345,1.287839,-0.424008,0.538187,16.951053
COOP,0.570395,0.472332,1.207614,-0.627923,0.533904,30.292051
AVGO,0.518668,0.435784,1.190197,-0.483000,0.533191,19.878321
...,...,...,...,...,...,...
XRAY,-0.158090,0.379519,-0.416554,-0.805048,0.506781,4.129458
IART,-0.176434,0.421116,-0.418969,-0.854757,0.492505,5.884995
HPP,-0.290086,0.559156,-0.518792,-0.939603,0.459672,15.557039
NEOG,-0.243624,0.454760,-0.535720,-0.909176,0.482512,10.010251
