### How about portfolio selection based on ticker's past sharpe ratios (3, 6, 12 months) vs sharpe ratios of VGT? 

### Portfolio Selection
- Ask Google AI return the top 10 holdings of these index ETFs
- Ask Google AI to combine all the tickers into a unique list
- Plot the tickers vs benchmark VGT
- Keep tickers that outperform VGT in 6 & 12 months period

etf_list = [  
    # U.S. Technology Sector ETFs  
    'VGT',
    'FTEC',
    
    # U.S. Large-Cap Growth ETFs
    'QQQ',
    'VUG',
    'IWF',
    'SCHG',
    'SPYG',
    
    # U.S. Mid-Cap Growth ETFs
    'VOT',
    'IWP',
    'IJK',
    
    # Global & International ETFs
    'VT',
    'VXUS',
    'VEA',
    
    # Single-Country & Thematic Index ETFs
    'EWC',
    'PNQI'
]

In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pprint
import inspect  # <--- ADD THIS LINE
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)
pd.set_option('display.float_format', '{:.6f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR
ROOT_DIR = NOTEBOOK_DIR.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 3. IMPORT CUSTOM MODULES ---
import utils
import plotting_utils

# --- 4. INITIAL_CAPITAL ---
INITIAL_CAPITAL = 100000

# --- 5. RISK FREE ANNUAL RATE ---
RISK_FREE_ANNUAL_RATE = 0.04

# --- 6. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

--- Path Configuration ---
✅ Project Root: c:\Users\ping\Files_win10\python\py311\stocks
✅ Parent Dir:   c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Notebook Dir: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Data Dir:     c:\Users\ping\Files_win10\python\py311\stocks\data
✅ Source Dir:   c:\Users\ping\Files_win10\python\py311\stocks\src

--- Module Verification ---
✅ Successfully imported 'utils' and 'plotting_utils'.


In [2]:
df_OHLCV = pd.read_parquet(DATA_DIR / 'df_OHLCV_clean_stocks_etfs.parquet')

In [3]:
# Select the 'Adj Close' column, which returns a Series with the ('Ticker', 'Date') MultiIndex.
# Then, .unstack(level='Ticker') pivots the 'Ticker' index level into columns.
df = df_OHLCV['Adj Close'].unstack(level='Ticker')

# Optional: You can verify the result matches the desired format
print("--- New df info ---")
df.info()
print("\n--- Sample Data ---")
print(df.head())

--- New df info ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2024-08-13 to 2025-08-12
Columns: 1510 entries, A to ZWS
dtypes: float64(1510)
memory usage: 2.9 MB

--- Sample Data ---
Ticker              A        AA       AAL      AAON       AAPL       ABBV     ABEV       ABNB        ABT      ACGL     ACHR       ACI       ACM        ACN       ACWI      ACWX       ADBE       ADC        ADI       ADM        ADP       ADSK      ADT       AEE      AEG       AEP       AER       AES        AFG        AFL      AFRM      AGCO       AGG       AGI     AGNC       AHR       AIG       AIQ      AIRR        AIT        AIZ        AJG       AKAM        AL      ALAB       ALB       ALC      ALGM       ALGN       ALK        ALL       ALLE       ALNY      ALSN       ALV        AM       AMAT      AMCR        AMD        AME       AMGN       AMH      AMLP        AMP        AMT       AMX       AMZN         AN      ANET        AON       AOS       APA        APD       APG       APH       

In [4]:
# df = pd.read_parquet(DATA_DIR / 'df.parquet')
# print(f'df:\n{df}')

In [5]:
# Initial unique ticker list on 2025-07-30 from the top 10 holdings in the ETF list
# ticker_list = [
# 'MSFT', 'AAPL', 'NVDA', 'GOOG', 'AMZN',
# 'META', 'LLY', 'TSM', 'AVGO', 'NVO',
# 'V', 'TSLA', 'ASML', 'LVMUY', 'ORCL',
# 'TM', 'CRM', 'COST', 'ADBE', 'NSRGY',
# 'SHEL', 'AMD', 'NFLX', 'ACN', 'AZN',
# 'RHHBY', 'CSCO', 'TCEHY', 'ORLY', 'BKNG',
# 'MELI', 'RY', 'TD', 'SHOP', 'TDG', 'APH',
# 'SNPS', 'BN', 'IDXX', 'CDNS', 'ENB', 'BMO',
# 'AJG', 'CP', 'ANET', 'CRWD', 'ABNB', 'CNQ',
# 'BNS', 'SQ', 'DECK', 'TTD', 'ANSS', 'COIN',
# 'VRT', 'FDS', 'TRGP', 'ON', 'BLDR', 'WSM',
# 'ROKU', 'DKNG', 'CSU.TO', 'EME', 'STLD',
# 'RBLX', 'U', 'PATH', 'MSTR',
# ]

# Plot 10 tickers vs VGT, and keep tickers that outperform VGT over the last 3, 6, 12 months 
# ticker_list = [
# 'MSFT', 'NVDA', 'TSM', 'AVGO', 'ORCL',
# 'SHEL', 'AMD', 'NFLX', 'CSCO', 'BKNG',
# 'MELI', 'TD', 'TDG', 'APH','IDXX', 
# 'CDNS', 'BMO', 'CRWD', 'COIN','VRT', 
# 'EME', 'RBLX', 'U', 'MSTR',
# ]

# The final 10 tickers
ticker_list = [
'NVDA', 'AVGO', 'ORCL', 'NFLX', 'APH',
'CRWD', 'EME', 'RBLX', 'U', 'MSTR',
]

In [6]:
# plot_comparative_performance(df, ticker_list)
plotting_utils.plot_comparative_performance(df, ticker_list, benchmark='VGT')

In [7]:
file_list = utils.get_recent_files(
    directory_path = DATA_DIR,
    extension = 'parquet', 
    prefix = None,
    contains_pattern = 'df_finviz_merged_stocks_etfs',
    # contains_pattern = 'df_finviz_stocks_etfs',    
    count = None
)

print(f'file_list:\n{file_list}')

file_list:
['2025-08-12_df_finviz_merged_stocks_etfs.parquet', '2025-08-11_df_finviz_merged_stocks_etfs.parquet', '2025-08-08_df_finviz_merged_stocks_etfs.parquet', '2025-08-07_df_finviz_merged_stocks_etfs.parquet', '2025-08-06_df_finviz_merged_stocks_etfs.parquet', '2025-08-05_df_finviz_merged_stocks_etfs.parquet', '2025-08-04_df_finviz_merged_stocks_etfs.parquet', '2025-08-01_df_finviz_merged_stocks_etfs.parquet', '2025-07-31_df_finviz_merged_stocks_etfs.parquet', '2025-07-30_df_finviz_merged_stocks_etfs.parquet', '2025-07-29_df_finviz_merged_stocks_etfs.parquet', '2025-07-28_df_finviz_merged_stocks_etfs.parquet', '2025-07-25_df_finviz_merged_stocks_etfs.parquet', '2025-07-24_df_finviz_merged_stocks_etfs.parquet', '2025-07-23_df_finviz_merged_stocks_etfs.parquet', '2025-07-22_df_finviz_merged_stocks_etfs.parquet', '2025-07-21_df_finviz_merged_stocks_etfs.parquet', '2025-07-18_df_finviz_merged_stocks_etfs.parquet', '2025-07-17_df_finviz_merged_stocks_etfs.parquet', '2025-07-16_df_finv

In [8]:
df_finviz = pd.read_parquet(DATA_DIR / file_list[0])
print(f'df_finviz.head(15):\n{df_finviz.head(15)}')

df_finviz.head(15):
       No.                                 Company               Index                  Sector                        Industry Country Exchange                                               Info  MktCap AUM, M  Rank  Market Cap, M        P/E    Fwd P/E       PEG       P/S       P/B        P/C      P/FCF    Book/sh    Cash/sh  Dividend %  Dividend TTM Dividend Ex Date  Payout Ratio %       EPS  EPS next Q  EPS this Y %  EPS next Y %  EPS past 5Y %  EPS next 5Y %  Sales past 5Y %  Sales Q/Q %  EPS Q/Q %  EPS YoY TTM %  Sales YoY TTM %      Sales, M     Income, M  EPS Surprise %  Revenue Surprise %  Outstanding, M     Float, M   Float %  Insider Own %  Insider Trans %  Inst Own %  Inst Trans %  Short Float %  Short Ratio  Short Interest, M     ROA %      ROE %    ROIC %   Curr R  Quick R  LTDebt/Eq  Debt/Eq  Gross M %  Oper M %  Profit M %  Perf 3D %  Perf Week %  Perf Month %  Perf Quart %  Perf Half %  Perf Year %  Perf YTD %     Beta       ATR  ATR/Price %  Volatili

In [9]:

_df = df_finviz.loc[ticker_list]
print(_df)

      No.                   Company               Index                  Sector                        Industry Country Exchange                                               Info  MktCap AUM, M  Rank  Market Cap, M        P/E   Fwd P/E      PEG        P/S        P/B         P/C      P/FCF    Book/sh   Cash/sh  Dividend %  Dividend TTM Dividend Ex Date  Payout Ratio %       EPS  EPS next Q  EPS this Y %  EPS next Y %  EPS past 5Y %  EPS next 5Y %  Sales past 5Y %  Sales Q/Q %   EPS Q/Q %  EPS YoY TTM %  Sales YoY TTM %      Sales, M    Income, M  EPS Surprise %  Revenue Surprise %  Outstanding, M     Float, M   Float %  Insider Own %  Insider Trans %  Inst Own %  Inst Trans %  Short Float %  Short Ratio  Short Interest, M      ROA %       ROE %     ROIC %   Curr R  Quick R  LTDebt/Eq  Debt/Eq  Gross M %   Oper M %  Profit M %  Perf 3D %  Perf Week %  Perf Month %  Perf Quart %  Perf Half %  Perf Year %  Perf YTD %     Beta       ATR  ATR/Price %  Volatility W %  Volatility M %   SMA20 

In [10]:
column_list = ['Company', 'Info', 'Rank', 'MktCap AUM, M', 'ATR/Price %', 'Price']
_df = _df.loc[:, column_list]
print(f"df:\n{_df}")

df:
                       Company                                               Info  Rank  MktCap AUM, M  ATR/Price %       Price
NVDA               NVIDIA Corp                         Technology, Semiconductors     1 4469100.000000     2.364053  183.160000
AVGO              Broadcom Inc                         Technology, Semiconductors     8 1471390.000000     2.771473  312.830000
ORCL               Oracle Corp              Technology, Software - Infrastructure    16  713050.000000     2.788939  253.860000
NFLX               Netflix Inc              Communication Services, Entertainment    21  520650.000000     2.241120 1225.280000
APH              Amphenol Corp                  Technology, Electronic Components   116  136560.000000     2.360304  111.850000
CRWD  Crowdstrike Holdings Inc              Technology, Software - Infrastructure   144  108620.000000     3.448830  435.800000
EME           Emcor Group, Inc            Industrials, Engineering & Construction   517   28320.0000

In [11]:
df_buy = _df.loc[:, ['Info', 'MktCap AUM, M', 'ATR/Price %', 'Price']]
print(f'df_buy:\n{df_buy}')

df_buy:
                                                   Info  MktCap AUM, M  ATR/Price %       Price
NVDA                         Technology, Semiconductors 4469100.000000     2.364053  183.160000
AVGO                         Technology, Semiconductors 1471390.000000     2.771473  312.830000
ORCL              Technology, Software - Infrastructure  713050.000000     2.788939  253.860000
NFLX              Communication Services, Entertainment  520650.000000     2.241120 1225.280000
APH                   Technology, Electronic Components  136560.000000     2.360304  111.850000
CRWD              Technology, Software - Infrastructure  108620.000000     3.448830  435.800000
EME             Industrials, Engineering & Construction   28320.000000     2.698516  632.570000
RBLX  Communication Services, Electronic Gaming & Mu...   89800.000000     4.832857  129.530000
U                    Technology, Software - Application   15760.000000     6.436042   37.290000
MSTR                 Technology,

In [12]:
# ------------------------------------------------------------------
# 1.  Market-Capitalisation weight
# ------------------------------------------------------------------
tot_mcap = df_buy['MktCap AUM, M'].sum()
df_buy['MktCap Weight'] = df_buy['MktCap AUM, M'] / tot_mcap

# ------------------------------------------------------------------
# 2.  Inverse-ATR weight (smaller ATR/Price % → larger weight)
# ------------------------------------------------------------------
inv_atr = 1 / df_buy['ATR/Price %']          # inverse
tot_inv = inv_atr.sum()
df_buy['Inv ATR/Price Weight'] = inv_atr / tot_inv

# 1. Sum the two raw weights
df_buy['Portfolio Weight'] = (
        df_buy['MktCap Weight'] + df_buy['Inv ATR/Price Weight']
)

# 2. Normalize so the summed weights equal 1.0
df_buy['Portfolio Weight'] /= df_buy['Portfolio Weight'].sum()

print(f'df_buy:\n{df_buy}')
# print(df_buy[['MktCap AUM, M', 'MkrCap Weight',
#               'ATR/Price %', 'Inv ATR/Price Weight']])

df_buy:
                                                   Info  MktCap AUM, M  ATR/Price %       Price  MktCap Weight  Inv ATR/Price Weight  Portfolio Weight
NVDA                         Technology, Semiconductors 4469100.000000     2.364053  183.160000       0.583047              0.129814          0.356430
AVGO                         Technology, Semiconductors 1471390.000000     2.771473  312.830000       0.191960              0.110731          0.151345
ORCL              Technology, Software - Infrastructure  713050.000000     2.788939  253.860000       0.093026              0.110037          0.101531
NFLX              Communication Services, Entertainment  520650.000000     2.241120 1225.280000       0.067925              0.136935          0.102430
APH                   Technology, Electronic Components  136560.000000     2.360304  111.850000       0.017816              0.130020          0.073918
CRWD              Technology, Software - Infrastructure  108620.000000     3.448830  4

In [13]:
df_buy_sorted_by_price = df_buy.sort_values('Price', ascending=False)
sum_MktCap_Weight = df_buy['MktCap Weight'].sum()
sum_Inv_ATR_Price_Weight  = df_buy['Inv ATR/Price Weight'].sum()
sum_portfolio_weights = df_buy['Portfolio Weight'].sum()
print(f'Sum MktCap Weights: {sum_MktCap_Weight:.2f}')
print(f'sum Inv ATR/Price Weights: {sum_Inv_ATR_Price_Weight:.2f}')
print(f'Sum of Portfolio Weights: {sum_portfolio_weights:.2f}')
print(f'\ndf_buy_sorted_by_price:\n{df_buy_sorted_by_price}')


Sum MktCap Weights: 1.00
sum Inv ATR/Price Weights: 1.00
Sum of Portfolio Weights: 1.00

df_buy_sorted_by_price:
                                                   Info  MktCap AUM, M  ATR/Price %       Price  MktCap Weight  Inv ATR/Price Weight  Portfolio Weight
NFLX              Communication Services, Entertainment  520650.000000     2.241120 1225.280000       0.067925              0.136935          0.102430
EME             Industrials, Engineering & Construction   28320.000000     2.698516  632.570000       0.003695              0.113724          0.058709
CRWD              Technology, Software - Infrastructure  108620.000000     3.448830  435.800000       0.014171              0.088983          0.051577
MSTR                 Technology, Software - Application  111830.000000     4.475266  394.390000       0.014590              0.068574          0.041582
AVGO                         Technology, Semiconductors 1471390.000000     2.771473  312.830000       0.191960              0.110731

In [14]:
etf_list = [
    # U.S. Technology Sector ETFs  
    # 'VGT', 'FTEC',
    'FTEC',      

    # U.S. Large-Cap Growth ETFs
    'QQQ', 'VUG', 'IWF', 'SCHG', 'SPYG',

    # U.S. Mid-Cap Growth ETFs
    'VOT', 'IWP', 'IJK',

    # Global & International ETFs
    'VT', 'VXUS', 'VEA',

    # # Single-Country & Thematic Index ETFs
    # 'EWC', 'PNQI'
]

In [15]:
plotting_utils.plot_comparative_performance(df, etf_list + ticker_list, benchmark='VGT')

In [16]:
top10_mktcap_list = df_finviz.index[0:10].to_list()
print(f'top10_mktcap_list: {top10_mktcap_list}')

top10_mktcap_list: ['NVDA', 'MSFT', 'AAPL', 'GOOG', 'GOOGL', 'AMZN', 'META', 'AVGO', 'TSM', 'TSLA']


In [17]:
plotting_utils.plot_comparative_performance(df, etf_list + top10_mktcap_list, benchmark='VGT')

In [18]:
_df_etf = df_finviz.loc[etf_list]
print(_df_etf)

      No.                                         Company Index     Sector              Industry Country Exchange                                               Info  MktCap AUM, M  Rank  Market Cap, M  P/E  Fwd P/E  PEG  P/S  P/B  P/C  P/FCF  Book/sh  Cash/sh  Dividend %  Dividend TTM Dividend Ex Date  Payout Ratio %  EPS  EPS next Q  EPS this Y %  EPS next Y %  EPS past 5Y %  EPS next 5Y %  Sales past 5Y %  Sales Q/Q %  EPS Q/Q %  EPS YoY TTM %  Sales YoY TTM %  Sales, M  Income, M  EPS Surprise %  Revenue Surprise %  Outstanding, M  Float, M  Float %  Insider Own %  Insider Trans %  Inst Own %  Inst Trans %  Short Float %  Short Ratio  Short Interest, M  ROA %  ROE %  ROIC %  Curr R  Quick R  LTDebt/Eq  Debt/Eq  Gross M %  Oper M %  Profit M %  Perf 3D %  Perf Week %  Perf Month %  Perf Quart %  Perf Half %  Perf Year %  Perf YTD %     Beta      ATR  ATR/Price %  Volatility W %  Volatility M %   SMA20 %  SMA50 %  SMA200 %  50D High %  50D Low %  52W High %  52W Low %        52W Range

### Put buy list here, output weighted sum of MarketCap and Inverse-Volatility sorted by descending ticker price

## Brought on 2025-08-13 

In [19]:
## Brought on 2025-08-13 
# buy_ticker_list = ['META', 'AVGO', 'NVDA', 'GOOG',     'SHOP', 'COIN', 'HOOD', 'APP', 'U',]
# buy_ticker_list = ['META', 'AVGO', 'NVDA', 'GOOG',     'SHOP', 'HOOD', 'APP', 'U',]
buy_ticker_list = ['META', 'AVGO', 'NVDA', 'GOOG',     'SHOP', 'APP', 'U',]

In [20]:
_df = df_finviz.loc[buy_ticker_list]
# print(_df)

column_list = ['Company', 'Info', 'Rank', 'MktCap AUM, M', 'ATR/Price %', 'Price']
_df = _df.loc[:, column_list]
# print(f"df:\n{_df}")

df_buy = _df.loc[:, ['Info', 'MktCap AUM, M', 'ATR/Price %', 'Price']]
# print(f'df_buy:\n{df_buy}')

# ------------------------------------------------------------------
# 1.  Market-Capitalisation weight
# ------------------------------------------------------------------
tot_mcap = df_buy['MktCap AUM, M'].sum()
df_buy['MktCap Weight'] = df_buy['MktCap AUM, M'] / tot_mcap

# ------------------------------------------------------------------
# 2.  Inverse-ATR weight (smaller ATR/Price % → larger weight)
# ------------------------------------------------------------------
inv_atr = 1 / df_buy['ATR/Price %']          # inverse
tot_inv = inv_atr.sum()
df_buy['Inv ATR/Price Weight'] = inv_atr / tot_inv

# 1. Sum the two raw weights
df_buy['Portfolio Weight'] = (
        df_buy['MktCap Weight'] + df_buy['Inv ATR/Price Weight']
)

# 2. Normalize so the summed weights equal 1.0
df_buy['Portfolio Weight'] /= df_buy['Portfolio Weight'].sum()

# print(f'df_buy:\n{df_buy}')
# print(df_buy[['MktCap AUM, M', 'MkrCap Weight',
#               'ATR/Price %', 'Inv ATR/Price Weight']])

df_buy_sorted_by_price = df_buy.sort_values('Price', ascending=False)
sum_MktCap_Weight = df_buy['MktCap Weight'].sum()
sum_Inv_ATR_Price_Weight  = df_buy['Inv ATR/Price Weight'].sum()
sum_portfolio_weights = df_buy['Portfolio Weight'].sum()
print(f'Sum MktCap Weights: {sum_MktCap_Weight:.2f}')
print(f'sum Inv ATR/Price Weights: {sum_Inv_ATR_Price_Weight:.2f}')
print(f'Sum of Portfolio Weights: {sum_portfolio_weights:.2f}')
print(f'\ndf_buy_sorted_by_price:\n{df_buy_sorted_by_price}')


Sum MktCap Weights: 1.00
sum Inv ATR/Price Weights: 1.00
Sum of Portfolio Weights: 1.00

df_buy_sorted_by_price:
                                                   Info  MktCap AUM, M  ATR/Price %      Price  MktCap Weight  Inv ATR/Price Weight  Portfolio Weight
META  Communication Services, Internet Content & Inf... 1984450.000000     2.426582 790.000000       0.184470              0.178897          0.181683
APP        Communication Services, Advertising Agencies  157960.000000     4.717345 467.000000       0.014684              0.092024          0.053354
AVGO                         Technology, Semiconductors 1471390.000000     2.771473 312.830000       0.136777              0.156634          0.146706
GOOG  Communication Services, Internet Content & Inf... 2464860.000000     2.013127 204.160000       0.229128              0.215638          0.222383
NVDA                         Technology, Semiconductors 4469100.000000     2.364053 183.160000       0.415438              0.183628      

In [21]:
plotting_utils.plot_comparative_performance(df, buy_ticker_list, benchmark='VGT')