### How about portfolio selection based on ticker's past sharpe ratios (3, 6, 12 months) vs sharpe ratios of VGT? 

### Portfolio Selection
- Ask Google AI return the top 10 holdings of these index ETFs
- Ask Google AI to combine all the tickers into a unique list
- Plot the tickers vs benchmark VGT
- Keep tickers that outperform VGT in 6 & 12 months period

etf_list = [  
    # U.S. Technology Sector ETFs  
    'VGT',
    'FTEC',
    
    # U.S. Large-Cap Growth ETFs
    'QQQ',
    'VUG',
    'IWF',
    'SCHG',
    'SPYG',
    
    # U.S. Mid-Cap Growth ETFs
    'VOT',
    'IWP',
    'IJK',
    
    # Global & International ETFs
    'VT',
    'VXUS',
    'VEA',
    
    # Single-Country & Thematic Index ETFs
    'EWC',
    'PNQI'
]

In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pprint
import inspect  # <--- ADD THIS LINE
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)
pd.set_option('display.float_format', '{:.6f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR
ROOT_DIR = NOTEBOOK_DIR.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 3. IMPORT CUSTOM MODULES ---
import utils
import plotting_utils

# --- 4. INITIAL_CAPITAL ---
INITIAL_CAPITAL = 100000

# --- 5. RISK FREE ANNUAL RATE ---
RISK_FREE_ANNUAL_RATE = 0.04

# --- 6. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

--- Path Configuration ---
✅ Project Root: c:\Users\ping\Files_win10\python\py311\stocks
✅ Parent Dir:   c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Notebook Dir: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Data Dir:     c:\Users\ping\Files_win10\python\py311\stocks\data
✅ Source Dir:   c:\Users\ping\Files_win10\python\py311\stocks\src

--- Module Verification ---
✅ Successfully imported 'utils' and 'plotting_utils'.


In [2]:
df = pd.read_parquet(DATA_DIR / 'df_adj_close.parquet')
print(f'df:\n{df}')

df:
Ticker              A        AA       AAL      AAON       AAPL       ABBV     ABEV        ABT      ACGL        ACM        ACN       ACWI      ACWX       ADBE       ADC        ADI       ADM        ADP       ADSK        AEE      AEG        AEM        AEP        AER       AES        AFG        AFL       AGCO       AGG       AGI     AGNC       AIG      AIRR        AIT        AIZ        AJG      AKAM        AL       ALB       ALK        ALL       ALLE      ALLY      ALSN        ALV       AMAT     AMCR        AMD        AME       AMGN       AMH      AMLP        AMP        AMT       AMX       AMZN         AN       ANET        AON       AOS        APD        APH        APO      APTV        AR      ARCC       ARE       ARES      ARKK      ARMK        ARW       ASML        ASR      ASX       ATI        ATO        ATR        AU       AVAV        AVB       AVGO        AVY        AWI        AWK       AXON        AXP       AXS      AXTA        AYI       AZN         AZO         B         BA      

In [3]:
# Initial unique ticker list on 2025-07-30 from the top 10 holdings in the ETF list
# ticker_list = [
# 'MSFT', 'AAPL', 'NVDA', 'GOOG', 'AMZN',
# 'META', 'LLY', 'TSM', 'AVGO', 'NVO',
# 'V', 'TSLA', 'ASML', 'LVMUY', 'ORCL',
# 'TM', 'CRM', 'COST', 'ADBE', 'NSRGY',
# 'SHEL', 'AMD', 'NFLX', 'ACN', 'AZN',
# 'RHHBY', 'CSCO', 'TCEHY', 'ORLY', 'BKNG',
# 'MELI', 'RY', 'TD', 'SHOP', 'TDG', 'APH',
# 'SNPS', 'BN', 'IDXX', 'CDNS', 'ENB', 'BMO',
# 'AJG', 'CP', 'ANET', 'CRWD', 'ABNB', 'CNQ',
# 'BNS', 'SQ', 'DECK', 'TTD', 'ANSS', 'COIN',
# 'VRT', 'FDS', 'TRGP', 'ON', 'BLDR', 'WSM',
# 'ROKU', 'DKNG', 'CSU.TO', 'EME', 'STLD',
# 'RBLX', 'U', 'PATH', 'MSTR',
# ]

# Plot 10 tickers vs VGT, and keep tickers that outperform VGT over the last 3, 6, 12 months 
# ticker_list = [
# 'MSFT', 'NVDA', 'TSM', 'AVGO', 'ORCL',
# 'SHEL', 'AMD', 'NFLX', 'CSCO', 'BKNG',
# 'MELI', 'TD', 'TDG', 'APH','IDXX', 
# 'CDNS', 'BMO', 'CRWD', 'COIN','VRT', 
# 'EME', 'RBLX', 'U', 'MSTR',
# ]

# The final 10 tickers
ticker_list = [
'NVDA', 'AVGO', 'ORCL', 'NFLX', 'APH',
'CRWD', 'EME', 'RBLX', 'U', 'MSTR',
]

In [4]:
# plot_comparative_performance(df, ticker_list)
plotting_utils.plot_comparative_performance(df, ticker_list, benchmark='VGT')

In [None]:
file_list = utils.get_recent_files(
    directory_path = DATA_DIR,
    extension = 'parquet', 
    prefix = None,
    contains_pattern = 'df_finviz_merged_stocks_etfs',
    # contains_pattern = 'df_finviz_stocks_etfs',    
    count = None
)

print(f'file_list:\n{file_list}')

file_list:
['2025-08-08_df_finviz_stocks_etfs.parquet', '2025-08-07_df_finviz_stocks_etfs.parquet', '2025-08-06_df_finviz_stocks_etfs.parquet', '2025-08-05_df_finviz_stocks_etfs.parquet', '2025-08-04_df_finviz_stocks_etfs.parquet', '2025-08-01_df_finviz_stocks_etfs.parquet', '2025-07-31_df_finviz_stocks_etfs.parquet', '2025-07-30_df_finviz_stocks_etfs.parquet', '2025-07-29_df_finviz_stocks_etfs.parquet', '2025-07-28_df_finviz_stocks_etfs.parquet', '2025-07-25_df_finviz_stocks_etfs.parquet', '2025-07-24_df_finviz_stocks_etfs.parquet', '2025-07-23_df_finviz_stocks_etfs.parquet', '2025-07-22_df_finviz_stocks_etfs.parquet', '2025-07-21_df_finviz_stocks_etfs.parquet', '2025-07-18_df_finviz_stocks_etfs.parquet', '2025-07-17_df_finviz_stocks_etfs.parquet', '2025-07-16_df_finviz_stocks_etfs.parquet', '2025-07-15_df_finviz_stocks_etfs.parquet', '2025-07-14_df_finviz_stocks_etfs.parquet', '2025-07-11_df_finviz_stocks_etfs.parquet', '2025-07-10_df_finviz_stocks_etfs.parquet', '2025-07-09_df_finvi

In [18]:
df_finviz = pd.read_parquet(DATA_DIR / file_list[0])
print(f'df_finviz:\n{df_finviz}')

df_finviz:
        No.                                      Company               Index                  Sector                        Industry Country Exchange  Market Cap, M       P/E   Fwd P/E      PEG       P/S       P/B       P/C     P/FCF   Book/sh   Cash/sh  Dividend %  Dividend TTM Dividend Ex Date  Payout Ratio %       EPS  EPS next Q  EPS This Y %  EPS Next Y %  EPS Past 5Y %  EPS Next 5Y %  Sales Past 5Y %  Sales Q/Q %  EPS Q/Q %  EPS YoY TTM %  Sales YoY TTM %      Sales, M     Income, M  EPS Surprise %  Revenue Surprise %  Outstanding, M     Float, M   Float %  Insider Own %  Insider Trans %  Inst Own %  Inst Trans %  Short Float %  Short Ratio  Short Interest, M     ROA %      ROE %    ROIC %   Curr R  Quick R  LTDebt/Eq  Debt/Eq  Gross M %  Oper M %  Profit M %  Perf Week %  Perf Month %  Perf Quart %  Perf Half %  Perf Year %  Perf YTD %     Beta      ATR  Volatility W %  Volatility M %   SMA20 %   SMA50 %  SMA200 %  50D High %  50D Low %  52W High %  52W Low %        5

In [19]:

_df = df_finviz.loc[ticker_list]
print(_df)

        No.                   Company               Index                  Sector                        Industry Country Exchange  Market Cap, M        P/E   Fwd P/E      PEG        P/S        P/B         P/C      P/FCF    Book/sh   Cash/sh  Dividend %  Dividend TTM Dividend Ex Date  Payout Ratio %       EPS  EPS next Q  EPS This Y %  EPS Next Y %  EPS Past 5Y %  EPS Next 5Y %  Sales Past 5Y %  Sales Q/Q %   EPS Q/Q %  EPS YoY TTM %  Sales YoY TTM %      Sales, M    Income, M  EPS Surprise %  Revenue Surprise %  Outstanding, M     Float, M   Float %  Insider Own %  Insider Trans %  Inst Own %  Inst Trans %  Short Float %  Short Ratio  Short Interest, M      ROA %       ROE %     ROIC %   Curr R  Quick R  LTDebt/Eq  Debt/Eq  Gross M %   Oper M %  Profit M %  Perf Week %  Perf Month %  Perf Quart %  Perf Half %  Perf Year %  Perf YTD %     Beta       ATR  Volatility W %  Volatility M %   SMA20 %    SMA50 %  SMA200 %  50D High %  50D Low %  52W High %  52W Low %         52W Range  All-Ti

In [20]:
column_list = ['Company', 'Info', 'Rank', 'MktCap AUM, M', 'ATR/Price %', 'Price']
_df = _df.loc[:, column_list]
print(f"df:\n{_df}")

KeyError: "['ATR/Price %'] not in index"

In [None]:
df_buy = _df.loc[:, ['Info', 'MktCap AUM, M', 'ATR/Price %', 'Price']]
print(f'df_buy:\n{df_buy}')

In [None]:
# ------------------------------------------------------------------
# 1.  Market-Capitalisation weight
# ------------------------------------------------------------------
tot_mcap = df_buy['MktCap AUM, M'].sum()
df_buy['MktCap Weight'] = df_buy['MktCap AUM, M'] / tot_mcap

# ------------------------------------------------------------------
# 2.  Inverse-ATR weight (smaller ATR/Price % → larger weight)
# ------------------------------------------------------------------
inv_atr = 1 / df_buy['ATR/Price %']          # inverse
tot_inv = inv_atr.sum()
df_buy['Inv ATR/Price Weight'] = inv_atr / tot_inv

# 1. Sum the two raw weights
df_buy['Portfolio Weight'] = (
        df_buy['MktCap Weight'] + df_buy['Inv ATR/Price Weight']
)

# 2. Normalize so the summed weights equal 1.0
df_buy['Portfolio Weight'] /= df_buy['Portfolio Weight'].sum()

print(f'df_buy:\n{df_buy}')
# print(df_buy[['MktCap AUM, M', 'MkrCap Weight',
#               'ATR/Price %', 'Inv ATR/Price Weight']])

In [None]:
df_buy_sorted_by_price = df_buy.sort_values('Price', ascending=False)
sum_MktCap_Weight = df_buy['MktCap Weight'].sum()
sum_Inv_ATR_Price_Weight  = df_buy['Inv ATR/Price Weight'].sum()
sum_portfolio_weights = df_buy['Portfolio Weight'].sum()
print(f'Sum MktCap Weights: {sum_MktCap_Weight:.2f}')
print(f'sum Inv ATR/Price Weights: {sum_Inv_ATR_Price_Weight:.2f}')
print(f'Sum of Portfolio Weights: {sum_portfolio_weights:.2f}')
print(f'\ndf_buy_sorted_by_price:\n{df_buy_sorted_by_price}')
