## Pipeline flow 

**Get User Information -> Match Portfolio -> .... -> Optimize portfolio -> Save to Database**

Return values from LLM after calling match_portfolio node

```
{
    "name": "Coffeehouse Portfolio",
    "allocation": {
        "Large Cap Blend": 0.1,
        "Large Cap Value": 0.1,
        "Small Cap Blend": 0.1,
        "Small Cap Value": 0.1,
        "International Large Cap Blend": 0.1,
        "Intermediate Bonds": 0.4,
        "REITs": 0.1
    },
    "description": "Focuses on diversification across several asset classes including stocks and bonds, with a tilt towards value investing. Suitable for investors looking for a mix of growth and income with a moderate risk tolerance."
}
```

In [1]:
import sys
import os

api_parent_dir = os.path.abspath(
    os.path.join(os.getcwd(), ".."))  # Move up twice
sys.path.append(api_parent_dir)  # Add parent of `api` to sys.path

print("Current sys.path:")
for path in sys.path:
    print(path)

Current sys.path:
/Users/bryantan/.pyenv/versions/3.12.8/lib/python312.zip
/Users/bryantan/.pyenv/versions/3.12.8/lib/python3.12
/Users/bryantan/.pyenv/versions/3.12.8/lib/python3.12/lib-dynload

/Users/bryantan/.pyenv/versions/3.12.8/envs/new_fyp_env/lib/python3.12/site-packages
/Users/bryantan/Documents/HengOngHuat/backend


In [2]:
from api.utils.StockAnalyzer import StockAnalyzer
from api.constants import CATEGORY_TO_ASSET_CLASS, BOND_CATEGORIES

from yahooquery import Screener 
import pandas as pd

from typing import List, Dict, TypedDict

In [3]:
matched_portfolio: dict = {
    "name": "Coffeehouse Portfolio",
    "allocation": {
        "Large Cap Blend": 0.1,
        "Large Cap Value": 0.1,
        "Small Cap Blend": 0.1,
        "Small Cap Value": 0.1,
        "International Large Cap Blend": 0.1,
        "Intermediate Bonds": 0.4,
        "REITs": 0.1
    },
    "description": "Focuses on diversification across several asset classes including stocks and bonds, with a tilt towards value investing. Suitable for investors looking for a mix of growth and income with a moderate risk tolerance."
}

# matched_portfolio: dict = {
#     "name": "Swensen Portfolio",
#     "allocation": {
#         "Large Cap Blend": 0.3,
#         "International Large Cap Blend": 0.15,
#         "Emerging Markets": 0.05,
#         "Intermediate Bonds": 0.30,
#         "REITs": 0.2
#     },
#     "description": "Focuses on diversification across several asset classes including stocks and bonds, with a tilt towards value investing. Suitable for investors looking for a mix of growth and income with a moderate risk tolerance."
# }

In [4]:
class MatchedPortfolio(TypedDict): 
    name: str
    allocation: Dict[str, float]
    description: str

## Helper methods needed

In [5]:
""" Pull ETFs based on allocations """
def classify_etfs(etfs: Dict[str, str]) -> Dict[str, str]:
    """
    Classify ETFs based on their category

    Args:
        etfs (dict): ETFs pulled from yfinance
        
    Returns: 
        dict: ETFs classified by asset class
    """
    classified_etfs = {}
    
    for etf, category in etfs.items():
        classified_etfs[etf] = CATEGORY_TO_ASSET_CLASS.get(category, "Other")
        
    return classified_etfs

def retrieve_etfs(limit=120) -> List[str]:
    """
    Retrieve ETFs from the Screener API
    
    Args:
        limit (int): Number of ETFs to retrieve
        
    Returns:
        list[str]: List of ETF symbols
    """
    screen = Screener()
    
    try:
        etf_data = screen.get_screeners('top_performing_etfs', count=limit)
        
        if not isinstance(etf_data, Dict):
            raise ValueError("Screener API did not return a dictionary")
        
        symbols = [
            quote["symbol"]
            for quote in etf_data.get("top_performing_etfs", {}).get("quotes", [])
        ]
        
    except Exception as e:
        print(f"Error retrieving ETFs: {e}")
        return []
    
    return symbols

def process_etf(portfolio: MatchedPortfolio, symbols: List[str]) -> Dict[str, List[str]]:
    """
    Process ETFs based on matched portfolio allocation

    Args:
        portfolio (MatchedPortfolio): Matched portfolio allocation
        symbols (List[str]): List of ETF symbols

    Returns:
        Dict[str, List[str]]: Top 10 ETFs and bond ETFs for each matched portfolio category
    """
    etf_categories = {} 
    
    for symbol in symbols:
        stock_analyzer = StockAnalyzer(symbol)
        category = stock_analyzer.asset_info.get('category', 'Others')
        etf_categories[symbol] = category
    
    mapped_etfs = classify_etfs(etf_categories)
        
    # Create filtered list with keys as categories of matched portfolio and value as empty list
    filtered_etfs = {
        category: []
        for category in portfolio['allocation'].keys()
    }
        
    # Append to filtered_etfs
    for symbol, category in mapped_etfs.items():
        if category in filtered_etfs:
            filtered_etfs[category].append(symbol)
            
    # Get the top 10 Etfs for each category
    top_10_etfs = {category: etfs[:10] for category, etfs in filtered_etfs.items() if category not in BOND_CATEGORIES}
    
    # Filter bond ETFs based on matched portfolio allocation 
    bond_etfs = {category: etfs[:10] for category, etfs in filtered_etfs.items() if category in BOND_CATEGORIES}
    
    return top_10_etfs, bond_etfs

def filter_etfs_dataframe(etf_df, bond_df):
    etf_df_filtered = etf_df.dropna(subset=[
        "Expense_Ratio", "AUM", "Average Volume", "3Y_Return", "5Y_Return", "Sharpe_1Y"])

    bond_df_filtered = bond_df.dropna(subset=[
        "Expense_Ratio", "AUM", "Average Volume", "3Y_Return", "5Y_Return", "Sharpe_1Y"])
    
    stocks_df = etf_df_filtered[
        # Stock ETFs filtering
        (etf_df_filtered["Category"] != "Intermediate Bonds") &
        (etf_df_filtered["3Y_Return"] > 0) &
        (etf_df_filtered["5Y_Return"] > 0) &
        (etf_df_filtered["AUM"] > 500_000_000)
    ]
    
    bonds_df = bond_df_filtered[
        # Bond ETFs filtering
        (bond_df_filtered["Category"] == "Intermediate Bonds") &
        # Allow slightly negative return
        (bond_df_filtered["3Y_Return"] > -0.02) &
        (bond_df_filtered["AUM"] > 500_000_000) &
        (bond_df_filtered["Expense_Ratio"] <= 0.20)
    ]
    
    # Combine stocks and bonds
    stocks_df_sorted = stocks_df.sort_values(
        by=["Sharpe_1Y", "AUM", "Average Volume"], ascending=[False, False, False]
    )

    bonds_df_sorted = bonds_df.sort_values(
        by=["Sharpe_1Y", "AUM", "Average Volume"], ascending=[False, False, False]
    )

    return stocks_df_sorted, bonds_df_sorted

def overlap_check(selected_etfs, holdings_data):
    overlap_count = {}
    
    for etf1 in selected_etfs:
        for etf2 in selected_etfs:
            if etf1 != etf2:
                common_holdings = set(holdings_data.get(etf1, [])) & set(
                    holdings_data.get(etf2, []))
                # % of overlapping holdings
                overlap_percentage = (len(common_holdings) / 10) * 100

                overlap_count[(etf1, etf2)] = overlap_percentage
                
    return overlap_count

## Pipeline for portfolio creation

In [6]:
def pipeline():
    symbols = retrieve_etfs()
    top_10_etfs, bond_etfs = process_etf(matched_portfolio, symbols)
    
    etf_tickers = [StockAnalyzer(ticker).get_etf_metrics() for tickers in top_10_etfs.values() for ticker in tickers]
    bond_tickers = [StockAnalyzer(ticker).get_etf_metrics() for tickers in bond_etfs.values() for ticker in tickers]
    
    etf_df = pd.DataFrame([etf for etf in etf_tickers if etf])
    bond_df = pd.DataFrame([etf for etf in bond_tickers if etf])
    
    # Filter DF
    stocks_df_sorted, bonds_df_sorted = filter_etfs_dataframe(etf_df, bond_df)
    
    # Data prep for ETF holding
    selected_etfs = stocks_df_sorted["Ticker"].tolist()
    holdings_data = {etf: StockAnalyzer(etf).get_etf_holdings_top_10() for etf in selected_etfs}
   
    # Check ETF holding overlap
    overlap_count = overlap_check(selected_etfs, holdings_data)
    overlap_df = pd.DataFrame(overlap_count.items(), columns=["ETF Pair", "Overlap Percentage"])
    overlap_df = overlap_df.sort_values(by="Overlap Percentage", ascending=False)
    
    # Filter ETFs with >30% overlap
    overlapping_pairs = overlap_df[overlap_df["Overlap Percentage"] > 10]["ETF Pair"].tolist()
    filtered_etfs = set(selected_etfs)
    for etf1, etf2 in overlapping_pairs:
        if etf1 in filtered_etfs and etf2 in filtered_etfs:
            filtered_etfs.remove(etf2)
    
    final_selected_etfs = list(filtered_etfs)
    bond_etfs = bonds_df_sorted["Ticker"].tolist()
    
    return final_selected_etfs.extend(bond_etfs)

In [14]:
created_portfolio = pipeline()

In [15]:
print(created_portfolio)

None


## Individual Function Testing

In [7]:
symbols = retrieve_etfs()
top_10_etfs, bond_etfs = process_etf(matched_portfolio, symbols)

etf_tickers = [StockAnalyzer(ticker).get_etf_metrics() for tickers in top_10_etfs.values() for ticker in tickers]
bond_tickers = [StockAnalyzer(ticker).get_etf_metrics() for tickers in bond_etfs.values() for ticker in tickers]

In [8]:
etf_df = pd.DataFrame([etf for etf in etf_tickers if etf])
bond_df = pd.DataFrame([etf for etf in bond_tickers if etf])

In [9]:
etf_df

Unnamed: 0,Ticker,Name,AUM,Average Volume,Category,3Y_Return,5Y_Return,YTD_Return,Dividend_Yield_Forward,Dividend_Yield_Trailing,3Y_Beta,Expense_Ratio,Turnover_Ratio,1Y_Return,1Y_Sharpe,Tracking_Error_1Y,1Y_Return_calc,Sharpe_1Y,Tracking_Error
0,BBUS,JPMorgan BetaBuilders U.S. Equity ETF,4356184064,160037,Large Cap Blend,0.098182,0.204517,1.45006,0.0119,,1.01,0.0002,0.03,,0.475233,0.007864,0.110243,0.967955,0.014399
1,SPLG,SPDR Portfolio S&P 500 ETF,58660720640,8093069,Large Cap Blend,0.100188,0.205414,1.44004,0.0126,,1.0,0.0002,0.03,,0.478809,0.007246,0.109972,0.986032,0.008246
2,IVV,iShares Core S&P 500 ETF,594077155328,6448811,Large Cap Blend,0.100271,0.205422,1.43806,0.0128,0.010014,1.0,0.0003,0.03,,0.47497,0.006597,0.116578,0.982043,0.00749
3,ITOT,iShares Core S&P Total U.S. Stock Market ETF,65149771776,1604227,Large Cap Blend,0.090799,0.202982,1.12074,0.0121,0.010079,1.03,0.0003,0.04,,0.41222,0.014706,0.10766,0.938448,0.019746
4,SCHX,Schwab U.S. Large-Cap ETF,52639002624,10420372,Large Cap Blend,0.097387,0.205403,1.3805,0.012,,1.01,0.0003,0.03,,0.503744,0.012748,0.13106,1.057542,0.017584
5,SPTM,SPDR Portfolio S&P 1500 Composite Stock Market...,9818044416,651981,Large Cap Blend,0.094901,0.20484,1.2156,0.0127,0.010554,1.01,0.0003,0.03,,0.42817,0.011485,0.102682,0.974156,0.013105
6,PBUS,Invesco MSCI USA ETF,5994743808,191969,Large Cap Blend,0.098629,0.201877,1.4276,0.0118,,1.01,0.0004,0.03,,0.477745,0.010659,0.110284,0.950899,0.037073
7,VOO,Vanguard S&P 500 ETF,1399085203456,6132138,Large Cap Blend,0.10002,0.20536,1.43889,0.0123,0.010429,1.0,0.0003,0.02,,0.474777,0.006768,0.112585,0.983947,0.007542
8,VV,Vanguard Large Cap Index Fund,59214147584,383006,Large Cap Blend,0.099378,0.205207,1.48706,0.0122,0.010051,1.01,0.0004,0.02,,0.472617,0.013356,0.113851,0.969854,0.013825
9,VONE,Vanguard Russell 1000 Index Fund ETF Shares,7934553088,103796,Large Cap Blend,0.095962,0.204659,1.36616,0.0119,0.0,1.01,0.0007,0.03,,0.453982,0.013254,0.109547,0.962457,0.015429


In [10]:
bond_df

Unnamed: 0,Ticker,Name,AUM,Average Volume,Category,3Y_Return,5Y_Return,YTD_Return,Dividend_Yield_Forward,Dividend_Yield_Trailing,3Y_Beta,Expense_Ratio,Turnover_Ratio,1Y_Return,1Y_Sharpe,Tracking_Error_1Y,1Y_Return_calc,Sharpe_1Y,Tracking_Error
0,BIV,Vanguard Intermediate-Term Bond Index Fund,44127584256,1364142,Intermediate Bonds,0.004405,0.012986,2.87795,0.0374,0.0,1.06,0.0003,0.55,,0.356093,0.141507,0.098002,-0.500287,0.183872


In [11]:
stocks_df_sorted, bonds_df_sorted = filter_etfs_dataframe(etf_df, bond_df)

In [12]:
stocks_df_sorted

Unnamed: 0,Ticker,Name,AUM,Average Volume,Category,3Y_Return,5Y_Return,YTD_Return,Dividend_Yield_Forward,Dividend_Yield_Trailing,3Y_Beta,Expense_Ratio,Turnover_Ratio,1Y_Return,1Y_Sharpe,Tracking_Error_1Y,1Y_Return_calc,Sharpe_1Y,Tracking_Error
4,SCHX,Schwab U.S. Large-Cap ETF,52639002624,10420372,Large Cap Blend,0.097387,0.205403,1.3805,0.012,,1.01,0.0003,0.03,,0.503744,0.012748,0.13106,1.057542,0.017584
17,MGV,Vanguard Mega Cap Value Index Fund,9028613120,292401,Large Cap Value,0.092285,0.187063,6.21994,0.0217,,0.79,0.0007,0.13,,0.724944,0.089702,0.144603,1.010839,0.088007
13,DIVB,iShares Core Dividend ETF,683180928,105566,Large Cap Value,0.085024,0.207653,6.43324,0.0245,,0.92,0.0005,0.36,,0.849858,0.099866,0.176557,1.006866,0.073836
14,VTV,Vanguard Value Index Fund ETF Shares,195238002688,2635659,Large Cap Value,0.083591,0.186453,5.2103,0.022,0.017494,0.83,0.0004,0.09,,0.589934,0.088819,0.12866,0.98753,0.087529
1,SPLG,SPDR Portfolio S&P 500 ETF,58660720640,8093069,Large Cap Blend,0.100188,0.205414,1.44004,0.0126,,1.0,0.0002,0.03,,0.478809,0.007246,0.109972,0.986032,0.008246
7,VOO,Vanguard S&P 500 ETF,1399085203456,6132138,Large Cap Blend,0.10002,0.20536,1.43889,0.0123,0.010429,1.0,0.0003,0.02,,0.474777,0.006768,0.112585,0.983947,0.007542
2,IVV,iShares Core S&P 500 ETF,594077155328,6448811,Large Cap Blend,0.100271,0.205422,1.43806,0.0128,0.010014,1.0,0.0003,0.03,,0.47497,0.006597,0.116578,0.982043,0.00749
19,EPS,WisdomTree U.S. LargeCap Fund,1077484928,57671,Large Cap Value,0.097076,0.200505,2.46328,0.0144,,0.96,0.0008,1.02,,0.529643,0.030807,0.11897,0.979843,0.031802
5,SPTM,SPDR Portfolio S&P 1500 Composite Stock Market...,9818044416,651981,Large Cap Blend,0.094901,0.20484,1.2156,0.0127,0.010554,1.01,0.0003,0.03,,0.42817,0.011485,0.102682,0.974156,0.013105
8,VV,Vanguard Large Cap Index Fund,59214147584,383006,Large Cap Blend,0.099378,0.205207,1.48706,0.0122,0.010051,1.01,0.0004,0.02,,0.472617,0.013356,0.113851,0.969854,0.013825


In [13]:
bonds_df_sorted

Unnamed: 0,Ticker,Name,AUM,Average Volume,Category,3Y_Return,5Y_Return,YTD_Return,Dividend_Yield_Forward,Dividend_Yield_Trailing,3Y_Beta,Expense_Ratio,Turnover_Ratio,1Y_Return,1Y_Sharpe,Tracking_Error_1Y,1Y_Return_calc,Sharpe_1Y,Tracking_Error
0,BIV,Vanguard Intermediate-Term Bond Index Fund,44127584256,1364142,Intermediate Bonds,0.004405,0.012986,2.87795,0.0374,0.0,1.06,0.0003,0.55,,0.356093,0.141507,0.098002,-0.500287,0.183872


In [14]:
# Data prep for ETF holding
selected_etfs = stocks_df_sorted["Ticker"].tolist()
holdings_data = {etf: StockAnalyzer(etf).get_etf_holdings_top_10() for etf in selected_etfs}

In [15]:
holdings_data

{'SCHX': ['Apple Inc',
  'NVIDIA Corp',
  'Microsoft Corp',
  'Amazon.com Inc',
  'Meta Platforms Inc Class A',
  'Alphabet Inc Class A',
  'Broadcom Inc',
  'Berkshire Hathaway Inc Class B',
  'Alphabet Inc Class C',
  'Tesla Inc'],
 'MGV': ['JPMorgan Chase & Co',
  'Berkshire Hathaway Inc Class B',
  'Exxon Mobil Corp',
  'Broadcom Inc',
  'UnitedHealth Group Inc',
  'Walmart Inc',
  'Procter & Gamble Co',
  'Johnson & Johnson',
  'The Home Depot Inc',
  'AbbVie Inc'],
 'DIVB': ['Cisco Systems Inc',
  'International Business Machines Corp',
  'Accenture PLC Class A',
  'JPMorgan Chase & Co',
  'Texas Instruments Inc',
  'Exxon Mobil Corp',
  'Wells Fargo & Co',
  'Chevron Corp',
  'Automatic Data Processing Inc',
  'Verizon Communications Inc'],
 'VTV': ['Berkshire Hathaway Inc Class B',
  'JPMorgan Chase & Co',
  'Exxon Mobil Corp',
  'Broadcom Inc',
  'UnitedHealth Group Inc',
  'Walmart Inc',
  'Procter & Gamble Co',
  'Johnson & Johnson',
  'The Home Depot Inc',
  'AbbVie Inc'],


In [16]:
holdings_df = pd.DataFrame.from_dict(holdings_data, orient="index").T
holdings_df

Unnamed: 0,SCHX,MGV,DIVB,VTV,SPLG,VOO,IVV,EPS,SPTM,VV,...,ITOT,ILCV,IVOV,VBR,SCHD,VB,IDEV,VEA,USRT,BBRE
0,Apple Inc,JPMorgan Chase & Co,Cisco Systems Inc,Berkshire Hathaway Inc Class B,Apple Inc,Apple Inc,Apple Inc,Apple Inc,Apple Inc,Apple Inc,...,Apple Inc,Apple Inc,Expand Energy Corp Ordinary Shares - New,Smurfit WestRock PLC,AbbVie Inc,Smurfit WestRock PLC,Novo Nordisk AS Class B,SAP SE,Prologis Inc,Prologis Inc
1,NVIDIA Corp,Berkshire Hathaway Inc Class B,International Business Machines Corp,JPMorgan Chase & Co,NVIDIA Corp,NVIDIA Corp,NVIDIA Corp,Alphabet Inc Class A,NVIDIA Corp,Microsoft Corp,...,NVIDIA Corp,Berkshire Hathaway Inc Class B,Fidelity National Financial Inc,Williams-Sonoma Inc,Coca-Cola Co,Williams-Sonoma Inc,SAP SE,Novo Nordisk AS Class B,Welltower Inc,Equinix Inc
2,Microsoft Corp,Exxon Mobil Corp,Accenture PLC Class A,Exxon Mobil Corp,Microsoft Corp,Microsoft Corp,Microsoft Corp,Amazon.com Inc,Microsoft Corp,NVIDIA Corp,...,Microsoft Corp,JPMorgan Chase & Co,US Foods Holding Corp,Atmos Energy Corp,Amgen Inc,Atmos Energy Corp,ASML Holding NV,ASML Holding NV,Equinix Inc,Welltower Inc
3,Amazon.com Inc,Broadcom Inc,JPMorgan Chase & Co,Broadcom Inc,Amazon.com Inc,Amazon.com Inc,Amazon.com Inc,NVIDIA Corp,Amazon.com Inc,Amazon.com Inc,...,Amazon.com Inc,Exxon Mobil Corp,Reliance Inc,Expand Energy Corp Ordinary Shares - New,Cisco Systems Inc,Expand Energy Corp Ordinary Shares - New,Nestle SA,Nestle SA,Simon Property Group Inc,Simon Property Group Inc
4,Meta Platforms Inc Class A,UnitedHealth Group Inc,Texas Instruments Inc,UnitedHealth Group Inc,Meta Platforms Inc Class A,Meta Platforms Inc Class A,Meta Platforms Inc Class A,Microsoft Corp,Meta Platforms Inc Class A,Meta Platforms Inc Class A,...,Meta Platforms Inc Class A,UnitedHealth Group Inc,Flex Ltd,NRG Energy Inc,Pfizer Inc,NRG Energy Inc,AstraZeneca PLC,Roche Holding AG,Digital Realty Trust Inc,Public Storage
5,Alphabet Inc Class A,Walmart Inc,Exxon Mobil Corp,Walmart Inc,Alphabet Inc Class A,Alphabet Inc Class A,Alphabet Inc Class A,Meta Platforms Inc Class A,Alphabet Inc Class A,Alphabet Inc Class A,...,Alphabet Inc Class A,Walmart Inc,W.P. Carey Inc,First Citizens BancShares Inc Class A,Verizon Communications Inc,First Citizens BancShares Inc Class A,Roche Holding AG,AstraZeneca PLC,Realty Income Corp,Realty Income Corp
6,Broadcom Inc,Procter & Gamble Co,Wells Fargo & Co,Procter & Gamble Co,Berkshire Hathaway Inc Class B,Berkshire Hathaway Inc Class B,Berkshire Hathaway Inc Class B,JPMorgan Chase & Co,Berkshire Hathaway Inc Class B,Broadcom Inc,...,Berkshire Hathaway Inc Class B,Procter & Gamble Co,Unum Group,Tapestry Inc,Bristol-Myers Squibb Co,Deckers Outdoor Corp,Novartis AG Registered Shares,Novartis AG Registered Shares,Public Storage,Digital Realty Trust Inc
7,Berkshire Hathaway Inc Class B,Johnson & Johnson,Chevron Corp,Johnson & Johnson,Broadcom Inc,Broadcom Inc,Broadcom Inc,Berkshire Hathaway Inc Class B,Broadcom Inc,Berkshire Hathaway Inc Class B,...,Broadcom Inc,Alphabet Inc Class A,Biomarin Pharmaceutical Inc,EMCOR Group Inc,Chevron Corp,Liberty Formula One Group Registered Shs Serie...,HSBC Holdings PLC,HSBC Holdings PLC,VICI Properties Inc Ordinary Shares,Extra Space Storage Inc
8,Alphabet Inc Class C,The Home Depot Inc,Automatic Data Processing Inc,The Home Depot Inc,Alphabet Inc Class C,Alphabet Inc Class C,Alphabet Inc Class C,Exxon Mobil Corp,Alphabet Inc Class C,Alphabet Inc Class C,...,Alphabet Inc Class C,Johnson & Johnson,Reinsurance Group of America Inc,Equitable Holdings Inc,Texas Instruments Inc,DraftKings Inc Ordinary Shares - Class A,Shell PLC,Toyota Motor Corp,AvalonBay Communities Inc,AvalonBay Communities Inc
9,Tesla Inc,AbbVie Inc,Verizon Communications Inc,AbbVie Inc,Tesla Inc,Tesla Inc,Tesla Inc,Eli Lilly and Co,Tesla Inc,Tesla Inc,...,Tesla Inc,The Home Depot Inc,Performance Food Group Co,F5 Inc,PepsiCo Inc,Tapestry Inc,Lvmh Moet Hennessy Louis Vuitton SE,Shell PLC,Extra Space Storage Inc,VICI Properties Inc Ordinary Shares


In [17]:
 # Check ETF holding overlap
overlap_count = overlap_check(selected_etfs, holdings_data)
overlap_df = pd.DataFrame(overlap_count.items(), columns=["ETF Pair", "Overlap Percentage"])
overlap_df = overlap_df.sort_values(by="Overlap Percentage", ascending=False)

In [18]:
overlap_df

Unnamed: 0,ETF Pair,Overlap Percentage
351,"(BBUS, VONE)",100.0
79,"(VTV, MGV)",100.0
163,"(IVV, SPTM)",100.0
164,"(IVV, VV)",100.0
168,"(IVV, BBUS)",100.0
...,...,...
361,"(BBUS, VEA)",0.0
360,"(BBUS, IDEV)",0.0
359,"(BBUS, VB)",0.0
358,"(BBUS, SCHD)",0.0


In [19]:
# Filter ETFs with >30% overlap
overlapping_pairs = overlap_df[overlap_df["Overlap Percentage"] > 30]["ETF Pair"].tolist()
filtered_etfs = set(selected_etfs)
for etf1, etf2 in overlapping_pairs:
    if etf1 in filtered_etfs and etf2 in filtered_etfs:
        filtered_etfs.remove(etf2)

final_selected_etfs = list(filtered_etfs)

In [20]:
final_selected_etfs

['IDEV', 'EPS', 'VYM', 'USRT', 'VB', 'IVOV', 'SCHD']