## Import Required Libraries

In [7]:
import pandas as pd
import yfinance as yf
from datetime import datetime
import time

## Define Sector ETFs and Their Details

In [8]:
# Define sector ETFs with their sector names
sector_etfs = {
    'XLK': 'Technology',
    'XLV': 'Healthcare',
    'XLF': 'Financial',
    'XLI': 'Industrial',
    'XLY': 'Consumer Discretionary'
}

## Function to Get ETF Holdings from Yahoo Finance
This function retrieves the current holdings of a given ETF using yfinance - completely free and no API key needed!

In [9]:
def get_etf_holdings(etf_ticker):
    """
    Get holdings for a given ETF ticker using yfinance.
    
    Parameters:
    etf_ticker (str): The ticker symbol of the ETF
    
    Returns:
    list: List of tuples (symbol, weight) of holdings in the ETF
    """
    try:
        etf = yf.Ticker(etf_ticker)
        
        # Get the holdings - yfinance provides this for many ETFs
        holdings = etf.get_holdings()
        
        if holdings is not None and not holdings.empty:
            # Extract symbols from the holdings dataframe
            # The dataframe typically has 'Symbol' or similar column
            if 'Symbol' in holdings.columns:
                symbols = holdings['Symbol'].tolist()
            elif 'symbol' in holdings.columns:
                symbols = holdings['symbol'].tolist()
            else:
                # If Symbol column not found, try the index
                symbols = holdings.index.tolist()
            
            print(f"Found {len(symbols)} holdings for {etf_ticker}")
            return symbols
        else:
            print(f"No holdings data available for {etf_ticker} via yfinance")
            return []
        
    except Exception as e:
        print(f"Error fetching holdings for {etf_ticker}: {str(e)}")
        return []

## Automated Update Function Using Yahoo Finance
This function automatically fetches and updates holdings for a specific sector ETF.

In [None]:
def update_sector_from_yahoo(etf_code):
    """
    Automatically fetch and update holdings for a sector ETF from Yahoo Finance.
    
    Parameters:
    etf_code (str): The ETF ticker (e.g., 'XLK')
    
    Returns:
    pd.DataFrame: DataFrame with the sector holdings, or None if fetch fails
    """
    if etf_code not in sector_etfs:
        print(f"Unknown ETF code: {etf_code}")
        return None
    
    sector_name = sector_etfs[etf_code]
    print(f"\nFetching holdings for {etf_code} ({sector_name})...")
    
    symbols = get_etf_holdings(etf_code)
    
    if symbols:
        df = pd.DataFrame({
            'Symbol': symbols,
            'Sector': sector_name,
            'Sector_Code': etf_code
        })
        print(f"Successfully created dataframe with {len(df)} stocks")
        return df
    else:
        print(f"Failed to fetch holdings for {etf_code}")
        return None

## Update All Sectors at Once
Fetch all ETF holdings from Yahoo Finance and update the entire CSV file.

In [10]:
def update_all_sectors_from_yahoo():
    """
    Fetch holdings for all sector ETFs from Yahoo Finance and create a complete dataframe.
    
    Returns:
    pd.DataFrame: Complete dataframe with all sectors, or None if all fetches fail
    """
    all_holdings = []
    
    for etf_code in sector_etfs.keys():
        holdings_df = update_sector_from_yahoo(etf_code)
        if holdings_df is not None:
            all_holdings.append(holdings_df)
        time.sleep(1)  # Be polite to Yahoo Finance servers
    
    if all_holdings:
        complete_df = pd.concat(all_holdings, ignore_index=True)
        complete_df = complete_df.sort_values(['Sector_Code', 'Symbol']).reset_index(drop=True)
        
        print(f"\n{'='*60}")
        print(f"Successfully fetched {len(complete_df)} total stocks")
        print(f"\nBreakdown by sector:")
        print(complete_df.groupby(['Sector_Code', 'Sector']).size())
        print(f"{'='*60}")
        
        return complete_df
    else:
        print("Failed to fetch any holdings data")
        return None

## Alternative: Manual Input Function
Since ETF holdings data might not be readily available via API, this function allows manual updates.

In [4]:
def update_sector_holdings_manual(etf_code, sector_name, symbols_list):
    """
    Manually update holdings for a specific sector.
    
    Parameters:
    etf_code (str): The ETF ticker (e.g., 'XLK')
    sector_name (str): The sector name (e.g., 'Technology')
    symbols_list (list): List of stock symbols
    
    Returns:
    pd.DataFrame: DataFrame with the sector holdings
    """
    df = pd.DataFrame({
        'Symbol': symbols_list,
        'Sector': sector_name,
        'Sector_Code': etf_code
    })
    return df

## Web Scraping Alternative
Use web scraping to get holdings from ETF provider websites or financial data sites.

In [5]:
import requests
from bs4 import BeautifulSoup

def scrape_etf_holdings(etf_ticker):
    """
    Scrape ETF holdings from a financial website.
   
    
    Parameters:
    etf_ticker (str): The ETF ticker symbol
    
    Returns:
    list: List of stock symbols
    """
    try:
        #  Using etfdb.com, This can be modified to other sources
        # Note: Always check the website's robots.txt and terms of service
        url = f"https://www.ssga.com/us/en/institutional/etfs/funds/{etf_ticker.lower()}-spdr-select-sector-fund"
        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            # Parse the HTML to extract holdings
            # This will vary by website
            print(f"Successfully accessed page for {etf_ticker}")
            # Add parsing logic here
            return []
        else:
            print(f"Failed to access page for {etf_ticker}: Status {response.status_code}")
            return []
            
    except Exception as e:
        print(f"Error scraping holdings for {etf_ticker}: {str(e)}")
        return []



## Using Polygon.io API (Recommended)
If you have a Polygon.io API key, you can use their comprehensive data.

In [6]:
def get_holdings_from_polygon(etf_ticker, api_key):
    """
    Get ETF holdings using Polygon.io API.
    
    Parameters:
    etf_ticker (str): The ETF ticker symbol
    api_key (str): Your Polygon.io API key
    
    Returns:
    list: List of stock symbols
    """
    try:
        # Polygon.io endpoint for ETF holdings
        url = f"https://api.polygon.io/v3/reference/tickers?type=CS&market=stocks&active=true&limit=1000&apiKey={api_key}"
        
        response = requests.get(url)
        
        if response.status_code == 200:
            data = response.json()
            # Process the data to extract relevant holdings
            # Note: You may need to filter based on sector or other criteria
            return []
        else:
            print(f"API request failed with status {response.status_code}")
            return []
            
    except Exception as e:
        print(f"Error fetching from Polygon.io: {str(e)}")
        return []

# Example usage (uncomment and add your API key):
# POLYGON_API_KEY = 'your_api_key_here'
# holdings = get_holdings_from_polygon('XLK', POLYGON_API_KEY)

## Load Current CSV File

In [None]:
# Load the current sector stocks CSV
csv_path = 'sector_stocks.csv'

try:
    current_df = pd.read_csv(csv_path)
    print(f"Current CSV loaded successfully with {len(current_df)} stocks")
    print(f"\nStocks per sector:")
    print(current_df.groupby('Sector_Code').size())
    print(f"\nFirst few rows:")
    display(current_df.head())
except FileNotFoundError:
    print(f"CSV file not found at {csv_path}")
    current_df = pd.DataFrame(columns=['Symbol', 'Sector', 'Sector_Code'])

## Manual Update Example
Use this cell to manually add or update stocks for a specific sector.

In [None]:
# Example: Manually update Technology sector (XLK)
# Uncomment and modify as needed

# new_xlk_symbols = ['AAPL', 'MSFT', 'NVDA', 'AVGO', 'CRM', 'ORCL', 'CSCO']
# xlk_update = update_sector_holdings_manual('XLK', 'Technology', new_xlk_symbols)
# display(xlk_update)

## Update Specific Sector
Function to update holdings for a specific sector ETF.

In [None]:
def update_sector_in_csv(df, new_holdings_df, sector_code):
    """
    Update the CSV dataframe with new holdings for a specific sector.
    
    Parameters:
    df (pd.DataFrame): The current dataframe
    new_holdings_df (pd.DataFrame): New holdings dataframe
    sector_code (str): The sector code to update (e.g., 'XLK')
    
    Returns:
    pd.DataFrame: Updated dataframe
    """
    # Remove old entries for this sector
    df_filtered = df[df['Sector_Code'] != sector_code]
    
    # Add new holdings
    df_updated = pd.concat([df_filtered, new_holdings_df], ignore_index=True)
    
    # Sort by sector code and symbol
    df_updated = df_updated.sort_values(['Sector_Code', 'Symbol']).reset_index(drop=True)
    
    print(f"Updated {sector_code}: {len(new_holdings_df)} stocks")
    
    return df_updated

## Save Updated CSV
Save the updated dataframe back to the CSV file.

In [None]:
def save_updated_csv(df, filepath='sector_stocks.csv', backup=True):
    """
    Save the updated dataframe to CSV.
    
    Parameters:
    df (pd.DataFrame): The dataframe to save
    filepath (str): Path to save the CSV
    backup (bool): Whether to create a backup of the old file
    """
    if backup:
        # Create backup with timestamp
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        backup_path = f'sector_stocks_backup_{timestamp}.csv'
        try:
            current_df = pd.read_csv(filepath)
            current_df.to_csv(backup_path, index=False)
            print(f"Backup created: {backup_path}")
        except:
            print("No existing file to backup")
    
    # Save updated file
    df.to_csv(filepath, index=False)
    print(f"Updated CSV saved to {filepath}")
    print(f"Total stocks: {len(df)}")
    print(f"\nBreakdown by sector:")
    print(df.groupby(['Sector_Code', 'Sector']).size())

## Complete Update Workflow Example
Example workflow to update all sectors.

In [None]:
# Example: Update a specific sector
# Uncomment and modify with actual data

# Step 1: Get new holdings (using your preferred method)
# new_symbols = ['AAPL', 'MSFT', 'NVDA']  # Replace with actual holdings
# new_holdings = update_sector_holdings_manual('XLK', 'Technology', new_symbols)

# Step 2: Update the dataframe
# updated_df = update_sector_in_csv(current_df, new_holdings, 'XLK')

# Step 3: Save to CSV
# save_updated_csv(updated_df)

## Verification
Verify the updated CSV file.

In [None]:
# Verify the updated CSV
verify_df = pd.read_csv('sector_stocks.csv')

print(f"Total stocks in updated file: {len(verify_df)}")
print(f"\nStocks by sector:")
print(verify_df.groupby(['Sector_Code', 'Sector']).size())
print(f"\nSample data:")
display(verify_df.groupby('Sector_Code').head(3))

## Notes and Recommendations

### Data Sources for ETF Holdings:
1. **SSGA (State Street) Website**: Official source for SPDR ETFs - [ssga.com/etfs](https://www.ssga.com/us/en/institutional/etfs)
2. **ETF Provider APIs**: Some providers offer API access
3. **Financial Data APIs**: 
   - Polygon.io (mentioned in your ReadMe)
   - Alpha Vantage
   - IEX Cloud
4. **Manual Download**: Most reliable - download holdings CSV from official ETF pages

### Recommended Workflow:
1. Download holdings CSV files from SSGA website for each ETF
2. Read the downloaded files in this notebook
3. Extract ticker symbols
4. Update the sector_stocks.csv file

### Update Frequency:
- ETF holdings typically change quarterly
- Check official ETF websites for rebalancing dates
- Major changes announced in advance