In [18]:
import pandas as pd

In [20]:
# Load only the 'Ticker' column from the dataset
file_path = 'IWV_holdings_2024.csv'  # replace with your actual file path
symbol_data = pd.read_csv(file_path, usecols=['Ticker'])

In [21]:
# Extract unique tickers
unique_tickers = symbol_data['Ticker'].unique()

In [22]:
# Convert to a DataFrame or list for easier handling
tickers_df = pd.DataFrame(unique_tickers, columns=['Ticker'])

In [23]:
# Optionally, save the unique tickers to a new file if needed
tickers_df.to_csv('IWV_holdings_2024_tickers.csv', index=False)

In [24]:
# Print the count of unique tickers to verify
print(f"Number of unique tickers: {len(unique_tickers)}")

Number of unique tickers: 2676


In [26]:
# Identify duplicates in the 'symbol' column
duplicate_symbols = symbol_data[symbol_data.duplicated(subset=['Ticker'], keep=False)]

# Display unique duplicated symbols
unique_duplicates = duplicate_symbols['Ticker'].unique()
print("Duplicate Tickers:", unique_duplicates)

Duplicate Tickers: ['ADRO']


In [27]:
# Load the Asset Class column along with Symbol, if not already loaded
symbol_data = pd.read_csv(file_path, usecols=['Asset Class'])

# Check for unique values in the 'Asset Class' column
unique_asset_classes = symbol_data['Asset Class'].unique()
print("Unique Asset Classes:", unique_asset_classes)

Unique Asset Classes: ['Equity' 'Money Market' 'Cash' 'Cash Collateral and Margins']


In [28]:
# Load your data into a DataFrame
# Replace 'your_data.csv' with the actual path to your file
df = pd.read_csv('IWV_holdings_2024.csv')

# Select only the desired columns
selected_columns = ['Ticker', 'Name', 'Sector', 'Price', 'Location', 'Exchange']
df_filtered = df[selected_columns]

# To save to a new CSV file
df_filtered.to_csv('lWV_holdings_2024_filtered.csv', index=False)

In [29]:
# Load the Asset Class column along with Symbol, if not already loaded
location_data = pd.read_csv(file_path, usecols=['Location'])

# Check for unique values in the 'Asset Class' column
unique_asset_classes = location_data['Location'].unique()
print("Unique Location Classes:", unique_asset_classes)

Unique Location Classes: ['United States' 'Brazil' 'Canada' 'Germany' 'Peru' 'Norway'
 'United Kingdom' 'India' 'Panama' 'Israel' 'Netherlands' 'Bermuda'
 'Ireland' 'Monaco' 'Argentina' 'Jersey' 'Guernsey' 'Australia' 'China']


In [31]:
# Load the files
sp500_df = pd.read_csv('S&P_500_2024_tickers.csv')  # Replace with the actual file name for S&P 500 data
ticker_df = pd.read_csv('IWV_holdings_2024_tickers.csv')  # Replace with the actual file name for the file with only tickers

# Extract sets of tickers from each file
sp500_tickers = set(sp500_df['Symbol'])
ticker_list = set(ticker_df['Ticker'])

# Find missing tickers in ticker_list
missing_tickers = sp500_tickers - ticker_list

# Output the result
if not missing_tickers:
    print("All S&P 500 tickers are included in your list.")
else:
    print("The following S&P 500 tickers are missing from your list:")
    print(missing_tickers)

The following S&P 500 tickers are missing from your list:
{'STX', 'NXPI', 'BRK.B', 'ERIE', 'TEL', 'BF.B'}


In [32]:
# Load the IWV holdings data
iwv_df = pd.read_csv('IWV_holdings_2024.csv')

# Select and rename the necessary columns
master_data = iwv_df[['Ticker', 'Name', 'Sector']].copy()
master_data.rename(columns={
    'Ticker': 'Symbol',
    'Name': 'Asset Name',
    'Sector': 'Sector'
}, inplace=True)

# Add empty columns for Sub Industry, S&P Inclusion Date, and Foundation Date
master_data['Sub Industry'] = ""
master_data['S&P Inclusion Date'] = ""
master_data['Foundation Date'] = ""

# Save the result to master_data.csv
master_data.to_csv('master_data.csv', index=False)

print("master_data.csv has been created with the specified columns and mappings.")


master_data.csv has been created with the specified columns and mappings.


In [36]:
import pandas as pd

# File paths
spy_data_file = 'SPY_data.csv'  # Replace with your SPY data CSV file path
master_data_file = 'master_data.csv'  # Replace with your master data file path
output_file_path = 'updated_master_data.csv'  # Replace with path to save updated master

# Read both files
spy_data = pd.read_csv(spy_data_file)
master_data = pd.read_csv(master_data_file)

# Select the relevant columns from SPY data
spy_data = spy_data[['Symbol', 'Sub Industry', 'S&P Inclusion Date', 'Foundation Date']]

# Merge the SPY data into the master data based on the 'Symbol' column
updated_master = master_data.merge(spy_data, on='Symbol', how='left', suffixes=('', '_SPY'))

# Fill in missing values in the master data columns with data from SPY data
for col in ['Sub Industry', 'S&P Inclusion Date', 'Foundation Date']:
    updated_master[col] = updated_master[col].combine_first(updated_master[f"{col}_SPY"])

# Drop the additional columns used for merging
updated_master = updated_master.drop(columns=[f"{col}_SPY" for col in ['Sub Industry', 'S&P Inclusion Date', 'Foundation Date']])

# Save the updated master file
updated_master.to_csv(output_file_path, index=False)

print("Data has been successfully updated and saved to the new master file.")



Data has been successfully updated and saved to the new master file.


In [38]:
import yfinance as yf
import pandas as pd

# Load the updated master data file
master_data = pd.read_csv('updated_master_data.csv')  # replace with the path to your file

# Function to get the company name using yfinance
def get_company_name(ticker):
    try:
        stock = yf.Ticker(ticker)
        company_name = stock.info.get('longName')  # 'longName' contains the company name
        return company_name
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Iterate through each row to check for missing company names
for index, row in master_data.iterrows():
    if pd.isna(row['Asset Name']):  # Check if 'Asset Name' is missing
        ticker = row['Symbol']
        company_name = get_company_name(ticker)
        
        if company_name:  # If a company name is found, update the DataFrame
            master_data.at[index, 'Asset Name'] = company_name
            print(f"Updated company name for {ticker}: {company_name}")
        else:
            print(f"No company name found for {ticker}")

# Save the updated master data
master_data.to_csv('master_data2.csv', index=False)

print("Company name updates complete and saved to 'updated_master_with_company_names.csv'.")


Updated company name for STX: Seagate Technology Holdings plc
Updated company name for NXPI: NXP Semiconductors N.V.
Updated company name for ERIE: Erie Indemnity Company
Updated company name for TEL: TE Connectivity plc
Company name updates complete and saved to 'updated_master_with_company_names.csv'.


In [39]:
#Add company name from ticker symbol using yfinance
import yfinance as yf
import pandas as pd

# Load the updated master data file
master_data = pd.read_csv('master_data2.csv')  # replace with the path to your file

# Function to get company details including foundation date using yfinance
def get_company_details(ticker):
    try:
        stock = yf.Ticker(ticker)
        company_info = {
            'name': stock.info.get('longName'),
            'sector': stock.info.get('sector'),
            'sub_industry': stock.info.get('industry'),  # 'industry' in yfinance often refers to Sub Industry
            'foundation_date': stock.info.get('startDate')  # 'startDate' may approximate the foundation date
        }
        return company_info
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Iterate through each row to check for missing company details
for index, row in master_data.iterrows():
    ticker = row['Symbol']
    needs_update = False  # Track if we make any updates for this row

    # Check if any of the columns are missing and need data
    if pd.isna(row['Asset Name']) or pd.isna(row['Sector']) or pd.isna(row['Sub Industry']) or pd.isna(row['Foundation Date']):
        company_details = get_company_details(ticker)
        
        if company_details:
            # Update Asset Name if missing
            if pd.isna(row['Asset Name']) and company_details['name']:
                master_data.at[index, 'Asset Name'] = company_details['name']
                print(f"Updated company name for {ticker}: {company_details['name']}")
                needs_update = True
            
            # Update Sector if missing
            if pd.isna(row['Sector']) and company_details['sector']:
                master_data.at[index, 'Sector'] = company_details['sector']
                print(f"Updated sector for {ticker}: {company_details['sector']}")
                needs_update = True
            
            # Update Sub Industry if missing
            if pd.isna(row['Sub Industry']) and company_details['sub_industry']:
                master_data.at[index, 'Sub Industry'] = company_details['sub_industry']
                print(f"Updated sub industry for {ticker}: {company_details['sub_industry']}")
                needs_update = True
            
            # Update Foundation Date if missing
            if pd.isna(row['Foundation Date']) and company_details['foundation_date']:
                master_data.at[index, 'Foundation Date'] = company_details['foundation_date']
                print(f"Updated foundation date for {ticker}: {company_details['foundation_date']}")
                needs_update = True
            
        if not needs_update:
            print(f"No additional data found for {ticker}")

# Save the updated master data
master_data.to_csv('master_data3.csv', index=False)

print("Data updates complete and saved to 'updated_master_with_foundation_dates.csv'.")


Updated sub industry for APO: Asset Management
Updated sub industry for MRVL: Semiconductors


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/XTSLA?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=XTSLA&crumb=1HVtqFbGSPb


No additional data found for XTSLA
Updated sub industry for CRH: Building Materials
Updated sub industry for SPOT: Internet Content & Information
Updated sub industry for DASH: Internet Content & Information
Updated sub industry for TTD: Software - Application
Updated sub industry for WDAY: Software - Application
Updated sub industry for NU: Banks - Regional
Updated sub industry for APP: Software - Application
Updated sub industry for LNG: Oil & Gas Midstream
Updated sub industry for SQ: Software - Infrastructure
Updated sub industry for FERG: Industrial Distribution
Updated sub industry for VRT: Electrical Equipment & Parts
Updated sub industry for MSTR: Software - Application
Updated sub industry for DDOG: Software - Application
Updated sub industry for SNOW: Software - Application
Updated sub industry for COIN: Financial Data & Stock Exchanges
Updated sub industry for ALNY: Biotechnology
Updated sub industry for VEEV: Health Information Services
Updated sub industry for ARES: Asset 

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/MSFUT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=MSFUT&crumb=1HVtqFbGSPb


No additional data found for MSFUT
Updated sub industry for SKY: Residential Construction
Updated sub industry for RH: Specialty Retail
Updated sub industry for AVT: Electronics & Computer Distribution
Updated sub industry for ITRI: Scientific & Technical Instruments
Updated sub industry for ROIV: Biotechnology
Updated sub industry for ZETA: Software - Infrastructure
Updated sub industry for IBP: Residential Construction
Updated sub industry for CIVI: Oil & Gas E&P
Updated sub industry for CRNX: Biotechnology
Updated sub industry for BIPC: Utilities - Regulated Gas
Updated sub industry for OZK: Banks - Regional
Updated sub industry for NSIT: Electronics & Computer Distribution
Updated sub industry for OGN: Drug Manufacturers - General
Updated sub industry for HCP: Software - Infrastructure
Updated sub industry for AM: Oil & Gas Midstream
Updated sub industry for RARE: Biotechnology
Updated sub industry for SLM: Credit Services
Updated sub industry for BDC: Communication Equipment
Updat

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/LENB?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=LENB&crumb=1HVtqFbGSPb


No additional data found for LENB
Updated sub industry for PD: Software - Application
Updated sub industry for THS: Packaged Foods
Updated sub industry for UPBD: Software - Application
Updated sub industry for AVPT: Software - Infrastructure
Updated sub industry for MCY: Insurance - Property & Casualty
Updated sub industry for STC: Insurance - Property & Casualty
Updated sub industry for MSGE: Leisure
Updated sub industry for CASH: Banks - Regional
Updated sub industry for VSEC: Aerospace & Defense
Updated sub industry for IMVT: Biotechnology
Updated sub industry for KNTK: Oil & Gas Midstream
Updated sub industry for MLKN: Furnishings, Fixtures & Appliances
Updated sub industry for OCUL: Biotechnology
Updated sub industry for KOS: Oil & Gas E&P
Updated sub industry for NEO: Diagnostics & Research
Updated sub industry for INSW: Oil & Gas Midstream
Updated sub industry for OSW: Leisure
Updated sub industry for WKC: Oil & Gas Refining & Marketing
Updated sub industry for BEAM: Biotechnolo

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/GEFB?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=GEFB&crumb=3KaTt8NSNpm


No additional data found for GEFB
Updated sub industry for MLNK: Software—Application
Updated sub industry for ODC: Specialty Chemicals
Updated sub industry for OBT: Banks—Regional
Updated sub industry for YMAB: Biotechnology
Updated sub industry for BATRA: Entertainment
Updated sub industry for EBTC: Banks—Regional
Updated sub industry for ESPR: Drug Manufacturers—Specialty & Generic
Updated sub industry for HYLN: Auto Parts
Updated sub industry for MCB: Banks—Regional
Updated sub industry for PGY: Software—Infrastructure
Updated sub industry for RVNC: Biotechnology
Updated sub industry for TRTX: REIT—Mortgage
Updated sub industry for BBUC: Asset Management
Updated sub industry for BCAL: Banks—Regional
Updated sub industry for CTO: REIT—Diversified
Updated sub industry for MAX: Internet Content & Information
Updated sub industry for RYAM: Chemicals
Updated sub industry for SENEA: Packaged Foods
Updated sub industry for CCNE: Banks—Regional
Updated sub industry for TRDA: Biotechnology


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/METCV?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=METCV&crumb=3KaTt8NSNpm


No additional data found for METCV
Updated sub industry for SHYF: Farm & Heavy Construction Machinery
Updated sub industry for URGN: Biotechnology
Updated sub industry for ALRS: Banks—Regional
Updated sub industry for REFI: REIT—Mortgage
Updated sub industry for CLW: Paper & Paper Products
Updated sub industry for GOGO: Telecom Services
Updated sub industry for SLP: Health Information Services
Updated sub industry for XPER: Software—Application
Updated sub industry for ADTN: Communication Equipment
Updated sub industry for CCCC: Biotechnology
Updated sub industry for CWCO: Utilities—Regulated Water
Updated sub industry for DENN: Restaurants
Updated sub industry for FLGT: Diagnostics & Research
Updated sub industry for LESL: Specialty Retail
No additional data found for LGFA
Updated sub industry for LOVE: Furnishings, Fixtures & Appliances
Updated sub industry for NLOP: REIT—Office
Updated sub industry for RLAY: Biotechnology
Updated sub industry for WRLD: Credit Services
Updated sub in

In [40]:
import yfinance as yf
import pandas as pd

# Load the updated master data file
master_data = pd.read_csv('master_data3.csv')  # replace with your file path

# Function to get company details including foundation and IPO dates
def get_company_dates(ticker):
    try:
        stock = yf.Ticker(ticker)
        company_info = {
            'ipo_date': stock.info.get('ipoDate'),  # IPO date
            'foundation_date': stock.info.get('startDate')  # Approximation for foundation date
        }
        return company_info
    except Exception as e:
        print(f"Error fetching dates for {ticker}: {e}")
        return None

# Iterate through each row to check for missing dates
for index, row in master_data.iterrows():
    ticker = row['Symbol']
    needs_update = False

    # Check if foundation or IPO date is missing and retrieve if needed
    if pd.isna(row['Foundation Date']) or 'IPO Date' not in master_data.columns or pd.isna(row.get('IPO Date', None)):
        company_dates = get_company_dates(ticker)
        
        if company_dates:
            # Update Foundation Date if missing
            if pd.isna(row['Foundation Date']) and company_dates['foundation_date']:
                master_data.at[index, 'Foundation Date'] = company_dates['foundation_date']
                print(f"Updated foundation date for {ticker}: {company_dates['foundation_date']}")
                needs_update = True
            
            # Update IPO Date if missing
            if 'IPO Date' not in master_data.columns:
                master_data['IPO Date'] = None  # add IPO Date column if it doesn’t exist
            
            if pd.isna(row.get('IPO Date', None)) and company_dates['ipo_date']:
                master_data.at[index, 'IPO Date'] = company_dates['ipo_date']
                print(f"Updated IPO date for {ticker}: {company_dates['ipo_date']}")
                needs_update = True

        if not needs_update:
            print(f"No additional date data found for {ticker}")

# Save the updated master data with IPO and foundation dates
master_data.to_csv('master_data4.csv', index=False)

print("Date updates complete and saved to 'master_data4.csv'.")


No additional date data found for AAPL
No additional date data found for NVDA
No additional date data found for MSFT
No additional date data found for AMZN
No additional date data found for META
No additional date data found for GOOGL
No additional date data found for GOOG
No additional date data found for BRK-A
No additional date data found for BRK-B
No additional date data found for AVGO
No additional date data found for TSLA
No additional date data found for LLY
No additional date data found for JPM
No additional date data found for XOM
No additional date data found for UNH
No additional date data found for V
No additional date data found for MA
No additional date data found for HD
No additional date data found for PG
No additional date data found for COST
No additional date data found for JNJ
No additional date data found for ABBV
No additional date data found for WMT
No additional date data found for NFLX
No additional date data found for BAC
No additional date data found for CRM


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/XTSLA?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=XTSLA&crumb=1HVtqFbGSPb


No additional date data found for XTSLA
No additional date data found for HCA
No additional date data found for BDX
No additional date data found for CRWD
No additional date data found for ORLY
No additional date data found for CSX
No additional date data found for CRH
No additional date data found for MCK
No additional date data found for FCX
No additional date data found for WMB
No additional date data found for FDX
No additional date data found for EMR
No additional date data found for COF
No additional date data found for ECL
No additional date data found for ADSK
No additional date data found for AJG
No additional date data found for CARR
No additional date data found for MAR
No additional date data found for ABNB
No additional date data found for AFL
No additional date data found for HLT
No additional date data found for DLR
No additional date data found for SLB
No additional date data found for TFC
No additional date data found for ROP
No additional date data found for GM
No add

KeyboardInterrupt: 

In [43]:
import yfinance as yf
import pandas as pd

# Load the master data file
master_data = pd.read_csv('master_data4.csv')  # replace with your file path

# Function to get IPO date using yfinance
def get_ipo_date(ticker):
    try:
        stock = yf.Ticker(ticker)
        ipo_date = stock.info.get('ipoDate')
        
        # Fallback to firstTradeDateEpoch if ipoDate is not available
        if not ipo_date:
            first_trade = stock.info.get('firstTradeDateEpoch')
            if first_trade:
                ipo_date = pd.to_datetime(first_trade, unit='s').date()
        
        # Further fallback: Use the earliest date from historical data
        if not ipo_date:
            history = stock.history(period="max")
            if not history.empty:
                ipo_date = history.index[0].date()  # First available date
        
        return ipo_date
    except Exception as e:
        print(f"Error fetching IPO date for {ticker}: {e}")
        return None
        
# Iterate through each row to check for missing IPO dates
for index, row in master_data.iterrows():
    ticker = row['Symbol']
    
    # Check if IPO Date is missing and retrieve if needed
    if pd.isna(row['IPO Date']):
        ipo_date = get_ipo_date(ticker)
        
        if ipo_date:  # If IPO date is found, update the DataFrame
            master_data.at[index, 'IPO Date'] = ipo_date
            print(f"Updated IPO date for {ticker}: {ipo_date}")
        else:
            print(f"No IPO date found for {ticker}")

# Save the updated master data with IPO dates filled in
master_data.to_csv('master_data5.csv', index=False)

print("IPO date updates complete and saved to 'updated_master_with_ipo_dates.csv'.")


  master_data.at[index, 'IPO Date'] = ipo_date


Updated IPO date for AAPL: 1980-12-12
Updated IPO date for NVDA: 1999-01-22
Updated IPO date for MSFT: 1986-03-13
Updated IPO date for AMZN: 1997-05-15
Updated IPO date for META: 2012-05-18
Updated IPO date for GOOGL: 2004-08-19
Updated IPO date for GOOG: 2004-08-19
Updated IPO date for BRK-A: 1980-03-17
Updated IPO date for BRK-B: 1996-05-09
Updated IPO date for AVGO: 2009-08-06
Updated IPO date for TSLA: 2010-06-29
Updated IPO date for LLY: 1972-06-01
Updated IPO date for JPM: 1980-03-17
Updated IPO date for XOM: 1962-01-02
Updated IPO date for UNH: 1984-10-17
Updated IPO date for V: 2008-03-19
Updated IPO date for MA: 2006-05-25
Updated IPO date for HD: 1981-09-22
Updated IPO date for PG: 1962-01-02
Updated IPO date for COST: 1986-07-09
Updated IPO date for JNJ: 1962-01-02
Updated IPO date for ABBV: 2013-01-02
Updated IPO date for WMT: 1972-08-25
Updated IPO date for NFLX: 2002-05-23
Updated IPO date for BAC: 1973-02-21
Updated IPO date for CRM: 2004-06-23
Updated IPO date for ORCL:

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/XTSLA?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=XTSLA&crumb=1HVtqFbGSPb
$XTSLA: possibly delisted; no timezone found


No IPO date found for XTSLA
Updated IPO date for HCA: 2011-03-10
Updated IPO date for BDX: 1973-02-21
Updated IPO date for CRWD: 2019-06-12
Updated IPO date for ORLY: 1993-04-23
Updated IPO date for CSX: 1980-11-03
Updated IPO date for CRH: 1989-07-13
Updated IPO date for MCK: 1994-11-10
Updated IPO date for FCX: 1995-07-10
Updated IPO date for WMB: 1981-12-31
Updated IPO date for FDX: 1978-04-12
Updated IPO date for EMR: 1972-06-01
Updated IPO date for COF: 1994-11-16
Updated IPO date for ECL: 1973-02-21
Updated IPO date for ADSK: 1985-06-28
Updated IPO date for AJG: 1984-06-20
Updated IPO date for CARR: 2020-03-19
Updated IPO date for MAR: 1998-03-23
Updated IPO date for ABNB: 2020-12-10
Updated IPO date for AFL: 1980-03-17
Updated IPO date for HLT: 2013-12-12
Updated IPO date for DLR: 2004-10-29
Updated IPO date for SLB: 1981-12-31
Updated IPO date for TFC: 1980-03-18
Updated IPO date for ROP: 1992-02-13
Updated IPO date for GM: 2010-11-18
Updated IPO date for NSC: 1982-06-02
Update

$HEIA: possibly delisted; no timezone found


No IPO date found for HEIA
Updated IPO date for EG: 1995-10-03
Updated IPO date for PSTG: 2015-10-06
Updated IPO date for APTV: 2011-11-17
Updated IPO date for KIM: 1991-11-22
Updated IPO date for ALGN: 2001-01-30
Updated IPO date for TXT: 1973-02-21
Updated IPO date for GWRE: 2012-01-25
Updated IPO date for OC: 2006-11-01
Updated IPO date for LNT: 1973-02-21
Updated IPO date for NTNX: 2016-09-30
Updated IPO date for AKAM: 1999-10-29
Updated IPO date for TW: 2019-04-04
Updated IPO date for AVTR: 2019-05-17
Updated IPO date for TOL: 1986-07-08
Updated IPO date for SSNC: 2010-03-31
Updated IPO date for JBHT: 1983-11-22
Updated IPO date for CF: 2005-08-11
Updated IPO date for EQH: 2018-05-10
Updated IPO date for VRSN: 1998-01-30
Updated IPO date for THC: 1980-03-17
Updated IPO date for XPO: 2003-10-07
Updated IPO date for RVTY: 1973-02-21
Updated IPO date for CASY: 1983-10-20
Updated IPO date for TRMB: 1990-07-20
Updated IPO date for DPZ: 2004-07-13
Updated IPO date for SMCI: 2007-03-29
U

$BFB: possibly delisted; no timezone found


No IPO date found for BFB
Updated IPO date for SEIC: 1981-03-25
Updated IPO date for AAON: 1992-12-16
Updated IPO date for ADC: 1994-04-15
Updated IPO date for ACI: 2020-06-26
Updated IPO date for BFAM: 2013-01-25
Updated IPO date for HII: 2011-03-22
Updated IPO date for UFPI: 1993-11-10
Updated IPO date for AR: 2013-10-10
Updated IPO date for CBSH: 1980-03-17
Updated IPO date for VKTX: 2015-04-28
Updated IPO date for RGEN: 1986-04-29
Updated IPO date for HQY: 2014-07-31
Updated IPO date for VNO: 1980-03-17
Updated IPO date for COLD: 2018-01-19
Updated IPO date for SAIC: 2013-09-16
Updated IPO date for RHI: 1980-03-17
Updated IPO date for VFC: 1980-03-17
Updated IPO date for XP: 2019-12-11
Updated IPO date for GNTX: 1981-12-22
Updated IPO date for S: 2021-06-30
Updated IPO date for WH: 2018-05-21
Updated IPO date for AGNC: 2008-05-15
Updated IPO date for WEX: 2005-02-16
Updated IPO date for DVA: 1995-10-31
Updated IPO date for RRC: 1980-06-19
Updated IPO date for JXN: 2021-09-01
Update

$UHALB: possibly delisted; no timezone found


No IPO date found for UHALB
Updated IPO date for EXLS: 2006-10-20
Updated IPO date for JAZZ: 2007-06-01
Updated IPO date for WBA: 1980-03-17
Updated IPO date for PB: 1998-11-12
Updated IPO date for FR: 1994-06-24
Updated IPO date for ANF: 1996-09-26
Updated IPO date for BSY: 2020-09-23
Updated IPO date for MAT: 1976-06-17
Updated IPO date for G: 2007-08-02
Updated IPO date for RLI: 1980-03-17
Updated IPO date for AZPN: 1994-10-26
Updated IPO date for KEX: 1980-03-17
Updated IPO date for CVLT: 2006-09-22
Updated IPO date for MTH: 1988-07-26
Updated IPO date for PLNT: 2015-08-06
Updated IPO date for COKE: 1973-02-21
Updated IPO date for SIRI: 1994-09-13
Updated IPO date for FAF: 2010-05-28
Updated IPO date for RYAN: 2021-07-22
Updated IPO date for SPXC: 1980-03-17
Updated IPO date for CROX: 2006-02-08
Updated IPO date for ESNT: 2013-10-31
Updated IPO date for GKOS: 2015-06-25
Updated IPO date for R: 1980-01-02
Updated IPO date for MKSI: 1999-03-30
Updated IPO date for AZEK: 2020-06-12
Up

$MOGA: possibly delisted; no timezone found


No IPO date found for MOGA
Updated IPO date for ALTM: 2018-10-11
Updated IPO date for FOX: 2019-03-13
Updated IPO date for PVH: 1980-03-17
Updated IPO date for NXST: 2003-11-25
Updated IPO date for RITM: 2013-05-02
Updated IPO date for JHG: 2017-05-30
Updated IPO date for VNOM: 2014-06-18
Updated IPO date for HOMB: 2006-06-23
Updated IPO date for KBH: 1986-08-01
Updated IPO date for KRG: 2004-08-12
Updated IPO date for MMS: 1997-06-13
Updated IPO date for CELH: 2007-01-22
Updated IPO date for ZWS: 2012-03-29
Updated IPO date for RMBS: 1997-05-14
Updated IPO date for THG: 1995-10-11
Updated IPO date for FIVE: 2012-07-19
Updated IPO date for ST: 2010-03-11
Updated IPO date for THO: 1984-01-10
Updated IPO date for TKR: 1973-02-21
Updated IPO date for DOCS: 2021-06-24
Updated IPO date for RDN: 1992-10-30
Updated IPO date for UMBF: 1980-03-17
Updated IPO date for VVV: 2016-09-23
Updated IPO date for OLN: 1973-02-21
Updated IPO date for SON: 1980-03-17
Updated IPO date for UBSI: 1987-06-04
U

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/MSFUT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=MSFUT&crumb=3KaTt8NSNpm
$MSFUT: possibly delisted; no timezone found


No IPO date found for MSFUT
Updated IPO date for SKY: 1973-05-03
Updated IPO date for RH: 2012-11-02
Updated IPO date for AVT: 1973-05-03
Updated IPO date for ITRI: 1993-11-05
Updated IPO date for ROIV: 2020-12-08
Updated IPO date for ZETA: 2021-06-10
Updated IPO date for IBP: 2014-02-13
Updated IPO date for CIVI: 2011-12-15
Updated IPO date for CRNX: 2018-07-18
Updated IPO date for BIPC: 2020-03-31
Updated IPO date for OZK: 1997-07-17
Updated IPO date for NSIT: 1995-01-24
Updated IPO date for OGN: 2021-05-14
Updated IPO date for HCP: 2021-12-09
Updated IPO date for AM: 2017-05-04
Updated IPO date for RARE: 2014-01-31
Updated IPO date for SLM: 1983-09-23
Updated IPO date for BDC: 1993-11-24
Updated IPO date for STRL: 1991-07-12
Updated IPO date for AUR: 2021-05-10
Updated IPO date for CRDO: 2022-01-27
Updated IPO date for PECO: 2021-02-25
Updated IPO date for POR: 2006-03-31
Updated IPO date for EXPO: 1990-08-17
Updated IPO date for CADE: 1985-10-16
Updated IPO date for SLG: 1997-08-15

$BFA: possibly delisted; no timezone found


No IPO date found for BFA
Updated IPO date for DYN: 2020-09-17
Updated IPO date for AKR: 1993-05-27
Updated IPO date for HAYW: 2021-03-12
Updated IPO date for PRK: 1990-08-31
Updated IPO date for MBC: 2022-12-09
Updated IPO date for RPD: 2015-07-17
Updated IPO date for FCPT: 2015-11-10
Updated IPO date for IDYA: 2019-05-23
Updated IPO date for IPAR: 1988-02-04
Updated IPO date for PSMT: 1997-09-02
Updated IPO date for MTX: 1992-10-23
Updated IPO date for TWST: 2018-10-31
Updated IPO date for LAUR: 2017-02-01
Updated IPO date for LGIH: 2013-11-07
Updated IPO date for OII: 1975-10-31
Updated IPO date for ZD: 1999-07-23
Updated IPO date for OSIS: 1997-10-02
Updated IPO date for WERN: 1986-06-20
Updated IPO date for ABR: 2004-04-07
Updated IPO date for GT: 1962-01-02
Updated IPO date for CPRI: 2011-12-15
Updated IPO date for HWKN: 1980-03-18
Updated IPO date for KYMR: 2020-08-21
Updated IPO date for VERX: 2020-07-29
Updated IPO date for AZTA: 1995-02-02
Updated IPO date for ARWR: 1993-12-1

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/LENB?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=LENB&crumb=3KaTt8NSNpm
$LENB: possibly delisted; no timezone found


No IPO date found for LENB
Updated IPO date for PD: 2019-04-11
Updated IPO date for THS: 2005-06-28
Updated IPO date for UPBD: 1995-01-25
Updated IPO date for AVPT: 2019-11-05
Updated IPO date for MCY: 1985-11-20
Updated IPO date for STC: 1973-02-21
Updated IPO date for MSGE: 2023-05-02
Updated IPO date for CASH: 1993-09-20
Updated IPO date for VSEC: 1982-10-22
Updated IPO date for IMVT: 2019-06-21
Updated IPO date for KNTK: 2018-11-13
Updated IPO date for MLKN: 1980-03-17
Updated IPO date for OCUL: 2014-07-25
Updated IPO date for KOS: 2011-05-11
Updated IPO date for NEO: 2004-03-16
Updated IPO date for INSW: 2016-11-16
Updated IPO date for OSW: 2017-11-17
Updated IPO date for WKC: 1986-08-28
Updated IPO date for BEAM: 2020-02-06
Updated IPO date for PRG: 1982-11-04
Updated IPO date for AGM: 1994-02-10
Updated IPO date for ALKT: 2021-04-14
Updated IPO date for BHE: 1990-06-27
Updated IPO date for JBLU: 2002-04-12
Updated IPO date for PZZA: 1993-06-08
Updated IPO date for PDCO: 1992-10-

$LGFB: possibly delisted; no timezone found


No IPO date found for LGFB
Updated IPO date for MEG: 2020-07-23
Updated IPO date for DESP: 2017-09-20
Updated IPO date for SEZL: 2023-09-13
Updated IPO date for ASAN: 2020-09-30
Updated IPO date for DFH: 2021-01-21
Updated IPO date for PHR: 2019-07-18
Updated IPO date for CDRE: 2021-11-04
Updated IPO date for CLB: 1995-09-21
Updated IPO date for PRO: 2007-07-26
Updated IPO date for SHLS: 2021-01-28
Updated IPO date for RWT: 1995-08-04
Updated IPO date for EFC: 2010-10-08
Updated IPO date for MNRO: 1991-07-30
Updated IPO date for THR: 2011-05-05
Updated IPO date for UTI: 2003-12-18
Updated IPO date for BTSG: 2024-01-26
Updated IPO date for BV: 2018-06-28
Updated IPO date for CRGX: 2023-11-13
Updated IPO date for MBUU: 2014-01-31
Updated IPO date for MDXG: 2008-02-12
Updated IPO date for PGRE: 2014-11-19
Updated IPO date for CXM: 2021-06-23
Updated IPO date for EMBC: 2022-03-22
Updated IPO date for LQDA: 2018-07-26
Updated IPO date for NG: 2003-12-04
Updated IPO date for AVNS: 2014-10-21

$CWENA: possibly delisted; no timezone found


No IPO date found for CWENA
Updated IPO date for HCSG: 1983-11-29
Updated IPO date for TREE: 2008-08-12
Updated IPO date for MRVI: 2020-11-20
Updated IPO date for TMP: 1986-06-02
Updated IPO date for ARLO: 2018-08-03
Updated IPO date for AVBP: 2024-01-26
Updated IPO date for CPF: 1987-08-28
Updated IPO date for NABL: 2021-07-19
Updated IPO date for OBK: 2018-05-08
Updated IPO date for PFC: 1993-07-20
Updated IPO date for TGI: 1996-10-25
Updated IPO date for ATEN: 2014-03-21
Updated IPO date for CWH: 2016-10-07
Updated IPO date for CTLP: 1999-06-07
Updated IPO date for MRTN: 1986-09-25
Updated IPO date for WNC: 1991-11-08
Updated IPO date for EXPI: 2018-02-07
Updated IPO date for WSR: 2010-08-26
Updated IPO date for MATV: 1995-11-09
Updated IPO date for EYE: 2017-10-27
Updated IPO date for VZIO: 2021-03-26
Updated IPO date for DX: 1988-02-10
Updated IPO date for HLF: 2004-12-16
Updated IPO date for PDFS: 2001-07-31
Updated IPO date for TTGT: 2007-05-17
Updated IPO date for UWMC: 2020-05

Failed to get ticker 'TALK' reason: HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=10)
$TALK: possibly delisted; no timezone found


No IPO date found for TALK
Updated IPO date for AMTB: 2018-10-18
Updated IPO date for PSFE: 2020-10-09
Updated IPO date for SEI: 2017-05-12
Updated IPO date for BMBL: 2021-02-11
Updated IPO date for DMRC: 1999-12-02
Updated IPO date for RUSHB: 1996-06-07
Updated IPO date for SMP: 1980-03-17
Updated IPO date for BBW: 2004-10-28
Updated IPO date for CMPO: 2020-11-19
Updated IPO date for EVGO: 2020-11-20
Updated IPO date for JBSS: 1991-12-04
Updated IPO date for NRIM: 1990-11-15
Updated IPO date for PLSE: 2016-05-18
Updated IPO date for QTRX: 2017-12-07
Updated IPO date for RGR: 1973-05-03
Updated IPO date for AHH: 2013-05-08
Updated IPO date for GETY: 2020-09-21
Updated IPO date for GDOT: 2010-07-22
Updated IPO date for HTBK: 1998-07-20
Updated IPO date for NPK: 1980-03-17
Updated IPO date for ORIC: 2020-04-24
Updated IPO date for SWI: 2018-10-19
Updated IPO date for CCBG: 1994-06-06
Updated IPO date for IRWD: 2010-02-03
Updated IPO date for LXU: 1980-01-02
Updated IPO date for AIOT: 199

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/GEFB?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=GEFB&crumb=3KaTt8NSNpm
$GEFB: possibly delisted; no timezone found


No IPO date found for GEFB
Updated IPO date for MLNK: 2021-07-28
Updated IPO date for ODC: 1980-03-19
Updated IPO date for OBT: 2003-10-15
Updated IPO date for YMAB: 2018-09-21
Updated IPO date for BATRA: 2016-04-18
Updated IPO date for EBTC: 2005-02-14
Updated IPO date for ESPR: 2013-06-26
Updated IPO date for HYLN: 2020-01-02
Updated IPO date for MCB: 2017-11-08
Updated IPO date for PGY: 2021-04-21
Updated IPO date for RVNC: 2014-02-06
Updated IPO date for TRTX: 2017-07-20
Updated IPO date for BBUC: 2022-03-07
Updated IPO date for BCAL: 2005-11-10
Updated IPO date for CTO: 1980-03-17
Updated IPO date for MAX: 2020-10-28
Updated IPO date for RYAM: 2014-06-16
Updated IPO date for SENEA: 1998-06-03
Updated IPO date for CCNE: 1994-04-04
Updated IPO date for TRDA: 2021-10-29
Updated IPO date for GRNT: 2020-11-06
Updated IPO date for HTLD: 1986-11-05
Updated IPO date for RICK: 1995-10-13
Updated IPO date for TWKS: 2021-09-15
Updated IPO date for UHT: 1986-12-26
Updated IPO date for VREX: 2

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/METCV?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=METCV&crumb=3KaTt8NSNpm
$METCV: possibly delisted; no timezone found


No IPO date found for METCV
Updated IPO date for SHYF: 1984-06-19
Updated IPO date for URGN: 2017-05-04
Updated IPO date for ALRS: 2003-03-20
Updated IPO date for REFI: 2021-12-08
Updated IPO date for CLW: 2008-12-05
Updated IPO date for GOGO: 2013-06-21
Updated IPO date for SLP: 1997-06-18
Updated IPO date for XPER: 2022-09-20
Updated IPO date for ADTN: 1994-08-10
Updated IPO date for CCCC: 2020-10-02
Updated IPO date for CWCO: 1995-01-25
Updated IPO date for DENN: 1998-01-08
Updated IPO date for FLGT: 2016-09-29
Updated IPO date for LESL: 2020-10-29


$LGFA: possibly delisted; no timezone found


No IPO date found for LGFA
Updated IPO date for LOVE: 2018-06-27
Updated IPO date for NLOP: 2023-11-06
Updated IPO date for RLAY: 2020-07-16
Updated IPO date for WRLD: 1991-11-26
Updated IPO date for CVGW: 2002-07-22
Updated IPO date for CMP: 2003-12-12
Updated IPO date for FISI: 1999-06-25
Updated IPO date for SD: 2016-10-04
Updated IPO date for SFST: 1999-10-28
Updated IPO date for ZUMZ: 2005-05-06
Updated IPO date for CZNC: 1994-04-05
Updated IPO date for HBT: 2019-10-11
Updated IPO date for INDI: 2019-10-04
Updated IPO date for JAKK: 1996-05-02
Updated IPO date for KRNY: 2005-02-24
Updated IPO date for OIS: 2001-02-09
Updated IPO date for PANL: 2013-12-19
Updated IPO date for PAYS: 2007-10-10
Updated IPO date for FRST: 2006-11-01
Updated IPO date for PACK: 2018-03-13
Updated IPO date for RBB: 2017-07-26
Updated IPO date for TSBK: 1998-01-13
Updated IPO date for ARIS: 2021-10-22
Updated IPO date for CHCT: 2015-05-20
Updated IPO date for FWRG: 2021-10-01
Updated IPO date for KE: 2014

$ADRO: possibly delisted; no timezone found


No IPO date found for ADRO
Updated IPO date for AVTE: 2021-06-30
Updated IPO date for IPSC: 2021-06-18
Updated IPO date for CHRS: 2014-11-06
Updated IPO date for RAPT: 2019-10-31
Updated IPO date for HLVX: 2022-04-29
Updated IPO date for CPS: 2010-05-25


$INH: possibly delisted; no price data found  (1d 1925-11-29 -> 2024-11-04)


No IPO date found for INH
Updated IPO date for BLUE: 2013-06-19
Updated IPO date for WLLBW: 2020-09-04
Updated IPO date for STX: 2002-12-11
Updated IPO date for NXPI: 2010-08-06
Updated IPO date for ERIE: 1995-10-02
Updated IPO date for TEL: 2007-06-14
IPO date updates complete and saved to 'updated_master_with_ipo_dates.csv'.


In [44]:
import yfinance as yf
import pandas as pd

# Load the master data file
master_data = pd.read_csv('master_data6.csv')  # replace with your file path

# Function to get the first traded date using firstTradeDateEpoch
def get_first_traded_date(ticker):
    try:
        stock = yf.Ticker(ticker)
        first_trade = stock.info.get('firstTradeDateEpoch')
        
        if first_trade:
            # Convert from Unix timestamp to date format
            first_traded_date = pd.to_datetime(first_trade, unit='s').date()
            return first_traded_date
        else:
            print(f"No first trade data found for {ticker}")
            return None
    except Exception as e:
        print(f"Error fetching first traded date for {ticker}: {e}")
        return None

# Check for blank rows in First Traded column and update only those
for index, row in master_data[master_data['First Traded'].isna()].iterrows():
    ticker = row['Symbol']
    first_traded_date = get_first_traded_date(ticker)
    
    if first_traded_date:
        master_data.at[index, 'First Traded'] = first_traded_date
        print(f"Updated first traded date for {ticker}: {first_traded_date}")

# Save the updated data back to a CSV file
master_data.to_csv('master_data7.csv', index=False)

print("Only blank entries in 'First Traded' column have been updated and saved.")


No first trade data found for HEI-A
No first trade data found for BF-B
No first trade data found for UHAL-B
No first trade data found for MOG-A
No first trade data found for BF-A
No first trade data found for LEN-B
No first trade data found for LGF-B
No first trade data found for CWEN-A
No first trade data found for TALK
No first trade data found for GEF-B
No first trade data found for METC
No first trade data found for LGF-A
No first trade data found for INBX
Only blank entries in 'First Traded' column have been updated and saved.


In [45]:
import yfinance as yf
import pandas as pd

# Load the master data file
master_data = pd.read_csv('master_data6.csv')  # replace with your file path

# Function to get the first traded date using firstTradeDateEpoch
def get_first_traded_date(ticker):
    try:
        stock = yf.Ticker(ticker)
        first_trade = stock.info.get('firstTradeDateEpoch')
        
        if first_trade:
            # Convert from Unix timestamp to date format
            first_traded_date = pd.to_datetime(first_trade, unit='s').date()
            return first_traded_date
        else:
            print(f"No first trade data found for {ticker}")
            return None
    except Exception as e:
        print(f"Error fetching first traded date for {ticker}: {e}")
        return None

# Iterate only through rows where 'First Traded' is NaN
for index, row in master_data[master_data['First Traded'].isna()].iterrows():
    ticker = row['Symbol']
    first_traded_date = get_first_traded_date(ticker)
    
    if first_traded_date:
        master_data.at[index, 'First Traded'] = first_traded_date
        print(f"Updated first traded date for {ticker}: {first_traded_date}")

# Save the updated data back to a CSV file
master_data.to_csv('master_data7.csv', index=False)

print("Only blank entries in 'First Traded' column have been updated and saved to 'updated_master_with_first_traded.csv'.")


No first trade data found for HEI-A
No first trade data found for BF-B
No first trade data found for UHAL-B
No first trade data found for MOG-A
No first trade data found for BF-A
No first trade data found for LEN-B
No first trade data found for LGF-B
No first trade data found for CWEN-A
No first trade data found for TALK
No first trade data found for GEF-B
No first trade data found for METC
No first trade data found for LGF-A
No first trade data found for INBX
Only blank entries in 'First Traded' column have been updated and saved to 'updated_master_with_first_traded.csv'.


In [46]:
import yfinance as yf
import pandas as pd

# Load the master data file
master_data = pd.read_csv('master_data6.csv')  # replace with your file path

# Function to get the first traded date using firstTradeDateEpoch
def get_first_traded_date(ticker):
    try:
        stock = yf.Ticker(ticker)
        print(f"Fetching data for {ticker}...")  # Debug log
        
        # Print stock.info to manually check for firstTradeDateEpoch presence
        print(stock.info)  # Temporary debugging step

        first_trade = stock.info.get('firstTradeDateEpoch')
        
        if first_trade:
            # Convert from Unix timestamp to date format
            first_traded_date = pd.to_datetime(first_trade, unit='s').date()
            return first_traded_date
        else:
            print(f"No first trade data found for {ticker}")
            return None
    except Exception as e:
        print(f"Error fetching first traded date for {ticker}: {e}")
        return None

# Check a sample of blank First Traded rows to verify filtering
print(master_data[master_data['First Traded'].isna()].head())

# Iterate only through rows where 'First Traded' is NaN
for index, row in master_data[master_data['First Traded'].isna()].iterrows():
    ticker = row['Symbol']
    first_traded_date = get_first_traded_date(ticker)
    
    if first_traded_date:
        master_data.at[index, 'First Traded'] = first_traded_date
        print(f"Updated first traded date for {ticker}: {first_traded_date}")

# Save the updated data back to a CSV file
master_data.to_csv('master_data7.csv', index=False)

print("Only blank entries in 'First Traded' column have been updated and saved to 'updated_master_with_first_traded.csv'.")


      Symbol                  Asset Name            Sector Industry  \
464    HEI-A          HEICO CORP CLASS A       Industrials      NaN   
717     BF-B   BROWN FORMAN CORP CLASS B  Consumer Staples      NaN   
761   UHAL-B  U HAUL NON VOTING SERIES N       Industrials      NaN   
884    MOG-A            MOOG INC CLASS A       Industrials      NaN   
1337    BF-A   BROWN FORMAN CORP CLASS A  Consumer Staples      NaN   

     First Traded S&P Inclusion  
464           NaN           NaN  
717           NaN           NaN  
761           NaN           NaN  
884           NaN           NaN  
1337          NaN           NaN  
Fetching data for HEI-A...
{'address1': '3000 Taft Street', 'city': 'Hollywood', 'state': 'FL', 'zip': '33021', 'country': 'United States', 'phone': '954 987 4000', 'fax': '954 987 8228', 'website': 'https://www.heico.com', 'industry': 'Aerospace & Defense', 'industryKey': 'aerospace-defense', 'industryDisp': 'Aerospace & Defense', 'sector': 'Industrials', 'sectorKey

In [47]:
import yfinance as yf
import pandas as pd

# Replace this with the path to your CSV file containing the final list of companies
company_data = pd.DataFrame({
    'Symbol': [
        'HEI-A', 'BF-B', 'UHAL-B', 'MOG-A', 'BF-A', 'LEN-B', 
        'LGF-B', 'CWEN-A', 'TALK', 'GEF-B', 'METC', 'LGF-A', 'INBX'
    ],
    'First Traded': [None] * 13  # Placeholder for First Traded dates
})

# Function to get first traded date
def get_first_traded_date(ticker):
    try:
        stock = yf.Ticker(ticker)
        first_trade = stock.info.get('firstTradeDateEpoch')
        
        if first_trade:
            # Convert from Unix timestamp to date format
            first_traded_date = pd.to_datetime(first_trade, unit='s').date()
            return first_traded_date
        else:
            # Fall back on earliest available historical data if no firstTradeDateEpoch
            history = stock.history(period="max")
            if not history.empty:
                first_traded_date = history.index[0].date()
                return first_traded_date
            else:
                print(f"No data available for {ticker}")
                return None
    except Exception as e:
        print(f"Error fetching first traded date for {ticker}: {e}")
        return None

# Loop through the symbols and fetch the First Traded date
for index, row in company_data.iterrows():
    ticker = row['Symbol']
    first_traded_date = get_first_traded_date(ticker)
    
    if first_traded_date:
        company_data.at[index, 'First Traded'] = first_traded_date
        print(f"Updated first traded date for {ticker}: {first_traded_date}")

# Save the results to a new CSV file
company_data.to_csv('final_companies_with_first_traded.csv', index=False)

print("First traded date data saved to 'final_companies_with_first_traded.csv'")


Updated first traded date for HEI-A: 1998-04-27
Updated first traded date for BF-B: 1980-03-17
Updated first traded date for UHAL-B: 2022-11-10
Updated first traded date for MOG-A: 1980-05-29
Updated first traded date for BF-A: 1973-05-03
Updated first traded date for LEN-B: 2003-04-23
Updated first traded date for LGF-B: 2016-12-09
Updated first traded date for CWEN-A: 2013-07-17
Updated first traded date for TALK: 2020-07-30
Updated first traded date for GEF-B: 2003-10-07
Updated first traded date for METC: 2017-02-03
Updated first traded date for LGF-A: 1998-11-17
Updated first traded date for INBX: 2024-06-04
First traded date data saved to 'final_companies_with_first_traded.csv'


In [48]:
import yfinance as yf
import pandas as pd

# Load the master_data6 file
master_data = pd.read_csv('master_data6.csv')  # replace with the actual path to your master_data6 file

# Function to get first traded date
def get_first_traded_date(ticker):
    try:
        stock = yf.Ticker(ticker)
        first_trade = stock.info.get('firstTradeDateEpoch')
        
        if first_trade:
            # Convert from Unix timestamp to date format
            first_traded_date = pd.to_datetime(first_trade, unit='s').date()
            return first_traded_date
        else:
            # Fall back on earliest available historical data if no firstTradeDateEpoch
            history = stock.history(period="max")
            if not history.empty:
                first_traded_date = history.index[0].date()
                return first_traded_date
            else:
                print(f"No data available for {ticker}")
                return None
    except Exception as e:
        print(f"Error fetching first traded date for {ticker}: {e}")
        return None

# Iterate through each row with a missing First Traded date in master_data
for index, row in master_data[master_data['First Traded'].isna()].iterrows():
    ticker = row['Symbol']
    first_traded_date = get_first_traded_date(ticker)
    
    if first_traded_date:
        master_data.at[index, 'First Traded'] = first_traded_date
        print(f"Updated first traded date for {ticker}: {first_traded_date}")

# Save the updated master_data6 file with First Traded dates filled in
master_data.to_csv('master_data7.csv', index=False)

print("First traded date updates complete and saved to 'updated_master_data6.csv'.")


Updated first traded date for HEI-A: 1998-04-27
Updated first traded date for BF-B: 1980-03-17
Updated first traded date for UHAL-B: 2022-11-10
Updated first traded date for MOG-A: 1980-05-29
Updated first traded date for BF-A: 1973-05-03
Updated first traded date for LEN-B: 2003-04-23
Updated first traded date for LGF-B: 2016-12-09
Updated first traded date for CWEN-A: 2013-07-17
Updated first traded date for TALK: 2020-07-30
Updated first traded date for GEF-B: 2003-10-07
Updated first traded date for METC: 2017-02-03
Updated first traded date for LGF-A: 1998-11-17
Updated first traded date for INBX: 2024-06-04
First traded date updates complete and saved to 'updated_master_data6.csv'.


In [50]:
import yfinance as yf
import pandas as pd

# Load master_data7 file
master_data = pd.read_csv('master_data7.csv')  # replace with the actual path to your master_data7 file

# Function to fetch sector, industry, and first traded date from yfinance
def fetch_yfinance_data(ticker):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info
        
        # Retrieve required fields
        sector = info.get('sector')
        industry = info.get('industry')
        
        # Get first traded date, using firstTradeDateEpoch or earliest historical date as fallback
        first_trade = info.get('firstTradeDateEpoch')
        if first_trade:
            first_traded_date = pd.to_datetime(first_trade, unit='s').date()
        else:
            # Fall back to earliest historical date if no firstTradeDateEpoch
            history = stock.history(period="max")
            first_traded_date = history.index[0].date() if not history.empty else None
        
        return sector, industry, first_traded_date
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None, None, None

# Loop through each row in master_data and update incorrect or missing data
for index, row in master_data.iterrows():
    ticker = row['Symbol']
    
    # Fetch data from yfinance
    sector, industry, first_traded_date = fetch_yfinance_data(ticker)
    
    # Check and update sector if necessary
    if pd.isna(row['Sector']) or row['Sector'] != sector:
        master_data.at[index, 'Sector'] = sector
        print(f"Updated Sector for {ticker}: {sector}")
    
    # Check and update industry if necessary
    if pd.isna(row['Industry']) or row['Industry'] != industry:
        master_data.at[index, 'Industry'] = industry
        print(f"Updated Industry for {ticker}: {industry}")
    
    # Check and update first traded date if necessary
    if pd.isna(row['First Traded']) or row['First Traded'] != first_traded_date:
        master_data.at[index, 'First Traded'] = first_traded_date
        print(f"Updated First Traded date for {ticker}: {first_traded_date}")

# Save the updated master_data7 to a new file
master_data.to_csv('master_data8.csv', index=False)

print("Sector, Industry, and First Traded updates complete and saved to 'master_data8.csv'.")


Updated Sector for AAPL: Technology
Updated Industry for AAPL: Consumer Electronics
Updated First Traded date for AAPL: 1980-12-12
Updated Sector for NVDA: Technology
Updated First Traded date for NVDA: 1999-01-22
Updated Sector for MSFT: Technology
Updated Industry for MSFT: Software - Infrastructure
Updated First Traded date for MSFT: 1986-03-13
Updated Sector for AMZN: Consumer Cyclical
Updated Industry for AMZN: Internet Retail
Updated First Traded date for AMZN: 1997-05-15
Updated Sector for META: Communication Services
Updated Industry for META: Internet Content & Information
Updated First Traded date for META: 2012-05-18
Updated Sector for GOOGL: Communication Services
Updated Industry for GOOGL: Internet Content & Information
Updated First Traded date for GOOGL: 2004-08-19
Updated Sector for GOOG: Communication Services
Updated Industry for GOOG: Internet Content & Information
Updated First Traded date for GOOG: 2004-08-19
Updated Sector for BRK-A: Financial Services
Updated In

KeyboardInterrupt: 

In [51]:
import yfinance as yf
import pandas as pd

# Load master_data7 file
master_data = pd.read_csv('master_data7.csv')  # replace with the actual path to your master_data7 file

# Function to fetch sector, industry, and first traded date from yfinance
def fetch_yfinance_data(ticker):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info
        
        # Retrieve required fields
        sector = info.get('sector')
        industry = info.get('industry')
        
        # Get first traded date, using firstTradeDateEpoch or earliest historical date as fallback
        first_trade = info.get('firstTradeDateEpoch')
        if first_trade:
            first_traded_date = pd.to_datetime(first_trade, unit='s').date()
        else:
            # Fall back to earliest available historical date if no firstTradeDateEpoch
            history = stock.history(period="max")
            first_traded_date = history.index[0].date() if not history.empty else None
        
        return sector, industry, first_traded_date
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None, None, None

# Loop through each row in master_data and update incorrect or missing data
for index, row in master_data.iterrows():
    ticker = row['Symbol']
    
    # Fetch data from yfinance
    sector, industry, first_traded_date = fetch_yfinance_data(ticker)
    
    # Check and update sector if necessary
    if pd.isna(row['Sector']) or row['Sector'] != sector:
        master_data.at[index, 'Sector'] = sector
        print(f"Updated Sector for {ticker}: {sector}")
    
    # Check and update industry if necessary
    if pd.isna(row['Industry']) or row['Industry'] != industry:
        master_data.at[index, 'Industry'] = industry
        print(f"Updated Industry for {ticker}: {industry}")
    
    # Check and update first traded date if it's missing or differs by more than 1 day
    current_first_traded = row['First Traded']
    if pd.isna(current_first_traded):
        # Update if missing
        master_data.at[index, 'First Traded'] = first_traded_date
        print(f"Filled missing First Traded date for {ticker}: {first_traded_date}")
    elif first_traded_date and abs((pd.to_datetime(current_first_traded) - pd.to_datetime(first_traded_date)).days) > 1:
        # Update if there's a discrepancy of more than 1 day
        master_data.at[index, 'First Traded'] = first_traded_date
        print(f"Corrected First Traded date for {ticker}: {first_traded_date}")

# Save the updated master_data7 to a new file
master_data.to_csv('master_data8.csv', index=False)

print("Sector, Industry, and First Traded updates complete and saved to 'master_data8.csv'.")


Updated Sector for AAPL: Technology
Updated Industry for AAPL: Consumer Electronics
Updated Sector for NVDA: Technology
Updated Sector for MSFT: Technology
Updated Industry for MSFT: Software - Infrastructure
Updated Sector for AMZN: Consumer Cyclical
Updated Industry for AMZN: Internet Retail
Updated Sector for META: Communication Services
Updated Industry for META: Internet Content & Information
Updated Sector for GOOGL: Communication Services
Updated Industry for GOOGL: Internet Content & Information
Updated Sector for GOOG: Communication Services
Updated Industry for GOOG: Internet Content & Information
Updated Sector for BRK-A: Financial Services
Updated Industry for BRK-A: Insurance - Diversified
Updated Sector for BRK-B: Financial Services
Updated Industry for BRK-B: Insurance - Diversified
Updated Sector for AVGO: Technology
Updated Sector for TSLA: Consumer Cyclical
Updated Industry for TSLA: Auto Manufacturers
Updated Sector for LLY: Healthcare
Updated Industry for LLY: Drug 

$CBZ: possibly delisted; no price data found  (1d 1925-11-29 -> 2024-11-04)


Error fetching data for SMG: HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Max retries exceeded with url: /v10/finance/quoteSummary/SMG?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=SMG&crumb=3KaTt8NSNpm (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1000)')))
Updated Sector for SMG: None
Updated Industry for SMG: None
Error fetching data for ABM: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Updated Sector for ABM: None
Updated Industry for ABM: None
Error fetching data for GVA: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Updated Sector for GVA: None
Updated Industry for GVA: None
Updated Sector for NEU: Basic Materials
Updated Sector for CCOI: Communication Services
Updated Sector for HCC: Basic Materials
Upda

In [52]:
import pandas as pd

# Load the Nasdaq-100 symbols CSV file and the master data file
nasdaq_data = pd.read_csv('Nasdaq 100 companies by weight_ 11_5_24 - Sheet1.csv')  # Replace with the actual path to your Nasdaq-100 file
master_data = pd.read_csv('master_data8.csv')  # Replace with the actual path to your master data file

# Ensure both files have standardized column names
nasdaq_data.rename(columns={'Symbol': 'Nasdaq Symbol'}, inplace=True)  # Adjust if needed
master_data.rename(columns={'Symbol': 'Master Symbol', 'Index Inclusion': 'Index Inclusion'}, inplace=True)

# Find Nasdaq-100 symbols missing an Index Inclusion date in the master data
# First, filter the master data for rows where Index Inclusion is missing
missing_inclusion_data = master_data[
    master_data['Master Symbol'].isin(nasdaq_data['Nasdaq Symbol']) & master_data['Index Inclusion'].isna()
]

# Output the rows with missing Index Inclusion dates for manual review
missing_inclusion_data.to_csv('nasdaq_missing_index_inclusion.csv', index=False)

print("Missing Index Inclusion date data saved to 'nasdaq_missing_index_inclusion.csv'.")


Missing Index Inclusion date data saved to 'nasdaq_missing_index_inclusion.csv'.


In [53]:
import pandas as pd

# Load the nasdaq_index_companies file and the master_data8 file
nasdaq_data = pd.read_csv('nasdaq_index_companies.csv')  # Replace with the actual path to your Nasdaq file
master_data = pd.read_csv('master_data8.csv')  # Replace with the actual path to your master data file

# Merge the Index Inclusion date from nasdaq_data into master_data based on Symbol
# We use a left join to ensure we retain all rows in master_data, updating only where a match is found
master_data_updated = master_data.merge(
    nasdaq_data[['Symbol', 'Index Inclusion']],  # Selecting necessary columns from nasdaq_data
    on='Symbol',
    how='left',
    suffixes=('', '_Nasdaq')  # Avoids overwriting if 'Index Inclusion' already exists in master_data
)

# Update Index Inclusion in master_data with the new data from Nasdaq file where available
master_data_updated['Index Inclusion'] = master_data_updated['Index Inclusion'].combine_first(master_data_updated['Index Inclusion_Nasdaq'])

# Drop the auxiliary column used for merging
master_data_updated = master_data_updated.drop(columns=['Index Inclusion_Nasdaq'])

# Save the updated master data to a new CSV file
master_data_updated.to_csv('master_data9.csv', index=False)

print("Index Inclusion dates have been added to 'updated_master_data8.csv'.")


Index Inclusion dates have been added to 'updated_master_data8.csv'.
