In [None]:
pip install bs4 selenium yfinance pandas webdriver_manager

In [None]:
import requests
from bs4 import BeautifulSoup

def fetch_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    table = soup.find('table', {'id': 'constituents'})
    tickers = [row.find_all('td')[0].text.strip() for row in table.find_all('tr')[1:]]
    return tickers

def download_ticker_cik_mapping():
    url = "https://www.sec.gov/include/ticker.txt"
    response = requests.get(url)
    ticker_cik_mapping = {line.split('\t')[0]: line.split('\t')[1] for line in response.text.splitlines()}
    return ticker_cik_mapping

def format_cik(cik):
    return str(cik).zfill(10)  # Pad the CIK to ensure it is 10 digits long

def map_sp500_tickers_to_cik():
    sp500_tickers = fetch_sp500_tickers()
    ticker_cik_mapping = download_ticker_cik_mapping()

    sp500_ticker_cik_mapping = {}
    for ticker in sp500_tickers:
        cik = ticker_cik_mapping.get(ticker.lower())
        if cik:
            formatted_cik = format_cik(cik)
            sp500_ticker_cik_mapping[ticker] = formatted_cik

    return sp500_ticker_cik_mapping
"""
# Fetch and print the data
sp500_ticker_cik_mapping = map_sp500_tickers_to_cik()
print(sp500_ticker_cik_mapping)
"""

In [69]:
import requests

def fetch_company_xbrl_facts(cik):
    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    headers = {'User-Agent': 'ADVANCE INDUSTRIES LLC: MR. MALCOLM JAMES RUTLEDGE SKINNER'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data for CIK {cik}. Status code: {response.status_code}")
        return None

def get_historical_data(data, concept_tags):
    historical_values = {}
    
    for tag in concept_tags:
        if tag in data['facts']['us-gaap']:
            print(f"Found tag: {tag}")
            tag_data = data['facts']['us-gaap'][tag]
            for unit in tag_data['units']:
                dates_seen = set()
                try:
                    # Reverse the list to start with the most recent entry
                    for item in reversed(tag_data['units'][unit]):
                        date = item.get('end', item.get('instant'))
                        if date not in dates_seen:
                            historical_values[(tag, unit, date)] = item['val']
                            dates_seen.add(date)
                except KeyError as e:
                    print(f"KeyError for tag {tag} under unit {unit}: {e}")

    # Convert the dictionary to a list of tuples for output
    return [(tag, unit, date, value) for (tag, unit, date), value in historical_values.items()]

def fetch_financial_historicals(cik):
    xbrl_data = fetch_company_xbrl_facts(cik)
    if xbrl_data:
        concepts = {
            'TotalLiabilities': [
                'Liabilities'
            ],
            'ShareholdersEquity': [
                'StockholdersEquity'
            ],
            'NetIncome': [
                'NetIncomeLoss'
            ],
            'Revenue': [
                'RevenueFromContractWithCustomerExcludingAssessedTax'
            ],
            'EarningsPerShareDiluted': [
                'EarningsPerShareBasic'
            ],
            'WeightedAverageNumberOfDilutedSharesOutstanding': [
                'CommonStockSharesOutstanding'
            ],
            'Assets':[
                'Assets'
            ]
        }
        historicals = {}
        for concept_name, concept_tags in concepts.items():
            historicals[concept_name] = get_historical_data(xbrl_data, concept_tags)
        return historicals
"""
# Example usage for a specific company
cik = '0000789019'  # Example: CIK for Microsoft
financial_historicals = fetch_financial_historicals(cik)
"""
"""
The below code can be used to print the collected data, as needed.

for concept, data in financial_historicals.items():
    print(f"\n{concept}:")
    for tag, unit, value, date in data:  # Corrected to handle four elements
        print(f"Tag: {tag}, Unit: {unit}, Date: {date}, Value: {value}")

"""
# DISCLAIMER
# This code assumes that within a series of duplicate dates,
# the last entry (as ordered in the data source) is the most accurate.
# This may not be appropriate for all data sets or analyses.
# Users should verify the assumption against their data and use case.

Found tag: Liabilities
Found tag: StockholdersEquity
Found tag: NetIncomeLoss
Found tag: RevenueFromContractWithCustomerExcludingAssessedTax
Found tag: EarningsPerShareBasic
Found tag: CommonStockSharesOutstanding
Found tag: Assets


'\nThe below code can be used to print the collected data, as needed.\n\nfor concept, data in financial_historicals.items():\n    print(f"\n{concept}:")\n    for tag, unit, value, date in data:  # Corrected to handle four elements\n        print(f"Tag: {tag}, Unit: {unit}, Date: {date}, Value: {value}")\n\n'

In [None]:
# This returns all of the XBRL tags and the date ranges that are used.
import requests

def fetch_company_xbrl_tags(cik):
    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    headers = {'User-Agent': 'Your User Agent Info'}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data for CIK {cik}. Status code: {response.status_code}")
        return None

def extract_xbrl_tags_with_dates(xbrl_data):
    tag_dates = {}
    if xbrl_data and 'facts' in xbrl_data and 'us-gaap' in xbrl_data['facts']:
        for tag, data in xbrl_data['facts']['us-gaap'].items():
            dates = []
            for unit in data['units'].values():
                for item in unit:
                    date = item.get('end', item.get('instant'))
                    if date:
                        dates.append(date)
            if dates:
                tag_dates[tag] = (min(dates), max(dates))
    return tag_dates
"""
cik = '0000789019'  # Example: CIK for Microsoft
xbrl_data = fetch_company_xbrl_tags(cik)

if xbrl_data:
    xbrl_tags_dates = extract_xbrl_tags_with_dates(xbrl_data)
    for tag, dates in sorted(xbrl_tags_dates.items()):
        print(f"Tag: {tag}, Date Range: {dates[0]} to {dates[1]}")
else:
    print("No XBRL data found.")
"""

In [73]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time

def get_splits_with_selenium(ticker):
    options = webdriver.ChromeOptions()
    options.headless = True
    service = Service(ChromeDriverManager().install())
    browser = webdriver.Chrome(service=service, options=options)

    url = f'https://seekingalpha.com/symbol/{ticker}/splits'
    browser.get(url)
    browser.implicitly_wait(10)

    # Scroll the page to ensure all elements are loaded
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(5)  # Wait for the page to load

    soup = BeautifulSoup(browser.page_source, 'html.parser')
    splits_table = soup.find('tbody', {'data-test-id': 'table-body'})
    splits = []
    
    if splits_table:
        for row in splits_table.find_all('tr'):
            date = row.find('th').get_text(strip=True)
            ratio = row.find('td').get_text(strip=True)
            splits.append({'date': date, 'ratio': ratio})

    browser.quit()
    return splits

# Example usage with ticker "AAPL"
"""
splits_data = get_splits_with_selenium("AAPL")
for split in splits_data:
    print(split)
"""

{'date': 'Aug. 31, 2020', 'ratio': '4:1'}
{'date': 'Jun. 09, 2014', 'ratio': '7:1'}
{'date': 'Feb. 28, 2005', 'ratio': '2:1'}
{'date': 'Jun. 21, 2000', 'ratio': '2:1'}
{'date': 'Jun. 16, 1987', 'ratio': '2:1'}


In [74]:
import pandas as pd
import yfinance as yf

def download_market_data(ticker, start_date, end_date, file_name):
    """
    Downloads market data for a given ticker and saves it to a CSV file.

    Parameters:
    ticker (str): The ticker symbol of the stock or index.
    start_date (str): The start date for the data in YYYY-MM-DD format.
    end_date (str): The end date for the data in YYYY-MM-DD format.
    file_name (str): The name of the file to save the data to.
    
    Returns:
    DataFrame: The data for the specified ticker and date range.
    """
    data = yf.download(ticker, start=start_date, end=end_date, progress=False)
    data = data[['Adj Close', 'Volume']]  # Select adjusted close and volume
    data.to_csv(file_name)  # Save to CSV
    print(f"Data for {ticker} saved to {file_name}")
    return data
"""
# Example usage
download_market_data('MSFT', '2004-01-01', '2023-11-11', 'msft_stock_data.csv')
"""

Data for MSFT saved to msft_stock_data.csv


Unnamed: 0_level_0,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2004-01-02,17.203354,44487700
2004-01-05,17.635780,67333700
2004-01-06,17.698463,46950800
2004-01-07,17.679655,54298200
2004-01-08,17.648319,58810800
...,...,...
2023-11-06,356.529999,23828300
2023-11-07,360.529999,25833900
2023-11-08,363.200012,26767800
2023-11-09,360.690002,24847300


In [None]:
import pandas as pd

def adjust_eps_for_splits(eps_df, splits_data):
    """
    Adjusts the EPS for a stock based on its split history.

    Parameters:
    eps_df (DataFrame): A DataFrame with columns 'Date' and 'EPS' where 'Date' is the reporting date.
    splits_data (list of dicts): A list where each dict contains 'date' and 'ratio' keys for each stock split.

    Returns:
    DataFrame: The adjusted EPS DataFrame.
    """
    # Convert split ratios from strings to numerical values (e.g., '2:1' becomes 2.0)
    for split in splits_data:
        split['date'] = pd.to_datetime(split['date'])
        split_ratio = split['ratio'].split(':')
        split['ratio'] = float(split_ratio[0]) / float(split_ratio[1])
    
    # Sort the splits by date
    splits_data.sort(key=lambda x: x['date'])
    
    # Adjust the EPS
    for index, row in eps_df.iterrows():
        # Find splits that occurred after the EPS report date
        for split in splits_data:
            if split['date'] > row['Date']:
                row['EPS'] /= split['ratio']
    
    return eps_df

# Example usage:
# Assume eps_df is your dataframe with EPS data and splits_data is your list of split information.
# adjusted_eps_df = adjust_eps_for_splits(eps_df, splits_data)


In [None]:
# manufacture financial ratios as needed
# function should also get the dates for which the financial ratio is what it is

In [None]:
# get a stock ticker
# get it's cik
# get fundamental data, store in pandas dataframe
# get split dates for stock
# adjust P/E based on split
# get the rest of the financial ratios together
# merge and fill fundamental data with adjusted close and volume