# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [187]:
from datetime import datetime, date, timedelta, timezone
from scipy.stats import norm

import yfinance as yf
import pandas as pd
import numpy as np
from typing import List, Tuple

# Exploration

In [3]:
msft = yf.Ticker("MSFT")

# get all stock info
# msft.info

# get historical market data
# hist = msft.history(period="1mo")

# show meta information about the history (requires history() to be called first)
# msft.history_metadata

# show actions (dividends, splits, capital gains)
# msft.actions
# msft.dividends
# msft.splits
# msft.capital_gains  # only for mutual funds & etfs

# show share count
# msft.get_shares_full(start="2024-01-01", end=None)

# show financials:
# # - income statement
# msft.income_stmt
# msft.quarterly_income_stmt
# # - balance sheet
# msft.balance_sheet
# msft.quarterly_balance_sheet
# # - cash flow statement
# msft.cashflow
# msft.quarterly_cashflow
# # see `Ticker.get_income_stmt()` for more options

# show holders
# msft.major_holders
# msft.institutional_holders
# msft.mutualfund_holders
# msft.insider_transactions
# msft.insider_purchases
# msft.insider_roster_holders

# show recommendations
# msft.recommendations
# msft.recommendations_summary
# msft.upgrades_downgrades

# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
# msft.earnings_dates

# show ISIN code - *experimental*
# ISIN = International Securities Identification Number
# msft.isin

# show options expirations
# msft.options

# show news
# msft.news

In [173]:
# hist

In [174]:
# # get option chain for specific expiration
# opt = msft.option_chain() #'2024-08-16')
# # data available via: opt.calls, opt.puts

In [175]:
# opt.calls

In [176]:
# # Create Ticker object
# ticker = yf.Ticker("AAPL")

# # Get available expiration dates
# expirations = ticker.options

# # Choose an expiration date
# expiration_date = expirations[0]

# # Fetch option chain
# option_chain = ticker.option_chain(expiration_date)

# # Select a specific call option
# call_option = option_chain.calls.iloc[0]
# call_symbol = call_option['contractSymbol']

# # Fetch historical data for the call option
# call_history = yf.Ticker(call_symbol).history(period="1mo")

# # Print historical prices and current open interest
# print("Historical prices for", call_symbol)
# print(call_history[['Open', 'High', 'Low', 'Close', 'Volume']])
# print("\nCurrent Open Interest:", call_option['openInterest'])

# Options

In [209]:
def weighted_average(values: List[float], weights: List[float]) -> float:
    return round(np.average(values, weights=weights), 4) if len(values) == len(weights) else np.nan

def get_stock_data(tickers: List[str]) -> pd.DataFrame:
    data = []
    for t in tickers:
        try:
            ticker = yf.Ticker(t)
            
            # Get shares outstanding
            shares_outstanding = ticker.info.get('sharesOutstanding', np.nan)
            
            # Get options data
            options_data = get_options_data(ticker)
            if options_data is None:
                continue
            
            oi_calls, oi_puts, iv = options_data
            oi_total = oi_calls + oi_puts
            
            # Calculate ratio
            ratio = round((oi_total * 100) / shares_outstanding, 4) if shares_outstanding else np.nan
            
            # Get company name
            company_name = ticker.info.get('longName', 'N/A')
            
            # Get next earnings date
            next_earnings = get_next_earnings_date(ticker)
            
            # Create DataFrame
            df = pd.DataFrame({
                'ticker': [t],
                'name': [company_name],
                'shares': [shares_outstanding],
                'calls_oi': [oi_calls],
                'puts_oi': [oi_puts],
                'total_oi': [oi_total],
                'ratio': [ratio],
                'iv': [iv],
                'next_earnings': [next_earnings]
            })
            
            data.append(df)
        except Exception as e:
            print(f"Error processing {t}: {str(e)}")
            continue
    
    return pd.concat(data, ignore_index=True) if data else pd.DataFrame()

def get_options_data(ticker: yf.Ticker) -> Tuple[int, int, float]:
    try:
        options = ticker.options
        oi_calls, oi_puts = 0, 0
        iv_weighted_sum, total_weight = 0, 0
        
        for date in options:
            opt = ticker.option_chain(date)
            
            calls_oi = opt.calls['openInterest'].fillna(0).astype(int)
            puts_oi = opt.puts['openInterest'].fillna(0).astype(int)
            
            oi_calls += calls_oi.sum()
            oi_puts += puts_oi.sum()
            
            calls_iv = opt.calls['impliedVolatility'].fillna(0)
            puts_iv = opt.puts['impliedVolatility'].fillna(0)
            
            iv_weighted_sum += (calls_iv * calls_oi).sum() + (puts_iv * puts_oi).sum()
            total_weight += calls_oi.sum() + puts_oi.sum()
        
        iv = round(iv_weighted_sum / total_weight, 4) if total_weight > 0 else np.nan
        
        return oi_calls, oi_puts, iv
    except Exception as e:
        print(f"Error fetching options data: {str(e)}")
        return None

def get_next_earnings_date(ticker: yf.Ticker) -> str:
    try:
        earnings_calendar = ticker.earnings_dates
        if earnings_calendar is not None and not earnings_calendar.empty:
            today = datetime.now(timezone.utc).date()
            future_dates = earnings_calendar[earnings_calendar.index.date > today]
            if not future_dates.empty:
                next_date = future_dates.index[-1]
                return next_date.strftime('%Y-%m-%d')
            else:
                return 'No future earnings date available'
    except Exception as e:
        print(f"Error fetching earnings date: {str(e)}")
    return 'N/A'

In [210]:
%%time

# Usage
tickers = ['NVDA', 'AAPL', 'MSFT']#, 'GOOGL', 'AMZN']  # Add more tickers as needed
result = get_stock_data(tickers)

CPU times: user 1.9 s, sys: 102 ms, total: 2.01 s
Wall time: 7.34 s


In [211]:
result.head()

Unnamed: 0,ticker,name,shares,calls_oi,puts_oi,total_oi,ratio,iv,next_earnings
0,NVDA,NVIDIA Corporation,24598300672,13770351,13660368,27430719,0.1115,0.7505,2024-08-21
1,AAPL,Apple Inc.,15334099968,3659201,2417840,6077041,0.0396,0.3526,2024-08-01
2,MSFT,Microsoft Corporation,7432309760,1112556,1008079,2120635,0.0285,0.341,2024-07-30


In [212]:
# Open the file
filepath = 'tickers/nyse_tickers.txt'

with open(filepath, 'r') as file:
    # Read the lines and store them in a list
    lines = file.readlines()

# Remove newline characters
tickers = [line.strip() for line in lines]

# Print the list to verify
print(tickers[:10])
print(len(tickers))

print("est. rutime:", round(len(tickers)*2.2 / 60), "min")

['A', 'AA', 'AACT', 'AAN', 'AAP', 'AAT', 'AB', 'ABBV', 'ABEV', 'ABG']
3170
est. rutime: 116 min


In [None]:
%%time

df = get_stock_data(tickers)