In [15]:
import sqlite3
from sqlalchemy import create_engine
import pandas as pd
from pandas.io import sql
from os import listdir
import datetime
import numpy as np

## ingestion

In [7]:
hist_dir = "../earnings_calendar_history/"
hist_files = [l for l in listdir(hist_dir) if l.endswith('.csv')]
# hist_files

In [68]:
database = "../main.db"

In [106]:
def convert_hist_file(hist_file: str) -> pd.DataFrame:
    col_mapper = {
            "TICKER\nNO MATCHES":"ticker",
            "MKT CAP":"cap",
            "EPS ESTIMATE":"eps_estimate",
            "REPORTED EPS":"eps_reported",
            "SURPRISE":"surprise",
            "SURPRISE %":"surprise_pct",
            "REVENUE FORECAST":"revenue_fx",
            "REVENUE ACTUAL":"revenue",
            "DATE":"eps_date",
            "PERIOD ENDING":"period_ending"
    }
    tb = pd.read_csv(hist_dir+hist_file)\
        .rename(columns=col_mapper)

    for col in col_mapper.values():
        if col not in tb.columns:
            tb[col] = np.nan

    tb = tb[col_mapper.values()]

    tb['file_date'] = hist_file.replace('.csv','')

    tb.replace('—',np.nan, inplace=True)
    
    def multiplier(s:str):
        s = str(s)
        if 'K' in s:
            return 1000.0
        if 'M' in s:
            return 1000_000.0
        if 'B' in s:
            return 1000_000_000.0
        if 'T' in s:
            return 1000_000_000_000.0
        return 1.0

    convert_numerical = lambda s:\
        float( str(s).replace('%','')\
                .replace('USD','')\
                .replace('−','-')\
                .replace('T','')\
                .replace('M','')\
                .replace('B','')\
                .replace('K','') )\
        *multiplier(s)

    for col in [
        "cap",
        "eps_estimate",
        "eps_reported",
        "surprise",
        "surprise_pct",
        "revenue_fx",
        "revenue"
    ]:
        tb[col]=tb[col].apply(convert_numerical)

    tb['name']   = tb['ticker'].apply(lambda s: s.split(';')[1])
    tb['ticker'] = tb['ticker'].apply(lambda s: s.split(';')[0])

    for col in ['eps_date','period_ending','file_date']:
        tb[col] = pd.to_datetime(tb[col])
    
    return tb

convert_hist_file(hist_files[28]).head(5)

Unnamed: 0,ticker,cap,eps_estimate,eps_reported,surprise,surprise_pct,revenue_fx,revenue,eps_date,period_ending,file_date,name
0,AAPL,2478000000000.0,1.27,1.29,0.02,1.74,88739000000.0,,NaT,NaT,2022-10-29,APPLE INC
1,MSFT,1758000000000.0,2.31,2.35,0.04,1.66,49663000000.0,,NaT,NaT,2022-10-29,MICROSOFT CORP.
2,GOOG,1248000000000.0,1.26,1.06,-0.2,-16.1,71001000000.0,,NaT,NaT,2022-10-29,ALPHABET INC (GOOGLE) CLASS C
3,AMZN,1055000000000.0,0.22,0.28,0.06,28.41,127387000000.0,,NaT,NaT,2022-10-29,"AMAZON.COM, INC."
4,XOM,461357000000.0,3.86,4.45,0.59,15.22,104594000000.0,,NaT,NaT,2022-10-29,EXXON MOBIL CORPORATION


In [41]:
def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Exception as e:
        print(e)

In [151]:
conn = sqlite3.connect(database)

In [109]:
sql_create_earnings_table = """ CREATE TABLE earnings (
                                        id integer PRIMARY KEY,
                                        ticker text NOT NULL,
                                        name text NOT NULL,
                                        cap decimal,
                                        eps_estimate decimal,
                                        eps_reported decimal,
                                        surprise decimal,
                                        surprise_pct decimal,
                                        revenue decimal,
                                        revenue_fx decimal,
                                        eps_date date,
                                        period_ending date,
                                        file_date date
                                    ); """
create_table(conn, sql_create_earnings_table)

run ```sqlite3 main.db```

In [110]:
for hist_file in hist_files[:]:
    convert_hist_file(hist_file).to_sql('earnings', conn, if_exists='append', index=False)

### checks...

In [111]:
pd.read_sql_query('SELECT * FROM earnings', conn)

Unnamed: 0,id,ticker,name,cap,eps_estimate,eps_reported,surprise,surprise_pct,revenue,revenue_fx,eps_date,period_ending,file_date
0,1,STZ,"CONSTELLATION BRANDS, INC.",4.695200e+10,2.09,2.37,-0.22,-10.39,2.103000e+09,2.017000e+09,2022-04-07 00:00:00,2022-02-28 00:00:00,2022-04-10 00:00:00
1,2,CAG,"CONAGRA BRANDS, INC.",1.655000e+10,0.58,0.58,-0.13,-21.67,2.914000e+09,2.846000e+09,2022-04-07 00:00:00,2022-02-28 00:00:00,2022-04-10 00:00:00
2,3,RPM,RPM INTERNATIONAL INC.,1.109900e+10,0.30,0.38,-0.05,-16.76,1.434000e+09,1.400000e+09,2022-04-06 00:00:00,2022-02-28 00:00:00,2022-04-10 00:00:00
3,4,LW,"LAMB WESTON HOLDINGS, INC.",9.631000e+09,0.44,0.73,0.29,65.25,9.550000e+08,9.685170e+08,2022-04-07 00:00:00,2022-02-28 00:00:00,2022-04-10 00:00:00
4,5,LEVI,LEVI STRAUSS & CO,7.488000e+09,0.42,0.46,0.06,15.19,1.592000e+09,1.547000e+09,2022-04-06 00:00:00,2022-02-28 00:00:00,2022-04-10 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,4012,ETCC,ENVIRONMENTAL TECTONICS CORP.,2.399000e+06,,,,,,,,,2023-01-14 00:00:00
4012,4013,SMIT,"SCHMITT INDUSTRIES, INC.",1.624000e+06,,,,,,,,,2023-01-14 00:00:00
4013,4014,WGNR,WEGENER CORP.,2.524680e+05,,,,,,,,,2023-01-14 00:00:00
4014,4015,PRXIQ,PREMIER EXHIBITIONS INC,7.930000e+02,,,,,,,,,2023-01-14 00:00:00


In [114]:
pd.read_sql_query('SELECT count(distinct ticker) as "unique symbols" FROM earnings', conn)

Unnamed: 0,unique symbols
0,1996


In [118]:
pd.read_sql_query("""
    SELECT sum(case when surprise>0 then 1.0 else 0.0 end)/count(1) as "positive EPS ratio" 
    FROM earnings""", conn)

Unnamed: 0,positive EPS ratio
0,0.487301


## load tickers history

In [152]:
sql_create_history_table = """ CREATE TABLE history (
                                        id integer PRIMARY KEY,
                                        ticker text NOT NULL,
                                        trade_date date,
                                        open decimal,
                                        high decimal,
                                        low decimal,
                                        close decimal,
                                        volume integer
                                    ); """
create_table(conn, sql_create_history_table)

In [121]:
pos_tickers = pd.read_sql_query('SELECT distinct ticker as "symbols" FROM earnings where surprise>0', conn)['symbols'].to_list()
len(pos_tickers)

1176

In [124]:
# !pip install yfinance

In [125]:
import yfinance as yf

In [127]:
start_date = '2022-01-01'
end_date   = datetime.datetime.now().strftime('%Y-%m-%d')
print(f"Getting tickers from {start_date} till {end_date}")

Getting tickers from 2022-01-01 till 2023-01-15


In [160]:
for i,ticker in enumerate(pos_tickers):
    print(f"{i}/{len(pos_tickers)} {ticker}...")
    try:
        tmp=yf.download(ticker, start_date, end_date)
    except Exception as e:
        print("Exception: "+str(e))
        continue
    
    tmp = tmp\
        .reset_index()\
        .drop(columns='Adj Close')\
        .rename(columns={
            "Date":"trade_date",
            "Open":"open",
            "Close":"close",
            "High":"high",
            "Low":"low",
            "Volume":"volume"
        })
    tmp['ticker'] = ticker
    
    tmp.to_sql('history', conn, if_exists='append', index=False)

0/1176 LW...
[*********************100%***********************]  1 of 1 completed
1/1176 LEVI...
[*********************100%***********************]  1 of 1 completed
2/1176 AYI...
[*********************100%***********************]  1 of 1 completed
3/1176 TRNO...
[*********************100%***********************]  1 of 1 completed
4/1176 TLRY...
[*********************100%***********************]  1 of 1 completed
5/1176 WDFC...
[*********************100%***********************]  1 of 1 completed
6/1176 SST...
[*********************100%***********************]  1 of 1 completed
7/1176 SCHN...
[*********************100%***********************]  1 of 1 completed
8/1176 GBX...
[*********************100%***********************]  1 of 1 completed
9/1176 NRIX...
[*********************100%***********************]  1 of 1 completed
10/1176 RGP...
[*********************100%***********************]  1 of 1 completed
11/1176 KRUS...
[*********************100%***********************]  1 of 1 comple

In [162]:
pd.read_sql_query('SELECT count(distinct ticker) as "unique symbols", count(1) as total_count FROM history', conn)

Unnamed: 0,unique symbols,total_count
0,1156,299589


## other yfinance data

In [164]:
import yfinance as yf

msft = yf.Ticker("MSFT")

In [165]:

# get stock info
msft.info


{'zip': '98052-6399',
 'sector': 'Technology',
 'fullTimeEmployees': 221000,
 'longBusinessSummary': 'Microsoft Corporation develops, licenses, and supports software, services, devices, and solutions worldwide. The company operates in three segments: Productivity and Business Processes, Intelligent Cloud, and More Personal Computing. The Productivity and Business Processes segment offers Office, Exchange, SharePoint, Microsoft Teams, Office 365 Security and Compliance, Microsoft Viva, and Skype for Business; Skype, Outlook.com, OneDrive, and LinkedIn; and Dynamics 365, a set of cloud-based and on-premises business solutions for organizations and enterprise divisions. The Intelligent Cloud segment licenses SQL, Windows Servers, Visual Studio, System Center, and related Client Access Licenses; GitHub that provides a collaboration platform and code hosting service for developers; Nuance provides healthcare and enterprise AI solutions; and Azure, a cloud platform. It also offers enterprise

In [167]:
# show meta information about the history (requires history() to be called first)
# msft.history_metadata

In [168]:

# show actions (dividends, splits, capital gains)
msft.actions

Unnamed: 0_level_0,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1987-09-21 00:00:00-04:00,0.00,2.0
1990-04-16 00:00:00-04:00,0.00,2.0
1991-06-27 00:00:00-04:00,0.00,1.5
1992-06-15 00:00:00-04:00,0.00,1.5
1994-05-23 00:00:00-04:00,0.00,2.0
...,...,...
2021-11-17 00:00:00-05:00,0.62,0.0
2022-02-16 00:00:00-05:00,0.62,0.0
2022-05-18 00:00:00-04:00,0.62,0.0
2022-08-17 00:00:00-04:00,0.62,0.0


In [169]:
# show dividends
msft.dividends

Date
2003-02-19 00:00:00-05:00    0.08
2003-10-15 00:00:00-04:00    0.16
2004-08-23 00:00:00-04:00    0.08
2004-11-15 00:00:00-05:00    3.08
2005-02-15 00:00:00-05:00    0.08
                             ... 
2021-11-17 00:00:00-05:00    0.62
2022-02-16 00:00:00-05:00    0.62
2022-05-18 00:00:00-04:00    0.62
2022-08-17 00:00:00-04:00    0.62
2022-11-16 00:00:00-05:00    0.68
Name: Dividends, Length: 76, dtype: float64

In [170]:
# show splits
msft.splits

Date
1987-09-21 00:00:00-04:00    2.0
1990-04-16 00:00:00-04:00    2.0
1991-06-27 00:00:00-04:00    1.5
1992-06-15 00:00:00-04:00    1.5
1994-05-23 00:00:00-04:00    2.0
1996-12-09 00:00:00-05:00    2.0
1998-02-23 00:00:00-05:00    2.0
1999-03-29 00:00:00-05:00    2.0
2003-02-18 00:00:00-05:00    2.0
Name: Stock Splits, dtype: float64

In [171]:
# show capital gains (for mutual funds & etfs)
msft.capital_gains

[]

In [172]:

# show share count
msft.shares
msft.get_shares_full()


2021-07-30 00:00:00-04:00    7519000064
2021-08-02 00:00:00-04:00    7514890240
2021-09-11 00:00:00-04:00    7554030080
2021-09-11 00:00:00-04:00    7514890240
2021-10-12 00:00:00-04:00    7530729984
2021-10-12 00:00:00-04:00    7514890240
2021-10-19 00:00:00-04:00    7507899904
2021-10-28 00:00:00-04:00    7507979776
2021-12-02 00:00:00-05:00    7519580160
2021-12-03 00:00:00-05:00    7521419776
2021-12-04 00:00:00-05:00    7507979776
2021-12-04 00:00:00-05:00    7658599936
2022-01-27 00:00:00-05:00    7496869888
2022-02-04 00:00:00-05:00    7800719872
2022-02-05 00:00:00-05:00    7496869888
2022-02-11 00:00:00-05:00    7496869888
2022-03-04 00:00:00-05:00    7605040128
2022-03-05 00:00:00-05:00    7496869888
2022-03-16 00:00:00-04:00    7595310080
2022-03-17 00:00:00-04:00    7496869888
2022-03-28 00:00:00-04:00    7507230208
2022-03-31 00:00:00-04:00    7496869888
2022-04-01 00:00:00-04:00    7631819776
2022-04-02 00:00:00-04:00    7496869888
2022-04-29 00:00:00-04:00    7479029760


In [173]:

# show financials:
# - income statement
msft.income_stmt
msft.quarterly_income_stmt


Unnamed: 0,2022-09-30,2022-06-30,2022-03-31,2021-12-31
Total Revenue,50122000000.0,51865000000.0,49360000000.0,51728000000.0
Operating Revenue,50122000000.0,51865000000.0,49360000000.0,51728000000.0
Cost Of Revenue,15452000000.0,16429000000.0,15615000000.0,16960000000.0
Gross Profit,34670000000.0,35436000000.0,33745000000.0,34768000000.0
Operating Expense,13152000000.0,14902000000.0,13381000000.0,12521000000.0
Selling General And Administration,6524000000.0,8053000000.0,7075000000.0,6763000000.0
General And Administrative Expense,1398000000.0,1749000000.0,1480000000.0,1384000000.0
Other Gand A,1398000000.0,1749000000.0,1480000000.0,1384000000.0
Selling And Marketing Expense,5126000000.0,6304000000.0,5595000000.0,5379000000.0
Research And Development,6628000000.0,6849000000.0,6306000000.0,5758000000.0


In [174]:

# - balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet


Unnamed: 0,2022-09-30,2022-06-30,2022-03-31,2021-12-31
Total Assets,359784000000.0,364840000000.0,344607000000.0,340389000000.0
Current Assets,160812000000.0,169684000000.0,153922000000.0,174188000000.0
Cash Cash Equivalents And Short Term Investments,107244000000.0,104749000000.0,104660000000.0,125348000000.0
Cash And Cash Equivalents,22884000000.0,13931000000.0,12498000000.0,20604000000.0
Cash Financial,7237000000.0,8258000000.0,7456000000.0,6255000000.0
...,...,...,...,...
Tangible Book Value,95299000000.0,87720000000.0,84205000000.0,101627000000.0
Total Debt,60282000000.0,61270000000.0,61283000000.0,64032000000.0
Net Debt,25738000000.0,35850000000.0,37428000000.0,32654000000.0
Share Issued,7457000000.0,7464000000.0,7483000000.0,7500000000.0


In [176]:

# - cash flow statement
msft.cashflow
# msft.quarterly_cashflow
# see `Ticker.get_income_stmt()` for more options


Unnamed: 0,2022-06-30,2021-06-30,2020-06-30,2019-06-30
Operating Cash Flow,89035000000.0,76740000000.0,60675000000.0,52185000000.0
Cash Flow From Continuing Operating Activities,89035000000.0,76740000000.0,60675000000.0,52185000000.0
Net Income From Continuing Operations,72738000000.0,61271000000.0,44281000000.0,39240000000.0
Operating Gains Losses,-409000000.0,-1249000000.0,-219000000.0,-792000000.0
Gain Loss On Investment Securities,,,-219000000.0,-792000000.0
Depreciation Amortization Depletion,14460000000.0,11686000000.0,12796000000.0,11682000000.0
Depreciation And Amortization,14460000000.0,11686000000.0,12796000000.0,11682000000.0
Depreciation,14460000000.0,11686000000.0,12796000000.0,11682000000.0
Deferred Tax,-5702000000.0,-150000000.0,-3620000000.0,-3534000000.0
Deferred Income Tax,-5702000000.0,-150000000.0,-3620000000.0,-3534000000.0


In [None]:

# show major holders
msft.major_holders

# show institutional holders
msft.institutional_holders

# show mutualfund holders
msft.mutualfund_holders


In [177]:

# show earnings
msft.earnings
msft.quarterly_earnings


Unnamed: 0_level_0,Revenue,Earnings
Quarter,Unnamed: 1_level_1,Unnamed: 2_level_1
4Q2021,51728000000,18765000000
1Q2022,49360000000,16728000000
2Q2022,51865000000,16740000000
3Q2022,50122000000,17556000000


In [178]:

# show sustainability
msft.sustainability


Unnamed: 0_level_0,Value
2022-8,Unnamed: 1_level_1
palmOil,False
controversialWeapons,False
gambling,False
socialScore,8.39
nuclear,False
furLeather,False
alcoholic,False
gmo,False
catholic,False
socialPercentile,


In [None]:

# show analysts recommendations
msft.recommendations
msft.recommendations_summary


In [179]:

# show analysts other work
msft.analyst_price_target
msft.revenue_forecasts
msft.earnings_forecasts
msft.earnings_trend


Unnamed: 0_level_0,Max Age,End Date,Growth,Earnings Estimate Avg,Earnings Estimate Low,Earnings Estimate High,Earnings Estimate Year Ago Eps,Earnings Estimate Number Of Analysts,Earnings Estimate Growth,Revenue Estimate Avg,...,Revenue Estimate Growth,Eps Trend Current,Eps Trend 7Days Ago,Eps Trend 30Days Ago,Eps Trend 60Days Ago,Eps Trend 90Days Ago,Eps Revisions Up Last7Days,Eps Revisions Up Last30Days,Eps Revisions Down Last30Days,Eps Revisions Down Last90Days
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0Q,1,2022-12-31,-0.069,2.31,2.23,2.91,2.48,35.0,-0.069,52986600000.0,...,0.024,2.31,2.31,2.29,2.29,2.57,1.0,4.0,0.0,
+1Q,1,2023-03-31,0.05,2.33,1.95,2.5,2.22,34.0,0.05,52509700000.0,...,0.064,2.33,2.33,2.33,2.33,2.5,1.0,2.0,0.0,
0Y,1,2023-06-30,0.042,9.6,9.26,11.21,9.21,46.0,0.042,212982000000.0,...,0.074,9.6,9.56,9.55,9.55,10.05,2.0,5.0,0.0,
+1Y,1,2024-06-30,0.161,11.15,10.21,12.48,9.6,45.0,0.161,239498000000.0,...,0.124,11.15,11.16,11.18,11.18,11.84,0.0,2.0,2.0,
+5Y,1,NaT,0.1288,,,,,,,,...,,,,,,,,,,
-5Y,1,NaT,0.24086,,,,,,,,...,,,,,,,,,,


In [180]:

# show next event (earnings, etc)
msft.calendar


Unnamed: 0,Value
Earnings Date,2023-01-24 21:00:00
Earnings Average,2.31
Earnings Low,2.23
Earnings High,2.91
Revenue Average,52986600000
Revenue Low,52389000000
Revenue High,53871800000


In [181]:

# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default. 
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
msft.earnings_dates


Unnamed: 0_level_0,EPS Estimate,Reported EPS,Surprise(%)
Earnings Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-10-23 06:00:00-04:00,,,
2023-07-24 06:00:00-04:00,,,
2023-04-24 06:00:00-04:00,,,
2023-01-24 17:00:00-05:00,2.31,,
2023-01-24 16:00:00-05:00,2.31,,
2022-10-25 12:00:00-04:00,2.3,2.35,0.0204
2022-07-26 12:00:00-04:00,2.29,2.23,-0.0275
2022-04-26 12:00:00-04:00,2.19,2.22,0.016
2022-01-25 11:00:00-05:00,2.31,2.48,0.0727
2021-10-26 12:00:00-04:00,2.08,2.27,0.094


In [182]:

# show ISIN code - *experimental*
# ISIN = International Securities Identification Number
msft.isin

# show options expirations
msft.options

# show news
msft.news


[{'uuid': '8ab2f529-b9e1-3aae-b2d4-25b446c91232',
  'title': 'Microsoft Slashes Workforce Amid Strategic Investments In AI, Cloud',
  'publisher': "Investor's Business Daily",
  'link': 'https://finance.yahoo.com/m/8ab2f529-b9e1-3aae-b2d4-25b446c91232/microsoft-slashes-workforce.html',
  'providerPublishTime': 1674077205,
  'type': 'STORY',
  'thumbnail': {'resolutions': [{'url': 'https://s.yimg.com/uu/api/res/1.2/Zb_pSqioWFMjO0rXN3ffLg--~B/aD01NjM7dz0xMDAwO2FwcGlkPXl0YWNoeW9u/https://media.zenfs.com/en/ibd.com/8df6f76095e6b4e0fad13afec88c2050',
     'width': 1000,
     'height': 563,
     'tag': 'original'},
    {'url': 'https://s.yimg.com/uu/api/res/1.2/9KbEqF3Dr42UtiCyal5CZQ--~B/Zmk9ZmlsbDtoPTE0MDtweW9mZj0wO3c9MTQwO2FwcGlkPXl0YWNoeW9u/https://media.zenfs.com/en/ibd.com/8df6f76095e6b4e0fad13afec88c2050',
     'width': 140,
     'height': 140,
     'tag': '140x140'}]},
  'relatedTickers': ['MSFT']},
 {'uuid': 'b706500a-0a81-3d39-8e14-3e3010fb2e51',
  'title': 'CrowdStrike Initiated At

In [None]:

# get option chain for specific expiration
opt = msft.option_chain('YYYY-MM-DD')
# data available via: opt.calls, opt.puts

### earnings calendar

In [187]:
import datetime
from yahoo_earnings_calendar import YahooEarningsCalendar

In [189]:
date_from = datetime.datetime.strptime(
    'May 5 2021  10:00AM', '%b %d %Y %I:%M%p')
date_to = datetime.datetime.strptime(
    'May 8 2021  1:00PM', '%b %d %Y %I:%M%p')
yec = YahooEarningsCalendar()
print(yec.earnings_on(date_from))
print(yec.earnings_between(date_from, date_to))

IndexError: list index out of range

In [190]:
print(yec.get_next_earnings_date('box'))

Exception: Invalid Symbol or Unavailable Earnings Date

In [163]:
conn.close()