# Retriever

provide trading recommendations for new and unseen tickers

In [1]:
import pickle
import pandas as pd
import yfinance as yf # original was 0.2.4 now 0.2.11
from datetime import datetime, timedelta
import numpy as np

yf.__version__

'0.2.11'

## load classifiers

In [2]:
CLFS = []
for i in range(10):
    for j in range(10):
        with open(f'../model_cls/seed_{i}_{j}.pickle','rb') as f:
            CLFS.append( pickle.loads(f.read()) )

## get data

need to load these columns: ticker, 'eps_est', 
        'eps_actual', 
        'eps_diff', 
       'on_earnings_close', 
       'on_earnings_volume', 
       'market_cap',
       'price_var'

data assumptions: close>5, eps_diff > 0.0

### load recently eps-ed tickers

In [8]:
ech = pd.read_csv('../earnings_calendar_history/2023-02-04.csv')
ech['ticker'] = ech['TICKER\nNO MATCHES'].map(lambda t: t.split(';')[0])
ech['eps_diff'] = ech['SURPRISE'].map(lambda s: float(s.replace('−','-').replace('USD','').replace('—','-1')))
sel_tickers = ech.loc[ech.eps_diff>0,'ticker'].to_list()
print(f"Found {len(sel_tickers)} tickers with eps diff > 0")

Found 107 tickers with eps diff > 0


In [9]:
class TickerModel():

    def __init__(self,ticker):
        self.ticker= ticker
        self.model = yf.Ticker(ticker)
        self.has_earnings = False
        self.has_history  = False
    
    def download_earnings_dates(self):
        self.ed = self.model.earnings_dates
        if self.ed.shape[0]!=0:
            self.has_earnings = True
        else:
            return
        self.ed.dropna(inplace=True)
        self.recent_ed = self.ed.head(1).index[0] # timestamp
        self.recent_eps_actual = self.ed.head(1)['Reported EPS'][0]
        self.recent_eps_est = self.ed.head(1)['EPS Estimate'][0]
        self.recent_eps_diff = self.recent_eps_actual - self.recent_eps_est
        if self.recent_eps_diff<=0:
            print("WARNING: eps diff is not positive", self.recent_eps_diff)

    def download_history(self):
        start_date = datetime.today() - timedelta(days=100)
        end_date   = datetime.today()
        try:
            self.hist=yf.download(self.ticker, start_date, end_date)
        except Exception as e:
            print("Exception: "+str(e))

        if self.hist.shape[0]>0:
            self.has_history = True
        else:
            return

    def download_balance_sheet(self): # NOT USED ANYMORE
        """
            c.ord_shares * h.close as market_cap,
            "Ordinary Shares Number"
        """
        try:
            bs = self.model\
                .balance_sheet\
                .transpose()
        except Exception as e:
            print("Error getting balance sheet:",e)
            bs = self.model\
                .balance_sheet\
                .transpose()
        bs_cols = [c for c in bs.columns if c in ['Total Assets','Common Stock','Total Capitalization','Total Debt', 'Net Debt','Ordinary Shares Number']]
        self.balance_sheet = bs[bs_cols]
        self.ord_shares = bs['Ordinary Shares Number'][0]

    def create_features(self) -> pd.DataFrame:
        ed = str(self.recent_ed)[:10]
        on_earnings_close = self.hist.loc[ed, 'Close']
        on_earnings_vol   = self.hist.loc[ed, 'Volume']
        market_cap = self.model.fast_info.market_cap
        # market_cap = on_earnings_close * self.ord_shares
        sub_hist = self.hist.loc[:ed]
        price_var = sub_hist.Close.std()/sub_hist.Close.mean()

        self.F = pd.DataFrame({
            'eps_est':self.recent_eps_est, 
            'eps_actual':self.recent_eps_actual, 
            'eps_diff':self.recent_eps_diff, 
            'on_earnings_close':on_earnings_close, 
            'on_earnings_volume':on_earnings_vol, 
            'market_cap':market_cap,
            'price_var':price_var
        }, index=[self.ticker])

        return self.F

    def predict_class(self, CLFS) -> float:
        probas = []
        for clf in CLFS:
            probas.append( clf.predict_proba(self.F)[:,1][0] )
        self.probas = probas
        self.avg_pred = np.mean(probas)

        return self.avg_pred

    def __repr__(self):
        return f"TickerModel of {self.ticker}"

TickerModels = [TickerModel(t) for t in sel_tickers]

In [10]:
results = {}

for i in range(len(sel_tickers)):
    print(i,len(sel_tickers),TickerModels[i])
    try:
        TickerModels[i].download_earnings_dates()
        TickerModels[i].download_history()
        # TickerModels[i].download_balance_sheet()
        TickerModels[i].create_features()
        pred = TickerModels[i].predict_class(CLFS)
    except:
        continue
    results[TickerModels[i].ticker] = pred
    print(pred)

0 107 TickerModel of XOM
[*********************100%***********************]  1 of 1 completed
0.31602294123330404
1 107 TickerModel of LLY
[*********************100%***********************]  1 of 1 completed
0.28216464556893184
2 107 TickerModel of MRK
[*********************100%***********************]  1 of 1 completed
0.34737455231852915
3 107 TickerModel of PFE
[*********************100%***********************]  1 of 1 completed
0.44123581602427064
4 107 TickerModel of TMO
[*********************100%***********************]  1 of 1 completed
0.4145758237055741
5 107 TickerModel of MCD
[*********************100%***********************]  1 of 1 completed
0.25954610415168966
6 107 TickerModel of TMUS
[*********************100%***********************]  1 of 1 completed
0.32159127069259974
7 107 TickerModel of UPS
[*********************100%***********************]  1 of 1 completed
0.42065532532938754
8 107 TickerModel of BMY
[*********************100%***********************]  1 of 1 comp

In [11]:
print("tickers to consider")
for ticker, pred in results.items():
    if pred>0.6:
        print(ticker, pred)

tickers to consider
TEAM 0.6037426540762797
LPLA 0.6009181765231602
NVR 0.6547241458386093
PHM 0.6522419836433646
DECK 0.6331405604441609


## check ticker

In [81]:
TM = [m for m in TickerModels if m.ticker=='NVR'][0]

In [82]:
TM.ed

Unnamed: 0_level_0,EPS Estimate,Reported EPS,Surprise(%)
Earnings Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-31 04:00:00-05:00,98.88,133.44,0.3495
2022-10-25 05:00:00-04:00,121.71,118.51,-0.0263
2022-07-26 05:00:00-04:00,126.77,123.65,-0.0246
2022-04-26 05:00:00-04:00,96.98,116.56,0.2019
2022-02-01 04:00:00-05:00,99.76,89.09,-0.107
2021-10-21 05:00:00-04:00,92.51,86.44,-0.0656
2021-07-21 05:00:00-04:00,72.53,82.45,0.1368
2021-04-21 05:00:00-04:00,63.01,63.21,0.0032


In [86]:
TM.hist.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-25 00:00:00-05:00,4990.220215,5089.0,4990.220215,5069.060059,5069.060059,10100
2023-01-26 00:00:00-05:00,5100.0,5176.189941,5037.5,5112.220215,5112.220215,9500
2023-01-27 00:00:00-05:00,5067.819824,5149.990234,5067.819824,5122.009766,5122.009766,10100
2023-01-30 00:00:00-05:00,5101.209961,5131.930176,5001.009766,5021.419922,5021.419922,18500
2023-01-31 00:00:00-05:00,5199.0,5349.990234,5151.160156,5270.0,5270.0,32700
2023-02-01 00:00:00-05:00,5276.259766,5376.0,5185.120117,5357.919922,5357.919922,24400
2023-02-02 00:00:00-05:00,5330.509766,5500.0,5304.279785,5405.669922,5405.669922,43400
2023-02-03 00:00:00-05:00,5307.310059,5359.720215,5200.0,5281.339844,5281.339844,39500
2023-02-06 00:00:00-05:00,5245.0,5245.0,5116.990234,5149.439941,5149.439941,52000
2023-02-07 00:00:00-05:00,5112.0,5124.359863,5039.02002,5101.0,5101.0,4634


## trades actual

2023-02-04 NVR 65%\
2023-02-11 RCL 75% -10%/+10%