In [141]:
import pandas as pd
import numpy as np
import yfinance as yf
import sys
import requests
import urllib
import time
import datetime
import threading
import queue

In [142]:
pd.options.mode.chained_assignment = 'raise'

In [143]:
# Use the following site to get list of all publicly traded Stocks
#http://nxcoreapi.com/symbols.php

#nyse
nyse = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=NYSE&m_type=&s_type=exact&m_symbol=on&m_name=on')
#nasdaq
nqnm = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=NQNM&m_type=&s_type=exact&m_symbol=on&m_name=on')
#amex
amex = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=AMEX&m_type=&s_type=exact&m_symbol=on&m_name=on')

In [144]:
nyse = nyse[0]
nyse.columns = nyse.iloc[0]
nyse = nyse[1:]
# nyse

In [145]:
nqnm = nqnm[0]
nqnm.columns = nqnm.iloc[0]
nqnm = nqnm[1:]
# nqnm

In [146]:
amex = amex[0]
amex.columns = amex.iloc[0]
amex = amex[1:]
# amex

In [147]:
stocks = nyse.copy()
stocks = stocks.append(nqnm)
stocks = stocks.append(amex)
# stocks.tail()
# stocks


In [148]:
stocks = stocks[['Symbol', 'Name', 'Exchange']]
stocks.reset_index(inplace=True)
stocks = stocks.drop(columns=['index'])
stocks

Unnamed: 0,Symbol,Name,Exchange
0,A,"Agilent Technologies, Inc",NYSE
1,AA,Alcoa Corp,NYSE
2,AAN,"Aaron's, Inc",NYSE
3,AAP,"Advance Auto Parts, Inc W/I",NYSE
4,AAT,"American Assets Trust, Inc",NYSE
...,...,...,...
6284,YCBD,"cbdMD, Inc.",AMEX
6285,YCBD.PR.A,"cbdMD, Inc. 8.0% Series A Cumulative Convertib...",AMEX
6286,YUMA,"Yuma Energy, Inc",AMEX
6287,ZDGE,"Zedge, Inc",AMEX


In [149]:
features = ['industry', 'sector', 'dividendRate', 'exDividendDate', 'lastSplitDate', 'lastSplitFactor']
    
for feature in features:
    stocks.loc[:, feature] = np.nan
    
stocks[features] = stocks[features].astype('object')


In [150]:
# symbol = yf.Ticker('A')
# symbol.info
# symbol.quarterly_financials

In [151]:
# new_stocks = stocks[~stocks.Symbol.str.contains('/')==True]
# # stocks = stocks[~stocks.Symbol.str.contains['/']]
# new_stocks

In [152]:
# uncomment the following when debugging
stocks = stocks[0:100]
stocks

Unnamed: 0,Symbol,Name,Exchange,industry,sector,dividendRate,exDividendDate,lastSplitDate,lastSplitFactor
0,A,"Agilent Technologies, Inc",NYSE,,,,,,
1,AA,Alcoa Corp,NYSE,,,,,,
2,AAN,"Aaron's, Inc",NYSE,,,,,,
3,AAP,"Advance Auto Parts, Inc W/I",NYSE,,,,,,
4,AAT,"American Assets Trust, Inc",NYSE,,,,,,
...,...,...,...,...,...,...,...,...,...
95,AHT.PR.I,"Ashford Hospitality Trust, Inc PR SER I",NYSE,,,,,,
96,AI,Arlington Asset Investment,NYSE,,,,,,
97,AI.PR.B,Arlington Asset Investment Corp PR,NYSE,,,,,,
98,AI.PR.C,Arlington Asset Investment Corp 8.250% Series ...,NYSE,,,,,,


In [153]:
# gets the future and previous quarterly earnings 
def getQuarterlyDates(symbol):
    j = 0
    for connection_tries in range(5):
        try:
            url_string = 'https://finance.yahoo.com/calendar/earnings/?day=2020-06-30&symbol=' + symbol
#             print (url_string)
            dt = pd.read_html(url_string)[0].copy()
            
            # check to see if there's a future quarterly earnings date posted.  Sometimes there are more than 1.
            # They are in descending order so we loop until we find the last future one.
            future_earnings_date_index = 0
            i = -1
            for row in dt.itertuples():
                if row[6] == '-':
                    i+=1
                    next_earnings_date_index = i
                else:
                    break
            
            if i != -1:
                future_date_string = dt['Earnings Date'].iloc[next_earnings_date_index]
                future_date_object = datetime.datetime.strptime(future_date_string, "%b %d, %Y, %H %p%Z")
            else:
                future_date_object = np.nat
            
            previous_date_string = dt['Earnings Date'].iloc[next_earnings_date_index+1]
            previous_date_object = datetime.datetime.strptime(previous_date_string, "%b %d, %Y, %H %p%Z")
            # yahoo returns some dates in the future, I have no clue how many per symbol, so I just drop them.

            # get the most recent date and convert it to a date object
            
            return previous_date_object, future_date_object
        
        except (urllib.error.HTTPError, requests.exceptions.ConnectionError) as e:
            
            if connection_tries < 1:
                print (e, "HTTP Error. Will try again in 10 seconds")
                time.sleep(10)
                connection_tries +=1
            elif connection_tries < 2:
                print (e, "HTTP Error. Will try again in 30 seconds")
                time.sleep(30)
                connection_tries +=1
            elif connection_tries < 4:
                print (e, "HTTP Error. Will try again in 5 minutes")
                time.sleep(300)
                connection_tries +=1
            elif connection_tries < 5:
                print (e, "HTTP Error.  Will try again in 1 hour")
                time.sleep(3600)
                connection_tries +=1
            else:
                print (e, "HTTP Error.  Giving up.")
                
        except:
            e = sys.exc_info()[0]
#             print('here: ', e)
           
    print(symbol, e)       
    return np.nan, np.nan


In [154]:
def getInfo(symbol, info=True, calendar=True, quarterly_financials=True):
    connection_tries=0
    j=0
    
    info = {}
    ticker = yf.Ticker(symbol)           
    info_bool = False
    while 1:
        
        try:
            if info_bool == False:
                info = ticker.info
                info_bool = True
                           
            return info
            
        except (urllib.error.HTTPError, requests.exceptions.ConnectionError) as e:
            
            if connection_tries < 1:
                print (e, "HTTP Error. Will try again in 10 seconds")
                time.sleep(10)
                connection_tries +=1
            elif connection_tries < 2:
                print (e, "HTTP Error. Will try again in 30 seconds")
                time.sleep(30)
                connection_tries +=1
            elif connection_tries < 4:
                print (e, "HTTP Error. Will try again in 5 minutes")
                time.sleep(300)
                connection_tries +=1
            elif connection_tries < 5:
                print (e, "HTTP Error.  Will try again in 1 hour")
                time.sleep(3600)
                connection_tries +=1
            else:
                connection_tries = 0
                print (e, "HTTP Error.  Giving up.")
                return {}, pd.DataFrame(), pd.DataFrame()
        except:
            e = sys.exc_info()[0]
#             print('here: ', e)
            if j>2:
                return 'error'
            j+=1
            

In [158]:
@storeInQueue
def quarterly_thread(stocks):
    i = 0
    initial_time = round(time.time())
            
    for row in stocks.itertuples():
        print ("Quarterly Thread: ", i)    
    
        if i % 100 == 0:
            print ("It has been: ", (round(time.time())-initial_time)/ 60.0, " minutes in quarterly thread")
        
        if not '.' in row[1] and not '/' in row[1]:
        
            symbol = row[1]    

            previous, future = getQuarterlyDates(symbol)

            #     get any upcoming earnings date
            try:                
                stocks.at[i, 'upcoming_earnings_date'] = future
            except:
                stocks.at[i, 'upcoming_earnings_date'] = np.nan

            #get the previous quarterly earnings date
            try:
                stocks.at[i, 'previous_earnings_date'] = previous
            except:
                stocks.at[i, 'previous_earnings_date'] = np.nan
        else:
            print ('Bad symbol in ', row[1])
        i += 1
        
    print ('Finished quarterly thread in ', (round(time.time())-initial_time)/ 60.0, " minutes!")
    return stocks[['previous_earnings_date', 'upcoming_earnings_date']]

In [159]:
@storeInQueue
def info_thread(stocks, features):
    i = 0
    initial_time = round(time.time())

    bad_symbols = []
    symbols_with_no_return = []

    for row in stocks.itertuples():
        print ("Info Thread: ", i)    
        if i % 100 == 0:
            print ("It has been: ", (round(time.time())-initial_time)/ 60.0, " minutes in info thread")
        if '/' in row[1]:
            print ('Bad symbol in ', row[1])
            bad_symbols.append(i)
        elif '.' in row[1]:
            print ('Bad symbol in ', row[1])
            bad_symbols.append(i)
        else:

            symbol = row[1]
    #         symbol = yf.Ticker(symbol)           

            info = getInfo(symbol)

            if info == 'error':
                symbols_with_no_return.append(row[1])
                print('No return: ', row[1])

            else:

                for feature in features:

                    #try twice in case there's a fail that shouldn't happen

                    try:
                        value = info[feature]
                        stocks.at[i, feature] = value

                    except:
                        stocks.at[i, feature] = np.nan
                        e = sys.exc_info()[0]
                        print(e, ": ", feature) 
        i += 1
    print ('Finished info thread in ', (round(time.time())-initial_time)/ 60.0, " minutes!")
    return stocks[features]

In [160]:
def storeInQueue(f):
    def wrapper(*args):
        my_queue.put(f(*args))
    return wrapper

In [161]:
my_queue = queue.Queue()

t1 = threading.Thread(target=info_thread, args=(stocks.copy(), features,))
t1.start()

t2 = threading.Thread(target=quarterly_thread, args=(stocks.copy(),))
t2.start()

t1.join()
t2.join()

Info Thread:  0
It has been:  0.0  minutes in info thread
Quarterly Thread:  0
It has been:  0.0  minutes in quarterly thread
Quarterly Thread:  1
Quarterly Thread:  2
Info Thread:  1
Quarterly Thread:  3
Info Thread:  2
Quarterly Thread:  4
Quarterly Thread:  5
Info Thread:  3
Info Thread:  4
Info Thread:  5
AB <class 'AttributeError'>
Quarterly Thread:  6
Info Thread:  6
Info Thread:  7
ABB <class 'AttributeError'>
Quarterly Thread:  7
Quarterly Thread:  8
Info Thread:  8
Quarterly Thread:  9
Info Thread:  9
Info Thread:  10
ABEV <class 'AttributeError'>
Quarterly Thread:  10
Quarterly Thread:  11
Info Thread:  11
Quarterly Thread:  12
Quarterly Thread:  13
Bad symbol in  ABR.PR.A
Quarterly Thread:  14
Bad symbol in  ABR.PR.B
Quarterly Thread:  15
Bad symbol in  ABR.PR.C
Quarterly Thread:  16
Info Thread:  12
Quarterly Thread:  17
Info Thread:  13
Bad symbol in  ABR.PR.A
Info Thread:  14
Bad symbol in  ABR.PR.B
Info Thread:  15
Bad symbol in  ABR.PR.C
Info Thread:  16
Info Thread:  1

In [162]:
my_list = []
while not my_queue.empty():
    my_list.append(my_queue.get())
    


In [163]:
stocks = stocks.copy()

for item in my_list:
    print(type(item))
    if 'previous_earnings_date' in item.columns:
        stocks[['previous_earnings_date', 'upcoming_earnings_date']] = item[['previous_earnings_date', 'upcoming_earnings_date']].copy()
    else:
        stocks[features] = item[features].copy()

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [165]:
stocks.head(20)

Unnamed: 0,Symbol,Name,Exchange,industry,sector,dividendRate,exDividendDate,lastSplitDate,lastSplitFactor,previous_earnings_date,upcoming_earnings_date
0,A,"Agilent Technologies, Inc",NYSE,Diagnostics & Research,Healthcare,0.72,1593388800.0,1414972800.0,1398:1000,2020-08-18 12:00:00,2020-11-23 05:00:00
1,AA,Alcoa Corp,NYSE,Aluminum,Basic Materials,,,1477958400.0,1000:801,2020-07-15 12:00:00,2020-10-14 06:00:00
2,AAN,"Aaron's, Inc",NYSE,Rental & Leasing Services,Industrials,0.16,1600214400.0,1271376000.0,3:2,2020-07-29 12:00:00,2020-11-02 06:00:00
3,AAP,"Advance Auto Parts, Inc W/I",NYSE,Specialty Retail,Consumer Cyclical,1.0,1600300800.0,1127692800.0,3:2,2020-08-18 12:00:00,2020-11-10 09:00:00
4,AAT,"American Assets Trust, Inc",NYSE,REIT—Diversified,Real Estate,1.0,1599609600.0,,,2020-07-28 12:00:00,2020-10-27 04:00:00
5,AB,AllianceBernstein Holding LP Units,NYSE,Asset Management,Financial Services,2.44,1596153600.0,891561600.0,2:1,NaT,NaT
6,ABB,ABB Ltd,NYSE,Electrical Equipment & Parts,Industrials,0.83,1585526400.0,985824000.0,4:1,NaT,NaT
7,ABBV,"AbbVie, Inc",NYSE,Drug Manufacturers—General,Healthcare,4.72,1594684800.0,,,2020-07-31 12:00:00,2020-10-30 08:00:00
8,ABC,AmerisourceBergen Corp,NYSE,Medical Distribution,Healthcare,1.68,1597363200.0,1245110400.0,2:1,2020-08-05 12:00:00,2020-11-05 09:00:00
9,ABEV,Ambev SA ADS,NYSE,Beverages—Brewers,Consumer Defensive,0.12,1576800000.0,1384128000.0,5:1,NaT,NaT


In [None]:
stocks.to_csv('info.csv')

In [None]:
with open('bad_symbols.txt', 'w') as f:
    for s in bad_symbols:
        f.write(str(s) + "\n")
        
with open('symbols_no_return.txt', 'w') as f:
    for line in symbols_with_no_return:
        f.write(str(line) + "\n")