In [24]:
import pandas as pd
import numpy as np
import yfinance as yf
import sys
import requests
import urllib
import time
import datetime
import threading

In [25]:
pd.options.mode.chained_assignment = 'raise'

In [26]:
# Use the following site to get list of all publicly traded Stocks
#http://nxcoreapi.com/symbols.php

#nyse
nyse = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=NYSE&m_type=&s_type=exact&m_symbol=on&m_name=on')
#nasdaq
nqnm = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=NQNM&m_type=&s_type=exact&m_symbol=on&m_name=on')
#amex
amex = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=AMEX&m_type=&s_type=exact&m_symbol=on&m_name=on')

In [27]:
nyse = nyse[0]
nyse.columns = nyse.iloc[0]
nyse = nyse[1:]
# nyse

In [28]:
nqnm = nqnm[0]
nqnm.columns = nqnm.iloc[0]
nqnm = nqnm[1:]
# nqnm

In [29]:
amex = amex[0]
amex.columns = amex.iloc[0]
amex = amex[1:]
# amex

In [30]:
stocks = nyse.copy()
stocks = stocks.append(nqnm)
stocks = stocks.append(amex)
# stocks.tail()
# stocks


In [31]:
stocks = stocks[['Symbol', 'Name', 'Exchange']]
stocks.reset_index(inplace=True)
stocks = stocks.drop(columns=['index'])
stocks

Unnamed: 0,Symbol,Name,Exchange
0,A,"Agilent Technologies, Inc",NYSE
1,AA,Alcoa Corp,NYSE
2,AAN,"Aaron's, Inc",NYSE
3,AAP,"Advance Auto Parts, Inc W/I",NYSE
4,AAT,"American Assets Trust, Inc",NYSE
...,...,...,...
6284,YCBD,"cbdMD, Inc.",AMEX
6285,YCBD.PR.A,"cbdMD, Inc. 8.0% Series A Cumulative Convertib...",AMEX
6286,YUMA,"Yuma Energy, Inc",AMEX
6287,ZDGE,"Zedge, Inc",AMEX


In [32]:
features = ['industry', 'sector', 'dividendRate', 'exDividendDate', 'lastSplitDate', 'lastSplitFactor']
    
for feature in features:
    stocks.loc[:, feature] = np.nan
    
stocks[features] = stocks[features].astype('object')


In [33]:
# symbol = yf.Ticker('A')
# symbol.info
# symbol.quarterly_financials

In [34]:
# uncomment the following when debugging
stocks = stocks[0:100]
stocks

Unnamed: 0,Symbol,Name,Exchange,industry,sector,dividendRate,exDividendDate,lastSplitDate,lastSplitFactor
0,A,"Agilent Technologies, Inc",NYSE,,,,,,
1,AA,Alcoa Corp,NYSE,,,,,,
2,AAN,"Aaron's, Inc",NYSE,,,,,,
3,AAP,"Advance Auto Parts, Inc W/I",NYSE,,,,,,
4,AAT,"American Assets Trust, Inc",NYSE,,,,,,
...,...,...,...,...,...,...,...,...,...
95,AHT.PR.I,"Ashford Hospitality Trust, Inc PR SER I",NYSE,,,,,,
96,AI,Arlington Asset Investment,NYSE,,,,,,
97,AI.PR.B,Arlington Asset Investment Corp PR,NYSE,,,,,,
98,AI.PR.C,Arlington Asset Investment Corp 8.250% Series ...,NYSE,,,,,,


In [35]:
def getQuarterlyDates(symbol):
    for i in range(5):
        try:
            url_string = 'https://finance.yahoo.com/calendar/earnings/?day=2020-06-30&symbol=' + symbol
#             print (url_string)
            dt = pd.read_html(url_string)[0].copy()
            # yahoo returns some dates in the future, I have no clue how many per symbol, so I just drop them.
            dt = dt[dt['Surprise(%)'] != '-']

            # get the most recent date and convert it to a date object
            date_string = dt['Earnings Date'].iloc[0]
            date_object = datetime.datetime.strptime(date_string, "%b %d, %Y, %H %p%Z")
            return date_object
        except:
            e = sys.exc_info()[0]
    print(symbol, e)       
    return np.nat


In [36]:
def getInfo(symbol, info=True, calendar=True, quarterly_financials=True):
    connection_tries=0
    j=0
    
    # these booleans are used so that we don't get the same data again if a connection error happens afterwards
    info_bool = False
    calendar_bool = False
    quarterly_bool = False
    
    info = {}
    calendar = {}
    quarterly_financials = {}
    ticker = yf.Ticker(symbol)           

    while 1:
        
        try:
            if info_bool == False:
                info = ticker.info
                info_bool = True
                
            if calendar_bool == False:
                calendar = ticker.calendar
                calendar_bool = True
                
            if quarterly_bool == False:
                quarterly_financials = getQuarterlyDates(symbol)
                quarterly_bool = True
                
            return info, calendar, quarterly_financials
            
        except (urllib.error.HTTPError, requests.exceptions.ConnectionError) as e:
            
            if connection_tries < 1:
                print (e, "HTTP Error. Will try again in 10 seconds")
                time.sleep(10)
                connection_tries +=1
            elif connection_tries < 2:
                print (e, "HTTP Error. Will try again in 30 seconds")
                time.sleep(30)
                connection_tries +=1
            elif connection_tries < 4:
                print (e, "HTTP Error. Will try again in 5 minutes")
                time.sleep(300)
                connection_tries +=1
            elif connection_tries < 5:
                print (e, "HTTP Error.  Will try again in 1 hour")
                time.sleep(3600)
                connection_tries +=1
            else:
                connection_tries = 0
                print (e, "HTTP Error.  Giving up.")
                return {}, pd.DataFrame(), pd.DataFrame()
        except:
            e = sys.exc_info()[0]
#             print('here: ', e)
            if j>2:
                return 'error', 'error', 'error'
            j+=1
            

In [37]:
i = 0
initial_time = round(time.time())
stocks.head()

# stocks[features] = stocks[features].astype('object')

bad_symbols = []
symbols_with_no_return = []

for row in stocks.itertuples():
    print (i)    
    if i % 100 == 0:
        print ("It has been: ", (round(time.time())-initial_time)/ 60.0, " minutes")
        
    if '/' in row[1]:
        print ('Bad symbol in ', row[1])
        bad_symbols.append(i)
    elif '.' in row[1]:
        print ('Bad symbol in ', row[1])
        bad_symbols.append(i)
    else:
        
        symbol = row[1]
#         symbol = yf.Ticker(symbol)           

        info, calendar, quarterly_financials = getInfo(symbol)
        
        if info == 'error':
            symbols_with_no_return.append(row[1])
            print('No return: ', row[1])
        
        else:
        
            for feature in features:

                #try twice in case there's a fail that shouldn't happen

                try:
                    value = info[feature]
                    stocks.at[i, feature] = value

                except:
                    stocks.at[i, feature] = np.nan
                    e = sys.exc_info()[0]
                    print(e, ": ", feature)

        #     get any upcoming earnings date
            try:       
                if 'Value' in calendar.columns:
                    column = 'Value'
                else:
                    column = 0
                value = calendar[column]['Earnings Date']
                stocks.at[i, 'upcoming_earnings_date'] = value
            except:
                stocks.at[i, 'upcoming_earnings_date'] = np.nan

            #get the previous quarterly earnings date
            try:
                value = quarterly_financials
                stocks.at[i, 'previous_earnings_date'] = value
            except:
                stocks.at[i, 'previous_earnings_date'] = np.nan
                
    i += 1
print ('Finished in ', (round(time.time())-initial_time)/ 60.0, " minutes!")
    

0
It has been:  0.0  minutes
1
2
3
4
5
6
7
8
9
10
11
12
13
Bad symbol in  ABR.PR.A
14
Bad symbol in  ABR.PR.B
15
Bad symbol in  ABR.PR.C
16
17
AC <class 'IndexError'>
AC <class 'IndexError'>
AC <class 'IndexError'>
AC <class 'IndexError'>
No return:  AC
18
19
20
21
22
23
Bad symbol in  ACEL.WS
24
25
26
27
28
Bad symbol in  ACND.U
29
No return:  ACP
30
31
No return:  ACV
32
33
34
35
36
37
38
39
ADX <class 'IndexError'>
ADX <class 'IndexError'>
ADX <class 'IndexError'>
ADX <class 'IndexError'>
No return:  ADX
40
No return:  AEB
41
42
No return:  AEFC
43
44
45
Bad symbol in  AEL.PR.A
46
Bad symbol in  AEL.PR.B
47


  return op(a, b)


48
49
50
Bad symbol in  AEP.PR.B
51
Bad symbol in  AEP.PR.C
52
53
54
AFB <class 'IndexError'>
AFB <class 'IndexError'>
AFB <class 'IndexError'>
AFB <class 'IndexError'>
No return:  AFB
55
No return:  AFC
56
57
No return:  AFGB
58
No return:  AFGC
59
60
No return:  AFGH
61
62
63
AFT <class 'ValueError'>
AFT <class 'ValueError'>
AFT <class 'ValueError'>
AFT <class 'ValueError'>
No return:  AFT
64
65
66
AGD <class 'ValueError'>
AGD <class 'ValueError'>
AGD <class 'ValueError'>
AGD <class 'ValueError'>
No return:  AGD
67
68
69
Bad symbol in  AGM.A
70
Bad symbol in  AGM.PR.A
71
Bad symbol in  AGM.PR.C
72
Bad symbol in  AGM.PR.D
73
Bad symbol in  AGM.PR.E
74
Bad symbol in  AGM.PR.F
75
No return:  AGN
76
77
Bad symbol in  AGO.PR.B
78
Bad symbol in  AGO.PR.E
79
Bad symbol in  AGO.PR.F
80
81
82
83
84
HTTP Error 503: Service Unavailable HTTP Error. Will try again in 10 seconds
85
86
Bad symbol in  AHH.PR.A
87
Bad symbol in  AHL.PR.C
88
Bad symbol in  AHL.PR.D
89
Bad symbol in  AHL.PR.E
90
91
Bad

In [38]:
stocks.head(20)

Unnamed: 0,Symbol,Name,Exchange,industry,sector,dividendRate,exDividendDate,lastSplitDate,lastSplitFactor,upcoming_earnings_date,previous_earnings_date
0,A,"Agilent Technologies, Inc",NYSE,Diagnostics & Research,Healthcare,0.72,1593388800.0,1414972800.0,1398:1000,NaT,NaT
1,AA,Alcoa Corp,NYSE,Aluminum,Basic Materials,,,1477958400.0,1000:801,2020-10-14,2020-07-15 12:00:00
2,AAN,"Aaron's, Inc",NYSE,Rental & Leasing Services,Industrials,0.16,1600214400.0,1271376000.0,3:2,2020-11-02,2020-07-29 12:00:00
3,AAP,"Advance Auto Parts, Inc W/I",NYSE,Specialty Retail,Consumer Cyclical,1.0,1600300800.0,1127692800.0,3:2,2020-11-10,2020-08-18 12:00:00
4,AAT,"American Assets Trust, Inc",NYSE,REIT—Diversified,Real Estate,1.0,1599609600.0,,,2020-10-27,2020-07-28 12:00:00
5,AB,AllianceBernstein Holding LP Units,NYSE,Asset Management,Financial Services,2.44,1596153600.0,891561600.0,2:1,2020-10-22,2020-07-23 12:00:00
6,ABB,ABB Ltd,NYSE,Electrical Equipment & Parts,Industrials,0.83,1585526400.0,985824000.0,4:1,NaT,2020-07-22 12:00:00
7,ABBV,"AbbVie, Inc",NYSE,Drug Manufacturers—General,Healthcare,4.72,1594684800.0,,,2020-10-30,2020-07-31 12:00:00
8,ABC,AmerisourceBergen Corp,NYSE,Medical Distribution,Healthcare,1.68,1597363200.0,1245110400.0,2:1,2020-11-05,2020-08-05 12:00:00
9,ABEV,Ambev SA ADS,NYSE,Beverages—Brewers,Consumer Defensive,0.12,1576800000.0,1384128000.0,5:1,NaT,2020-07-31 12:00:00


In [None]:
stocks.to_csv('info.csv')

In [None]:
with open('bad_symbols.txt', 'w') as f:
    for s in bad_symbols:
        f.write(str(s) + "\n")
        
with open('symbols_no_return.txt', 'w') as f:
    for line in symbols_with_no_return:
        f.write(str(line) + "\n")