In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import sys
import requests
import urllib
import time
import datetime

In [2]:
# Use the following site to get list of all publicly traded Stocks
#http://nxcoreapi.com/symbols.php

#nyse
nyse = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=NYSE&m_type=&s_type=exact&m_symbol=on&m_name=on')
#nasdaq
nqnm = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=NQNM&m_type=&s_type=exact&m_symbol=on&m_name=on')
#amex
amex = pd.read_html('http://nxcoreapi.com/symbols.php?search=&m_exchange=AMEX&m_type=&s_type=exact&m_symbol=on&m_name=on')

In [3]:
nyse = nyse[0]
nyse.columns = nyse.iloc[0]
nyse = nyse[1:]
# nyse

In [4]:
nqnm = nqnm[0]
nqnm.columns = nqnm.iloc[0]
nqnm = nqnm[1:]
# nqnm

In [5]:
amex = amex[0]
amex.columns = amex.iloc[0]
amex = amex[1:]
# amex

In [6]:
stocks = nyse.copy()
stocks = stocks.append(nqnm)
stocks = stocks.append(amex)
# stocks.tail()
# stocks


In [7]:
stocks = stocks[['Symbol', 'Name', 'Exchange']]
stocks.reset_index(inplace=True)
stocks = stocks.drop(columns=['index'])
stocks

Unnamed: 0,Symbol,Name,Exchange
0,A,"Agilent Technologies, Inc",NYSE
1,AA,Alcoa Corp,NYSE
2,AAN,"Aaron's, Inc",NYSE
3,AAP,"Advance Auto Parts, Inc W/I",NYSE
4,AAT,"American Assets Trust, Inc",NYSE
...,...,...,...
6276,YCBD,"cbdMD, Inc.",AMEX
6277,YCBD.PR.A,"cbdMD, Inc. 8.0% Series A Cumulative Convertib...",AMEX
6278,YUMA,"Yuma Energy, Inc",AMEX
6279,ZDGE,"Zedge, Inc",AMEX


In [8]:
features = ['industry', 'sector', 'dividendRate', 'exDividendDate', 'lastSplitDate', 'lastSplitFactor']
    
for feature in features:
    stocks.loc[:, feature] = np.nan
    
stocks[features] = stocks[features].astype('object')


In [9]:
# symbol = yf.Ticker('A')
# symbol.info
# symbol.quarterly_financials

In [10]:
# uncomment the following when debugging
# stocks = stocks[0:5]
# stocks

In [11]:
def getInfo(symbol, info=True, calendar=True, quarterly_financials=True):
    connection_tries=0
    j=0
    
    # these booleans are used so that we don't get the same data again if a connection error happens afterwards
    info_bool = False
    calendar_bool = False
    quarterly_bool = False
    
    info = {}
    calendar = {}
    quarterly_financials = {}
    
    while 1:
        
        try:
            if info_bool == False:
                info = symbol.info
                info_bool = True
                
            if calendar_bool == False:
                calendar = symbol.calendar
                calendar_bool = True
                
            if quarterly_bool == False:
                quarterly_financials = symbol.quarterly_financials
                quarterly_bool = True
                
            return info, calendar, quarterly_financials
            
        except (urllib.error.HTTPError, requests.exceptions.ConnectionError) as e:
            
            if connection_tries < 1:
                print (e, "HTTP Error. Will try again in 10 seconds")
                time.sleep(10)
                connection_tries +=1
            elif connection_tries < 2:
                print (e, "HTTP Error. Will try again in 30 seconds")
                time.sleep(30)
                connection_tries +=1
            elif connection_tries < 4:
                print (e, "HTTP Error. Will try again in 5 minutes")
                time.sleep(300)
                connection_tries +=1
            elif connection_tries < 5:
                print (e, "HTTP Error.  Will try again in 1 hour")
                time.sleep(3600)
                connection_tries +=1
            else:
                connection_tries = 0
                print (e, "HTTP Error.  Giving up.")
                return {}, pd.DataFrame(), pd.DataFrame()
        except:
            e = sys.exc_info()[0]
#             print('here: ', e)
            if j>2:
                return 'error', 'error', 'error'
            j+=1
            

In [None]:
i = 0
initial_time = round(time.time())
stocks.head()

# stocks[features] = stocks[features].astype('object')

bad_symbols = []
symbols_with_no_return = []

for row in stocks.itertuples():
    print (i)    
    if i % 100 == 0:
        print ("It has been: ", (round(time.time())-initial_time)/ 60.0, " minutes")
        
    if '/' in row[1]:
        print ('Bad symbol in ', row[1])
        bad_symbols.append(i)
    elif '.' in row[1]:
        print ('Bad symbol in ', row[1])
        bad_symbols.append(i)
    else:
        
        symbol = row[1]
        symbol = yf.Ticker(symbol)           

        info, calendar, quarterly_financials = getInfo(symbol)
        
        if info == 'error':
            symbols_with_no_return.append(row[1])
            print('No return: ', row[1])
        
        else:
        
            for feature in features:

                #try twice in case there's a fail that shouldn't happen

                try:
                    value = info[feature]
                    stocks.at[i, feature] = value

                except:
                    stocks.at[i, feature] = np.nan
                    e = sys.exc_info()[0]
                    print(e, ": ", feature)

        #     get any upcoming earnings date
            try:       
                if 'Value' in calendar.columns:
                    column = 'Value'
                else:
                    column = 0
                value = calendar[column]['Earnings Date']
                stocks.at[i, 'upcoming_earnings_date'] = value
            except:
                stocks.at[i, 'upcoming_earnings_date'] = np.nan

            #get the previous quarterly earnings date
            try:
                value = quarterly_financials.columns[0]
                stocks.at[i, 'previous_earnings_date'] = value
            except:
                stocks.at[i, 'previous_earnings_date'] = np.nan
    
    i += 1
    

0
It has been:  0.0  minutes
1
2
3
4
5
6
7
8
9
10
11
12
13
Bad symbol in  ABR.PR.A
14
Bad symbol in  ABR.PR.B
15
Bad symbol in  ABR.PR.C
16
17
18
19
20
No return:  ACC
21
22
23
Bad symbol in  ACEL.WS
24
25
26
27
28
Bad symbol in  ACND.U
29
No return:  ACP
30
31
No return:  ACV
32
33
No return:  ADCT
34
35
36
37
38
39
40
No return:  AEB
41
No return:  AEE
42
No return:  AEFC
43
44
No return:  AEL
45
Bad symbol in  AEL.PR.A
46
Bad symbol in  AEL.PR.B
47
48
49
50
Bad symbol in  AEP.PR.B
51
Bad symbol in  AEP.PR.C
52
53
54
55
No return:  AFC
56
57
No return:  AFGB
58
No return:  AFGC
59
60
No return:  AFGH
61
No return:  AFI
62
63
64
65
66
67
68
69
Bad symbol in  AGM.A
70
Bad symbol in  AGM.PR.A
71
Bad symbol in  AGM.PR.C
72
Bad symbol in  AGM.PR.D
73
Bad symbol in  AGM.PR.E
74
Bad symbol in  AGM.PR.F
75
No return:  AGN
76
77
Bad symbol in  AGO.PR.B
78
Bad symbol in  AGO.PR.E
79
Bad symbol in  AGO.PR.F
80
No return:  AGR
81
82
83
84
85
86
Bad symbol in  AHH.PR.A
87
Bad symbol in  AHL.PR.C


In [None]:
stocks

In [None]:
stocks.to_csv('info.csv')

In [None]:
with open('bad_symbols.txt', 'w') as f:
    for s in bad_symbols:
        f.write(str(s) + "\n")
        
with open('symbols_no_return.txt', 'w') as f:
    for line in symbols_with_no_return:
        f.write(str(line) + "\n")