In [71]:
import pandas as pd
import numpy as np
import yfinance as yf
import sys
import requests
import urllib
import time
import datetime
import threading
import queue

In [88]:
global days_to_add
days_to_add = 100

In [72]:
def storeInQueue(f):
    def wrapper(*args):
        my_queue.put(f(*args))
    return wrapper

In [73]:
pd.options.mode.chained_assignment = 'raise'

In [89]:
# Use the following site to get list of all publicly traded Stocks
#http://nxcoreapi.com/symbols.php

#nasdaq
#https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nasdaq&render=download
nqnm = pd.read_csv('nasdaq.csv')
nqnm['Exchange'] = 'Nasdaq'
#nyse
#https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nyse&render=download
nyse = pd.read_csv('nyse.csv')
nyse['Exchange'] = 'NYSE'
#amex
#https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=amex&render=download
amex = pd.read_csv('amex.csv')
amex['Exchange'] = 'AMEX'

In [90]:
amex

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote,Exchange
0,GOED,1847 Goedeker Inc.,7.3500,$44.92M,2020.0,Consumer Services,Home Furnishings,https://old.nasdaq.com/symbol/goed,AMEX
1,XXII,"22nd Century Group, Inc",0.6164,$85.59M,,Consumer Non-Durables,Farming/Seeds/Milling,https://old.nasdaq.com/symbol/xxii,AMEX
2,FAX,Aberdeen Asia-Pacific Income Fund Inc,4.1900,$1.04B,1986.0,,,https://old.nasdaq.com/symbol/fax,AMEX
3,IAF,Aberdeen Australia Equity Fund Inc,4.8600,$110.53M,,,,https://old.nasdaq.com/symbol/iaf,AMEX
4,AEF,"Aberdeen Emerging Markets Equity Income Fund, ...",6.7800,$344.1M,,,,https://old.nasdaq.com/symbol/aef,AMEX
...,...,...,...,...,...,...,...,...,...
286,WYY,WidePoint Corporation,0.5420,$45.75M,,Technology,EDP Services,https://old.nasdaq.com/symbol/wyy,AMEX
287,WTT,"Wireless Telecom Group, Inc.",1.2900,$27.93M,,Capital Goods,Electrical Products,https://old.nasdaq.com/symbol/wtt,AMEX
288,XTNT,"Xtant Medical Holdings, Inc.",0.8671,$11.47M,,Health Care,Biotechnology: Biological Products (No Diagnos...,https://old.nasdaq.com/symbol/xtnt,AMEX
289,ZDGE,"Zedge, Inc.",1.4100,$17.2M,2016.0,Technology,Computer Software: Prepackaged Software,https://old.nasdaq.com/symbol/zdge,AMEX


In [91]:
stocks = nyse.copy()
stocks = stocks.append(nqnm)
stocks = stocks.append(amex)
# stocks.tail()
# stocks


In [92]:
stocks = stocks[['Symbol', 'Name', 'Exchange', 'Sector', 'industry']]
stocks.reset_index(inplace=True)
stocks = stocks.drop(columns=['index'])
stocks

Unnamed: 0,Symbol,Name,Exchange,Sector,industry
0,DDD,3D Systems Corporation,NYSE,Technology,Computer Software: Prepackaged Software
1,MMM,3M Company,NYSE,Health Care,Medical/Dental Instruments
2,WBAI,500.com Limited,NYSE,Consumer Services,Services-Misc. Amusement & Recreation
3,WUBA,58.com Inc.,NYSE,Technology,"Computer Software: Programming, Data Processing"
4,EGHT,8x8 Inc,NYSE,Technology,EDP Services
...,...,...,...,...,...
7125,WYY,WidePoint Corporation,AMEX,Technology,EDP Services
7126,WTT,"Wireless Telecom Group, Inc.",AMEX,Capital Goods,Electrical Products
7127,XTNT,"Xtant Medical Holdings, Inc.",AMEX,Health Care,Biotechnology: Biological Products (No Diagnos...
7128,ZDGE,"Zedge, Inc.",AMEX,Technology,Computer Software: Prepackaged Software


In [93]:
features = ['dividendRate', 'exDividendDate', 'lastSplitDate', 'lastSplitFactor']
    
for feature in features:
    stocks.loc[:, feature] = np.nan
    
stocks[features] = stocks[features].astype('object')


In [94]:
# # uncomment the following when debugging
# stocks = stocks[0:10]
# stocks

In [95]:
# get quarterly dates for 75 days in future and 75 days in past
def makeQuarterlyDicts():
    
    #get the business days
    quarterly_df = pd.DataFrame()
    start = pd.to_datetime('today') - pd.to_timedelta(days_to_add, unit='d')
    end = pd.to_datetime('today') + pd.to_timedelta(days_to_add, unit='d')
    quarterly_df['Business Dates'] = pd.bdate_range(start=start, end=end)
    
    previous_dict = {}
    future_dict = {}
    holidays = []

    for row in quarterly_df.itertuples():
#         print (row[1])

        date = row[1]
        day = str(row[1])[0:10]
        url = 'https://finance.yahoo.com/calendar/earnings/?day='+day


        # add to previous earnings dates
        try:
            earnings = pd.read_html(url)[0]
            if row[1] < pd.to_datetime('today'):
                for earnings_row in earnings.itertuples():
                    previous_dict[earnings_row[1]] = (date, earnings_row[4], earnings_row[5], earnings_row[6])

            # add to future earnings dates
            else:
                for earnings_row in earnings.itertuples():
                    future_dict[earnings_row[1]] = (date)
        except (ValueError):
            holidays.append(date)
            e = sys.exc_info()[0]
            print(e)
    
    return previous_dict, future_dict

In [96]:
def QuarterlyInfo(stocks):
    initial_time = round(time.time())
    
    previous_dict, future_dict = makeQuarterlyDicts()
    
    previous = pd.DataFrame.from_dict(previous_dict, orient='index',
                                  columns=['Previous_Earnings_Date', 'Expected', 'Actual', 'Surprise'])
    future = pd.DataFrame.from_dict(future_dict, orient = 'index', columns=['Future Earnings Date'])
    
    previous = previous.reset_index()
    future = future.reset_index()
    
    previous.rename(columns={'index': 'Symbol'}, inplace=True)
    future.rename(columns={'index': 'Symbol'}, inplace=True)
    
    stocks = pd.merge(stocks, previous, how='inner')
    stocks = pd.merge(stocks, future, how='left')
    
    print ('Finished quarterly thread in ', (round(time.time())-initial_time)/ 60.0, " minutes!")
    
    return stocks


In [97]:
def getInfo(symbol, info=True, calendar=True, quarterly_financials=True):
    connection_tries=0
    j=0
    
    info = {}
    ticker = yf.Ticker(symbol)           
    info_bool = False
    while 1:
        
        try:
            if info_bool == False:
                info = ticker.info
                info_bool = True
                           
            return info
            
        except (urllib.error.HTTPError, requests.exceptions.ConnectionError) as e:
            
            if connection_tries < 1:
                print (e, "HTTP Error. Will try again in 10 seconds")
                time.sleep(10)
                connection_tries +=1
            elif connection_tries < 2:
                print (e, "HTTP Error. Will try again in 30 seconds")
                time.sleep(30)
                connection_tries +=1
            elif connection_tries < 4:
                print (e, "HTTP Error. Will try again in 5 minutes")
                time.sleep(300)
                connection_tries +=1
            elif connection_tries < 5:
                print (e, "HTTP Error.  Will try again in 1 hour")
                time.sleep(3600)
                connection_tries +=1
            else:
                connection_tries = 0
                print (e, "HTTP Error.  Giving up.")
                return {}, pd.DataFrame(), pd.DataFrame()
        except:
            e = sys.exc_info()[0]
#             print('here: ', e)
            if j>2:
                return 'error'
            j+=1
            

In [98]:
@storeInQueue
def info_thread(stocks, features):
    i = 0
    initial_time = round(time.time())

    bad_symbols = []
    symbols_with_no_return = []

    for row in stocks.itertuples():
        print ("Info Thread: ", i)    
        if i % 100 == 0:
            print ("It has been: ", (round(time.time())-initial_time)/ 60.0, " minutes in info thread")
        if '/' in row[1]:
            print ('Bad symbol in ', row[1])
            bad_symbols.append(i)
        elif '.' in row[1]:
            print ('Bad symbol in ', row[1])
            bad_symbols.append(i)
        elif '/' in row[1]:
            print ('Bad symbol in ', row[1])
            bad_symbols.append(i)
        else:

            symbol = row[1]
    #         symbol = yf.Ticker(symbol)           

            info = getInfo(symbol)

            if info == 'error':
                symbols_with_no_return.append(row[1])
                print('No return: ', row[1])

            else:

                recent_split = False
                for feature in features:

                    #try twice in case there's a fail that shouldn't happen

                    try:
                        value = info[feature]
                        
                        if feature = 'lastSplitDate':
                            if datetime.datetime.utcfromtimestamp(value) > pd.to_datetime('today') - pd.to_timedelta(days_to_add, unit='d'):
                                stocks.at[i, feature] = datetime.datetime.utcfromtimestamp(value)
                                recent_split = True
                            else:
                                stocks.at[i, feature] = np.nan
                        elif feature = 'lastSplitFactor":
                            if recent_split:
                                stocks.at[i, feature] = value
                            else:
                                stocks.at[i, feature] = np.nan
                        else:
                            stocks.at[i, feature] = value

                    except:
                        stocks.at[i, feature] = np.nan
                        e = sys.exc_info()[0]
                        print(e, ": ", feature) 
        i += 1
    print ('Finished info thread in ', (round(time.time())-initial_time)/ 60.0, " minutes!")
    return stocks[features]

In [99]:
stocks = QuarterlyInfo(stocks.copy())

<class 'ValueError'>
<class 'ValueError'>
<class 'ValueError'>
Finished quarterly thread in  4.1  minutes!


In [100]:
stocks

Unnamed: 0,Symbol,Name,Exchange,Sector,industry,dividendRate,exDividendDate,lastSplitDate,lastSplitFactor,Previous_Earnings_Date,Expected,Actual,Surprise,Future Earnings Date
0,DDD,3D Systems Corporation,NYSE,Technology,Computer Software: Prepackaged Software,,,,,2020-08-04,-0.1,-0.13,-30,2020-10-28
1,MMM,3M Company,NYSE,Health Care,Medical/Dental Instruments,,,,,2020-07-27,1.8,1.78,-1.17,2020-10-27
2,WUBA,58.com Inc.,NYSE,Technology,"Computer Software: Programming, Data Processing",,,,,2020-09-03,0.64,-,-,2020-11-16
3,EGHT,8x8 Inc,NYSE,Technology,EDP Services,,,,,2020-07-29,-0.12,-0.07,+43.55,2020-10-28
4,EGHT,8x8 Inc,Nasdaq,Technology,EDP Services,,,,,2020-07-29,-0.12,-0.07,+43.55,2020-10-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1755,UFAB,"Unique Fabricating, Inc.",AMEX,Capital Goods,Auto Parts:O.E.M.,,,,,2020-06-24,0.03,-0.12,-500,NaT
1756,UEC,Uranium Energy Corp.,AMEX,Basic Industries,Precious Metals,,,,,2020-06-08,-,-,-,NaT
1757,VOLT,"Volt Information Sciences, Inc.",AMEX,Technology,Professional Services,,,,,2020-06-15,-0.41,-0.25,+39.02,2020-09-10
1758,WRN,Western Copper and Gold Corporation,AMEX,Basic Industries,Precious Metals,,,,,2020-07-27,-0.01,-0.01,-,NaT


In [None]:
my_queue = queue.Queue()

t1 = threading.Thread(target=info_thread, args=(stocks.copy(), features,))
t1.start()

# t2 = threading.Thread(target=QuarterlyInfo, args=(stocks.copy(),))
# t2.start()

t1.join()
# t2.join()

Info Thread:  0
It has been:  0.0  minutes in info thread
Info Thread:  1
Info Thread:  2
Info Thread:  3
Info Thread:  4
Info Thread:  5
Info Thread:  6
Info Thread:  7
Info Thread:  8
Info Thread:  9
Info Thread:  10
Info Thread:  11
Info Thread:  12
Info Thread:  13
Info Thread:  14
Info Thread:  15
Info Thread:  16
Info Thread:  17
Info Thread:  18
Info Thread:  19
Info Thread:  20
Info Thread:  21
Info Thread:  22
Info Thread:  23
Info Thread:  24
Info Thread:  25
Info Thread:  26
Info Thread:  27
Info Thread:  28
Info Thread:  29
Info Thread:  30
Info Thread:  31
Info Thread:  32
Info Thread:  33
Info Thread:  34
Info Thread:  35
Info Thread:  36
Info Thread:  37
Info Thread:  38
Info Thread:  39
Info Thread:  40
Info Thread:  41
Info Thread:  42
Info Thread:  43
Info Thread:  44
Info Thread:  45
Info Thread:  46
Info Thread:  47
Info Thread:  48
Info Thread:  49
Info Thread:  50
Info Thread:  51
Info Thread:  52
Info Thread:  53
Info Thread:  54
Info Thread:  55
Info Thread:  56

Info Thread:  437
Info Thread:  438
Info Thread:  439
Info Thread:  440
Info Thread:  441
Info Thread:  442
Info Thread:  443
Info Thread:  444
Info Thread:  445
Info Thread:  446
Info Thread:  447
Info Thread:  448
Info Thread:  449
Info Thread:  450
Info Thread:  451
Info Thread:  452
Info Thread:  453
Info Thread:  454
Info Thread:  455
Info Thread:  456
Info Thread:  457
Info Thread:  458
Info Thread:  459
Info Thread:  460
Info Thread:  461
Info Thread:  462
Info Thread:  463
Info Thread:  464
Info Thread:  465
Info Thread:  466
Info Thread:  467
Info Thread:  468
Info Thread:  469
Info Thread:  470
Info Thread:  471
Info Thread:  472
Info Thread:  473
Info Thread:  474
Info Thread:  475
Info Thread:  476
Info Thread:  477
Info Thread:  478
Info Thread:  479
Info Thread:  480
Info Thread:  481
Info Thread:  482
Info Thread:  483
Info Thread:  484
Info Thread:  485
Info Thread:  486
Info Thread:  487
Info Thread:  488
Info Thread:  489
Info Thread:  490
Info Thread:  491
Info Threa

In [50]:
my_list = []
while not my_queue.empty():
    my_list.append(my_queue.get())
    


In [51]:
stocks = stocks.copy()

for item in my_list:
    print(type(item))
    if 'previous_earnings_date' in item.columns:
        stocks[['previous_earnings_date', 'upcoming_earnings_date']] = item[['previous_earnings_date', 'upcoming_earnings_date']].copy()
    else:
        stocks[features] = item[features].copy()

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [52]:
stocks.head(20)

Unnamed: 0,Symbol,Name,Exchange,Sector,industry,dividendRate,exDividendDate,lastSplitDate,lastSplitFactor,previous_earnings_date,upcoming_earnings_date
0,DDD,3D Systems Corporation,NYSE,Technology,Computer Software: Prepackaged Software,,,1361750400.0,3:2,2020-08-05 12:00:00,2020-10-28 06:00:00
1,MMM,3M Company,NYSE,Health Care,Medical/Dental Instruments,5.88,1597968000.0,1064880000.0,2:1,2020-07-28 12:00:00,2020-10-27 08:00:00
2,WBAI,500.com Limited,NYSE,Consumer Services,Services-Misc. Amusement & Recreation,,,,,2020-06-26 12:00:00,2020-09-03 04:00:00
3,WUBA,58.com Inc.,NYSE,Technology,"Computer Software: Programming, Data Processing",,,,,2020-07-30 12:00:00,2020-10-28 04:00:00
4,EGHT,8x8 Inc,NYSE,Technology,EDP Services,,,,,2013-02-12 12:00:00,2014-10-28 12:00:00
5,AHC,A.H. Belo Corporation,NYSE,Consumer Services,Newspapers/Magazines,0.16,1597276800.0,1422921600.0,10:1,2020-07-30 12:00:00,2020-10-27 08:00:00
6,AOS,A.O Smith Corporation,NYSE,Consumer Durables,Consumer Electronics/Appliances,0.96,1596067200.0,1475712000.0,2:1,2020-07-28 12:00:00,2020-10-27 06:00:00
7,ATEN,"A10 Networks, Inc.",NYSE,Technology,Computer Communications Equipment,,,,,2020-07-21 12:00:00,2020-09-23 06:00:00
8,AIR,AAR Corp.,NYSE,Capital Goods,Aerospace,0.3,1585267200.0,888278400.0,3:2,2020-07-29 12:00:00,2020-11-02 06:00:00
9,AAN,"Aaron&#39;s, Inc.",NYSE,Technology,Diversified Commercial Services,0.16,1600214400.0,1271376000.0,3:2,NaT,NaT


In [25]:
stocks.to_csv('info.csv')

In [26]:
with open('bad_symbols.txt', 'w') as f:
    for s in bad_symbols:
        f.write(str(s) + "\n")
        
with open('symbols_no_return.txt', 'w') as f:
    for line in symbols_with_no_return:
        f.write(str(line) + "\n")

NameError: name 'bad_symbols' is not defined