In [40]:
import pandas as pd
import numpy as np
import yfinance as yf
import sys
import requests
import urllib
import time
import datetime
import threading
import queue

In [41]:
global days_to_add
days_to_add = 110

In [42]:
def storeInQueue(f):
    def wrapper(*args):
        my_queue.put(f(*args))
    return wrapper

In [43]:
pd.options.mode.chained_assignment = 'raise'

In [44]:
# Use the following site to get list of all publicly traded Stocks
#http://nxcoreapi.com/symbols.php

#nasdaq
#https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nasdaq&render=download
nqnm = pd.read_csv('nasdaq.csv')
nqnm['Exchange'] = 'Nasdaq'
#nyse
#https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nyse&render=download
nyse = pd.read_csv('nyse.csv')
nyse['Exchange'] = 'NYSE'
#amex
#https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=amex&render=download
amex = pd.read_csv('amex.csv')
amex['Exchange'] = 'AMEX'

In [45]:
amex

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote,Exchange
0,GOED,1847 Goedeker Inc.,7.3500,$44.92M,2020.0,Consumer Services,Home Furnishings,https://old.nasdaq.com/symbol/goed,AMEX
1,XXII,"22nd Century Group, Inc",0.6164,$85.59M,,Consumer Non-Durables,Farming/Seeds/Milling,https://old.nasdaq.com/symbol/xxii,AMEX
2,FAX,Aberdeen Asia-Pacific Income Fund Inc,4.1900,$1.04B,1986.0,,,https://old.nasdaq.com/symbol/fax,AMEX
3,IAF,Aberdeen Australia Equity Fund Inc,4.8600,$110.53M,,,,https://old.nasdaq.com/symbol/iaf,AMEX
4,AEF,"Aberdeen Emerging Markets Equity Income Fund, ...",6.7800,$344.1M,,,,https://old.nasdaq.com/symbol/aef,AMEX
...,...,...,...,...,...,...,...,...,...
286,WYY,WidePoint Corporation,0.5420,$45.75M,,Technology,EDP Services,https://old.nasdaq.com/symbol/wyy,AMEX
287,WTT,"Wireless Telecom Group, Inc.",1.2900,$27.93M,,Capital Goods,Electrical Products,https://old.nasdaq.com/symbol/wtt,AMEX
288,XTNT,"Xtant Medical Holdings, Inc.",0.8671,$11.47M,,Health Care,Biotechnology: Biological Products (No Diagnos...,https://old.nasdaq.com/symbol/xtnt,AMEX
289,ZDGE,"Zedge, Inc.",1.4100,$17.2M,2016.0,Technology,Computer Software: Prepackaged Software,https://old.nasdaq.com/symbol/zdge,AMEX


In [46]:
stocks = nyse.copy()
stocks = stocks.append(nqnm)
stocks = stocks.append(amex)
# stocks.tail()
# stocks


In [47]:
stocks = stocks[['Symbol', 'Name', 'Exchange', 'Sector', 'industry']]
stocks.reset_index(inplace=True)
stocks = stocks.drop(columns=['index'])
stocks

Unnamed: 0,Symbol,Name,Exchange,Sector,industry
0,DDD,3D Systems Corporation,NYSE,Technology,Computer Software: Prepackaged Software
1,MMM,3M Company,NYSE,Health Care,Medical/Dental Instruments
2,WBAI,500.com Limited,NYSE,Consumer Services,Services-Misc. Amusement & Recreation
3,WUBA,58.com Inc.,NYSE,Technology,"Computer Software: Programming, Data Processing"
4,EGHT,8x8 Inc,NYSE,Technology,EDP Services
...,...,...,...,...,...
7125,WYY,WidePoint Corporation,AMEX,Technology,EDP Services
7126,WTT,"Wireless Telecom Group, Inc.",AMEX,Capital Goods,Electrical Products
7127,XTNT,"Xtant Medical Holdings, Inc.",AMEX,Health Care,Biotechnology: Biological Products (No Diagnos...
7128,ZDGE,"Zedge, Inc.",AMEX,Technology,Computer Software: Prepackaged Software


In [48]:
features = ['dividendRate', 'exDividendDate', 'lastSplitDate', 'lastSplitFactor', 'Buy', 'Sell', 'Hold']
    
for feature in features:
    stocks.loc[:, feature] = np.nan
    
stocks[features] = stocks[features].astype('object')


In [49]:
# # uncomment the following when debugging
# stocks = stocks[0:10]
# stocks

In [50]:
# get quarterly dates for 75 days in future and 75 days in past
def makeQuarterlyDicts():
    
    #get the business days
    quarterly_df = pd.DataFrame()
    start = pd.to_datetime('today') - pd.to_timedelta(days_to_add, unit='d')
    end = pd.to_datetime('today') + pd.to_timedelta(days_to_add, unit='d')
    quarterly_df['Business Dates'] = pd.bdate_range(start=start, end=end)
    
    previous_dict = {}
    future_dict = {}
    holidays = []

    for row in quarterly_df.itertuples():
#         print (row[1])

        date = row[1]
        day = str(row[1])[0:10]
        url = 'https://finance.yahoo.com/calendar/earnings/?day='+day


        # add to previous earnings dates
        try:
            earnings = pd.read_html(url)[0]
            if row[1] < pd.to_datetime('today'):
                for earnings_row in earnings.itertuples():
                    previous_dict[earnings_row[1]] = (date, earnings_row[4], earnings_row[5], earnings_row[6])

            # add to future earnings dates
            else:
                for earnings_row in earnings.itertuples():
                    future_dict[earnings_row[1]] = (date)
        except (ValueError):
            holidays.append(date)
            e = sys.exc_info()[0]
            print(e)
    
    return previous_dict, future_dict

In [51]:
def QuarterlyInfo(stocks):
    initial_time = round(time.time())
    
    previous_dict, future_dict = makeQuarterlyDicts()
    
    previous = pd.DataFrame.from_dict(previous_dict, orient='index',
                                  columns=['Previous_Earnings_Date', 'Expected', 'Actual', 'Surprise'])
    future = pd.DataFrame.from_dict(future_dict, orient = 'index', columns=['Future Earnings Date'])
    
    previous = previous.reset_index()
    future = future.reset_index()
    
    previous.rename(columns={'index': 'Symbol'}, inplace=True)
    future.rename(columns={'index': 'Symbol'}, inplace=True)
    
    stocks = pd.merge(stocks, previous, how='inner')
    stocks = pd.merge(stocks, future, how='left')
    
    print ('Finished quarterly thread in ', (round(time.time())-initial_time)/ 60.0, " minutes!")
    
    return stocks


In [52]:
def getInfo(symbol, info=True, calendar=True, quarterly_financials=True):
    connection_tries=0
    j=0
    
    info = {}
    ticker = yf.Ticker(symbol)           
    info_bool = False
    while 1:
        
        try:
            if info_bool == False:
                info = ticker.info
                recs = ticker.recommendations
                info_bool = True
                           
            return info, recs
            
        except (urllib.error.HTTPError, requests.exceptions.ConnectionError) as e:
            
            if connection_tries < 1:
                print (e, "HTTP Error. Will try again in 10 seconds")
                time.sleep(10)
                connection_tries +=1
            elif connection_tries < 2:
                print (e, "HTTP Error. Will try again in 30 seconds")
                time.sleep(30)
                connection_tries +=1
            elif connection_tries < 4:
                print (e, "HTTP Error. Will try again in 5 minutes")
                time.sleep(300)
                connection_tries +=1
            elif connection_tries < 5:
                print (e, "HTTP Error.  Will try again in 1 hour")
                time.sleep(3600)
                connection_tries +=1
            else:
                connection_tries = 0
                print (e, "HTTP Error.  Giving up.")
                return 'error', 'error'
        except:
            e = sys.exc_info()[0]
#             print('here: ', e)
            if j>2:
                return 'error', 'error'
            j+=1
            

In [53]:
def filterRecs(recs, earnings_date):
    sell = 0
    hold = 0
    buy = 0

    recs = recs.reset_index()
    
    for i in range(recs.shape[0]-1, -1, -1):
        
        time_difference = earnings_date - recs.Date.iloc[i]
        
        if time_difference < pd.to_timedelta(90, unit='d') and time_difference > pd.to_timedelta(0, unit='d'):
            if recs['To Grade'].iloc[i] in ['Buy', 'Long-Term Buy', 'Overweight', 'OutPerform', 'Strong Buy', 'Outperform']:
                buy +=1
            elif recs['To Grade'].iloc[i] in ['Neutral', 'Hold', 'Sector Perform', 'Equal-Weight', 'Market Perform', 'Perform']:
                hold +=1
            elif recs['To Grade'].iloc[i] in ['Sell', 'Underperform', 'Underweight']:
                sell +=1
            else:
                print ('Unknown Recommendation: ', recs['To Grade'].iloc[i])
    
    return buy, hold, sell

In [54]:
# @storeInQueue
def info_thread(stocks, features):
    i = 0
    initial_time = round(time.time())

    bad_symbols = []
    symbols_with_no_return = []

    for row in stocks.itertuples():
        print ("Info Thread: ", i)    
        if i % 100 == 0:
            print ("It has been: ", (round(time.time())-initial_time)/ 60.0, " minutes in info thread")
        if '/' in row[1] or '.' in row[1] or '/' in row[1]:
            print ('Bad symbol in ', row[1])
            bad_symbols.append(i)       
        else:
            symbol = row[1]
    #         symbol = yf.Ticker(symbol)           

            info, recs = getInfo(symbol)

            if info == 'error':
                symbols_with_no_return.append(row[1])
                print('No return: ', row[1])

            else:

                recent_split = False
                for feature in ['dividendRate', 'exDividendDate', 'lastSplitDate', 'lastSplitFactor']:
                    #try twice in case there's a fail that shouldn't happen

                    try:
                        value = info[feature]
                        
                        if feature == 'lastSplitDate':
                            if datetime.datetime.utcfromtimestamp(value) > pd.to_datetime('today') - pd.to_timedelta(days_to_add, unit='d'):
                                stocks.at[i, feature] = datetime.datetime.utcfromtimestamp(value)
                                recent_split = True
                            else:
                                stocks.at[i, feature] = np.nan
                        elif feature == 'lastSplitFactor':
                            if recent_split:
                                stocks.at[i, feature] = value
                            else:
                                stocks.at[i, feature] = np.nan
                        else:
                            stocks.at[i, feature] = value
                    except:
                        stocks.at[i, feature] = np.nan
                        e = sys.exc_info()[0]
                        print(e, ": ", feature) 
                
                #populate the buy, sell and hold columns
                #row[10] is previous earnings date
                previous_earnings_date = row[10]
                buy, hold, sell = filterRecs(recs, previous_earnings_date)

                stocks.at[i, 'Buy'] = buy
                stocks.at[i, 'Hold'] = hold
                stocks.at[i, 'Sell'] = sell                        

                    
        i += 1
    print ('Finished info thread in ', (round(time.time())-initial_time)/ 60.0, " minutes!")
    return stocks[features]

In [None]:
stocks = QuarterlyInfo(stocks.copy())

In [None]:
stocks.shape

In [None]:
stocks

In [None]:
stocks[features] = info_thread(stocks.copy(), features)

In [None]:
stocks.head(10)

In [None]:
# my_queue = queue.Queue()

# t1 = threading.Thread(target=info_thread, args=(stocks.copy(), features,))
# t1.start()

# # t2 = threading.Thread(target=QuarterlyInfo, args=(stocks.copy(),))
# # t2.start()

# t1.join()
# # t2.join()

In [None]:
# my_list = []
# while not my_queue.empty():
#     my_list.append(my_queue.get())
    


In [None]:
# stocks = stocks.copy()

# for item in my_list:
#     print(type(item))
#     if 'previous_earnings_date' in item.columns:
#         stocks[['previous_earnings_date', 'upcoming_earnings_date']] = item[['previous_earnings_date', 'upcoming_earnings_date']].copy()
#     else:
#         stocks[features] = item[features].copy()

In [None]:
stocks.to_csv('info.csv')

In [None]:
# with open('bad_symbols.txt', 'w') as f:
#     for s in bad_symbols:
#         f.write(str(s) + "\n")
        
# with open('symbols_no_return.txt', 'w') as f:
#     for line in symbols_with_no_return:
#         f.write(str(line) + "\n")