In [1]:
import datetime
import numpy as np 
import time
from bs4 import BeautifulSoup
import requests
import re
from datetime import datetime
import pickle
import string
import pandas as pd
from copy import deepcopy

In [2]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

# aux functions

In [3]:
def get_prices(soup_tag):
    
    median_price = re.findall(r'\d+', str(soup_tag[1]))
    current_price_tag = re.findall(r'\d+', str(soup_tag[-1]))
    low_price_tag = re.findall(r'\d+', str(soup_tag[-3]))
            
    def convert_tag_to_price(tag_price):
                
        # price of the action is in thousands (seperated by comma)
        if len(tag_price) == 3:
            price = int(tag_price[0])*1000+int(tag_price[1])+float(tag_price[2])/100
        else:
            price = int(tag_price[0])+float(tag_price[1])/100

        return price
    
    median_price = convert_tag_to_price(median_price)
    current_price = convert_tag_to_price(current_price_tag)
    low_price = convert_tag_to_price(low_price_tag)
            
    return current_price, low_price, median_price
        
def get_number_of_analysts(soup_tag):
            
    analyst_list = [int(re.findall(r'\d+', str(tag))[0]) for tag in soup_tag if len(re.findall(r'\d+', str(tag))) == 1]
    num_categories = int(len(analyst_list)/3)
    num_analysts = np.sum(np.array(analyst_list)[[(index-1)+2*index for index in range(1, num_categories+1)]])
            
    return num_analysts

def get_pe_ratio(soup_tag):
    
    pe_ratio = float(str(soup_tag[0]).split("<")[1].split(">")[-1])
    print("P/E ratio", pe_ratio)
    
    return pe_ratio
    
def get_short_float(soup_tag):
    
    short_float = float(str(soup_tag[-1]).split("<")[1].split(">")[-1][:-1])
    print("Short interest", short_float)
    
    return short_float

# fetch company names and price

In [4]:
def get_nyse_stocks():
    
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:63.0) Gecko/20100101 Firefox/63.0'}

    list_stock = []
    for letter in string.ascii_uppercase:
        print("Fetching for letter: " + letter)
        
        r = requests.get("https://eoddata.com/stocklist/NYSE/" + letter + ".htm", headers=headers)
        html_doc = r.text
        soup = BeautifulSoup(html_doc, 'html.parser')
        soup_tag = soup.find_all("a", href=True)

        for link in soup_tag:
            text = link.get('href')
            if "/stockquote/NYSE/" in text:
                text = text.split("/")[-1].split(".")[0]

                if text not in list_stock:
                    list_stock.append(text)
                    
    return list_stock

In [5]:
def get_interesting_stocks_wsj(list_stock):
    
    interesting_stocks = dict()
    database = dict()
    
    for symbol in list_stock:
        
        print(symbol)

        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:63.0) Gecko/20100101 Firefox/63.0'}
        r = requests.get("https://www.wsj.com/market-data/quotes/" + symbol + "/research-ratings", headers=headers)
        html_doc = r.text
        soup = BeautifulSoup(html_doc, 'html.parser')
        soup_tag_research = soup.find_all("span", attrs={"class": "data_data"})
        
        r = requests.get("https://www.wsj.com/market-data/quotes/" + symbol, headers=headers)
        html_doc = r.text
        soup = BeautifulSoup(html_doc, 'html.parser')
        soup_tag_overview = soup.find_all("span", attrs={"class": "WSJTheme--data_data--3CZkJ3RI"})
        
        full_data = True
        try:
            pe_ratio = get_pe_ratio(soup_tag_overview)
        except (ValueError, IndexError):
            full_data = False
            print("No P/E data found")
            
        try:
            short_float = get_short_float(soup_tag_overview)
        except (ValueError, IndexError):
            full_data = False
            print("No short float data found")
        
        try:
            current_price, low_price, median_price = get_prices(soup_tag_research)
            low_price_change = ((low_price-current_price)/current_price)*100
            median_price_change = ((median_price-current_price)/current_price)*100
            number_of_analysts = get_number_of_analysts(soup_tag_research)   
            
            if full_data:
                database[symbol] = [pe_ratio, short_float, current_price, low_price_change, 
                                 median_price_change, number_of_analysts]
            if current_price < low_price and number_of_analysts >= 6:
                interesting_stocks[symbol] = low_price_change
                
        except IndexError:
            print("No data found for symbol: " + symbol)
            
    return interesting_stocks, database

In [6]:
def get_interesting_stocks_zacks(interesting_stocks, database):
    
    for symbol in database.keys():

        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:63.0) Gecko/20100101 Firefox/63.0'}
        r = requests.get("https://www.zacks.com/stock/quote/" + symbol + "?q=aapl", headers=headers)
        html_doc = r.text
        soup = BeautifulSoup(html_doc, 'html.parser')
        soup_tag_rank = soup.find_all("p",  attrs={"class": "rank_view"})
        soup_tag = soup.find_all("span", attrs={"class": "composite_val"})
        
        try:
            if len(soup_tag) == 8:
                soup_tag = soup_tag[-4:-1]
                score_list = [char for tag in soup_tag for char in str(tag) if char.isupper()]
                
                if score_list:
                    score_list.sort()
                    if score_list[-1] <= 'B' and symbol in interesting_stocks.keys():
                        potential_upside = interesting_stocks[symbol]
                        interesting_stocks[symbol] = [potential_upside, score_list]
                        
                buy_reccomendation = re.findall(r'\d+', str(soup_tag_rank).split("<")[1])
                database[symbol] = database[symbol] + [score_list, buy_reccomendation]
                print(database[symbol])
                
            else:
                print("No data found for symbol: " + symbol)
                
        except IndexError:
            print("No data found for symbol: " + symbol)
            
    return interesting_stocks, database
            

In [7]:
list_stock = get_nyse_stocks()

Fetching for letter: A
Fetching for letter: B
Fetching for letter: C
Fetching for letter: D
Fetching for letter: E
Fetching for letter: F
Fetching for letter: G
Fetching for letter: H
Fetching for letter: I
Fetching for letter: J
Fetching for letter: K
Fetching for letter: L
Fetching for letter: M
Fetching for letter: N
Fetching for letter: O
Fetching for letter: P
Fetching for letter: Q
Fetching for letter: R
Fetching for letter: S
Fetching for letter: T
Fetching for letter: U
Fetching for letter: V
Fetching for letter: W
Fetching for letter: X
Fetching for letter: Y
Fetching for letter: Z


In [None]:
interesting_stocks, database = get_interesting_stocks_wsj(list_stock)

A
P/E ratio 46.1
Short interest 0.72
AA
No P/E data found
Short interest 5.49
AAC
No P/E data found
Short interest 0.2
No data found for symbol: AAC
AAI-B
No P/E data found
No short float data found
No data found for symbol: AAI-B
AAI-C
No P/E data found
No short float data found
No data found for symbol: AAI-C
AAIC
P/E ratio 8.21
Short interest 3.7
AAN
No P/E data found
Short interest 2.33
AAP
P/E ratio 26.39
Short interest 5.04
AAQ
No P/E data found
No short float data found
No data found for symbol: AAQ
AAQC
No P/E data found
No short float data found
AAT
P/E ratio 127.93
Short interest 2.04
AB
P/E ratio 15.14
Short interest 1.11
ABB
P/E ratio 13.56
Short interest 0.27
ABBV
P/E ratio 39.66
Short interest 0.87
ABC
No P/E data found
Short interest 2.31
ABEV
P/E ratio 22.41
Short interest 0.7
ABG
P/E ratio 11.8
Short interest 6.96
ABM
P/E ratio 73.82
Short interest 2.09
ABR
P/E ratio 7.38
Short interest 3.94
ABR-A
No P/E data found
No short float data found
No data found for symbol: AB

No P/E data found
No short float data found
No data found for symbol: ALL-I
ALL-Y
No P/E data found
No short float data found
No data found for symbol: ALL-Y
ALLE
P/E ratio 30.64
Short interest 1.24
ALLY
P/E ratio 9.4
Short interest 3.91
ALP-Q
No P/E data found
No short float data found
No data found for symbol: ALP-Q
ALSN
P/E ratio 17.3
Short interest 3.31
ALT-A
No P/E data found
No short float data found
No data found for symbol: ALT-A
ALTG
No P/E data found
Short interest 1.66
ALU
No P/E data found
No short float data found
No data found for symbol: ALU
ALUS
No P/E data found
Short interest 3.45
ALV
P/E ratio 34.7
Short interest 1.73
ALX
P/E ratio 25.08
Short interest 1.98
AM
P/E ratio 13.35
Short interest 2.66
AMB
No P/E data found
No short float data found
No data found for symbol: AMB
AMBC
No P/E data found
Short interest 7.9
AMC
No P/E data found
Short interest 21.1
AMCR
P/E ratio 21.56
Short interest 3.03
AME
P/E ratio 38.37
Short interest 0.56
AMG
P/E ratio 20.68
Short interes

P/E ratio 29.09
Short interest 0.74
AVYA
No P/E data found
Short interest 14.64
AWF
No P/E data found
No short float data found
No data found for symbol: AWF
AWI
P/E ratio 28.73
Short interest 2.91
AWK
P/E ratio 38.79
Short interest 1.16
AWP
No P/E data found
No short float data found
No data found for symbol: AWP
AWR
P/E ratio 32.08
Short interest 2.01
AX
P/E ratio 13.78
Short interest 3.18
AXL
No P/E data found
Short interest 5.9
AXP
P/E ratio 26.21
Short interest 0.84
AXR
No P/E data found
Short interest 0.66
AXS
P/E ratio 29.93
Short interest 1.09
AXS-E
No P/E data found
No short float data found
No data found for symbol: AXS-E
AXTA
P/E ratio 90.36
Short interest 3.14
AYI
P/E ratio 27.62
Short interest 7.72
AYX
No P/E data found
Short interest 7.02
AZEK
No P/E data found
Short interest 1.22
AZO
P/E ratio 15.52
Short interest 1.51
AZRE
No P/E data found
Short interest 3.13
AZUL
No P/E data found
No short float data found
AZZ
P/E ratio 34.59
Short interest 1.27
B
P/E ratio 50.34
Shor

No P/E data found
No short float data found
No data found for symbol: BML-L
BMO
P/E ratio 14.94
Short interest 0.5
BMY
No P/E data found
Short interest 0.72
BNED
No P/E data found
Short interest 4.7
BNL
P/E ratio 49.64
Short interest 0.42
BNS
P/E ratio 16.75
Short interest 0.49
BNY
No P/E data found
No short float data found
No data found for symbol: BNY
BOA
No P/E data found
No short float data found
No data found for symbol: BOA
BOAC
No P/E data found
Short interest 0.11
No data found for symbol: BOAC
BOAS
No P/E data found
No short float data found
No data found for symbol: BOAS
BOE
No P/E data found
No short float data found
No data found for symbol: BOE
BOH
P/E ratio 19.86
Short interest 3.93
BOOT
P/E ratio 39.06
Short interest 6.72
BORR


In [None]:
interesting_stocks_zacks, database_zacks = get_interesting_stocks_zacks(deepcopy(interesting_stocks), deepcopy(database))

In [None]:
interesting_stocks_filtered = []

for k in interesting_stocks_zacks.keys():
    if isinstance(interesting_stocks_zacks[k], list):
        interesting_stocks_filtered.append((interesting_stocks_zacks[k], k))

In [None]:
print(len(interesting_stocks_filtered), len(database_zacks))

# dump stocks into the database

In [None]:
# load the current database
data_list = pickle.load(open("stocks_database.p", "rb"))

In [None]:
# append the new data for the new trading day
data_list.append(database_zacks)

In [None]:
# dump the new data
pickle.dump(data_list, open("stocks_database.p", "wb"))

# print the interesting stocks

In [15]:
print(interesting_stocks_filtered)

[([11.33603238866397, ['A', 'A', 'B']], 'ABM'), ([10.27027027027027, ['A', 'B', 'B']], 'BBU'), ([1.3357899585444457, ['A', 'A', 'B']], 'CADE'), ([2.5519662367372695, ['A', 'A', 'B']], 'CMI'), ([3.9295258834009688, ['B', 'B', 'B']], 'CRI'), ([9.664153529814943, ['A', 'A', 'B']], 'CVA'), ([15.163147792706337, ['A', 'A', 'A']], 'CWH'), ([0.36196117143797113, ['A', 'B', 'B']], 'DCI'), ([10.100526567735756, ['B', 'B', 'B']], 'DEA'), ([16.16329239960173, ['A', 'A', 'B']], 'DOOR'), ([4.976141785957745, ['A', 'A', 'A']], 'DY'), ([6.6098081023454185, ['A', 'A', 'B']], 'EPD'), ([1.0101010101010066, ['A', 'A', 'A']], 'ET'), ([16.892725030826146, ['A', 'B', 'B']], 'PTR'), ([11.78662482839773, ['A', 'A', 'A']], 'RE'), ([0.28473804100228195, ['A', 'A', 'A']], 'SBSW'), ([16.978609625668444, ['A', 'B', 'B']], 'SPB'), ([4.311543810848395, ['A', 'B', 'B']], 'ST'), ([11.473626971179986, ['A', 'B', 'B']], 'STM')]


In [16]:
interesting_stocks_filtered = sorted(interesting_stocks_filtered, key=lambda tup: tup[0])

In [17]:
interesting_stocks_filtered

[([0.28473804100228195, ['A', 'A', 'A']], 'SBSW'),
 ([0.36196117143797113, ['A', 'B', 'B']], 'DCI'),
 ([1.0101010101010066, ['A', 'A', 'A']], 'ET'),
 ([1.3357899585444457, ['A', 'A', 'B']], 'CADE'),
 ([2.5519662367372695, ['A', 'A', 'B']], 'CMI'),
 ([3.9295258834009688, ['B', 'B', 'B']], 'CRI'),
 ([4.311543810848395, ['A', 'B', 'B']], 'ST'),
 ([4.976141785957745, ['A', 'A', 'A']], 'DY'),
 ([6.6098081023454185, ['A', 'A', 'B']], 'EPD'),
 ([9.664153529814943, ['A', 'A', 'B']], 'CVA'),
 ([10.100526567735756, ['B', 'B', 'B']], 'DEA'),
 ([10.27027027027027, ['A', 'B', 'B']], 'BBU'),
 ([11.33603238866397, ['A', 'A', 'B']], 'ABM'),
 ([11.473626971179986, ['A', 'B', 'B']], 'STM'),
 ([11.78662482839773, ['A', 'A', 'A']], 'RE'),
 ([15.163147792706337, ['A', 'A', 'A']], 'CWH'),
 ([16.16329239960173, ['A', 'A', 'B']], 'DOOR'),
 ([16.892725030826146, ['A', 'B', 'B']], 'PTR'),
 ([16.978609625668444, ['A', 'B', 'B']], 'SPB')]