In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from sys import argv
from os.path import exists
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import selenium
import re
import requests

Defining a function to scrape the data
=============

The function below scrapes stock data from gurufocus. As of now for each stock symbol it goes to one webpage and saves the Graham number and current stock price and returns the data in a list of lists.

Future goals include extending the function to take two inputs; ticker and a list. The list contains [xpath name, and base URL]. The function will then return a list of lists, with each sublist being ticker any other data assciated to n-xpaths in the list.

-------------------------------------------------------------------------------------------------------------------------------
gnumber = sqrt of (22.5	x	Tangible Book Value per Share	x	EPS without Non-Recurring Items (NRI) (TTM))

Peter Lynch fair value [P/E (NRI) Ratio	/	EBITDA Growth Rate (5-year average) ]x 5-Year EBITDA Growth Rate	x	Earnings Per Share (NRI)

In [19]:
def GuruFocusScrape(tickerName, scraped_list):

    """
    Function opens gurufocus.com for a given tickerName and returns some value.
    The function needs three inputs, tickerName, baseWebpage, XPath element list
    """
    
    baseWebpage = "https://www.gurufocus.com/stock/" + tickerName
    #print(baseWebpage)

    request = requests.get(baseWebpage)
    #print(request)
    if request.status_code == 200:
    
        driver = webdriver.Chrome() # Open chrome browser
        wait = WebDriverWait(driver, 60) #defines an object that allows an expected condition to call it

        driver.get(baseWebpage)
        #driver.get(baseWebpage)
        
        if baseWebpage == driver.current_url:
        
            elementXPath = '//*[@id="cboxClose"]' # identifier button that closes stupid pop-up

            # EC.elment_to... retuns a value of thing that does or doesn't exist, wait.until waits 60s until EC returns True
            element = wait.until(EC.element_to_be_clickable((By.XPATH,elementXPath)))
            element.click() #clicks element when it becomes clickable
       
            EPSXpath = '//*[@id="wrapper"]/font/font/div/div[4]/div/table/tbody/tr[2]/td' #earnings-per-share
            #PtTBXpath = '//*[@id="wrapper"]/font/font/div/div[3]/table/tbody/tr[2]/td[2]' #price-to-tangible-book-value
           
            PtTBXpath =  '//*[@id="p2tangible_book"]'
            StockPriceXpath = '//*[@id="stock_profile_top"]/div[3]/font[1]' #stock price
            PeterLynchRatioXpath = '//*[@id="wrapper"]/font/font/div/div[3]/table/tbody/tr[6]/td[2]' #( PL value)/(Stock Price)
            GFStarRankingXpath = '//*[@id="unit_ul1888590250"]/li' #guru focus star rating 

            try:
                EPS = driver.find_element_by_xpath(EPSXpath).text
            except:
                EPS = 'NA'
                
            try:
                PtTB = driver.find_element_by_xpath(PtTBXpath).text
            except:
                PtTB = 'NA'
                
            try:
                StockPrice = driver.find_element_by_xpath(StockPriceXpath).text
            except:
                StockPrice = 'NA'
                
            try:   
                GFStarRanking = driver.find_element_by_xpath(GFStarRankingXpath).text
            except:
                GFStarRanking = 'NA'
                
            try:   
                PeterLynchRatio = driver.find_element_by_xpath(PeterLynchRatioXpath).text
            except:
                PeterLynchRatio = 'NA'
        
        else:
            EPS = 'NA'
            PtTB = 'NA'
            StockPrice = 'NA'
            GFStarRanking = 'NA'
            PeterLynchRatio = 'NA'
    
    else:
        
        EPS = 'NA'
        PtTB = 'NA'
        StockPrice = 'NA'
        GFStarRanking = 'NA'
        PeterLynchRatio = 'NA'
    
    
    sleep(5)   
    driver.close()
    print([tickerName, StockPrice, EPS, PtTB, StockPrice, PeterLynchRatio, GFStarRanking])
    return scraped_list.append([tickerName, EPS, PtTB, StockPrice, PeterLynchRatio, GFStarRanking])

 

Stock Tickers:
========
Obtaining all the nasdaq listed stocks for a given day from the nasdaq website. The list of companies that are traded on the index is given by nasdaq as a text file with delimter '|' and updated daily.

In [17]:
# Assign url of file: url
url = 'ftp://ftp.nasdaqtrader.com/symboldirectory/nasdaqlisted.txt'

# Read file into a DataFrame: df
nasdaq_info = pd.read_csv(url, sep='|')
tickers_to_get = nasdaq_info.iloc[0:3,0]
print(nasdaq_info.shape)

(3210, 8)


Obtaining, formatting, and cleaning the data
===========
Call the function with all stock tickers of interest, return a list, then format list to be readable df. Merge original nasdaq df and our new df. Finally filter the list for interested criteria.

In [20]:

ScrapedList =[]

#[GuruFocusScrape(ticker) for ticker in df['Symbol']] #loops for entire column of stock symbols, some 10,000
GNumber_list = [GuruFocusScrape(ticker, ScrapedList) for ticker in tickers_to_get] #loops over defined number of stock symbols
##---- Might try using tickers_to_get.apply(GuruFocusScrape(ticker,Scrapelist), axis=1)


#Turn returned scraped data list into a df
ScrapedData_df = pd.DataFrame(ScrapedList)
ScrapedData_df.columns = (['Symbol', 'EPS', 'PtTB', 'StockPrice', 'PeterLynchRatio', 'GFStarRanking'])

##----Comment these
ScrapedData_df['StockPrice'] = pd.to_numeric(ScrapedData_df['StockPrice'].str[1::], errors='coerce')
ScrapedData_df['EPS'] = pd.to_numeric(ScrapedData_df['EPS'], errors='coerce')
#ScrapedData_df['PtTB'] = pd.to_numeric(ScrapedData_df['PtTB'], errors='coerce')
#ScrapedData_df['PeterLynchRatio'] = pd.to_numeric(ScrapedData_df['PeterLynchRatio'], errors='coerce')

#merge nasdaq_df and ScrapedData_df together on column named Symbol
nasdaq_scraped_data_df = pd.merge(nasdaq_info, ScrapedData_df, on='Symbol')

##----Comment this
nasdaq_scraped_data_df.to_csv('C:\\Users\\RiggsSc\\Documents\\SCR\\GWest_data\\nasdaq_scraped_data.csv')

#print(ScrapedData_df)
print(nasdaq_scraped_data_df)

['AAAP', u'$40.57', u'-0.86', u'Price-to-Tangible-Book', u'$40.57', u'-2.16', 'NA']
['AABA', u'$58.36', u'-0.02', u'Price-to-Tangible-Book', u'$58.36', u'-0.97', 'NA']
['AAL', u'$52.61', u'4.15', 'NA', u'$52.61', 'NA', 'NA']
  Symbol                                      Security Name Market Category  \
0   AAAP  Advanced Accelerator Applications S.A. - Ameri...               Q   
1   AABA                         Altaba Inc. - Common Stock               Q   
2    AAL       American Airlines Group, Inc. - Common Stock               Q   

  Test Issue Financial Status  Round Lot Size ETF NextShares   EPS  \
0          N                N           100.0   N          N -0.86   
1          N                N           100.0   N          N -0.02   
2          N                N           100.0   N          N  4.15   

                     PtTB  StockPrice PeterLynchRatio GFStarRanking  
0  Price-to-Tangible-Book       40.57           -2.16            NA  
1  Price-to-Tangible-Book       58.36

In [37]:
#Here do all analysis and filter of data
print(nasdaq_scraped_data_df)

   Symbol                                      Security Name Market Category  \
0    ACFC  Atlantic Coast Financial Corporation - Common ...               G   
1    ACGL             Arch Capital Group Ltd. - Common Stock               Q   
2   ACGLP  Arch Capital Group Ltd. - Depositary Shares Re...               Q   
3    ACHC     Acadia Healthcare Company, Inc. - Common Stock               Q   
4    ACHN     Achillion Pharmaceuticals, Inc. - Common Stock               Q   
5    ACIA         Acacia Communications, Inc. - Common Stock               Q   
6    ACIU                        AC Immune SA - Common Stock               G   
7    ACIW                 ACI Worldwide, Inc. - Common Stock               Q   
8    ACLS          Axcelis Technologies, Inc. - Common Stock               Q   
9    ACNB                    ACNB Corporation - Common Stock               S   
10   ACOR           Acorda Therapeutics, Inc. - Common Stock               Q   
11   ACRS          Aclaris Therapeutics,

In [3]:
import requests

url ='http://api.wolframalpha.com/v2/query?appid=DEMO&input=population%20of%20france&output=json'
test = requests.get(url)
json_data = test.json()
print(test)

<Response [200]>


In [None]:
#fit G. west with r^2 values
#will FB-prophet method df.make_future_forecast() help?