Keeping this here in case `yahooquery` stops working

In [67]:
from requests import Session
from datetime import datetime as dt
import time
import pandas as pd
from bs4 import BeautifulSoup

HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '\
                         'AppleWebKit/537.36 (KHTML, like Gecko) '\
                         'Chrome/75.0.3770.80 Safari/537.36'}

In [78]:
def finviz_pull(url):
    
    # Create Session
    s = Session()
    
    # Add headers
    s.headers.update(HEADERS)
    
    # Extract data from Finviz - parse html
    screener = s.get(url)
    soup = BeautifulSoup(screener.text, 'html.parser')

    # Figure out number of stocks
    total_stocks_element = soup.find(class_ = 'count-text').text[7:]
    stop_position = total_stocks_element.find(' ')
    total_stocks = int(total_stocks_element[:stop_position])
    
    # Empty list to store stocks
    my_stocks = []

    # Pages and number of stocks
    page = 1
    stocks_imported = 0
    
    while stocks_imported < total_stocks:
        
        # Create new url
        new_url = url + '&r=' + str(page)

        # Pull data and parse html
        stock_data = s.get(new_url)
        soup = BeautifulSoup(stock_data.text, 'html.parser')

        # Table with stocks
        table_element_1 = soup.find_all(class_='table-dark-row-cp')
        table_element_2 = soup.find_all(class_='table-light-row-cp')
        table_element = table_element_1 + table_element_2
        
        # For each line extract the symbol, name and industry
        for idx, row in enumerate(table_element):
            
            # Creating table with all 'a' elements
            symbol_table = row.find_all('a')
            
            # Symbol
            symbol = symbol_table[1].text
            # Name
            symbol_name = symbol_table[2].text
            # Industry
            symbol_sector = symbol_table[3].text
            
            # Append all
            my_stocks.append([symbol, symbol_name, symbol_sector])
            
            stocks_imported += 1
            
        if stocks_imported == total_stocks:
            print(f"Total of {stocks_imported} stocks imported")
            print('Done loading')
        
        else:
            print(f"{stocks_imported} stocks imported")
            page += 20
            
    return my_stocks

In [79]:
def yahoo_finance_query(symbol, period1_date, period2_date, interval='1d', pre_post = 'false'):
    # Convert dates to timestamps
    period1 = int(time.mktime(time.strptime((period1_date + ' 00:00:00'), '%Y-%m-%d %H:%M:%S')))
    period2 = int(time.mktime(time.strptime((period2_date), '%Y-%m-%d %H:%M:%S')))
    
    # Headers
    s = Session()

    # Add headers
    s.headers.update(HEADERS)
    
    # Create url
    params = {"symbol": symbol, 
              "period1": period1, 
              "period2": period2,
              "interval": interval, 
              "includePrePost": pre_post}
    
    url = f'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}'
    
    # Get data
    stock_price = s.get(url, params=params) #, headers=headers, verify=False)#, proxies=proxies)
    
    # Return JSON
    price_json = stock_price.json()
    return price_json

In [80]:
def create_dataframe(price_json):
    
    dict_keys = price_json['chart']['result'][0].keys()
    
    if 'timestamp' in dict_keys:
    
        # Parse data out of JSON
        base_data = price_json['chart']['result'][0]
        timestamps = base_data['timestamp']
        quote = base_data['indicators']['quote'][0]

        high_price = quote['high']
        volume = quote['volume']
        open_price = quote['open']
        low_price = quote['low']
        close_price = quote['close']
            
        # Zip all lists
        zipped = list(zip(timestamps, high_price, low_price, open_price, close_price, volume))

        # Column Names
        column_names = ['timestamp', 'high_price', 'low_price', 'open_price', 'close_price', 'volume']

        # Create Dataframe, add column with symbol and return df
        df = pd.DataFrame(zipped, columns = column_names)
        df['symbol'] = base_data['meta']['symbol']

        return df
    
    else:
        return 'Nothing'

In [81]:
def get_prices(symbols_list
               , result_list
               , period1
               , interval='1d'
               , period2 = dt.today().strftime("%Y-%m-%d %H:%M:%S")
               , pre_post = 'false'):
    
    # Create % completion
    length = len(symbols_list)
    
    # Create df_master
    df_master = 'Nothing'
    
    # Go through list of symbols, get JSON and create dataframe
    for idx, symbol in enumerate(symbols_list):
        
        # Pull price and put in a dataframe
        price_json = yahoo_finance_query(symbol[0], period1, period2, interval, pre_post)
        
        df_symbol = create_dataframe(price_json)
        
        if type(df_symbol) == pd.DataFrame:
        
            # Add Name and Industry columns
            df_symbol['name'] = symbol[1]
            df_symbol['industry'] = symbol[2]

            #If this is the first symbol it will create a dataframe, otherwise, it appends the data
            if idx == 0 or type(df_master) != pd.DataFrame:
                df_master = df_symbol.copy()
            else:
                df_master = df_master.append(df_symbol)

            # Print Completion
            #print('Running Yahoo Finance Query: ' + str(idx + 1) + ' of ' + str(length) + ' - '+ symbol[0] + ' - ' + str(round((((idx + 1)/length) * 100),2)) + '%')

            if idx % 50 == 0:
                print('Running Yahoo Finance Query: ' + str(idx + 1) + ' of ' + str(length) + ' - ' + str(round((((idx + 1)/length) * 100),2)) + '%')
        
    # Convert timestamp column to datetime and return df_master
    df_master['timestamp'] = pd.to_datetime(df_master['timestamp'], unit='s')
   
    # Add interval column
    df_master['interval'] = interval
    
    # Reset index
    #df_master.reset_index(inplace=True, drop=True)
    
    # Just date
    df_master['just_date'] = df_master['timestamp'].dt.date
    
    print('Done')
    
    result_list.append(df_master)

Extract list of stocks from finviz

In [72]:
my_stocks = finviz_pull('https://finviz.com/screener.ashx?v=111&f=fa_pe_o10,sh_avgvol_o300,ta_highlow52w_a50h,ta_sma200_pa&ft=4&o=volume')


20 stocks imported
40 stocks imported
60 stocks imported
80 stocks imported
100 stocks imported
120 stocks imported
140 stocks imported
160 stocks imported
180 stocks imported
200 stocks imported
220 stocks imported
Total of 223 stocks imported
Done loading


In [74]:
print(my_stocks[:10])

[['CPSH', 'CPS Technologies Corporation', 'Technology'], ['UTI', 'Universal Technical Institute, Inc.', 'Consumer Defensive'], ['SAIA', 'Saia, Inc.', 'Industrials'], ['KRMD', 'Repro Med Systems, Inc.', 'Healthcare'], ['TECD', 'Tech Data Corporation', 'Technology'], ['MEDP', 'Medpace Holdings, Inc.', 'Healthcare'], ['AXE', 'Anixter International Inc.', 'Industrials'], ['MITK', 'Mitek Systems, Inc.', 'Technology'], ['ABMD', 'Abiomed, Inc.', 'Healthcare'], ['CNSL', 'Consolidated Communications Holdings, Inc.', 'Communication Services']]


Extrac prices from YahooFinance

In [82]:
lst = []
get_prices(my_stocks[:10], lst, period1='2017-01-01')

Running Yahoo Finance Query: 1 of 10 - 10.0%
Done


In [83]:
pd.concat(lst).head()

Unnamed: 0,timestamp,high_price,low_price,open_price,close_price,volume,symbol,name,industry,interval,just_date
0,2017-01-03 14:30:00,1.89,1.6,1.89,1.64,30100,CPSH,CPS Technologies Corporation,Technology,1d,2017-01-03
1,2017-01-04 14:30:00,1.81,1.6,1.8,1.6,12300,CPSH,CPS Technologies Corporation,Technology,1d,2017-01-04
2,2017-01-05 14:30:00,1.64,1.63,1.63,1.64,1400,CPSH,CPS Technologies Corporation,Technology,1d,2017-01-05
3,2017-01-06 14:30:00,1.61,1.61,1.61,1.61,100,CPSH,CPS Technologies Corporation,Technology,1d,2017-01-06
4,2017-01-09 14:30:00,1.61,1.52,1.61,1.53,40600,CPSH,CPS Technologies Corporation,Technology,1d,2017-01-09
