# Utility methods to get stock prices

In [1]:
!pip install bs4 pandas fastcore seaborn sqlalchemy psycopg2

import datetime
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings() # disable ssl verification warnings
from fastcore.parallel import parallel
from functools import reduce

import sqlite3
from sqlalchemy import create_engine

from fastcore import *
from pathlib import Path

Collecting bs4
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
Collecting seaborn
  Downloading seaborn-0.11.1-py3-none-any.whl (285 kB)
[K     |████████████████████████████████| 285 kB 24.0 MB/s eta 0:00:01
[?25hCollecting sqlalchemy
  Downloading SQLAlchemy-1.3.22-cp38-cp38-manylinux2010_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 51.7 MB/s eta 0:00:01
[?25hCollecting psycopg2
  Downloading psycopg2-2.8.6.tar.gz (383 kB)
[K     |████████████████████████████████| 383 kB 30.1 MB/s eta 0:00:01
[?25hCollecting beautifulsoup4
  Downloading beautifulsoup4-4.9.3-py3-none-any.whl (115 kB)
[K     |████████████████████████████████| 115 kB 78.4 MB/s eta 0:00:01
Collecting soupsieve>1.2; python_version >= "3.0"
  Downloading soupsieve-2.1-py3-none-any.whl (32 kB)
Building wheels for collected packages: bs4, psycopg2
  Building wheel for bs4 (setup.py) ... [?25ldone
[?25h  Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1273 sha256=524509ed3629c66a592d71f

In [2]:
DATA_DIR=Path("/storage/stooq_data/")
engine = create_engine(f"sqlite:///{DATA_DIR}/stocks.sqlite")

REMOVE_COLUMNS_HISTORICAL=['openint', 'per', 'file', 'time']
TRANSLATE_HISTORICAL_COLUMNS={'vol': 'volume', 'close': 'last', 'ticker': 'symbol'}
REMOVE_COLUMNS_TODAY=['turnover', 'name', 'date', 'change']

# Load prices from stooq.com

In [133]:
def remove_columns_if_exist(df, columns):
    columns_to_remove = set(columns).intersection(set(df.columns))
    return df.drop(columns=columns_to_remove)

def get_stock_prices_from(url):
    def _get_stock_prices_from(url, page=1):
        page_appended_url = f"{url}&l={page}"
        res = requests.get(page_appended_url)
        
        soup = BeautifulSoup(res.text)

        quotes_table = soup.find('table', {'id': 'fth1'})
        columns = [th.text.lower() for th in quotes_table.thead.tr]
        columns

        df = pd.DataFrame()
        records = []
        for tr in quotes_table.tbody:
            records.append([
                td.text for i, td in enumerate(tr.children)
            ])

        if len(records) > 0:
            _, next_page_records = _get_stock_prices_from(url, page=page+1)
            records += next_page_records

        return columns, records
    
    
    columns, records = _get_stock_prices_from(url)
    
    df = pd.DataFrame(records, columns=columns)
    df = fix_numbers(df, ['volume'])
    df = remove_columns_if_exist(df, REMOVE_COLUMNS_TODAY)
    
    return df

In [4]:
def dates_since(start_date):
    d = start_date
    while d < datetime.date.today():
        yield d
        d += datetime.timedelta(days=1)

In [5]:
def get_historical_stock_prices_from(url, date_since):
    df = pd.DataFrame()
    
    for d in dates_since(date_since):
        print(".", end="")
        dated_url = f"{url}&d={d.strftime('%Y%m%d')}"
        _df = get_stock_prices_from(dated_url)
        _df['date'] = d
        df = df.append(_df)
        
    return df

In [6]:
def columns_to_append(df):
    columns = set(['symbol', 'name', 'open', 'high', 'low', 'last', 'volume', 'turnover', 'date', 'market'])
    
    return set(df.columns).intersection(columns)

def fix_numbers(df, columns):
    df = df.copy()
    for column in columns:
        df[column] = df[column].fillna("0").apply(lambda x: '0' if x == '' else x )
        df[column] = (df[column].replace(r'[kmb]+$', '', regex=True).astype(float) * df[column].str.extract(r'[\d\.]+([kmb]+)', expand=False).fillna(1).replace(['k','m', 'b'], [10**3, 10**6, 10**9]).astype(int))
        
    return df

Get historical data from https://stooq.com/db/h/

In [89]:
urls_to_fetch = [
    {'url':'https://stooq.com/t/?i=513&v=1&g=1&u=1&n=1&b=0', 'name':"GPW"},
    {'url':'https://stooq.com/t/?i=534&v=1&g=1&u=1&n=1&b=0', 'name':"Crypto"},
    {'url': 'https://stooq.com/t/?i=514&v=1&g=1&u=1&n=1&b=0', 'name': 'NewConnect'},
    {'url': 'https://stooq.com/t/?i=510&v=1&g=1&u=1&n=1&b=0', 'name': 'Main Indexes'},
    {'url': 'https://stooq.com/t/?i=512&v=1&g=1&u=1&n=1&b=0', 'name': 'Main Commodities'},
#     {'url': 'https://stooq.com/t/?i=515&v=1', 'name': 'NYSE'},
#     {'url': 'https://stooq.com/t/?i=516&v=1', 'name': 'NASDAQ'},
#     {'url': 'https://stooq.com/t/?i=517&v=1', 'name': 'NYSE MKT'},
#     {'url': '', 'name': ''}
]

df = pd.DataFrame()
prices_from = datetime.datetime.strptime('2021-01-01', '%Y-%m-%d').date()

for conf in urls_to_fetch:
    print(f"Fetching {conf['name']}:", end="")
#     _df = get_stock_prices_from(conf['url'])
    _df = get_historical_stock_prices_from(conf['url'], prices_from)
    _df['market'] = conf['name']
#     _df = fix_numbers(_df)
    _df[columns_to_append(_df)].to_sql('stocks', engine, if_exists='append', index=False)
    print(" Done")

Fetching GPW:.

AttributeError: 'NoneType' object has no attribute 'thead'

In [90]:
bs = BeautifulSoup(requests.get('https://stooq.com/t/?i=513&v=1&g=1&u=1&n=1&b=0').text)

### Load historical data

In [6]:
df = pd.read_sql("stocks", engine)

### Initialize historical data
Process files downloaded from https://stooq.com/db/h/

In [135]:
def get_data_from_historical_file(file):
    df = pd.read_csv(file, delimiter=',')
    df.columns = [column.lower().strip("<>") for column in df.columns]
    
    df['date'] = df['date'].astype(str)
    df['date'] = pd.to_datetime(df.date)
    df['file'] = str(file)
    df = df.drop(columns=REMOVE_COLUMNS_HISTORICAL).rename(columns=TRANSLATE_HISTORICAL_COLUMNS)
    
    return df


In [138]:

def process_data_from(folder, suffix=''):
    for file in folder.ls():
        print(file)
        try:
            df = get_data_from_historical_file(file)
            df['symbol'] = df['symbol'] + suffix
            df.to_sql('stocks', engine, if_exists='append', index=False)
        except Exception as e:
            print(e)

In [139]:
folders_to_process = [
    {'folder': 'data/daily/pl/wse stocks', 'suffix':''},
    {'folder': 'data/daily/pl/nc stocks', 'suffix':''},
]
for config in  folders_to_process: 
    process_data_from(DATA_DIR/config['folder'], suffix=config['suffix'])

/storage/stooq_data/data/daily/pl/wse stocks/sen.txt
/storage/stooq_data/data/daily/pl/wse stocks/itm.txt
/storage/stooq_data/data/daily/pl/wse stocks/zst.txt
/storage/stooq_data/data/daily/pl/wse stocks/tsg.txt
/storage/stooq_data/data/daily/pl/wse stocks/atc.txt
/storage/stooq_data/data/daily/pl/wse stocks/inv.txt
/storage/stooq_data/data/daily/pl/wse stocks/opl.txt
/storage/stooq_data/data/daily/pl/wse stocks/r22.txt
/storage/stooq_data/data/daily/pl/wse stocks/wpl.txt
/storage/stooq_data/data/daily/pl/wse stocks/kci.txt
/storage/stooq_data/data/daily/pl/wse stocks/fgt.txt
/storage/stooq_data/data/daily/pl/wse stocks/moj.txt
/storage/stooq_data/data/daily/pl/wse stocks/ntt.txt
/storage/stooq_data/data/daily/pl/wse stocks/mil.txt
/storage/stooq_data/data/daily/pl/wse stocks/ksg.txt
/storage/stooq_data/data/daily/pl/wse stocks/hdr.txt
/storage/stooq_data/data/daily/pl/wse stocks/obl.txt
/storage/stooq_data/data/daily/pl/wse stocks/pps.txt
/storage/stooq_data/data/daily/pl/wse stocks/d

/storage/stooq_data/data/daily/pl/wse stocks/trn.txt
/storage/stooq_data/data/daily/pl/wse stocks/ekp.txt
/storage/stooq_data/data/daily/pl/wse stocks/bow.txt
/storage/stooq_data/data/daily/pl/wse stocks/iag.txt
/storage/stooq_data/data/daily/pl/wse stocks/bcm.txt
/storage/stooq_data/data/daily/pl/wse stocks/cnt.txt
/storage/stooq_data/data/daily/pl/wse stocks/msw.txt
/storage/stooq_data/data/daily/pl/wse stocks/mlg.txt
/storage/stooq_data/data/daily/pl/wse stocks/skl.txt
/storage/stooq_data/data/daily/pl/wse stocks/mci.txt
/storage/stooq_data/data/daily/pl/wse stocks/rbw.txt
/storage/stooq_data/data/daily/pl/wse stocks/erb.txt
/storage/stooq_data/data/daily/pl/wse stocks/mgt.txt
/storage/stooq_data/data/daily/pl/wse stocks/psw.txt
/storage/stooq_data/data/daily/pl/wse stocks/ats.txt
/storage/stooq_data/data/daily/pl/wse stocks/aml.txt
/storage/stooq_data/data/daily/pl/wse stocks/swg.txt
/storage/stooq_data/data/daily/pl/wse stocks/atl.txt
/storage/stooq_data/data/daily/pl/wse stocks/a

/storage/stooq_data/data/daily/pl/wse stocks/inl.txt
/storage/stooq_data/data/daily/pl/wse stocks/4fm.txt
/storage/stooq_data/data/daily/pl/wse stocks/ape.txt
/storage/stooq_data/data/daily/pl/wse stocks/amc.txt
/storage/stooq_data/data/daily/pl/wse stocks/cpa.txt
/storage/stooq_data/data/daily/pl/wse stocks/meg.txt
/storage/stooq_data/data/daily/pl/wse stocks/gpw.txt
/storage/stooq_data/data/daily/pl/wse stocks/kru.txt
/storage/stooq_data/data/daily/pl/wse stocks/pnd.txt
/storage/stooq_data/data/daily/pl/wse stocks/pura.txt
/storage/stooq_data/data/daily/pl/wse stocks/uni.txt
/storage/stooq_data/data/daily/pl/wse stocks/atd.txt
/storage/stooq_data/data/daily/pl/wse stocks/pri.txt
/storage/stooq_data/data/daily/pl/wse stocks/nvt.txt
/storage/stooq_data/data/daily/pl/wse stocks/cig.txt
/storage/stooq_data/data/daily/pl/wse stocks/pma.txt
/storage/stooq_data/data/daily/pl/wse stocks/nxg.txt
/storage/stooq_data/data/daily/pl/wse stocks/snk.txt
/storage/stooq_data/data/daily/pl/wse stocks/

/storage/stooq_data/data/daily/pl/nc stocks/gtp.txt
/storage/stooq_data/data/daily/pl/nc stocks/din.txt
/storage/stooq_data/data/daily/pl/nc stocks/sok.txt
/storage/stooq_data/data/daily/pl/nc stocks/fiv.txt
/storage/stooq_data/data/daily/pl/nc stocks/etx.txt
/storage/stooq_data/data/daily/pl/nc stocks/drf.txt
/storage/stooq_data/data/daily/pl/nc stocks/fld.txt
/storage/stooq_data/data/daily/pl/nc stocks/ltg.txt
/storage/stooq_data/data/daily/pl/nc stocks/mpy.txt
/storage/stooq_data/data/daily/pl/nc stocks/dns.txt
/storage/stooq_data/data/daily/pl/nc stocks/ecl.txt
/storage/stooq_data/data/daily/pl/nc stocks/kln.txt
/storage/stooq_data/data/daily/pl/nc stocks/fig.txt
/storage/stooq_data/data/daily/pl/nc stocks/epr.txt
/storage/stooq_data/data/daily/pl/nc stocks/sfd.txt
/storage/stooq_data/data/daily/pl/nc stocks/deg.txt
/storage/stooq_data/data/daily/pl/nc stocks/dua.txt
/storage/stooq_data/data/daily/pl/nc stocks/gng.txt
/storage/stooq_data/data/daily/pl/nc stocks/gre.txt
/storage/sto

/storage/stooq_data/data/daily/pl/nc stocks/ibc.txt
/storage/stooq_data/data/daily/pl/nc stocks/ec2.txt
/storage/stooq_data/data/daily/pl/nc stocks/vcp.txt
No columns to parse from file
/storage/stooq_data/data/daily/pl/nc stocks/boa.txt
No columns to parse from file
/storage/stooq_data/data/daily/pl/nc stocks/alu.txt
/storage/stooq_data/data/daily/pl/nc stocks/mrs.txt
/storage/stooq_data/data/daily/pl/nc stocks/sim.txt
/storage/stooq_data/data/daily/pl/nc stocks/cfg.txt
/storage/stooq_data/data/daily/pl/nc stocks/dam.txt
/storage/stooq_data/data/daily/pl/nc stocks/nxb.txt
/storage/stooq_data/data/daily/pl/nc stocks/egh.txt
/storage/stooq_data/data/daily/pl/nc stocks/scs.txt
/storage/stooq_data/data/daily/pl/nc stocks/7lv.txt
/storage/stooq_data/data/daily/pl/nc stocks/via.txt
/storage/stooq_data/data/daily/pl/nc stocks/pmt.txt
No columns to parse from file
/storage/stooq_data/data/daily/pl/nc stocks/ifm.txt
No columns to parse from file
/storage/stooq_data/data/daily/pl/nc stocks/emm.

/storage/stooq_data/data/daily/pl/nc stocks/apa.txt
/storage/stooq_data/data/daily/pl/nc stocks/wby.txt
/storage/stooq_data/data/daily/pl/nc stocks/aux.txt
/storage/stooq_data/data/daily/pl/nc stocks/gnr.txt
/storage/stooq_data/data/daily/pl/nc stocks/slk.txt
No columns to parse from file
/storage/stooq_data/data/daily/pl/nc stocks/mrh.txt
/storage/stooq_data/data/daily/pl/nc stocks/cdt.txt
/storage/stooq_data/data/daily/pl/nc stocks/sp1.txt
/storage/stooq_data/data/daily/pl/nc stocks/eon.txt
/storage/stooq_data/data/daily/pl/nc stocks/dcd.txt
/storage/stooq_data/data/daily/pl/nc stocks/edn.txt
/storage/stooq_data/data/daily/pl/nc stocks/stk.txt
No columns to parse from file
/storage/stooq_data/data/daily/pl/nc stocks/mnd.txt
/storage/stooq_data/data/daily/pl/nc stocks/dlk.txt
/storage/stooq_data/data/daily/pl/nc stocks/org.txt
/storage/stooq_data/data/daily/pl/nc stocks/igs.txt
/storage/stooq_data/data/daily/pl/nc stocks/ftn.txt
/storage/stooq_data/data/daily/pl/nc stocks/viv.txt
/sto

In [134]:
get_stock_prices_from("https://stooq.com/t/?i=513&v=1&g=1&u=1&n=1&b=0")

Unnamed: 0,symbol,open,high,low,last,volume,Unnamed: 7
0,06N,1.74,1.74,1.65,1.71,63200.0,
1,08N,0.860,0.865,0.860,0.865,209.0,
2,11B,468.0,473.0,456.0,460.0,6040.0,
3,1AT,36.9,37.5,36.2,37.2,3900.0,
4,4FM,4.80,4.80,4.60,4.80,524.0,
...,...,...,...,...,...,...,...
437,ZRE,0.750,0.775,0.740,0.765,19300.0,
438,ZST,34.0,39.0,34.0,37.2,18500.0,
439,ZUE,4.24,4.24,4.16,4.18,214.0,
440,ZUK,2.80,2.80,2.80,2.80,1050.0,


In [129]:
#TODO

get_data_from_historical_file(DATA_DIR/'data'/'daily'/'pl'/'nc stocks'/'bru.txt')

Unnamed: 0,symbol,date,open,high,low,last,volume
0,BRU,2014-07-02,0.6964,1.0586,0.6964,1.0586,557564
1,BRU,2014-07-03,1.1143,1.5414,1.1143,1.4857,374054
2,BRU,2014-07-04,1.6157,1.7736,1.3371,1.3371,262561
3,BRU,2014-07-07,1.3371,1.5507,1.2164,1.4764,125112
4,BRU,2014-07-08,1.5229,1.7457,1.4486,1.6250,59629
...,...,...,...,...,...,...,...
1624,BRU,2021-01-05,0.8400,0.8680,0.8140,0.8400,4423586
1625,BRU,2021-01-07,0.8420,1.0800,0.8420,1.0450,17145793
1626,BRU,2021-01-08,1.0600,1.0750,0.9380,0.9800,10424743
1627,BRU,2021-01-11,0.9800,1.0450,0.9260,1.0000,5048568
