# Utility methods to get stock prices

In [30]:
!pip install bs4 pandas fastcore seaborn sqlalchemy psycopg2

import datetime
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings() # disable ssl verification warnings
from fastcore.parallel import parallel
from functools import reduce

import sqlite3
from sqlalchemy import create_engine


Collecting psycopg2
  Downloading psycopg2-2.8.6.tar.gz (383 kB)
[K     |████████████████████████████████| 383 kB 15.5 MB/s eta 0:00:01
Building wheels for collected packages: psycopg2
  Building wheel for psycopg2 (setup.py) ... [?25ldone
[?25h  Created wheel for psycopg2: filename=psycopg2-2.8.6-cp38-cp38-linux_x86_64.whl size=413905 sha256=8890c8e098af77f3937554bc2316650c605398f8b44f9a14151b85be0482ccff
  Stored in directory: /root/.cache/pip/wheels/70/5e/69/8a020d78c09043156a7df0b64529e460fbd922ca065c4b795c
Successfully built psycopg2
Installing collected packages: psycopg2
Successfully installed psycopg2-2.8.6


In [2]:
def get_todays_quotes(symbol):
    return pd.read_csv(f'https://stooq.com/q/l/?s={symbol}&f=sd2t2ohlcv&h&e=csv')

def get_last_quote(symbol):
    return get_todays_quotes(symbol)[['Date', 'Time', 'Close', 'Volume']].rename(columns={'Close':'Price'})

In [3]:
def get_stock_prices_from(url):
    def _get_stock_prices_from(url, page=1):
        page_appended_url = f"{url}&l={page}"
        res = requests.get(page_appended_url)
        
        soup = BeautifulSoup(res.text)

        quotes_table = soup.find('table', {'id': 'fth1'})
        columns = [th.text.lower() for th in quotes_table.thead.tr]
        columns

        df = pd.DataFrame()
        records = []
        for tr in quotes_table.tbody:
            records.append([
                td.text for i, td in enumerate(tr.children)
            ])

        if len(records) > 0:
            _, next_page_records = _get_stock_prices_from(url, page=page+1)
            records += next_page_records

        return columns, records
    
    
    columns, records = _get_stock_prices_from(url)
    
    return pd.DataFrame(records, columns=columns)

In [4]:
def dates_since(start_date):
    d = start_date
    while d < datetime.date.today():
        yield d
        d += datetime.timedelta(days=1)

In [62]:
def get_historical_stock_prices_from(url, date_since):
    df = pd.DataFrame()
    
    for d in dates_since(date_since):
        print(".", end="")
        dated_url = f"{url}&d={d.strftime('%Y%m%d')}"
        _df = get_stock_prices_from(dated_url)
        _df['date'] = d
        df = df.append(_df)
        
    return df

In [63]:
def columns_to_append(df):
    columns = set(['symbol', 'name', 'open', 'high', 'low', 'last', 'change', 'volume', 'turnover', 'date', 'market'])
    
    return set(df.columns).intersection(columns)

In [64]:
# DB_CONN = 'postgresql://stocks:stocks@127.0.0.1:5432/stocks?'

# engine = create_engine(DB_CONN)
engine = create_engine("sqlite:///stocks.sqlite")

In [65]:
urls_to_fetch = [
    {'url':'https://stooq.com/t/?i=513&v=1', 'name':"GPW"},
    {'url':'https://stooq.com/t/?i=534&v=1', 'name':"Crypto"},
    {'url': 'https://stooq.com/t/?i=514&v=1', 'name': 'NewConnect'},
    {'url': 'https://stooq.com/t/?i=510&v=1', 'name': 'Main Indexes'},
    {'url': 'https://stooq.com/t/?i=512&v=1', 'name': 'Main Commodities'},
#     {'url': 'https://stooq.com/t/?i=515&v=1', 'name': 'NYSE'},
#     {'url': 'https://stooq.com/t/?i=516&v=1', 'name': 'NASDAQ'},
#     {'url': 'https://stooq.com/t/?i=517&v=1', 'name': 'NYSE MKT'},
#     {'url': '', 'name': ''}
]

df = pd.DataFrame()
prices_from = datetime.datetime.strptime('2021-01-01', '%Y-%m-%d').date()

for conf in urls_to_fetch:
    print(f"Fetching {conf['name']}:", end="")
#     _df = get_stock_prices_from(conf['url'])
    _df = get_historical_stock_prices_from(conf['url'], prices_from)
    _df['market'] = conf['name']
    
    _df[columns_to_append(_df)].to_sql('stocks', engine, if_exists='append', index=False)
    print(" Done")

Fetching GPW:............ Done
Fetching Crypto:............ Done
Fetching NewConnect:............ Done
Fetching Main Indexes:............ Done
Fetching Main Commodities:............ Done
Fetching :.

MissingSchema: Invalid URL '&d=20210101&l=1': No schema supplied. Perhaps you meant http://&d=20210101&l=1?

In [66]:
df = pd.read_sql('stocks', engine)

In [67]:
df

Unnamed: 0,last,date,volume,change,low,open,high,market,name,symbol,turnover
0,1.69,2021-01-07,160k,+2.42%,1.58,1.61,1.75,GPW,06MAGNA,06N,266k
1,0.860,2021-01-07,579,-2.82%,0.855,0.855,0.860,GPW,08OCTAVA,08N,498
2,472.5,2021-01-07,5.8k,-1.25%,472.0,480.0,492.5,GPW,11BIT,11B,2.72m
3,34.0,2021-01-07,43.6k,+3.34%,33.5,33.5,35.8,GPW,ATAL,1AT,1.52m
4,4.70,2021-01-07,2.67k,+1.08%,4.65,4.65,4.74,GPW,4FUNMEDIA,4FM,12.6k
...,...,...,...,...,...,...,...,...,...,...,...
6440,133.975,2021-01-12,,-2.14%,133.725,136.975,136.975,Main Commodities,FEEDER CATTLE,GF.F,
6441,68.500,2021-01-12,,+0.04%,68.200,68.900,69.525,Main Commodities,LEAN HOGS,HE.F,
6442,117.650,2021-01-12,,+3.75%,117.275,118.125,118.275,Main Commodities,LIVE CATTLE,LE.F,
6443,,2021-01-12,,,,,,Main Commodities,,Other,
