## Data Tools to Extract and Build a Historical Data Base of TSX Daily Prices

In [None]:
# Load required python packages
# https://www.selenium.dev/selenium/docs/api/py/api.html
import sqlite3
import pandas as pd
import time
from bs4 import BeautifulSoup
from lxml import etree
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

In [None]:
# Connect to yahoo populated price database that had one table per ticker symbol (not practical)
# Merge all yahoo extracted data to the new TSX_Prices.sqlite database for futur use
con1 = sqlite3.connect("TSX_Data.sqlite")
table = "tsx_symbols"
sql_request = f"SELECT * FROM '{table}'"
symbols_df = pd.read_sql_query(sql_request, con1)
symbols_df.drop(['level_0','index'], axis=1, inplace=True)

In [None]:
# Connect to the new Daily Prices Database
# Transfer the scraped ticker symbols from TSX to thi snew database  (uncomment for first run)
con2 = sqlite3.connect("TSX_Prices.sqlite")
#symbols_df.to_sql("symbols", con2) 

## DON'T RUN THIS FUNCTION

In [None]:
# DO NOT RUN BY DEFAULT, 
# ONLY USE if you deleted TSX_Prices.sqlite
# Merge all individual tables with prices to a common daily price table and add the ticker symbol to each day of data
for symbol, ticker in zip(symbols_df["yahoo"],symbols_df["ticker"]):
    table_name = symbol
    sql1 = f"SELECT * FROM '{table_name}'"
    prices_df = pd.read_sql_query(sql1, con1)
    prices_df.drop(['index'], axis=1, inplace=True)
    if not prices_df["Volume"].isnull().any():
        prices_df["Ticker"] = ticker
        prices_df = prices_df.reindex(["Date","Ticker","Open","High","Low","Close","Volume"],axis=1)
        prices_df.to_sql("prices_daily", con2, if_exists="append")


In [None]:
# Utilty function to load CSV files scraped from TSX Trading history web page into our new Data Base
def add_csv_data_to_daily_prices(symbol_to_load):
    # Load CSV file of Daily Data scraped from TMX web site
    # Clean the data make sure all rows are unique
    # Structure the data according to database table structure
    # Load existing data from daily_prices for symbol
    # Merge cleaned data to existing data
    # Wipe all duplicate dates
    # if new data is available, append to existing daily_prices


    # Load a CSV of Daily Price Data and make sure no duplicates exist
    #symbol_to_load = "new"
    filename = f"{symbol_to_load}.csv"
    data = pd.read_csv(f"CSV\{filename}")
    # Clean the data make sure all new data rows are unique
    if data.Date.is_unique == False:
        data_cleaned = data[~data.Date.duplicated()]
    else:
        data_cleaned = data
    #print(f"({data.shape[0]}, {data_cleaned.shape[0]})")

    # Structure the data according to database table structure
    data_cleaned.drop(['Unnamed: 0', 'VWAP ($)', 'Change ($)', 'Trade Value', '# Trades', 'Change (%)'], axis=1, inplace=True)
    data_cleaned.rename(columns={'Open ($)': 'Open', 'High ($)': 'High', 'Low ($)': 'Low', 'Close ($)': 'Close'  }, inplace=True)
    data_cleaned["Ticker"] = symbol_to_load
    data_cleaned['Date'] = pd.to_datetime(data_cleaned["Date"], infer_datetime_format=True)
    data_cleaned = data_cleaned.reindex(["Date","Ticker","Open","High","Low","Close","Volume"],axis=1)

    # Load existing data from daily_prices for symbol
    database = "TSX_Prices.sqlite"
    con3 = sqlite3.connect(database)
    existing_prices_df = pd.read_sql(f"SELECT * FROM 'prices_daily' WHERE Ticker='{symbol_to_load}'", con3)

    # Keep data_cleaned prices that are not in existing_prices (using Date column)
    #data_to_add = data_cleaned[~(data_cleaned['Date'].isin(existing_prices_df['Date']))].reset_index(drop=True)
    data_to_add = data_cleaned[~(data_cleaned['Date'].isin(existing_prices_df['Date']))].reset_index(drop=True)
    # Append Data to database
    data_to_add.to_sql("prices_daily", con3, if_exists="append")


## Trade history data reader

In [None]:
# Setup Selenium browser
CHROME_DRIVER_LOCATION = "chromedriver.exe"
OPTIONS = webdriver.ChromeOptions()
OPTIONS.add_argument('--ignore-certicate-errors')
OPTIONS.add_argument('--incognito')
#OPTIONS.add_argument('--headless')
OPTIONS.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path=CHROME_DRIVER_LOCATION,options=OPTIONS)
#driver.implicitly_wait(10)
wait = WebDriverWait(driver, 10)

In [None]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [None]:
def extract_trading_history(symbol, pages_to_read, msg1):
    tmx_url = f"https://money.tmx.com/en/quote/{symbol}/trade-history?selectedTab=price-history"
    driver.get(tmx_url)
    header_flag = True
    #username = input("Please click in web page then ENTER")
    time.sleep(5)

    # CLOSE THE Freaking AD at bottom of screen to expose the next button
    ad_closed = False
    try:
        #close_ad = WebDriverWait(driver, 5).until(presence_of_element_located(By.ID('ssrt-close-anchor-button')))
        close_ad = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, 'ssrt-close-anchor-button')))
        print(type(close_ad))
        close_ad.click()
        ad_closed = True
    except Exception as e:
        print(f"Unable to clase_ads : {e}")   

    if ad_closed:
        # Set to 45 for a good 2 years back data
        fin_loop = pages_to_read
        prev_date = "None"
        for i in range(1,fin_loop):
            html_page = driver.page_source
            try:
                data = pd.read_html(html_page)
                prices_df = data[0]
                #print(f"Data extracted: {i}, date at bottom : {prices_df['Date'].iloc[-1]}")
                print(f"{msg1} => {symbol} [{i}/{fin_loop}] :Extracted date: {prices_df['Date'].iloc[-1]}, Previous Date Extracted: {prev_date}")
                filename = f"CSV\{symbol}.csv"
                prices_df.to_csv(filename, mode='a', header=header_flag)
                header_flag = False
                prev_date = prices_df['Date'].iloc[-1]        
                # Find the next button and click it
                btn_next = driver.find_element(By.XPATH, "//button[@data-testid='next-button']")
                btn_next.click() 
            except:
                i = fin_loop
    
    print("End of extraction")            

In [None]:
# Get a bunch of ticker symbols that don't have any data
letter = "A" 
some_symbols_df = pd.read_sql_query(f"SELECT * FROM 'symbols' WHERE ticker LIKE '{letter}%' ORDER BY ticker ASC", con2)
some_symbols_df
tickers_to_scrap = []
for symbol_to_analyse, yahoo in zip(some_symbols_df['ticker'],some_symbols_df['yahoo']):
    data_df = pd.read_sql_query(f"SELECT * FROM '{yahoo}'", con1)
    no_data_found = data_df['Volume'].isnull().any()
    if no_data_found:
        tickers_to_scrap.append(symbol_to_analyse)
print(len(tickers_to_scrap)) 

In [None]:
str2 = ""

In [None]:
# GET all tickers from prices_daily that we want to update
sym4 = "Z"
con4 = sqlite3.connect("TSX_Prices.sqlite")
cursor4 = con4.cursor()
#sql4 = "SELECT Ticker, min(Date) as start_date, max(Date) as end_date, count(Ticker) as price_rows FROM prices_daily WHERE ticker LIKE 'A%' ORDER BY ticker ASC"
sql4 = f"SELECT Ticker FROM prices_daily WHERE ticker LIKE '{sym4}%' GROUP BY Ticker ORDER BY ticker ASC"
prices_data4 = pd.read_sql_query(sql4, con4)
tickers_to_scrap =prices_data4["Ticker"].to_list()

In [None]:
str2 = ""
for sym1 in tickers_to_scrap:
    str2 += f"'{sym1}',"
print(str2[:-1])
print(len(tickers_to_scrap))

In [None]:
# Manually launch a daily price extraction
#tickers = ['QAH','QBB','QBR.A','QBR.B','QBTC','QBTC.U','QBTL','QCD','QCE','QCLN','QCN','QDX','QDXB','QDXH','QEBH','QEBL','QEC','QEM','QETH.U','QETH.UN','QHY','QIE','QINF','QMA','QMY','QQC','QQC.F','QQEQ','QQEQ.F','QQJR','QQJR.F','QRET','QSB','QSP.UN','QSR','QTRH','QUB','QUIG','QUS','QUU','QUU.U','QXM','R.P','RA.UN','RAK','RATE','RAV.UN','RAY.A','RAY.B','RBA','RBDI','RBN.UN','RBNK','RBO','RBOT','RBOT.U','RBX','RBZ.P','RCD','RCE','RCG','RCG.PR.B','RCH','RCI.A','RCI.B','RCK','RCO.UN','RCR.P','RCT','RDG','RDL','RDS','RDU','RE','REAL','REAX','REBL.P','RECO','RECO.WT.A','RECP','REG','REI.UN','REIT','REKO','RENT.P','RET','RET.A','REVO','REX','RFC','RFP','RG','RGD','RGI','RHC','RHC.WT','RHT','RIB.UN','RID','RID.U','RIDH','RIDR.P','RIE','RIE.U','RIEH','RIFI','RIGU','RIIN','RIO','RIRA','RIT','RIWI','RJX.A','RK','RKR','RKV','RLB','RLP','RLP.DB.B','RLT.P','RLV','RLYG','RMD','RMI','RML','RMO','RMS.P','RNP','RNW','ROCK','ROI','ROK','ROMJ','ROMJ.WT','ROOF','ROOF.WT','ROOT','ROS','ROVR','ROX','RP','RPC','RPD','RPD.U','RPDH','RPF','RPI.UN','RPP','RPSB','RPX','RQI','RQJ','RQK','RQL','RQN','RQO','RQP','RR','RRI','RRR.UN','RRS','RS','RS.PR.A','RSE.P','RSI','RSI.DB.E','RSI.DB.F','RSLV','RSS','RSV','RTG','RTH','RTI','RTM','RUBH','RUBY','RUBY.U','RUD','RUD.U','RUDH','RUE','RUE.U','RUEH','RUG','RUM','RUP','RUS','RUSB','RUSB.U','RVG','RVX','RVX.WT.A','RW','RWC','RWE','RWE.B','RWU','RWU.B','RWW','RWW.B','RWX','RWX.B','RX','RXD','RXD.U','RXE','RXE.U','RY','RY.PR.H','RY.PR.J','RY.PR.M','RY.PR.N','RY.PR.O','RY.PR.P','RY.PR.S','RY.PR.Z','RYE','RYO','RYR','RYU','RZE','RZZ','S','SAAS.P','SAE','SAG','SAH','SAM','SAO','SAP','SARG.P','SAT','SAU','SAWC.P','SAY','SB','SBB','SBBC','SBC','SBC.PR.A','SBI','SBM','SBN','SBN.PR.A','SBR','SBT','SBT.B','SBT.U','SCAN','SCD','SCL','SCLT','SCOT','SCPO.UN','SCPT.A','SCPT.U','SCT','SCY','SCZ','SDC','SDE','SDI','SDR','SEA','SEB','SEC','SEI','SES','SEV','SFC','SFC.WT','SFD','SFI','SFR','SFT','SFTC','SFX','SGA','SGC','SGE','SGI','SGMD','SGN','SGO','SGQ','SGR.U','SGR.UN','SGU','SGY','SGY.DB','SGY.DB.A','SGZ','SHL','SHLE','SHOP','SHRP','SIA','SIC','SID','SIE','SIH.UN','SII','SIL','SILV','SIQ','SIS','SIXT','SJ','SJL','SJR.A','SJR.B','SKE','SKK','SKP','SKRR','SKYG','SKYY','SLF','SLF.PR.A','SLF.PR.B','SLF.PR.C','SLF.PR.D','SLF.PR.E','SLF.PR.G','SLF.PR.H','SLF.PR.I','SLF.PR.J','SLF.PR.K','SLG','SLHG','SLI','SLMN','SLR','SLS','SLVR','SM','SMA','SMAR.P','SMC','SMD','SME','SML','SMN','SMP','SMR','SMT','SMU.UN','SMY','SN','SNC','SNF','SNG','SNI.PR.A','SNM','SNS','SNV','SOC','SOCK','SOI','SOIL','SOIL.WT','SOLG','SOLR','SOMA','SOO.P','SOT.DB','SOT.UN','SOU','SOY','SPA','SPB','SPC','SPD','SPG','SPG.WT','SPI','SPN','SPOT','SPP','SPPP','SPPP.U','SPS.A','SPX','SQD','SQG','SR','SRA','SRC','SRE','SRES','SRG','SRI','SRL','SRU.UN','SRV.UN','SRX','SSA','SSE','SSF.UN','SSL','SSRM','SSS.P','SSSS.P','SSV','SSVR','SSX.P','STA','STAK.P','STC','STCK','STCK.WT','STE','STEP','STG','STGO','STH','STLC','STMP','STN','STND','STNG','STPL','STRR','STRR.WT','STS','STU','STUD','STUV','SU','SUGR','SUGR.DB','SUGR.WR','SUGR.WS','SUGR.WT','SUI','SUP','SURG','SVA','SVB','SVE','SVG','SVI','SVI.DB','SVI.DB.B','SVM','SVR','SVR.C','SVS','SVTN','SW','SWA','SWLF','SWP','SXI','SXL','SXP','SYH','SYLD','SYZ','SZLS','SZLS.WS','SZLS.WT','SZM']
tickers = tickers_to_scrap
pages = 2 # NUmber of TMX Trade history to scrap back
i = 1
total = len(tickers)
for ticker_to_extract in tickers:
    #ticker_to_extract = "AVNT"
    try:
        msg1 = f"[{i}/{total} : {ticker_to_extract}]"
        extract_trading_history(ticker_to_extract, pages, msg1)
        add_csv_data_to_daily_prices(ticker_to_extract)
    except:
        pass
    i += 1


In [None]:
driver.quit()

In [None]:
con5 = sqlite3.connect("TSX_Prices.sqlite")
cursor5 = con5.cursor()
sql5 = "SELECT ticker FROM symbols ORDER BY ticker ASC"
prices_data5 = pd.read_sql_query(sql5, con5)
prices_data5

In [None]:
con4 = sqlite3.connect("TSX_Prices.sqlite")
cursor4 = con4.cursor()
#sql4 = "SELECT Ticker, min(Date) as start_date, max(Date) as end_date, count(Ticker) as price_rows FROM prices_daily WHERE ticker LIKE 'A%' ORDER BY ticker ASC"
#sql4 = "SELECT Ticker, count(Ticker), min(Date) as start_date, max(Date) as end_date FROM prices_daily WHERE ticker LIKE 'A%' GROUP BY Ticker ORDER BY ticker ASC"
sql4 = "SELECT Ticker, count(Ticker) FROM prices_daily GROUP BY Ticker ORDER BY Ticker ASC"
prices_data4 = pd.read_sql_query(sql4, con4)
prices_data4[0:150]

In [None]:
# Prices statistics
cursor = con2.cursor()
# cursor.execute("SELECT count(*) FROM 'prices_daily'")                                             # Count all rows of data in prices_daily
# cursor.execute("SELECT DISTINCT Ticker, count() FROM 'prices_daily' GROUP BY Ticker")             # Count rows of prices for all distinct tickers from prices_daily
cursor.execute("SELECT COUNT (DISTINCT Ticker) FROM 'prices_daily'")                                # Count unique ticker symbols with prices from prices_daily
cursor.fetchone()[0]

In [None]:
#self.full_path = os.path.abspath(self.db)
#drivepath, self.filename = os.path.split(self.db)
import os.path
n1 = "TSX_Prices.sqlite"
full_path = os.path.abspath(n1)
drivepath, f1 = os.path.split(full_path)
drive, p1 = os.path.splitdrive(drivepath)
(drivepath, f1, drive, p1)


In [None]:
# Extract all tickers and ompany names and count the number price_rows available for each ticker, inlcuding star and end dates for each sorted by decreasing count of price_rows
con3 = sqlite3.connect("TSX_Prices.sqlite")
cursor2 = con3.cursor()
sql3 = "SELECT DISTINCT prices_daily.Ticker, symbols.name, min(prices_daily.Date) as start_date, max(prices_daily.Date) as end_date, count(prices_daily.Ticker) as price_rows FROM prices_daily INNER JOIN symbols ON prices_daily.Ticker = symbols.ticker GROUP BY prices_daily.ticker ORDER BY price_rows"
prices_data1 = pd.read_sql_query(sql3, con3)
prices_data1['start_date'] = pd.to_datetime(prices_data1['start_date']).dt.normalize()
prices_data1['end_date'] = pd.to_datetime(prices_data1['end_date']).dt.normalize()
prices_data1

In [3]:
import datetime as datetime
datetime.date.today()

datetime.date(2021, 12, 1)

In [12]:
import sqlite3
import pandas as pd
import time
con6 = sqlite3.connect("TSX_Prices.sqlite")
cursor6 = con6.cursor()
symbol_to_load = "BBQW"
sql6 = f"SELECT * FROM 'prices_daily' WHERE Ticker='{symbol_to_load}' ORDER BY Date DESC LIMIT 1"
prices_data6 = pd.read_sql_query(sql6, con6)
prices_data6
try:
    last_date = prices_data6.loc[0]["Date"]
except KeyError as e:
    last_date = None
print(last_date)


None


# Technical Indicators

In [7]:
def get_prices_data(ticker):
    con7 = sqlite3.connect("TSX_Prices.sqlite")
    cursor7 = con7.cursor()
    sql7 = f"SELECT * FROM 'prices_daily' WHERE Ticker='{ticker}' ORDER BY Date DESC"
    prices_data7 = pd.read_sql_query(sql7, con7)
    prices_data7.drop(labels="index", axis=1, inplace=True)
    con7.close()
    return prices_data7

In [4]:
# Export SQLite3 Symbols to CSV file for github push
import pandas as pd
import sqlite3

conn = sqlite3.connect("TSX_Prices.sqlite")
sql = f"SELECT * FROM 'symbols' ORDER BY ticker ASC"
data = pd.read_sql_query(sql, conn)
data.drop(labels="index", axis=1, inplace=True)
data.to_csv("symbols.csv", index="False")


In [8]:
import numpy as np
import pandas as pd
import sqlite3


In [None]:
data_df_7 = get_prices_data("ABCT")
data_df_7