In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import requests
from bs4 import BeautifulSoup as bs
from string import ascii_uppercase

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
def get_symbols_from_alphabet(alpha):
    r = requests.get('http://www.eoddata.com/stocklist/TSX/{}.htm'.format(alpha))
    soup = bs(r.content)
    div = soup.find('div', attrs={'id': 'col1w'})
    table = div.find('table', attrs={'class': 'quotes'})
    quotes = table.find_all('a')
    return [quote.text for quote in quotes if quote.text != '']

In [3]:
def get_stats_from_symbol(symbol):
    r = requests.get('https://finance.yahoo.com/quote/{}/key-statistics'.format(symbol))
    soup = bs(r.content)
    
    stats = {}
    
    if soup.find('table', class_='W(100%) Bdcl(c) M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)') != None:
        # name
        name = soup.find('h1', class_='D(ib) Fz(18px)').text 

        stats['name'] = name

        # valuation measures
        vm_table = soup.find('table', attrs={'class': 'W(100%) Bdcl(c) M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)'})
        key_rows = vm_table.find_all('td', class_=lambda c: c and c.startswith('Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px)'))
        keys = [row.find('span').text for row in key_rows]

        value_rows = vm_table.find_all('td', attrs={'class': 'Ta(c) Pstart(10px) Miw(60px) Miw(80px)--pnclg Bgc($lv1BgColor) fi-row:h_Bgc($hoverBgColor)'})
        values = [row.text for row in value_rows]

        for i in range(len(keys)):
            stats[keys[i]] = values[i]

        # financial highlights
        fh_div = soup.find('div', attrs={'class': 'Mb(10px) Pend(20px) smartphone_Pend(0px)'})
        key_rows = fh_div.find_all('td', class_=lambda c: c and c.startswith('Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px)'))
        keys = [row.find('span').text for row in key_rows]

        value_rows = fh_div.find_all('td', class_='Fw(500) Ta(end) Pstart(10px) Miw(60px)')
        values = [row.text if row.text != None else row.find('span').text for row in value_rows]

        for i in range(len(keys)):
            stats[keys[i]] = values[i]

        # trading_information
        ti_div = soup.find('div', class_='Pstart(20px) smartphone_Pstart(0px)')
        key_rows = ti_div.find_all('td', class_=lambda c: c and c.startswith('Pos(st) Start(0) Bgc($lv2BgColor) fi-row:h_Bgc($hoverBgColor) Pend(10px)'))
        keys = [row.find('span').text for row in key_rows]

        value_rows = ti_div.find_all('td', class_='Fw(500) Ta(end) Pstart(10px) Miw(60px)')
        values = [row.text if row.text != None else row.find('span').text for row in value_rows]

        for i in range(len(keys)):
            stats[keys[i]] = values[i]

    return stats

In [4]:
infos = []

for alpha in ascii_uppercase[:10]:
    symbols = get_symbols_from_alphabet(alpha)
    temp = [get_stats_from_symbol(symbol) for symbol in symbols]
    infos += [info for info in temp if info]
    print(alpha, len(infos))
infos

A 30
B 45
C 52
D 52
E 52
F 52
G 52
H 52
I 52
J 52


[{'name': 'Abbott Laboratories (ABT)',
  'Market Cap (intraday)': '178.68B',
  'Enterprise Value': '193.32B',
  'Trailing P/E': '58.67',
  'Forward P/E': '23.15',
  'PEG Ratio (5 yr expected)': '3.21',
  'Price/Sales': '5.72',
  'Price/Book': '5.84',
  'Enterprise Value/Revenue': '6.15',
  'Enterprise Value/EBITDA': '26.60',
  'Fiscal Year Ends': 'Dec 30, 2019',
  'Most Recent Quarter': 'Jun 29, 2020',
  'Profit Margin': '9.89%',
  'Operating Margin': '14.00%',
  'Return on Assets': '4.01%',
  'Return on Equity': '9.86%',
  'Revenue': '31.44B',
  'Revenue Per Share': '17.76',
  'Quarterly Revenue Growth': '-8.20%',
  'Gross Profit': '18.71B',
  'EBITDA': '7.57B',
  'Net Income Avi to Common': '3.07B',
  'Diluted EPS': '1.73',
  'Quarterly Earnings Growth': '-46.60%',
  'Total Cash': '5.04B',
  'Total Cash Per Share': '2.85',
  'Total Debt': '19.68B',
  'Total Debt/Equity': '63.90',
  'Current Ratio': '1.57',
  'Book Value Per Share': '17.27',
  'Operating Cash Flow': '6.4B',
  'Levered

In [5]:
tsx = pd.DataFrame(infos).iloc[:, :60]
tsx.head()

Unnamed: 0,name,Market Cap (intraday),Enterprise Value,Trailing P/E,Forward P/E,PEG Ratio (5 yr expected),Price/Sales,Price/Book,Enterprise Value/Revenue,Enterprise Value/EBITDA,...,Forward Annual Dividend Rate,Forward Annual Dividend Yield,Trailing Annual Dividend Rate,Trailing Annual Dividend Yield,5 Year Average Dividend Yield,Payout Ratio,Dividend Date,Ex-Dividend Date,Last Split Factor,Last Split Date
0,Abbott Laboratories (ABT),178.68B,193.32B,58.67,23.15,3.21,5.72,5.84,6.15,26.6,...,1.44,1.35%,1.4,1.31%,1.9,79.07%,"Nov 15, 2020","Oct 13, 2020",10000:4798,"Jan 01, 2013"
1,"Associated Capital Group, Inc. (AC)",778.64M,704.23M,20.78,,,26.69,0.91,24.04,-25.21,...,0.2,0.57%,0.2,0.55%,,,"Jun 29, 2020","Jun 14, 2020",1005:1000,"Jul 28, 2020"
2,Aurora Cannabis Inc. (ACB),562.90M,833.34M,,,,2.15,0.35,2.99,-0.26,...,,,,,,0.00%,,,1:12,"May 10, 2020"
3,Agnico Eagle Mines Limited (AEM),19.15B,20.56B,38.46,20.75,,7.2,3.67,7.71,14.11,...,0.8,1.01%,0.7,0.88%,0.95,33.82%,"Sep 14, 2020","Nov 23, 2020",,
4,Aeterna Zentaris Inc. (AEZS),21.54M,15.02M,,,,4.79,22.96,10.29,-5.14,...,,,,,,0.00%,,,1:100,"Nov 19, 2015"


In [6]:
tsx.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 60 columns):
 #   Column                                        Non-Null Count  Dtype 
---  ------                                        --------------  ----- 
 0   name                                          52 non-null     object
 1   Market Cap (intraday)                         52 non-null     object
 2   Enterprise Value                              52 non-null     object
 3   Trailing P/E                                  52 non-null     object
 4   Forward P/E                                   52 non-null     object
 5   PEG Ratio (5 yr expected)                     52 non-null     object
 6   Price/Sales                                   52 non-null     object
 7   Price/Book                                    52 non-null     object
 8   Enterprise Value/Revenue                      52 non-null     object
 9   Enterprise Value/EBITDA                       52 non-null     object
 10  Fisc

In [7]:
tsx.to_csv('tsx_stocks.csv')