# Use pandas-datareader Package to Get Financial Information

In [None]:
#install the package
!pip install pandas_datareader

In [1]:
#import packages
import pandas as pd
import pandas_datareader as pdr

from pandas_datareader import data, wb

## 1. Get the list of stocks in Toronto Stock Exchange
The listings of TSX/TSXV has been downloaded from the following link:  
https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=11&ved=2ahUKEwjC8Kf7jY3lAhVQh-AKHY1mBGYQFjAKegQIARAC&url=https%3A%2F%2Fwww.tsx.com%2Fresource%2Fen%2F571&usg=AOvVaw0t7ZrObTwMOVfTAbPG9YRQ

In [99]:
listings = pd.read_csv("tsx-andamp-tsxv-listed-companies-2019-09-13-en.csv", 
                       parse_dates=['Date of \nTSX Listing\nYYYYMMDD'])

In [100]:
listings.head()

Unnamed: 0,Co_ID,Exchange,Name,Root Ticker,QMV(C$) 31-August-2019,O/S Shares 31-August-2019,Sector,Sub Sector,Listing Type,Date of TSX Listing YYYYMMDD,...,Zinc,Rare Earths,Potash,Lithium,Uranium,Coal,Tungsten,Base & Precious Metals,Mineral Properties (General),Other Properties
0,FIV0003,TSX,5N Plus Inc.,VNP,195022756,83343058,Clean Technology,,IPO,2007-12-19,...,,,,,,,,,,
1,AAW0001,TSX,A&W Revenue Royalties Income Fund,AW,571728957,14064673,Consumer Products & Services,,,2002-02-14,...,,,,,,,,,,
2,FIR0005,TSX,Aberdeen Asia-Pacific Income Investment Compan...,FAP,177851156,50814616,Closed-End Funds,,,1986-06-11,...,,,,,,,,,,
3,ABE0005,TSX,Aberdeen International Inc.,AAB,4802614,96052282,Financial Services,,TSXV Grad,2008-01-30,...,,,,,,,,,,
4,ABS0003,TSX,Absolute Software Corporation,ABT,328576276,41697497,Technology,,TSXV Grad,2005-12-02,...,,,,,,,,,,


In [101]:
listings.columns

Index(['Co_ID', 'Exchange', 'Name', 'Root\nTicker',
       ' QMV(C$)\n31-August-2019 ', ' O/S Shares\n31-August-2019 ', 'Sector',
       'Sub\nSector', 'Listing Type', 'Date of \nTSX Listing\nYYYYMMDD',
       'HQ\nLocation', 'HQ\nRegion', 'Interlisted\nCode_I',
       'Interlisted\nCode_II', 'Listed on OTC', 'TSX \nVenture \nGrad',
       'Former\nCPC', 'Index', 'Clean Technology Primary Industry',
       'Clean Technology Sub-Sector', 'Technology Sub-Sector ',
       'Consumer Products & Services\nSub-Sector', 'Real Estate Sub-Sector ',
       'Life Sciences Sub-Sector', 'USA_City', 'USA_State', 'Asia Region',
       'Israel Related', 'Place of Incorporation\nC=Canada\nU=USA\nF=Foreign',
       'Fund Family', 'SP Grouping', 'SP_Type', 'SP_Sub',
       'Income Trust \nConversion', ' Volume YTD\n31-August-2019 ',
       ' Value (C$) YTD\n31-August-2019 ',
       ' Number of \nTrades YTD\n31-August-2019 ',
       ' Number of\nMonths of \nTrading Data ', 'AFRICA', 'AUS/NZ/PNG',
       'C

  
    
The Ticker of National Bank of Canada is "NA", and it was read as NaN by pandas read_csv methods. Here we fixed it manually.

In [102]:
listings[listings['Root\nTicker'].apply(lambda x: type(x)) == float]

Unnamed: 0,Co_ID,Exchange,Name,Root Ticker,QMV(C$) 31-August-2019,O/S Shares 31-August-2019,Sector,Sub Sector,Listing Type,Date of TSX Listing YYYYMMDD,...,Zinc,Rare Earths,Potash,Lithium,Uranium,Coal,Tungsten,Base & Precious Metals,Mineral Properties (General),Other Properties
1086,NAT0002,TSX,National Bank of Canada,,22993740736,433577427,Financial Services,,,1979-11-01,...,,,,,,,,,,


In [103]:
listings.loc[listings.Co_ID == 'NAT0002', 'Root\nTicker'] = 'NA'

Only keep a few key columns and rename them. Change data types accordingly.

In [104]:
keep_cols = ['Exchange', 'Name', 'Root\nTicker',
       ' QMV(C$)\n31-August-2019 ', 'Sector','Date of \nTSX Listing\nYYYYMMDD']
listings = listings[keep_cols]
listings.columns = ['exchange', 'name', 'ticker',
       'market_value', 'sector','date_of_listing']

In [108]:
listings['exchange']= listings['exchange'].apply(lambda x: x.strip())
listings['name']= listings['name'].apply(lambda x: x.strip())
listings['ticker']= listings['ticker'].apply(lambda x: x.strip())
listings['sector']= listings['sector'].apply(lambda x: x.strip())
listings['market_value']= listings['market_value'].apply(
    lambda x: x.strip()).apply(lambda x: x.replace(',',''))
                                             
listings['market_value'] = pd.to_numeric(listings['market_value'])

listings.set_index('ticker', inplace = True)

Remove the ETPs (Exchange-Traded Products) cause they have different data structures as the regular company's stocks.

In [109]:
listings.sector.value_counts()

ETP                               680
Mining                            213
Industrial Products & Services    123
Closed-End Funds                   92
Financial Services                 73
Oil & Gas                          70
Consumer Products & Services       70
Real Estate                        61
Life Sciences                      56
Technology                         51
Clean Technology                   31
Comm & Media                       23
Utilities & Pipelines              21
SPAC                                2
Name: sector, dtype: int64

Save to csv file for future use.

In [120]:
listings[(listings.sector != 'ETP') & (listings.date_of_listing < '2004')].to_csv('TSX_listing_2004.csv')