In [1]:
import pandas as pd
import pandas_datareader.data as web
import requests
from bs4 import BeautifulSoup
from datetime import datetime

In [2]:
print(pd.__version__)

0.20.3


In [3]:
url = 'https://www.sashares.co.za/shares-list/#gs.dct9y1'
response = requests.get(url)

In [4]:
website = response.text
soup = BeautifulSoup(website, features = "lxml")
table = soup.find('table')

In [5]:
rows = len(table.findAll('tr')) - 1
columns = 2 #len(table.findAll('tr')[0])
print((rows, columns))

(485, 2)


In [6]:
col_names = [table.findAll('tr')[0].find_all('th')[i].text for i in range(columns)]
col_names

['JSE Code', 'Share']

In [7]:
companies_dict = {col: [] for col in col_names}

for col in range(columns):
    for row in range(1, rows + 1):
        
        if col == 1:
            value = table.findAll('tr')[row].find_all('td')[col].text
            companies_dict[col_names[col]].append(value)
        elif col == 0:
            value = table.findAll('tr')[row].find_all('td')[col].text.split('.')[0]
            companies_dict[col_names[col]].append(value + '.JO')

In [8]:
Companies = pd.DataFrame.from_dict(companies_dict)
Companies

Unnamed: 0,JSE Code,Share
0,4ANWK.JO,NWK
1,4SI.JO,4SIGHT
2,ABG.JO,ABSA
3,ABSP.JO,ABSABANK-P
4,ACE.JO,ACCENT
5,ACG.JO,ANCHOR
6,ACL.JO,ARCMITTAL
7,ACS.JO,ACSION
8,ACT.JO,AFRO-C
9,ACZ.JO,ARDENCAP


In [9]:
Prices = {}
symbols = Companies['JSE Code']
start = datetime(2015, 1, 1).date()
end = end = datetime.now().date()


symbols_not_found = []

for i, symbol in enumerate(symbols):
    try:

        data = web.DataReader(
            name = symbol, 
            data_source = 'yahoo',
            start = start,
            end = end)\
            .rename(columns = {'Open': 'open',
                               'High': 'high',
                               'Low': 'low',
                               'Close': 'close',
                               'Adj Close': 'adj_close',
                               'Volume': 'volume'})

        Prices[symbol] = data
    except Exception:
        symbols_not_found.append(symbol)
        print(f'{i}: {symbol} not available in database')

0: 4ANWK.JO not available in database
103: CVW.JO not available in database
118: ECSD.JO not available in database
119: ECSD7.JO not available in database
120: ECSG.JO not available in database
121: ECSP1.JO not available in database
122: ECSP21.JO not available in database
123: ECSP24.JO not available in database
136: ERN.JO not available in database
155: FDP.JO not available in database
161: FSE.JO not available in database
162: FSEO1.JO not available in database
199: IBRP2.JO not available in database
203: IMCB22.JO not available in database
245: MED.JO not available in database
255: MRI.JO not available in database
286: NFSH40.JO not available in database
292: NPKP.JO not available in database
294: NPP1.JO not available in database
301: NVE.JO not available in database
302: NVS.JO not available in database
335: RBPCB.JO not available in database
339: RDI.JO not available in database
347: RLO.JO not available in database
352: RPL.JO not available in database
357: S32.JO not availabl

In [10]:
len(Prices.keys())

440

In [11]:
Companies.to_pickle('Companies.pkl')

In [12]:
import pickle
pickle.dump(Prices, open('Prices.pkl', 'wb'))