In [1]:
import pandas as pd
import pandas_datareader.data as web
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import yfinance as yf

In [2]:
print(pd.__version__)

1.0.4


In [3]:
url = 'https://www.sashares.co.za/shares-list/#gs.dct9y1'
response = requests.get(url)

In [4]:
website = response.text
soup = BeautifulSoup(website, features = "lxml")
table = soup.find('table')

In [5]:
rows = len(table.findAll('tr')) - 1
columns = 2 #len(table.findAll('tr')[0])
print((rows, columns))

(485, 2)


In [6]:
col_names = [table.findAll('tr')[0].find_all('th')[i].text for i in range(columns)]
col_names

['JSE Code', 'Share']

In [12]:
companies_dict = {col: [] for col in col_names}

for col in range(columns):
    for row in range(1, rows):
        
        if col == 1:
            value = table.findAll('tr')[row].find_all('td')[col].text
            companies_dict[col_names[col]].append(value)
        elif col == 0:
            value = table.findAll('tr')[row].find_all('td')[col].text.split('.')[0]
            companies_dict[col_names[col]].append(value + '.JO')

In [13]:
Companies = pd.DataFrame.from_dict(companies_dict).rename(columns = {'JSE Code':'symbol', 'Share':'company_name'})
Companies

Unnamed: 0,symbol,company_name
0,4ANWK.JO,NWK
1,4SI.JO,4SIGHT
2,ABG.JO,ABSA
3,ABSP.JO,ABSABANK-P
4,ACE.JO,ACCENT
...,...,...
479,YRK.JO,YORK
480,YYLBEE.JO,YEBOYETHU
481,ZCL.JO,ZARCLEAR
482,ZED.JO,ZEDER


In [32]:
Sector = {}
for i, symbol in enumerate(Companies['symbol']):
    try:
        
        instance = yf.Ticker(symbol)
        Sector[symbol] = instance.info['sector']
        
    except Exception:
        print(f'{i}: {symbol} not available in database')

0: 4ANWK.JO not available in database
1: 4SI.JO not available in database
2: ABG.JO not available in database
4: ACE.JO not available in database
5: ACG.JO not available in database
6: ACL.JO not available in database
7: ACS.JO not available in database
8: ACT.JO not available in database
9: ACZ.JO not available in database
13: ADW.JO not available in database
14: AEE.JO not available in database
15: AEG.JO not available in database
18: AFEP.JO not available in database
21: AFX.JO not available in database
22: AGL.JO not available in database
23: AHA.JO not available in database
24: AHB.JO not available in database
25: AHL.JO not available in database
26: AIL.JO not available in database
28: ALH.JO not available in database
29: AME.JO not available in database
30: AMIB50.JO not available in database
31: AMIRE.JO not available in database
32: AMS.JO not available in database
33: ANG.JO not available in database
34: ANH.JO not available in database
35: AON.JO not available in database
36

284: NFNAMB.JO not available in database
285: NFP.JO not available in database
286: NFSH40.JO not available in database
287: NFTRCI.JO not available in database
288: NGPLD.JO not available in database
289: NGPLT.JO not available in database
291: NPK.JO not available in database
292: NPKP.JO not available in database
293: NPN.JO not available in database
294: NPP1.JO not available in database
295: NRL.JO not available in database
297: NT1.JO not available in database
299: NTCP.JO not available in database
300: NUT.JO not available in database
301: NVE.JO not available in database
302: NVS.JO not available in database
303: NWL.JO not available in database
304: NY1.JO not available in database
305: OAO.JO not available in database
306: OAS.JO not available in database
312: ORN.JO not available in database
314: PBG.JO not available in database
315: PEM.JO not available in database
316: PFB.JO not available in database
317: PGFP.JO not available in database
319: PHM.JO not available in data

In [9]:
Prices = {}
symbols = Companies['JSE Code']
start = datetime(2015, 1, 1).date()
end = end = datetime.now().date()


symbols_not_found = []

for i, symbol in enumerate(symbols):
    try:

        data = web.DataReader(
            name = symbol, 
            data_source = 'yahoo',
            start = start,
            end = end)\
            .rename(columns = {'Open': 'open',
                               'High': 'high',
                               'Low': 'low',
                               'Close': 'close',
                               'Adj Close': 'adj_close',
                               'Volume': 'volume'})

        Prices[symbol] = data
    except Exception:
        symbols_not_found.append(symbol)
        print(f'{i}: {symbol} not available in database')

0: 4ANWK.JO not available in database
103: CVW.JO not available in database
118: ECSD.JO not available in database
119: ECSD7.JO not available in database
120: ECSG.JO not available in database
121: ECSP1.JO not available in database
122: ECSP21.JO not available in database
123: ECSP24.JO not available in database
136: ERN.JO not available in database
155: FDP.JO not available in database
161: FSE.JO not available in database
162: FSEO1.JO not available in database
199: IBRP2.JO not available in database
203: IMCB22.JO not available in database
245: MED.JO not available in database
255: MRI.JO not available in database
286: NFSH40.JO not available in database
292: NPKP.JO not available in database
294: NPP1.JO not available in database
301: NVE.JO not available in database
302: NVS.JO not available in database
335: RBPCB.JO not available in database
339: RDI.JO not available in database
347: RLO.JO not available in database
352: RPL.JO not available in database
357: S32.JO not availabl

In [10]:
len(Prices.keys())

440

In [11]:
Companies.to_pickle('Companies.pkl')

In [12]:
import pickle
pickle.dump(Prices, open('Prices.pkl', 'wb'))