In [11]:
import pandas as pd
import pandas_datareader.data as web
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pickle

In [2]:
print(pd.__version__)

1.0.4


In [3]:
url = 'https://www.sashares.co.za/shares-list/#gs.dct9y1'
response = requests.get(url)

In [4]:
website = response.text
soup = BeautifulSoup(website, features = "lxml")
table = soup.find('table')

In [5]:
rows = len(table.findAll('tr')) - 1
columns = 2 #len(table.findAll('tr')[0])
print((rows, columns))

(506, 2)


In [6]:
col_names = [table.findAll('tr')[0].find_all('th')[i].text for i in range(columns)]
col_names

['JSE Code', 'Share']

In [7]:
companies_dict = {col: [] for col in col_names}

for col in range(columns):
    for row in range(1, rows):
        
        if col == 1:
            value = table.findAll('tr')[row].find_all('td')[col].text
            companies_dict[col_names[col]].append(value)
        elif col == 0:
            value = table.findAll('tr')[row].find_all('td')[col].text.split('.')[0]
            companies_dict[col_names[col]].append(value + '.JO')

In [8]:
Companies = pd.DataFrame.from_dict(companies_dict).rename(columns = {'JSE Code':'symbol', 'Share':'company_name'})
Companies

Unnamed: 0,symbol,company_name
0,4SI.JO,4SIGHT
1,ABG.JO,ABSA
2,ABSP.JO,ABSABANK-P
3,ACE.JO,ACCENT
4,ACG.JO,ANCHOR
...,...,...
500,YRK.JO,YORK
501,YYLBEE.JO,YEBOYETHU
502,ZCL.JO,ZARCLEAR
503,ZED.JO,ZEDER


In [10]:
Prices = {}
symbols = Companies['symbol']
start = datetime(2010, 1, 1).date()
end = end = datetime.now().date()


symbols_not_found = []

for i, symbol in enumerate(symbols):
    try:

        data = web.DataReader(
            name = symbol, 
            data_source = 'yahoo',
            start = start,
            end = end)\
            .rename(columns = {'Open': 'open',
                               'High': 'high',
                               'Low': 'low',
                               'Close': 'close',
                               'Adj Close': 'adj_close',
                               'Volume': 'volume'})

        Prices[symbol] = data
    except Exception:
        symbols_not_found.append(symbol)
        print(f'{i}: {symbol} not available in database')

31: AMETNC.JO not available in database
41: APETNC.JO not available in database
96: COETNC.JO not available in database
110: CVW.JO not available in database
125: ECSD.JO not available in database
126: ECSD7.JO not available in database
127: ECSG.JO not available in database
128: ECSP1.JO not available in database
129: ECSP21.JO not available in database
130: ECSP24.JO not available in database
143: ERN.JO not available in database
161: FAETNC.JO not available in database
162: FAETNQ.JO not available in database
164: FDP.JO not available in database
170: FSE.JO not available in database
171: FSEO1.JO not available in database
201: HWA.JO not available in database
205: IBLVR2.JO not available in database
208: IBRP2.JO not available in database
212: IMCB22.JO not available in database
251: MCETNC.JO not available in database
256: MED.JO not available in database
266: MRI.JO not available in database
268: MSETNC.JO not available in database
269: MSETNQ.JO not available in database
279: MW

In [12]:
len(Prices.keys())

467

In [13]:
Companies.to_pickle('/home/ubuntu/projects/trading/data/Companies.pkl')

In [14]:
pickle.dump(Prices, open('/home/ubuntu/projects/trading/data/Prices.pkl', 'wb'))