In [145]:
from requests_html import HTMLSession
from bs4 import BeautifulSoup
import re
import json
import jmespath
from copy import deepcopy
import pandas as pd
from time import sleep
from random import randint

In [134]:
# Gather companies name, price, and code from main page api endpoint
main_api = "https://www.marketindex.com.au/api/v1/market/indicies/all-ordinaries/CHIA"
session = HTMLSession()
response = session.get(main_api)
response.status_code

200

In [106]:
len(response.json())

500

In [107]:
response.json()[:2]

[{'company_title': '3P Learning Ltd',
  'arrow': 'plain-dash',
  'change': 0,
  'change_class': '',
  'change_formatted': '0.00',
  'code': '3PL',
  'high': 0,
  'high_formatted': '$0.000',
  'last_price': 1.22,
  'last_price_formatted': '$1.22',
  'low': 0,
  'low_formatted': '$0.000',
  'one_year': -2.4,
  'one_year_class': 'neg',
  'one_year_formatted': '-2.40%',
  'percent_change': '0.00',
  'percent_change_class': '',
  'percent_change_formatted': '0.00%',
  'volume': 0,
  'volume_formatted': '0',
  'market_cap': 334979955.48,
  'market_cap_formatted': '$335M'},
 {'company_title': '29METALS Ltd',
  'arrow': 'green-up-arrow',
  'change': 0.02,
  'change_class': 'pos',
  'change_formatted': '+0.02',
  'code': '29M',
  'high': 0.465,
  'high_formatted': '$0.465',
  'last_price': 0.46,
  'last_price_formatted': '$0.460',
  'low': 0.45,
  'low_formatted': '$0.450',
  'one_year': -58.45,
  'one_year_class': 'neg',
  'one_year_formatted': '-58.45%',
  'percent_change': '4.55',
  'percent

In [184]:
# Parse gathered data
result = jmespath.search('''
    [].{Company: company_title,
    Code: code,
    Price: last_price
    }
''', response.json()[:100])

final_output = deepcopy(result)

In [None]:
# Get extra data: Sector & Book value
for i, comp in enumerate(final_output):
    code = comp['Code']
    
    # Sector
    url = f'https://www.marketindex.com.au/asx/{code}'
    session = HTMLSession()
    r1 = session.get(url)
    soup = BeautifulSoup(r1.text, 'html.parser')
    try:
        sector = soup.find('td', string='Sector').next_sibling.next_sibling.text
    except AttributeError:
        sector = soup.find('span', class_='px-1').next_sibling.next_sibling.text
    except:
        sector = ''
    comp['Sector'] = sector
    
    # Book value
    financial_url = f'https://www.marketindex.com.au/asx/{code}/financials'
    session = HTMLSession()
    r2 = session.get(financial_url)
    soup = BeautifulSoup(r2.text, 'html.parser')
    try:
        book_value = float(soup.find('td', string=re.compile("Book Value")).next_sibling.next_sibling.text.strip())
    except:
        book_value = 0
    comp['Book Value'] = book_value
    
    # DIVIDEND
    try:
        dividend = float(soup.find('td', string=re.compile(" DPS ")).next_sibling.next_sibling.text.strip())
        if dividend == '-':
            dividend = 0
    except:
        dividend = 0
    comp['Dividend'] = dividend / 100
    
    ##  Custom fields
    # Dividend Yield: = Dividend / Price
    dividend_yield = dividend / comp['Price']
    comp['Dividend Yield'] = f'{round(dividend_yield, 2)}%'
    
    # +/- Book Value $: = Book Value – Price
    book_value_plus_slash_minus = book_value - comp['Price']
    comp['+/- Book Value'] = round(book_value_plus_slash_minus, 3)
    
    # +/- Book Value Percentage %: = +/- Book Value $ / Price*1
    book_value_percentage = book_value_plus_slash_minus / comp['Price'] * 100
    comp['+/- Book Value Percentage %'] = f'{round(book_value_percentage, 3)}%'
    
    if i % 3 == 0:
        sleep(randint(5, 10))

final_output

In [None]:
pd.DataFrame(final_output).to_csv('market_index.csv', index=False)