<a href="https://colab.research.google.com/github/silvererudite/forex-forecaster/blob/main/data-scripts/get_forex_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install requests_html bs4

In [None]:
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from pprint import pprint
from urllib.parse import urljoin

In [None]:
# initialize an HTTP session
session = HTMLSession()

In [None]:
def get_form(url):
    """Returns form tag found on a web page's `url` """
    # GET request
    res = session.get(url)
    # for javascript driven website
    # res.html.render()
    soup = BeautifulSoup(res.html.html, "html.parser")
    return soup.find_all("form")

In [None]:
def get_form_details(form):
    """Returns the HTML details of a form,
    including action, method and list of form controls (inputs, etc)"""
    details = {}
    # get the form action (requested URL)
    action = form.attrs.get("action").lower()
    # get the form method (POST, GET, DELETE, etc)
    # if not specified, GET is the default in HTML
    method = form.attrs.get("method", "get").lower()
    # get all form inputs
    inputs = []
    for input_tag in form.find_all("input"):
        # get type of input form control
        input_type = input_tag.attrs.get("type", "text")
        # get name attribute
        input_name = input_tag.attrs.get("name")
        # get the default value of that input tag
        input_value =input_tag.attrs.get("value", "")
        # add everything to that list
        inputs.append({"type": input_type, "name": input_name, "value": input_value})
    for select in form.find_all("select"):
        # get the name attribute
        select_name = select.attrs.get("name")
        # set the type as select
        select_type = "select"
        select_options = []
        # the default select value
        select_default_value = ""
        # iterate over options and get the value of each
        for select_option in select.find_all("option"):
            # get the option value used to submit the form
            option_value = select_option.attrs.get("value")
            if option_value:
                select_options.append(option_value)
                if select_option.attrs.get("selected"):
                    # if 'selected' attribute is set, set this option as default    
                    select_default_value = option_value
        if not select_default_value and select_options:
            # if the default is not set, and there are options, take the first option as default
            select_default_value = select_options[0]
        # add the select to the inputs list
        inputs.append({"type": select_type, "name": select_name, "values": select_options, "value": select_default_value})
    for textarea in form.find_all("text"):
        # get the name attribute
        textarea_name = textarea.attrs.get("name")
        # set the type as textarea
        textarea_type = "text"
        # get the textarea value
        textarea_value = textarea.attrs.get("value", "")
        # add the textarea to the inputs list
        inputs.append({"type": textarea_type, "name": textarea_name, "value": textarea_value})
    details["action"] = action
    details["method"] = method
    details["inputs"] = inputs
    return details

In [None]:
url = "https://www.bb.org.bd/en/index.php/econdata/exchangerate"
form = get_form(url)

In [None]:
form

[<form action="/en/index.php/home/search" class="search" method="get">
 <input class="search_input" name="search_key" placeholder="Search" type="text"/>
 <button class="search_button">
 <i class="icon-search search_icon"></i>
 </button>
 </form>,
 <form action="/en/index.php/econdata/exchangerate" id="search-form" method="post">
 <div class="input-group mb-3">
 <div class="input-group-prepend">
 <label class="input-group-text" for="inputGroupSelect01"><i aria-hidden="true" class="fa fa-caret-down"></i></label>
 </div>
 <select class="custom-select" id="currencies" name="currencies">
 <option disabled="" selected="" value="">Select Currency</option>
 <option value="all">All Currency</option>
 <option value="usd">USD</option>
 <option value="others">Others</option>
 </select>
 </div>
 <div class="input-group mb-3">
 <div class="input-group-prepend">
 <label class="input-group-text">
 <i aria-hidden="true" class="fa fa-calendar"></i>
 </label>
 </div>
 <input class="datepicker-here form-c

In [None]:
for i, form in enumerate(form, start=1):
  form_details = get_form_details(form)
  print("="*50, f"form #{i}", "="*50)
  print(form_details)
      

{'action': '/en/index.php/home/search', 'method': 'get', 'inputs': [{'type': 'text', 'name': 'search_key', 'value': ''}]}
{'action': '/en/index.php/econdata/exchangerate', 'method': 'post', 'inputs': [{'type': 'text', 'name': 'date_picker', 'value': ''}, {'type': 'select', 'name': 'currencies', 'values': ['all', 'usd', 'others'], 'value': 'all'}]}


In [None]:
# the data body we want to submit
data = {}
for input_tag in form_details["inputs"]:
    if input_tag["type"] == "hidden":
        # if it's hidden, use the default value
        data[input_tag["name"]] = input_tag["value"]
    elif input_tag["type"] == "select":
        for i, option in enumerate(input_tag["values"], start=1):
            # iterate over available select options
            if option == input_tag["value"]:
                print(f"{i} # {option} (default)")
            else:
                print(f"{i} # {option}")
        choice = input(f"Enter the option for the select field '{input_tag['name']}' (1-{i}): ")
        try:
            choice = int(choice)
        except:
            # choice invalid, take the default
            value = input_tag["value"]
        else:
            value = input_tag["values"][choice-1]
        data[input_tag["name"]] = value
    elif input_tag["type"] != "submit":
        # all others except submit, prompt the user to set it
        value = input(f"Enter the value of the field '{input_tag['name']}' (type: {input_tag['type']}): ")
        data[input_tag["name"]] = value

Enter the value of the field 'date_picker' (type: text): January, 2022
1 # all (default)
2 # usd
3 # others
Enter the option for the select field 'currencies' (1-3): 2


In [None]:
# join the url with the action (form request URL)
url = urljoin(url, form_details["action"])
# pprint(data)
if form_details["method"] == "post":
    res = session.post(url, data=data)
elif form_details["method"] == "get":
    res = session.get(url, params=data)

In [None]:
res.content

In [None]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(res.content, 'html.parser')

In [None]:
pprint(soup.prettify())