In [2]:
from bs4 import BeautifulSoup
from requests_html import HTMLSession

In [4]:
session = HTMLSession()

## Getting Form Details

In [5]:
def get_all_forms(url):
    res = session.get(url)
    soup = BeautifulSoup(res.html.html, "html.parser")
    return soup.find_all("form")

In [12]:
def get_form_details(form):
    details = {}
    action = form.attrs.get("action").lower()
    
    method = form.attrs.get("method", "get").lower()
    inputs = []
    for input_tag in form.find_all("input"):
        input_type = input_tag.attrs.get("type", "text")
        input_name = input_tag.attrs.get("name")
        input_value =input_tag.attrs.get("value", "")
        inputs.append({"type": input_type, "name": input_name, "value": input_value})

    for select in form.find_all("select"):
        # get the name attribute
        select_name = select.attrs.get("name")
        # set the type as select
        select_type = "select"
        select_options = []
        # the default select value
        select_default_value = ""
        # iterate over options and get the value of each
        for select_option in select.find_all("option"):
            # get the option value used to submit the form
            option_value = select_option.attrs.get("value")
            if option_value:
                select_options.append(option_value)
                if select_option.attrs.get("selected"):
                    # if 'selected' attribute is set, set this option as default    
                    select_default_value = option_value
        if not select_default_value and select_options:
            # if the default is not set, and there are options, take the first option as default
            select_default_value = select_options[0]
        # add the select to the inputs list
        inputs.append({"type": select_type, "name": select_name, "values": select_options, "value": select_default_value})

        for textarea in form.find_all("textarea"):
            # get the name attribute
            textarea_name = textarea.attrs.get("name")
            # set the type as textarea
            textarea_type = "textarea"
            # get the textarea value
            textarea_value = textarea.attrs.get("value", "")
            # add the textarea to the inputs list
            inputs.append({"type": textarea_type, "name": textarea_name, "value": textarea_value})
            
            
        details["action"] = action
        details["method"] = method
        details["inputs"] = inputs
        return details

In [13]:
if __name__ == "__main__":
    import sys
    # get URL from the command line
    url = 'https://wikipedia.org'
    # get all form tags
    forms = get_all_forms(url)
    # iteratte over forms
    for i, form in enumerate(forms, start=1):
        form_details = get_form_details(form)
        print("="*50, f"form #{i}", "="*50)
        print(form_details)

{'action': '//www.wikipedia.org/search-redirect.php', 'method': 'get', 'inputs': [{'type': 'hidden', 'name': 'family', 'value': 'Wikipedia'}, {'type': 'hidden', 'name': 'language', 'value': 'en'}, {'type': 'search', 'name': 'search', 'value': ''}, {'type': 'hidden', 'name': 'go', 'value': 'Go'}, {'type': 'select', 'name': 'language', 'values': ['af', 'pl', 'sk', 'ar', 'ast', 'az', 'bg', 'nan', 'bn', 'be', 'ca', 'cs', 'cy', 'da', 'de', 'et', 'el', 'en', 'es', 'eo', 'eu', 'fa', 'fr', 'gl', 'ko', 'hy', 'hi', 'hr', 'id', 'it', 'he', 'ka', 'la', 'lv', 'lt', 'hu', 'mk', 'arz', 'ms', 'min', 'nl', 'ja', 'no', 'nn', 'ce', 'uz', 'pt', 'kk', 'ro', 'ru', 'simple', 'ceb', 'sl', 'sr', 'sh', 'fi', 'sv', 'ta', 'tt', 'th', 'tg', 'azb', 'tr', 'uk', 'ur', 'vi', 'vo', 'war', 'zh-yue', 'zh', 'my'], 'value': 'en'}]}
