In [2]:
from bs4 import BeautifulSoup
from requests_html import HTMLSession
session = HTMLSession()

def get_all_forms(url):
    
    res = session.get(url)
    
    soup = BeautifulSoup(res.html.html, "html.parser")
    return soup.find_all("form")

def get_form_details(form):
    """Returns the HTML details of a form,
    including action, method and list of form controls (inputs, etc)"""
    details = {}
    # get the form action (requested URL)
    action = form.attrs.get("action").lower()
    # get the form method (POST, GET, DELETE, etc)
    # if not specified, GET is the default in HTML
    method = form.attrs.get("method", "get").lower()
    # get all form inputs
    inputs = []
    for input_tag in form.find_all("input"):
        # get type of input form control
        input_type = input_tag.attrs.get("type", "text")
        # get name attribute
        input_name = input_tag.attrs.get("name")
        # get the default value of that input tag
        input_value =input_tag.attrs.get("value", "")
        # add everything to that list
        inputs.append({"type": input_type, "name": input_name, "value": input_value})
    # put everything to the resulting dictionary
    details["action"] = action
    details["method"] = method
    details["inputs"] = inputs
    return details


In [3]:
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from pprint import pprint
from urllib.parse import urljoin
import webbrowser
#from form_extractor import get_all_forms, get_form_details, session

url = "https://parivahan.gov.in/rcdlstatus/?pur_cd=101"
# get the first form
first_form = get_all_forms(url)[0]
# extract all form details
form_details = get_form_details(first_form)
pprint(form_details)
# the data body we want to submit
data = {}
for input_tag in form_details["inputs"]:
    if input_tag["type"] == "hidden":
        # if it's hidden, use the default value
        data[input_tag["name"]] = input_tag["value"]
    elif input_tag["type"] != "submit":
        # all others except submit, prompt the user to set it
        value = input(f"Enter the value of the field '{input_tag['name']}' (type: {input_tag['type']}): ")
        data[input_tag["name"]] = value

# join the url with the action (form request URL)
url = urljoin(url, form_details["action"])

if form_details["method"] == "post":
    res = session.post(url, data=data)
elif form_details["method"] == "get":
    res = session.get(url, params=data)

# the below code is only for replacing relative URLs to absolute ones
soup = BeautifulSoup(res.content, "html.parser")
for link in soup.find_all("link"):
    try:
        link.attrs["href"] = urljoin(url, link.attrs["href"])
    except:
        pass
for script in soup.find_all("script"):
    try:
        script.attrs["src"] = urljoin(url, script.attrs["src"])
    except:
        pass
for img in soup.find_all("img"):
    try:
        img.attrs["src"] = urljoin(url, img.attrs["src"])
    except:
        pass
for a in soup.find_all("a"):
    try:
        a.attrs["href"] = urljoin(url, a.attrs["href"])
    except:
        pass

# write the page content to a file
open("page.html", "w").write(str(soup))
# open the page on the default browser
webbrowser.open("page.html")    

{'action': '/rcdlstatus/vahan/rcdlhome.xhtml;jsessionid=6bbba68e7ba8c2fe4ae66fa85479037f',
 'inputs': [{'name': 'form_rcdl', 'type': 'hidden', 'value': 'form_rcdl'},
            {'name': 'form_rcdl:tf_dlNO', 'type': 'text', 'value': ''},
            {'name': 'form_rcdl:tf_dob_input', 'type': 'text', 'value': ''},
            {'name': 'form_rcdl:j_idt34:CaptchaID',
             'type': 'text',
             'value': ''},
            {'name': 'javax.faces.ViewState',
             'type': 'hidden',
             'value': 'DXxN2GkhW3Q7ORAlNCoNRho9W9FrnnRPxrVePk+WJwSuL4DUE0G8OEqoEKOgFkZFzebzKg0SnasPaT/Y7H87jAj0Ru+AJRHvgBXZO3ILFI9bClh+vF8vn8jCYQ86nwNqifemppWPhZ5xAAa2d1RX+HeDvLCzQyvHopV523g+7Igo9WeMnvfogEXrpAYBTjI9Ysc7oDO13a59sC5eeHuuLLHR/+eXP47g3SGIF++N5IsHt3yRSopDYtJzDdWjH2HXDVooD+fE1B5qOBJymYi0nwVej04Mrt6r2kuO2JMBxg5QnXS2JagbzJWejXjU3oz3PXQf+SHgqMygeVSPglpJMWwrSvy91x43RDkBvNVLjeGu/BlKmHdsG89TRrIMP5qbYe0oidKGmYMh3bS/ylIZxPEMWQSF9mz9SdZ2L3Lx5aCU12f1tXbTJACTcTvachQuBiGVZlRkhGUKPArJQoWqlrTgXFsj/

True