In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://josaa.admissions.nic.in/applicant/seatmatrix/openingclosingrankarchieve.aspx'

params = {
    "ctl00$ContentPlaceHolder1$ddlInstype": "ALL",
    "ctl00$ContentPlaceHolder1$ddlInstitute": "ALL",
    "ctl00$ContentPlaceHolder1$ddlBranch": "ALL",
    "ctl00$ContentPlaceHolder1$btnSubmit": "Submit"
}
years = [
    "2018",
    "2017"
]

rounds = [
    "1",
    "2",
    "3",
    "4",
    "5"
]

def josaa_scrape(year, Round):
    """
    Sample usage: df = josaa_scrape("2018", "1")
    df.info()
    """
    with requests.Session() as s:
        R = s.get(url)
        print(f"Initial GET request status: {R.status_code}")
        data = {}
        data.update({tag['name']: tag['value'] for tag in BeautifulSoup(R.content, 'lxml').select('input[name^=__]')})
        data["ctl00$ContentPlaceHolder1$ddlYear"] = year
        print("Data after initial GET and year selection:", data)
        R = s.post(url, data=data)
        print(f"POST year selection status: {R.status_code}")

        data.update({tag['name']: tag['value'] for tag in BeautifulSoup(R.content, 'lxml').select('input[name^=__]')})
        data["ctl00$ContentPlaceHolder1$ddlroundno"] = Round
        print("Data after year and round selection:", data)
        R = s.post(url, data=data)
        print(f"POST round selection status: {R.status_code}")

        for key, value in params.items():
            data.update({tag['name']: tag['value'] for tag in BeautifulSoup(R.content, 'lxml').select('input[name^=__]')})
            data[key] = value
            print(f"Data before POST with param {key}:", data)
            R = s.post(url, data=data)
            print(f"POST params {key} status: {R.status_code}")

    # Print out a portion of the final HTML to help with debugging
    print("Final HTML content snippet:")
    print(R.text[:1000])

    table = BeautifulSoup(R.text, 'lxml').find(id='ctl00_ContentPlaceHolder1_GridView1')

    if table is None:
        print(f"Failed to retrieve data for year {year}, round {Round}. The table was not found.")
        return None

    df = pd.read_html(table.prettify())[0]
    df.dropna(inplace=True, how="all")

    df["Year"] = year
    df["Round"] = Round
    df['Opening Rank'] = df['Opening Rank'].astype(int)
    df['Closing Rank'] = df['Closing Rank'].astype(int)

    return df

# Test the function
df = josaa_scrape("2021", "1")
if df is not None:
    df.info()
else:
    print("No data returned.")

# Uncomment the following lines to scrape all data and save to CSV
# for year in years:
#     for Round in rounds:
#         df = josaa_scrape(year, Round)
#         if df is not None:
#             df.to_csv(path_or_buf=f"{year}-{Round}.csv", index=False)


Initial GET request status: 200
Data after initial GET and year selection: {'__VIEWSTATE': 'u0INoNdMjE14tHu9myZPZSCz1cCFcQSNr1ZXv/W7oaIr+ij85rAS9kmZAup9zvLZrjzQ43yn0yfAi8iTvMbRrsGWkQ6iaLqJRL68wLYTsvn/JrIMxRgYooKbmUt1j7vKJ5yUs9TgzJehyAAUEYvIEfksu2IwLKs25M2azCrrHALGRrXaOgjD9b03BAOt7Nv3oOxtXt++0WXm8WgXjcckm6K+NX0J1VsPd+4+9XjdzpsaBtH0NIrwzW07TQDCEafEteQB9piTZWUUguql7Jg1h9IuMbNJxt6OOTMl5tiCO1v9ip7Uxl2+jvKL6ktqENIAM4SMOpQor9tbbeJGaDlRr2h0ODGFOoSWc170MNvuuRF0vB+EVRDq4vRlUtTrDE6hTNCctOTtNXOv5/qjA/+MSaeHLp5RUmjYG7jVWg1ZLWykbKwsswgtbWOjUUTJSzj0a7v9177lLiSLGrqv90oTHP1xXshVdh67a1xuhiQXBpRZCHEDjnWi8nQ5SlfdkJq12w7QqfiZWtLFHUwGYoErig5G1QO94q0nlVlJBa7w9tacQnqWplFM1V3WG/OyRv7TezaivOhYg1AHBw2PjtMQUor01Z61uTW6R6kZj5jzJPnDGrve/FxOpSQ65HBw7WjBmUh9oQ==', '__VIEWSTATEGENERATOR': 'AD19A6D0', '__EVENTVALIDATION': 'HsYdWflbWcxJAA4rYuKFwD7yDTzpbS/AJRyn0KdpIbt/bbU+AA5C3vDkNDkI5XW23SYIOwD4Zy9UZz0Rin0OkwXfDESw4NuMUSGG5tmTgwIbGBChSX2PNhkTptj3OTGOM26O4J/McX9KtQniskDsXyNZWOHRf+Vmd32x2QYzZ0gVXmB8DmjwADAR3GSz1KBGuntmLPz