In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

# URL of the HTML page
url = 'https://results.eci.gov.in/PcResultGenJune2024/index.htm'

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Get the HTML content
    html_content = response.text

prefix = "https://results.eci.gov.in/PcResultGenJune2024"
# Assuming you have the HTML content in a string variable `html_content`
soup = BeautifulSoup(html_content, 'html.parser')

# Find all <table> elements
tables = soup.find_all('table')

# Extract data from each table
dfs = []
for table in tables:
    rows = table.find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        for col in cols:
            if "href" in str(col):
                url = str(col).split("href=")[1].split(">")[0].strip("'").strip()
        row_data = [col.text.strip() for col in cols]
        row_data.append(prefix + "/" + url[1:-1])
        data.append(row_data)
    df = pd.DataFrame(data)
    dfs.append(df)

# Combine all DataFrames into one
combined_df = pd.concat(dfs, ignore_index=True)

In [2]:
col_name = ["Party", "Won", "Leading", "Total", "URL"]

In [3]:
combined_df.columns = col_name
combined_df = combined_df[combined_df["Total"].notna()]
combined_df["Won"] = combined_df["Won"].astype(int)
combined_df["Leading"] = combined_df["Leading"].astype(int)
combined_df["Total"] = combined_df["Total"].astype(int)
combined_df

Unnamed: 0,Party,Won,Leading,Total,URL
1,Bharatiya Janata Party - BJP,1,237,238,https://results.eci.gov.in/PcResultGenJune2024...
2,Indian National Congress - INC,0,99,99,https://results.eci.gov.in/PcResultGenJune2024...
3,Samajwadi Party - SP,0,36,36,https://results.eci.gov.in/PcResultGenJune2024...
4,All India Trinamool Congress - AITC,0,30,30,https://results.eci.gov.in/PcResultGenJune2024...
5,Dravida Munnetra Kazhagam - DMK,0,21,21,https://results.eci.gov.in/PcResultGenJune2024...
6,Telugu Desam - TDP,0,16,16,https://results.eci.gov.in/PcResultGenJune2024...
7,Janata Dal (United) - JD(U),0,15,15,https://results.eci.gov.in/PcResultGenJune2024...
8,Shiv Sena (Uddhav Balasaheb Thackrey) - SHSUBT,0,11,11,https://results.eci.gov.in/PcResultGenJune2024...
9,Nationalist Congress Party – Sharadchandra Paw...,0,8,8,https://results.eci.gov.in/PcResultGenJune2024...
10,Lok Janshakti Party(Ram Vilas) - LJPRV,0,5,5,https://results.eci.gov.in/PcResultGenJune2024...


In [4]:
# Create a dict of party name with url 

party_url = dict(zip(combined_df["Party"], combined_df["URL"]))

In [5]:
party_url

{'Bharatiya Janata Party - BJP': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-369.htm',
 'Indian National Congress - INC': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-742.htm',
 'Samajwadi Party - SP': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-1680.htm',
 'All India Trinamool Congress - AITC': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-140.htm',
 'Dravida Munnetra Kazhagam - DMK': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-582.htm',
 'Telugu Desam - TDP': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-1745.htm',
 'Janata Dal  (United) - JD(U)': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-805.htm',
 'Shiv Sena (Uddhav Balasaheb Thackrey) - SHSUBT': 'https://results.eci.gov.in/PcResultGenJune2024/partywiseleadresultState-3369.htm',
 'Nationalist Congress Party – Sharadchandra Pawar - NCPS

In [6]:
# # Export the dict as a json file
# import json
# with open("../data/party_url.json", "w") as outfile:
#     json.dump(party_url, outfile)