In [3]:
from bs4 import BeautifulSoup
import pandas as pd
import re

# Assuming you've saved the HTML content to a file named 'election_results.html'
with open('election_results.html', 'r', encoding='utf-8') as file:
    html_content = file.read()

soup = BeautifulSoup(html_content, 'html.parser')

# Extract party-wise results
results_table = soup.find('table', class_='table')
rows = results_table.find_all('tr')[1:-1]  # Exclude header and footer

data = []
for row in rows:
    cols = row.find_all('td')
    if len(cols) == 4:
        party = cols[0].text.strip()
        won = int(cols[1].text.strip())
        leading = int(cols[2].text.strip())
        total = int(cols[3].text.strip())
        data.append({'Party': party, 'Won': won, 'Leading': leading, 'Total': total})

# Create a DataFrame
df = pd.DataFrame(data)

# Extract vote share data
vote_share_data = []
script_tags = soup.find_all('script')
for script in script_tags:
    script_text = script.string
    if script_text and 'var xValues' in script_text and 'var yValues' in script_text:
        # Extract xValues and yValues using regex
        x_match = re.search(r'var xValues = \[(.*?)\];', script_text, re.DOTALL)
        y_match = re.search(r'var yValues = \[(.*?)\];', script_text, re.DOTALL)
        
        if x_match and y_match:
            x_values = eval('[' + x_match.group(1) + ']')
            y_values = eval('[' + y_match.group(1) + ']')
            
            for x, y in zip(x_values, y_values):
                match = re.match(r'([^{]+)(?:\{([^}]+)\})?', x)
                if match:
                    party = match.group(1).strip()
                    share = match.group(2)
                    share = float(share.strip('%')) if share else None
                    vote_share_data.append({'Party': party, 'Vote Share': share, 'Votes': y})

vote_share_df = pd.DataFrame(vote_share_data)

# Print the results
print("Party-wise Results:")
print(df)
print("\nVote Share:")
print(vote_share_df)

# Save to CSV files
df.to_csv('party_wise_results.csv', index=False)
vote_share_df.to_csv('vote_share.csv', index=False)

Party-wise Results:
                                                Party  Won  Leading  Total
0                        Bharatiya Janata Party - BJP  240        0    240
1                      Indian National Congress - INC   99        0     99
2                                Samajwadi Party - SP   37        0     37
3                 All India Trinamool Congress - AITC   29        0     29
4                     Dravida Munnetra Kazhagam - DMK   22        0     22
5                                  Telugu Desam - TDP   16        0     16
6                        Janata Dal  (United) - JD(U)   12        0     12
7      Shiv Sena (Uddhav Balasaheb Thackrey) - SHSUBT    9        0      9
8   Nationalist Congress Party – Sharadchandra Paw...    8        0      8
9                                     Shiv Sena - SHS    7        0      7
10             Lok Janshakti Party(Ram Vilas) - LJPRV    5        0      5
11      Yuvajana Sramika Rythu Congress Party - YSRCP    4        0      4
12   