In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_webpage(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None

def extract_table(soup):
    table = soup.find('table')
    if table is None:
        print("Table not found on the page.")
    return table

def parse_table(table):
    headers = [header.text.strip() for header in table.find_all('th')]
    rows = table.find_all('tr')
    data = [
        [column.text.strip() for column in row.find_all('td')]
        for row in rows if row.find_all('td')
    ]
    return headers, data

def filter_relevant_headers(headers, data):
    # Find the correct headers for the data
    relevant_headers = [header for header in headers if header]
    return relevant_headers[:len(data[0])]

def save_to_csv(headers, data, filename):
    relevant_headers = filter_relevant_headers(headers, data)
    df = pd.DataFrame(data, columns=relevant_headers)
    df.to_csv(filename, index=False)
    print(f"Scraping completed and data saved to '{filename}'")

def main(url):
    content = fetch_webpage(url)
    if content:
        soup = BeautifulSoup(content, 'html.parser')
        table = extract_table(soup)
        if table:
            headers, data = parse_table(table)
            print("Headers:", headers)
            if data:
                print("First row of data:", data[0])
            save_to_csv(headers, data, 'lok_sabha_election_results_2024.csv')
        else:
            print("Failed to scrape the data as the table was not found.")

if __name__ == "__main__":
    url = 'https://results.eci.gov.in/PcResultGenJune2024/index.htm'
    main(url)


Headers: ['Party', 'Won', 'Leading', 'Total', 'Total', '543', '0', '543']
First row of data: ['Bharatiya Janata Party - BJP', '240', '0', '240']
Scraping completed and data saved to 'lok_sabha_election_results_2024.csv'


In [3]:
import pandas as pd

def load_data(file_path):
    return pd.read_csv(file_path)

def calculate_statistics(data):
    total_seats_by_party = data[['Party', 'Won']].sort_values(by='Won', ascending=False)

    party_with_most_seats = total_seats_by_party.iloc[0]
    party_with_least_seats = total_seats_by_party.iloc[-1]

    total_seats = total_seats_by_party['Won'].sum()
    total_seats_by_party['Percentage'] = (total_seats_by_party['Won'] / total_seats) * 100

    total_parties = total_seats_by_party.shape[0]

    top_5_parties = total_seats_by_party.head(5)

    sum_of_seats = total_seats_by_party['Won'].sum()
    average_seats = total_seats_by_party['Won'].mean()
    median_seats = total_seats_by_party['Won'].median()
    std_dev_seats = total_seats_by_party['Won'].std()

    return {
        "Total seats won by each party": total_seats_by_party.to_dict('records'),
        "Party with the highest number of seats won": party_with_most_seats.to_dict(),
        "Party with the lowest number of seats won": party_with_least_seats.to_dict(),
        "Percentage of total seats won by each party": total_seats_by_party[['Party', 'Percentage']].to_dict('records'),
        "Total number of parties participating": total_parties,
        "Top 5 parties by number of seats won": top_5_parties.to_dict('records'),
        "Sum of seats won by all parties": sum_of_seats,
        "Average number of seats won by parties": average_seats,
        "Median number of seats won by parties": median_seats,
        "Standard deviation of seats won by parties": std_dev_seats
    }

def create_adjusted_report(report):
    return {
        "Insight": [
            "Total seats won by each party",
            "Party with the highest number of seats won",
            "Party with the lowest number of seats won",
            "Percentage of total seats won by each party",
            "Total number of parties participating",
            "Top 5 parties by number of seats won",
            "Sum of seats won by all parties",
            "Average number of seats won by parties",
            "Median number of seats won by parties",
            "Standard deviation of seats won by parties"
        ],
        "Details": [
            report["Total seats won by each party"],
            report["Party with the highest number of seats won"],
            report["Party with the lowest number of seats won"],
            report["Percentage of total seats won by each party"],
            report["Total number of parties participating"],
            report["Top 5 parties by number of seats won"],
            report["Sum of seats won by all parties"],
            report["Average number of seats won by parties"],
            report["Median number of seats won by parties"],
            report["Standard deviation of seats won by parties"]
        ]
    }

def save_report(adjusted_report, file_path):
    report_df = pd.DataFrame(adjusted_report)
    report_df.to_csv(file_path, index=False)
    print(f"Report saved to {file_path}")

def main(file_path, report_path):
    data = load_data(file_path)
    report = calculate_statistics(data)
    adjusted_report = create_adjusted_report(report)
    save_report(adjusted_report, report_path)

if __name__ == "__main__":
    file_path = 'lok_sabha_election_results_2024.csv'
    report_path = 'election_report.csv'
    main(file_path, report_path)


Report saved to election_report.csv
