<a href="https://colab.research.google.com/github/kushalshah0/colab_tools/blob/main/voters_info.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Suppress output of pip install
import os
import sys

# Install required libraries silently
!{sys.executable} -m pip install requests pandas beautifulsoup4 > /dev/null 2>&1

import requests
import pandas as pd
from bs4 import BeautifulSoup

# Function to extract information (HTML table) from Election Commission of Nepal's official website
def extract_voters(state, district, vdc_mun, ward, reg_centre):
    data_endpoint = 'https://voterlist.election.gov.np/view_ward.php'
    form_data = {
        'state': state,
        'district': district,
        'vdc_mun': vdc_mun,
        'ward': ward,
        'reg_centre': reg_centre
    }
    response = requests.post(data_endpoint, data=form_data)
    return response.content

# Function to load/save voter's information after transforming, extracting
def save_csv(table_name, headers, rows):
    path = '/content/warehouse'
    os.makedirs(path, exist_ok=True)
    pd.DataFrame(rows, columns=headers).to_csv(
        os.path.join(path, f"{table_name}.csv"), index=False)

# Returns voter's information inside a Python list
def get_tables_rows(table):
    voters_record = []
    # Using regex to skip the first row which is the header
    for tr in table.find_all("tr")[1:]:
        voter_record = []
        tds = tr.find_all("td")
        # Using regex to skip the last cell which is not required to collect
        for td in tds[0:-1]:
            voter_record.append(td.text.strip())
        voters_record.append(voter_record)
    return voters_record

# Function to obtain only the name of the election center
def create_file_name(table):
    for th in table.find("tr").find_all("th")[-1:]:
        election_center_name = th.text.strip()
    return election_center_name.replace(" ", "-").replace(",", "").replace(".", "")

# Main execution
if __name__ == "__main__":
    voters = extract_voters(state="2", district="33", vdc_mun="5353", ward="21", reg_centre="7451")

    soup = BeautifulSoup(voters, "html.parser")
    tables = soup.find_all("table")

    # Naming header/column
    column_names = ['sn', 'voter_num', 'voter_name', 'age', 'gender', 'spouse_name', 'parents_name']

    # Using the last index to select only the table consisting of voter's record or rows
    voters_record = get_tables_rows(tables[-1])

    # Making name of election center to get unique file name for each of the election center records
    file_name = create_file_name(tables[-1])

    # Saves to warehouse directory
    save_csv(file_name, column_names, voters_record)

    # Print success message
    print(f"Data successfully saved to {file_name}.csv in the '/content/warehouse' directory.")