# Class for Scraping Last Saturday's Registrations

In [2]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime, timedelta

class WebDataScraper:
    def __init__(self):
        self.most_recent_saturday = self.get_most_recent_saturday()
        self.url = f'https://vt.ncsbe.gov/RegStat/Results/?date={self.most_recent_saturday}'
        self.json_data = ""

    @staticmethod
    def get_most_recent_saturday():
        today = datetime.now()
        offset = (today.weekday() + 2) % 7
        recent_saturday = today - timedelta(days=offset)
        return recent_saturday.strftime('%m/%d/%Y')

    def fetch_data(self):
        response = requests.get(self.url)
        soup = BeautifulSoup(response.content, 'html.parser')
        scripts = soup.find_all('script')

        for script in scripts:
            if script.string and 'SetupGrid' in script.string:
                script_content = script.string
                start_index = script_content.find("var data = ")
                end_index = script_content.find("// initialize the igGrid control")
                if start_index != -1 and end_index != -1:
                    self.json_data = script_content[start_index + len("var data = "):end_index].strip()
                    break

        if self.json_data:
            self.json_data = self.json_data.rstrip(',')
        else:
            raise ValueError("JSON data not found.")

    def parse_json(self):
        if not self.json_data:
            raise ValueError("No JSON data to parse.")

        data = json.loads(self.json_data)
        df = pd.DataFrame(data)

        # Drop the 'AppVersion' column
        if 'AppVersion' in df.columns:
            df = df.drop(columns=['AppVersion'])

        # Add the 'Date' column with the most recent Saturday's date
        df['Week Ending'] = self.most_recent_saturday

        # Capitalize the first letter of each county name
        df['CountyName'] = df['CountyName'].str.capitalize()
        
        return df

    def get_dataframe(self):
        self.fetch_data()
        return self.parse_json()

In [12]:
# Create an instance of the WebDataScraper class
scraper = WebDataScraper()

# Fetch and process the data, returning a DataFrame
df = scraper.get_dataframe()
df

Unnamed: 0,CountyName,Democrats,Republicans,Libertarians,Green,NoLabels,Constitution,Unaffiliated,White,Black,...,Asian,Multiracial,Undesignated,Other,Hispanic,Male,Female,UnDisclosedGender,Total,Week Ending
0,Alamance,36834,36985,692,31,114,0,40421,74355,23855,...,1119,671,9812,4988,6503,48001,58026,9050,115077,08/03/2024
1,Alexander,4033,12272,118,2,29,0,8884,22182,960,...,153,37,1568,401,533,11445,12256,1637,25338,08/03/2024
2,Alleghany,1705,3424,40,0,11,0,2908,7173,76,...,6,17,659,149,252,3611,3871,606,8088,08/03/2024
3,Anson,8501,3192,58,2,27,0,4606,6872,6507,...,103,22,2636,210,156,6192,7470,2724,16386,08/03/2024
4,Ashe,3647,9342,106,2,19,0,7312,18796,119,...,55,17,1178,237,347,9196,10137,1095,20428,08/03/2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Wayne,26457,25791,451,19,314,0,22397,41659,23435,...,534,355,5524,3784,3394,31629,37938,5862,75429,08/03/2024
96,Wilkes,6582,24429,210,6,63,0,13293,38948,1611,...,135,116,2860,868,1044,19725,21778,3080,44583,08/03/2024
97,Wilson,24513,14091,252,11,143,0,16615,27188,21922,...,286,164,3815,2131,2051,23183,29193,3249,55625,08/03/2024
98,Yadkin,2865,14337,133,6,27,0,7829,21858,735,...,65,39,1657,819,1061,11244,12339,1614,25197,08/03/2024


# Merging Shape File with Dataframe and Mapping Registrations

In [21]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import geopandas as gpd
import pandas as pd

# Shapefile path including the .shp extension
shapefile_path = 'NC_State_County_Boundary_NCGS_2017.shp'

# Load the shapefile
gdf = gpd.read_file(shapefile_path)

# Check and set the CRS
if gdf.crs is None:
    gdf.crs = 'EPSG:4326'  # Set to a default CRS if unknown

# Convert to Web Mercator if necessary
if gdf.crs != 'EPSG:3857':
    gdf = gdf.to_crs('EPSG:3857')

# Merge shapefile with DataFrame
merged_gdf = gdf.merge(df, left_on='County', right_on='CountyName')
merged_gdf

0     POLYGON ((Infinity Infinity, Infinity Infinity...
1     POLYGON ((Infinity Infinity, Infinity Infinity...
2     POLYGON ((Infinity Infinity, Infinity Infinity...
3     POLYGON ((Infinity Infinity, Infinity Infinity...
4     POLYGON ((Infinity Infinity, Infinity Infinity...
                            ...                        
94    POLYGON ((Infinity Infinity, Infinity Infinity...
95    POLYGON ((Infinity Infinity, Infinity Infinity...
96    POLYGON ((Infinity Infinity, Infinity Infinity...
97    POLYGON ((Infinity Infinity, Infinity Infinity...
98    POLYGON ((Infinity Infinity, Infinity Infinity...
Name: geometry, Length: 99, dtype: geometry