# Class for Scraping Last Saturday's Registrations

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime, timedelta

class WebDataScraper:
    def __init__(self):
        self.most_recent_saturday = self.get_most_recent_saturday()
        self.url = f'https://vt.ncsbe.gov/RegStat/Results/?date={self.most_recent_saturday}'
        self.json_data = ""

    @staticmethod
    def get_most_recent_saturday():
        today = datetime.now()
        offset = (today.weekday() + 2) % 7
        recent_saturday = today - timedelta(days=offset)
        return recent_saturday.strftime('%m/%d/%Y')

    def fetch_data(self):
        response = requests.get(self.url)
        soup = BeautifulSoup(response.content, 'html.parser')
        scripts = soup.find_all('script')

        for script in scripts:
            if script.string and 'SetupGrid' in script.string:
                script_content = script.string
                start_index = script_content.find("var data = ")
                end_index = script_content.find("// initialize the igGrid control")
                if start_index != -1 and end_index != -1:
                    self.json_data = script_content[start_index + len("var data = "):end_index].strip()
                    break

        if self.json_data:
            self.json_data = self.json_data.rstrip(',')
        else:
            raise ValueError("JSON data not found.")

    def parse_json(self):
        if not self.json_data:
            raise ValueError("No JSON data to parse.")

        data = json.loads(self.json_data)
        df = pd.DataFrame(data)

        # Drop the 'AppVersion' column
        if 'AppVersion' in df.columns:
            df = df.drop(columns=['AppVersion'])

        # Add the 'Date' column with the most recent Saturday's date
        df['Week Ending'] = self.most_recent_saturday

        # Capitalize the first letter of each county name
        df['CountyName'] = df['CountyName'].str.capitalize()
        
        return df

    def get_dataframe(self):
        self.fetch_data()
        return self.parse_json()

In [2]:
# Create an instance of the WebDataScraper class
scraper = WebDataScraper()

# Fetch and process the data, returning a DataFrame
df = scraper.get_dataframe()
df

Unnamed: 0,CountyName,Democrats,Republicans,Libertarians,Green,NoLabels,Constitution,Unaffiliated,White,Black,...,Asian,Multiracial,Undesignated,Other,Hispanic,Male,Female,UnDisclosedGender,Total,Week Ending
0,Alamance,36834,36985,692,31,114,0,40421,74355,23855,...,1119,671,9812,4988,6503,48001,58026,9050,115077,08/03/2024
1,Alexander,4033,12272,118,2,29,0,8884,22182,960,...,153,37,1568,401,533,11445,12256,1637,25338,08/03/2024
2,Alleghany,1705,3424,40,0,11,0,2908,7173,76,...,6,17,659,149,252,3611,3871,606,8088,08/03/2024
3,Anson,8501,3192,58,2,27,0,4606,6872,6507,...,103,22,2636,210,156,6192,7470,2724,16386,08/03/2024
4,Ashe,3647,9342,106,2,19,0,7312,18796,119,...,55,17,1178,237,347,9196,10137,1095,20428,08/03/2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Wayne,26457,25791,451,19,314,0,22397,41659,23435,...,534,355,5524,3784,3394,31629,37938,5862,75429,08/03/2024
96,Wilkes,6582,24429,210,6,63,0,13293,38948,1611,...,135,116,2860,868,1044,19725,21778,3080,44583,08/03/2024
97,Wilson,24513,14091,252,11,143,0,16615,27188,21922,...,286,164,3815,2131,2051,23183,29193,3249,55625,08/03/2024
98,Yadkin,2865,14337,133,6,27,0,7829,21858,735,...,65,39,1657,819,1061,11244,12339,1614,25197,08/03/2024


# Merging Shape File with Dataframe and Mapping Registrations

In [3]:
import geopandas as gpd

# Shapefile path including the .shp extension
shapefile_path = 'NC_State_County_Boundary_NCGS_2017.shp'

# Load the shapefile
gdf = gpd.read_file(shapefile_path)

# Set the CRS (adjust EPSG code if needed)
gdf = gdf.set_crs(epsg=4326)

# Merge shapefile with DataFrame
merged_gdf = gdf.merge(df, left_on='County', right_on='CountyName')
merged_gdf

Unnamed: 0,County,FIPS,Rec_Survey,NCGS_url,ck_date,Area_mi_sq,Shape_Area,County_ID,geometry,CountyName,...,Asian,Multiracial,Undesignated,Other,Hispanic,Male,Female,UnDisclosedGender,Total,Week Ending
0,Camden,29,Recorded survey data is available. Visit North...,http://portal.ncdenr.org/web/lr/geodetic/bound...,2011-11-29,310.290035,8.650426e+09,15,"POLYGON ((2913995.55318 864401.7075, 2913895.0...",Camden,...,35,18,412,160,69,3944,4215,465,8624,08/03/2024
1,Gates,73,No recent survey data available,,2011-11-29,345.811854,9.640705e+09,37,"POLYGON ((2678789.04268 934597.31348, 2678741....",Gates,...,11,42,558,88,57,3633,4149,614,8396,08/03/2024
2,Iredell,97,Recorded survey data is available. Visit North...,http://portal.ncdenr.org/web/lr/geodetic/bound...,2011-11-29,597.974606,1.667062e+10,49,"POLYGON ((1420151.56849 639374.88728, 1420082....",Iredell,...,1597,434,9627,4636,5243,62637,69219,9928,141784,08/03/2024
3,Wilkes,193,Recorded survey data is available. Visit North...,http://portal.ncdenr.org/web/lr/geodetic/bound...,2011-11-29,757.273702,2.111175e+10,97,"POLYGON ((1319004.88269 828579.51571, 1318921....",Wilkes,...,135,116,2860,868,1044,19725,21778,3080,44583,08/03/2024
4,Union,179,Recorded survey data is available. Visit North...,http://portal.ncdenr.org/web/lr/geodetic/bound...,2011-11-29,639.465114,1.782795e+10,90,"POLYGON ((1536924.56983 534391.62581, 1537009....",Union,...,4345,795,21489,8508,9429,76906,85318,18537,180761,08/03/2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Surry,171,No recent survey data available,,2011-11-29,537.736732,1.499126e+10,86,"POLYGON ((1571704.80374 909917.76902, 1571628....",Surry,...,132,96,3773,1562,2248,21239,23816,3143,48198,08/03/2024
95,Hertford,91,No recent survey data available,,2011-11-29,360.439690,1.004846e+10,46,"POLYGON ((2676280.82526 915462.4574, 2676179.7...",Hertford,...,42,57,731,148,102,5734,7442,679,13855,08/03/2024
96,Stokes,169,No recent survey data available,,2011-11-29,455.872890,1.270900e+10,85,"POLYGON ((1694794.0385 914115.54081, 1694692.6...",Stokes,...,72,77,2745,436,493,15645,17160,82,32887,08/03/2024
97,Ashe,9,No recent survey data available,,2011-11-29,427.999676,1.193919e+10,5,"POLYGON ((1269410.48878 915476.45902, 1266824....",Ashe,...,55,17,1178,237,347,9196,10137,1095,20428,08/03/2024


# Showing Voter Registration Density on a Map by County

In [5]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import geopandas as gpd
import pandas as pd

# Load GeoDataFrame (replace with your actual file path)
gdf = gpd.read_file('NC_State_County_Boundary_NCGS_2017.shp')

# Ensure CRS is set to EPSG:4326 for compatibility with Plotly
if gdf.crs is None:
    gdf.crs = "EPSG:4326"
else:
    gdf = gdf.to_crs(epsg=4326)

# Merge the GeoDataFrame with voter data
merged_gdf = gdf.merge(df, left_on='County', right_on='CountyName')

# Create a choropleth map using Plotly Express
fig = px.choropleth_mapbox(
    merged_gdf,
    geojson=merged_gdf.geometry.__geo_interface__,
    locations=merged_gdf.index,
    color='Total',  # Column to color by
    hover_name='County',  # Column to show on hover
    hover_data={'County': True, 'Total': True},  # Show county name and total voters
    color_continuous_scale="Viridis",
    mapbox_style="carto-positron",
    center={"lat": 35.7, "lon": -80.8},
    zoom=7,
    title="Total Voter Registration by County"
)

# Create Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Graph(figure=fig)
])

if __name__ == '__main__':
    app.run_server(debug=True)
