# Class for Scraping Last Saturday's Registrations

In [2]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime, timedelta

class WebDataScraper:
    def __init__(self):
        self.saturdays_in_2024 = self.saturdays(2024)
        self.json_data = ""

    # Generate a list of all Saturdays in the year up to the current date.
    @staticmethod
    def saturdays(year : int):
        SAT = 5
        START_MONTH = 1
        START_DAY = 1
        start_date = datetime(year, START_MONTH, START_DAY) # starting on Jan 1
        start_day_of_the_week = start_date.weekday()
        days_until_saturday = SAT - start_day_of_the_week
        if days_until_saturday < 0:
            days_until_saturday += 7
        upcoming_sat = start_date + timedelta(days = days_until_saturday)
        
        saturdays = []
        saturdays.append(start_date.strftime('%m/%d/%Y')) # data starts at jan 1
        current_sat = upcoming_sat
        
        while current_sat < datetime.now():
            saturdays.append(current_sat.strftime('%m/%d/%Y'))
            current_sat += timedelta(weeks=1)

        return saturdays
    
    # Fetch data from the URL for the given Saturday and store the JSON content.
    def fetch_data(self, saturday_date : str):
        url = f'https://vt.ncsbe.gov/RegStat/Results/?date={saturday_date}'
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        scripts = soup.find_all('script')

        self.json_data = ""  # Clear previous JSON data

        for script in scripts:
            if script.string and 'SetupGrid' in script.string:
                script_content = script.string
                start_index = script_content.find("var data = ")
                end_index = script_content.find("// initialize the igGrid control")
                if start_index != -1 and end_index != -1:
                    self.json_data = script_content[start_index + len("var data = "):end_index].strip()
                    break

        if self.json_data:
            self.json_data = self.json_data.rstrip(',')
        else:
            raise ValueError("JSON data not found.")
        
    # Parse the JSON data into a DataFrame and add a 'Week Ending' column.
    def parse_json(self, saturday_date : str):
        if not self.json_data:
            raise ValueError("No JSON data to parse.")

        data = json.loads(self.json_data)
        df = pd.DataFrame(data)

        # Drop the 'AppVersion' column
        if 'AppVersion' in df.columns:
            df = df.drop(columns=['AppVersion'])

        # Add the 'Date' column with the Saturday's date
        df['Week Ending'] = saturday_date


        # Capitalize the first letter of each county name
        df['CountyName'] = df['CountyName'].str.capitalize()
        
        return df
    
    # Fetch and parse data for all Saturdays in the year, returning a list of DataFrames.
    def sat_dataframes(self):
        dfs = []
        for sat in self.saturdays_in_2024:
            self.fetch_data(sat)
            df = self.parse_json(sat)
            dfs.append(df)
        return dfs

### Use scraper, get all saturdays data

In [3]:
# Create an instance of the WebDataScraper class
scraper = WebDataScraper()

# Fetch and process the data, returning a DataFrame
sat_dfs = scraper.sat_dataframes()

# Merge FIPS with scraped dataframe
df_fips = pd.read_csv('FIPS.csv')
df_fips['FIPS'] = df_fips['FIPS'].astype(str).str.zfill(3)

voter_registration_by_fips_dfs = []

for sat_df, sat_date in zip(sat_dfs, scraper.saturdays_in_2024):
    sat_and_fips_df = pd.merge(sat_df, df_fips, left_on='CountyName', right_on='County', how='inner')
    
    # # Drop the 'CountyName' column
    # sat_and_fips_df = sat_and_fips_df.drop(columns=['CountyName'])

    # Capitalize the first letter of each county name
    sat_and_fips_df['County'] = sat_and_fips_df['County'].str.capitalize()

    # Reorder columns to put 'County' and 'FIPS_Code' at the front
    columns_order = ['County', 'FIPS'] + [col for col in sat_and_fips_df.columns if col not in ['County', 'FIPS']]
    sat_and_fips_df = sat_and_fips_df[columns_order]
    
    voter_registration_by_fips_dfs.append(sat_and_fips_df)

# Concatenate all dataframes into one
voter_registration_by_fips_all_sats_dfs = pd.concat(voter_registration_by_fips_dfs, ignore_index=True)
voter_registration_by_fips_all_sats_dfs.to_csv('combined_data.csv', index=False)




### Weekly change! (ignore for now!)

In [123]:
# import pandas as pd
# from datetime import datetime
# import plotly.express as px

# # Assuming 'voter_registration_by_fips_dfs' is a list of DataFrames with registration data
# def calculate_weekly_changes(dfs):
#     all_dfs = pd.concat(dfs)
#     all_dfs['Week Ending'] = pd.to_datetime(all_dfs['Week Ending'], format='%m/%d/%Y')

#     # Ensure data is sorted by 'County' and 'Week Ending'
#     all_dfs = all_dfs.sort_values(by=['County', 'Week Ending'])

#     # Calculate the weekly change for each county
#     all_dfs['Weekly Change'] = all_dfs.groupby('County')['Total'].diff().fillna(0)
    
#     return all_dfs

# def plot_weekly_changes(df):
#     fig = px.line(df,
#                   x='Week Ending',
#                   y='Weekly Change',
#                   color='County',
#                   title='Weekly Changes in Voter Registrations',
#                   labels={'Weekly Change': 'Change in Registrations'},
#                   line_shape='linear')

#     fig.update_layout(xaxis_title='Date', yaxis_title='Change in Registrations')
#     fig.update_traces(mode='lines+markers')

#     fig.show()

# df_with_changes = calculate_weekly_changes(voter_registration_by_fips_dfs)

# plot_weekly_changes(df_with_changes)


### Dashboard map!

In [159]:
def create_advanced_slider_plot(df):
    fig = go.Figure()

    max_total = df['Total'].max()

    saturdays = df['Week Ending'].unique()
    for sat in saturdays:
        data_for_saturday = df[df['Week Ending'] == sat]
        fig.add_trace(go.Bar(
            x=data_for_saturday['County'],
            y=data_for_saturday['Total'],
            name=sat,
            visible=(sat == saturdays[0])
        ))

    steps = []
    for i, saturday in enumerate(saturdays):
        step = dict(
            method="update",
            args=[{"visible": [False] * len(saturdays)},
                  {"title": f"Total Voter Registrations on {saturday}"}],
        )
        step["args"][0]["visible"][i] = True
        step["label"] = saturday  # Set the slider label
        steps.append(step)

    sliders = [dict(
        active=0,
        currentvalue={"prefix": "Saturday: ", "visible": True},
        pad={"t": 100},
        steps=steps
    )]

    fig.update_layout(
        sliders=sliders,
        xaxis_title="County",
        yaxis_title="Total Voter Registrations",
        yaxis=dict(range=[0, max_total * 1.1]),
        showlegend=False,
        title="Voter Registrations Over Time"
    )

    fig.show()


create_advanced_slider_plot(voter_registration_by_fips_all_sats_dfs)


In [None]:
import json
import pandas as pd
import plotly.express as px

# Load GeoJSON data
with open('north_carolina.geojson') as f:
    geojson_data = json.load(f)

# Only grab necessary columns from df
voter_registration_by_fips_dfs # todo! where week ending = selected saturday!

voter_df = voter_registration_by_fips_all_sats_dfs[['County', 'FIPS', 'Total', 'Week Ending']]

# Plot the choropleth map
fig = px.choropleth(
    voter_df,
    geojson=geojson_data,
    locations='FIPS',
    featureidkey="properties.FIPS",  # Adjust this key according to your GeoJSON properties
    color='Total',
    color_continuous_scale="Viridis",
    range_color=(voter_df['Total'].min(), voter_df['Total'].max()),
    scope="usa",  # Limit the map to the USA
    labels={'Total': 'Total Value'},
    hover_name='County'  # This will add the County name to the hover label

)

# Update layout to zoom in on North Carolina
fig.update_geos(fitbounds="locations", visible=False)

# Show the map
fig.show()
