In [1]:
import json
from typing import List, Optional


import pandas as pd
from pydantic import BaseModel, Field
import pydantic_core

In [2]:
class GroupResult(BaseModel):
    vote_type: str = Field(alias='groupName')
    votes: int = Field(alias='voteCount')

# class PrecinctResult(BaseModel):
#     pass

class BallotOption(BaseModel):
    candidate: str = Field(alias='name')
    total_votes: int = Field(alias='voteCount')
    party: str = Field(alias='politicalParty')
    group_results: List[GroupResult] = Field(alias='groupResults')
    # precinct_results: Optional[List[PrecinctResult]] = Field(alias='precintResults')
    
class BallotItem(BaseModel):
    office: str = Field(alias='name')
    ballot_options: List[BallotOption] = Field(alias='ballotOptions')

class LocalResult(BaseModel):
    county: str = Field(alias='name')
    ballot_items: List[BallotItem] = Field(alias='ballotItems')

class ElectionResults(BaseModel):
    election_name: str = Field(alias='electionName')
    election_date: str = Field(alias='electionDate')
    local_results: List[LocalResult] = Field(alias='localResults')

In [3]:
with open('./20221220__ga__special__state__house__129.json', 'r') as f:
    data = f.read()

In [4]:
m = ElectionResults.model_validate(
    pydantic_core.from_json(data)
)

In [5]:
m

ElectionResults(election_name='December 20, 2022 GA House District 129 - Special Election', election_date='2022-12-20', local_results=[LocalResult(county='Richmond County', ballot_items=[BallotItem(office='State House of Representatives - District 129 - Dem', ballot_options=[BallotOption(candidate='Scott Cambers', total_votes=244, party='DEM', group_results=[GroupResult(vote_type='Election Day Votes', votes=89), GroupResult(vote_type='Absentee by Mail Votes', votes=2), GroupResult(vote_type='Advance Voting Votes', votes=153), GroupResult(vote_type='Provisional Votes', votes=0)]), BallotOption(candidate='Davis Green', total_votes=158, party='DEM', group_results=[GroupResult(vote_type='Election Day Votes', votes=50), GroupResult(vote_type='Absentee by Mail Votes', votes=2), GroupResult(vote_type='Advance Voting Votes', votes=106), GroupResult(vote_type='Provisional Votes', votes=0)]), BallotOption(candidate='Karlton L. Howard', total_votes=1810, party='DEM', group_results=[GroupResult(vo

In [6]:
data = m.model_dump_json()

In [7]:
data

'{"election_name":"December 20, 2022 GA House District 129 - Special Election","election_date":"2022-12-20","local_results":[{"county":"Richmond County","ballot_items":[{"office":"State House of Representatives - District 129 - Dem","ballot_options":[{"candidate":"Scott Cambers","total_votes":244,"party":"DEM","group_results":[{"vote_type":"Election Day Votes","votes":89},{"vote_type":"Absentee by Mail Votes","votes":2},{"vote_type":"Advance Voting Votes","votes":153},{"vote_type":"Provisional Votes","votes":0}]},{"candidate":"Davis Green","total_votes":158,"party":"DEM","group_results":[{"vote_type":"Election Day Votes","votes":50},{"vote_type":"Absentee by Mail Votes","votes":2},{"vote_type":"Advance Voting Votes","votes":106},{"vote_type":"Provisional Votes","votes":0}]},{"candidate":"Karlton L. Howard","total_votes":1810,"party":"DEM","group_results":[{"vote_type":"Election Day Votes","votes":709},{"vote_type":"Absentee by Mail Votes","votes":31},{"vote_type":"Advance Voting Votes"

In [8]:
def flatten_election_data(data):

    json_data = json.loads(data)
    
    # Extract relevant data from the JSON
    election_name = json_data.get('election_name')
    election_date = json_data.get('election_date')
    
    # Initialize an empty list to store the flattened data
    flattened_data = []
    
    # Iterate through each county's results
    for result in json_data.get('local_results'):
        for ballot_item in result.get('ballot_items'):
            for ballot_option in ballot_item.get('ballot_options'):
                for group_result in ballot_option.get('group_results'):
                    flattened_data.append(
                        {
                            'election_name': election_name,
                            'election_date': election_date,
                            'county': result.get('county'),
                            'office': ballot_item.get('office'),
                            'candidate': ballot_option.get('candidate'),
                            'party': ballot_option.get('party'),
                            'vote_type': group_result.get('vote_type'),
                            'votes': group_result.get('votes'),
                            'total_votes': ballot_option.get('total_votes')
                        }
                    )

    return flattened_data

In [9]:
flatten_data = flatten_election_data(data)

In [10]:
with open('data.json', 'w') as f:
    json.dump(flatten_data, f)