In [120]:
# Data handling and processing
import os
import re
import time
import pandas as pd
import numpy as np
import statistics
import json
import csv
import sys
from datetime import datetime
from typing import List, Tuple, NamedTuple, Set, Dict, Any, Union, Optional
from pathlib import Path

# Scraping
import requests

# Plotting
import matplotlib as plt

In [121]:
# store FEC API key and other constants
API_KEY = "A5BcxD84ZnaDzIBnKCqWR5ZGfFl56F4sWIgaRIGO"
BASE_URL = 'https://api.open.fec.gov/v1'
election_years = [2018, 2020, 2022, 2024]

In [122]:
# helper functions
def get_candidates(api_key, election_year):
    """Fetch candidates with their financial totals"""
    candidates = []
    offices = ['P'] #, 'S', 'H']  # President, Senate, House

    for office in offices:
        page = 1
        while True:
            params = {
                'api_key': api_key,
                'election_year': election_year,
                'office': office,
                'party': ['DEM', 'REP'],
                'candidate_status': ['C', 'F'], 
                'per_page': 100,
                'page': page
            }
            
            url = f'{BASE_URL}/candidates'
            print(f"Fetching page {page} for {election_year} {office} candidates...")
            
            response = requests.get(url, params=params)
            
            if response.status_code != 200:
                print(f"Error fetching candidates for {election_year}, office {office}: {response.status_code}")
                print(f"Response content: {response.text}")
                break
                
            data = response.json()
            candidates.extend(data['results'])
            
            if len(data['results']) < 100:  # Last page
                break
                
            page += 1
            time.sleep(0.25)  # Rate limiting
    
    return candidates

def get_finances(api_key, candidate_id, election_year):
    """Get financial data for a specific candidate"""
    params = {
        'api_key': api_key,
        'candidate_id': candidate_id,
        'election_year': election_year,
        'per_page': 100,  # Should only be one record but just in case
    }
    
    url = f'{BASE_URL}/candidates/totals'
    response = requests.get(url, params=params)
    
    if response.status_code != 200:
        print(f"Error fetching finances for candidate {candidate_id}: {response.status_code}")
        print(f"Response content: {response.text}")
        return None
    
    data = response.json()
    
    if not data['results']:
        print(f"No financial data found for candidate {candidate_id}")
        return None
    
    # Return the first (and usually only) result
    return data['results'][0]

def get_committees(api_key, candidate_id):
    """Get committees for a candidate.
    - P type -- candidate's principal committee. Can have only one.
    - A type -- other authorized committees.
    
    Note: These are directly controlled by the candidate, have contribution limits, and must directly report to the FEC
    """    
    params = {
        'api_key': api_key,
        'candidate_id': candidate_id,  # This will match if candidate_id is in the committee's candidate_ids array
        'designation': ['P', 'A'],
        'per_page': 100
    }
    
    url = f'{BASE_URL}/committees'  
    response = requests.get(url, params=params)
    
    if response.status_code != 200:
        print(f"Error fetching committees for candidate {candidate_id}: {response.status_code}")
        print(f"Response content: {response.text}")  # Added to see error message
        return None
        
    data = response.json()
    return data['results']

def to_dataframe(candidates):
    """Convert candidates list to DataFrame with relevant columns"""
    relevant_columns = ['candidate_id', 'name', 'party', 'office', 'state', 'district', 'incumbent_challenge_full', 
                        'total_receipts', 'total_disbursements', 'committee_ids', 'committee_names', 'committee_types', 'principal_committee']
    
    df = pd.DataFrame(candidates)
    return df[relevant_columns]

In [123]:
cs = get_candidates(API_KEY, 2024)

Fetching page 1 for 2024 P candidates...


In [130]:
filtered = [c for c in cs if c.get('total_receipts') is not None and float(c.get('total_receipts', 0)) >= 100000]

In [132]:
len(filtered)

17

In [136]:
df = to_dataframe(filtered)
df.to_csv("fec.csv", index=False)


Unnamed: 0,candidate_id,name,party,office,state,district,incumbent_challenge_full,total_receipts,total_disbursements,committee_ids,committee_names,committee_types,principal_committee
0,P80000722,"BIDEN, JOSEPH R JR",DEM,P,US,0.0,Incumbent,1169399000.0,1167837000.0,"[C00213652, C00431916, C00703975]","[BIDEN FOR PRESIDENT, BIDEN FOR PRESIDENT, INC...","[P, P, P]",C00213652
1,P40011868,"BINKLEY, RYAN",REP,P,US,0.0,Challenger,11884130.0,11881470.0,[C00836544],[BINKLEY FOR PRESIDENT 2024],[P],C00836544
2,P40013518,"BURGUM, DOUG",REP,P,US,0.0,Challenger,18007930.0,18005230.0,[C00842302],"[DOUG BURGUM FOR AMERICA, INC.]",[P],C00842302
3,P60008521,"CHRISTIE, CHRIS",REP,P,US,0.0,Challenger,7628635.0,7592972.0,"[C00580399, C00842237]","[CHRIS CHRISTIE FOR PRESIDENT INC, CHRIS CHRIS...","[P, P]",C00580399
4,P40013039,"DESANTIS, RON",REP,P,US,0.0,Challenger,375129.1,375129.1,[C00834853],[DRAFT DESANTIS 2024 FUND],[A],
5,P40012593,"ELDER, LARRY",REP,P,US,0.0,Challenger,1376325.0,1375553.0,[C00839365],[ELDER FOR PRESIDENT 24],[P],C00839365
6,P40010977,"HALEY, NIKKI",REP,P,US,0.0,Challenger,58378350.0,56690370.0,[C00833392],[NIKKI HALEY FOR PRESIDENT INC.],[P],C00833392
7,P00009423,"HARRIS, KAMALA",DEM,P,US,0.0,Challenger,1158006000.0,1158116000.0,"[C00703975, C00694455]","[HARRIS FOR PRESIDENT, KAMALA HARRIS FOR THE P...","[P, A]",C00703975
8,P40014011,"HURD, WILLIAM",REP,P,US,0.0,Challenger,1451894.0,1451894.0,[C00843540],"[HURD FOR AMERICA, INC.]",[P],C00843540
9,P40011850,"HUTCHINSON, W. ASA",REP,P,US,0.0,Challenger,1587082.0,1587082.0,[C00837104],"[ASA FOR AMERICA, INC.]",[P],C00837104


In [None]:
for c in cs:
        finances = get_finances(API_KEY, c['candidate_id'], 2024)
        if finances:
            c.update({
                'total_receipts': finances.get('receipts'),
                'total_disbursements': finances.get('disbursements')
            })
        time.sleep(0.25)  # rate limiting
        
        committees = get_committees(API_KEY, c['candidate_id'])
        if committees:
            c.update({
                'committee_ids': [c.get('committee_id') for c in committees],
                'committee_names': [c.get('name') for c in committees],
                'committee_types': [c.get('designation') for c in committees],  # Changed from committee_type to designation
                'principal_committee': next((c.get('committee_id') for c in committees if c.get('designation') == 'P'), None)  # Changed from committee_type to designation
            })
        time.sleep(0.25)  # rate limiting

In [95]:
candidates = []
for year in election_years:
    print(f"Processing election year {year}...")
    cs = get_candidates(API_KEY, year)
    
    for c in cs:
        finances = get_finances(API_KEY, c['candidate_id'], year)
        if finances:
            c.update({
                'total_receipts': finances.get('receipts'),
                'total_disbursements': finances.get('disbursements')
            })
        
        committees = get_committees(API_KEY, c['candidate_id'])
        if committees:
            c.update({
                'committee_ids': [c.get('committee_id') for c in committees],
                'committee_names': [c.get('name') for c in committees],
                'committee_types': [c.get('committee_type') for c in committees],
                'principal_committee': next((c.get('committee_id') for c in committees if c.get('committee_type') == 'P'), None)
            })
        
        time.sleep(0.25)  # rate limiting
    
    filtered = [c for c in cs if c.get('total_receipts') is not None and float(c.get('total_receipts', 0)) >= 100000]
    candidates += filtered
    
df = to_dataframe(candidates)

df.to_csv("fec.csv", index=False)

Processing election year 2024...
Fetching page 1 for 2024 P candidates...
Fetching page 2 for 2024 P candidates...
Fetching page 3 for 2024 P candidates...
Fetching page 4 for 2024 P candidates...
Fetching page 5 for 2024 P candidates...
Fetching page 6 for 2024 P candidates...
Fetching page 7 for 2024 P candidates...


In [97]:
candidates[0]

{'active_through': 2024,
 'candidate_id': 'P40013401',
 'candidate_inactive': False,
 'candidate_status': 'N',
 'cycles': [2024],
 'district': '00',
 'district_number': 0,
 'election_districts': ['00'],
 'election_years': [2024],
 'federal_funds_flag': False,
 'first_file_date': '2023-06-04',
 'has_raised_funds': False,
 'inactive_election_years': None,
 'incumbent_challenge': 'C',
 'incumbent_challenge_full': 'Challenger',
 'last_f2_date': '2023-06-04',
 'last_file_date': '2023-06-04',
 'load_date': '2023-06-08T21:05:05',
 'name': '375 ROB ROY DR, DAVID J SR SR',
 'office': 'P',
 'office_full': 'President',
 'party': 'DEM',
 'party_full': 'DEMOCRATIC PARTY',
 'state': 'US',
 'total_receipts': 0.0,
 'total_disbursements': 0.0}

In [96]:
for candidate in candidates:
    finances = get_finances(API_KEY, candidate['candidate_id'], 2024)
    time.sleep(0.25)  # Rate limiting
    if finances:
        candidate.update({
            'total_receipts': finances.get('receipts'),
            'total_disbursements': finances.get('disbursements')
        })

Error fetching finances for candidate P00011429: 429
Response content: {
  "error": {
    "code": "OVER_RATE_LIMIT",
    "message": "You have exceeded your rate limit of 40 calls per hour for the DEMO_KEY, 1000 calls per hour for a personal key, or 120 calls per minute for an upgraded key. You can either try again later, sign up for a personal key at https://api.data.gov/signup/, or email apiinfo@fec.gov to upgrade your key."
  }
}
Error fetching finances for candidate P40014938: 429
Response content: {
  "error": {
    "code": "OVER_RATE_LIMIT",
    "message": "You have exceeded your rate limit of 40 calls per hour for the DEMO_KEY, 1000 calls per hour for a personal key, or 120 calls per minute for an upgraded key. You can either try again later, sign up for a personal key at https://api.data.gov/signup/, or email apiinfo@fec.gov to upgrade your key."
  }
}
Error fetching finances for candidate P40007726: 429
Response content: {
  "error": {
    "code": "OVER_RATE_LIMIT",
    "messag

KeyboardInterrupt: 

In [92]:
filtered_candidates = [c for c in candidates if c.get('total_receipts') is not None and float(c.get('total_receipts', 0)) >= 10000000]

In [93]:
if filtered_candidates:
    print(f"\nFound {len(filtered_candidates)} candidates")
    for candidate in filtered_candidates:
        print(f"{candidate['name']} - {candidate['party']} - {candidate['candidate_status']}")


Found 8 candidates
BIDEN, JOSEPH R JR - DEM - C
BINKLEY, RYAN - REP - C
BURGUM, DOUG - REP - C
HALEY, NIKKI - REP - C
HARRIS, KAMALA - DEM - C
JOHNSON, PERRY - REP - C
NORRIS, JIM ALEXANDER SR - REP - C
RAMASWAMY, VIVEK - REP - C
