In [1]:
# Data handling and processing
import os
import re
import time
import pandas as pd
import numpy as np
import statistics
import json
import csv
import sys
from datetime import datetime
from typing import List, Tuple, NamedTuple, Set, Dict, Any, Union, Optional
from pathlib import Path

# Scraping
import requests

# Plotting
import matplotlib as plt

In [40]:
# constants and Facebook API
ACCESS_TOKEN = "EAAMtZAjRm38kBOytVhS5ytH8YFZCluDNkAICn4j8JVEgoaARLJq4o5quIPmkWyrS5Hywc0HMUfkMtQuUIZB5STBZCyZAzAu58w7WqMHzL9REZCZBcTuZBAUV4H79Inspugjg9rdWaXTZBicvQ5Sv6CAz50ZCdHJyQ1xCgvvCtwZCTsnwuSZC9EOduVVcR5e1ZCVwFO2XAL3p9av8DFLt7NuFRhfF7u4tPTH9WX5AqNsxZBIclZBSWeX"
BASE_URL = "https://graph.facebook.com/v21.0/ads_archive/"

In [29]:
def get_profiles_test(access_token, name):
    search_url = "https://graph.facebook.com/v21.0/pages/search"
    
    params = {
        'access_token': access_token,
        'q': name,  # Search query parameter for Pages API
        'fields': 'id,name,verification_status,followers_count,category,fan_count',
        'limit': 10  # Get up to 10 potential matches
    }
    
    try:
        response = requests.get(search_url, params=params, timeout=10)
        print(f"Full URL being called: {response.url}")
        print(f"Response status code: {response.status_code}")
        
        if response.status_code != 200:
            error_data = response.json()
            if 'error' in error_data:
                print(f"Error message: {error_data['error'].get('message')}")
        
        response_data = response.json()
            
        return response_data
        
    except Exception as e:
        print(f"Error: {str(e)}")
        return None
        

In [30]:
print(get_profiles_test(ACCESS_TOKEN, "Jamie Raskin"))

Full URL being called: https://graph.facebook.com/v21.0/pages/search?access_token=EAAMtZAjRm38kBO2EQUq62I8lB2CmBKzql5bDCkSjVJkRaPY3JwenkZCTdVZB2dZB5E9kIlAHk1GyvFsA95j0kbr7j4g8WdD57UFZAMpfDZC1QNiP3Bm5GRWOMYcFjzPufjKhiqnLjhfq7lqcaNyWgIRhZCRseXYIi7tnoAQiiXPNYZA1RugX8YaZAiu3ARxf8GrZCNEWrxugPQB8BShkgmIAUWDjFEVhDayTkZBVdkZCla5ilyH2&q=Jamie+Raskin&fields=id%2Cname%2Cverification_status%2Cfollowers_count%2Ccategory%2Cfan_count&limit=10
Response status code: 400
Error message: (#10) This endpoint requires the 'pages_read_engagement' permission or the 'Page Public Content Access' feature or the 'Page Public Metadata Access' feature. Refer to https://developers.facebook.com/docs/apps/review/login-permissions#manage-pages, https://developers.facebook.com/docs/apps/review/feature#reference-PAGES_ACCESS and https://developers.facebook.com/docs/apps/review/feature#page-public-metadata-access for details.
{'error': {'message': "(#10) This endpoint requires the 'pages_read_engagement' permission or the

In [31]:
def check_token_permissions(access_token):
    debug_url = f"https://graph.facebook.com/debug_token"
    params = {
        'input_token': access_token,
        'access_token': access_token
    }
    response = requests.get(debug_url, params=params)
    print(response.json())

In [32]:
check_token_permissions(ACCESS_TOKEN)

{'data': {'app_id': '894341919203273', 'type': 'USER', 'application': 'test', 'data_access_expires_at': 1744920063, 'expires_at': 1737151200, 'is_valid': True, 'scopes': ['pages_show_list', 'ads_management', 'ads_read', 'pages_read_engagement', 'pages_manage_ads', 'public_profile'], 'granular_scopes': [{'scope': 'pages_show_list'}, {'scope': 'ads_management'}, {'scope': 'ads_read'}, {'scope': 'pages_read_engagement'}, {'scope': 'pages_manage_ads'}], 'user_id': '612975991243256'}}


In [50]:
def get_ads(page_id, access_token):
   params = {
       'access_token': access_token,
       'search_page_ids': [page_id],
       'ad_type': 'POLITICAL_AND_ISSUE_ADS',
       'fields': 'id,page_id,page_name,ad_creative_bodies,ad_creation_time,impressions,spend,demographic_distribution',
       'ad_reached_countries': 'US',
       'limit': 100
   }
   
   try:
       response = requests.get('https://graph.facebook.com/v21.0/ads_archive/', params=params)
       return response.json()
   except Exception as e:
       print(f"Error: {str(e)}")
       return None

In [51]:
id = 582293558518300
results = get_ads(id, ACCESS_TOKEN)

In [49]:
results

{'data': [{'id': '1260218001151559',
   'page_id': '582293558518300',
   'page_name': 'Abby Broyles',
   'ad_creative_bodies': ["Oklahoma Republicans are trying to steal the 5th congressional district, but we're putting up a fight. Chip in before our big Dec 31 deadline."],
   'ad_creative_link_titles': ["Flip Oklahoma's 5th District"],
   'ad_creation_time': '2021-12-29',
   'spend': {'lower_bound': '0', 'upper_bound': '99'}},
  {'id': '2205934999556007',
   'page_id': '582293558518300',
   'page_name': 'Abby Broyles',
   'ad_creative_bodies': ["Oklahoma Republicans are trying to steal the 5th congressional district, but we're putting up a fight. Chip in before our big Dec 31 deadline."],
   'ad_creative_link_titles': ["Flip Oklahoma's 5th District"],
   'ad_creation_time': '2021-12-29',
   'spend': {'lower_bound': '0', 'upper_bound': '99'}},
  {'id': '1616438455369830',
   'page_id': '582293558518300',
   'page_name': 'Abby Broyles',
   'ad_creative_bodies': ["Oklahoma Republicans ar

In [33]:
# load in candidate information
candidates = pd.read_csv("data_clean/candidates.csv")

# filter to candidates running in 2018 or later
candidates = candidates[candidates["year"] >= 2018]

# create query column
candidates["name_first_last"] = candidates["name_first"] + " " + candidates["name_last"]

# collapse to the candidate level
candidates = candidates.groupby("name_first_last").size().reset_index(name="count")

  candidates = pd.read_csv("data_clean/candidates.csv")


In [36]:
candidates.to_csv("data_raw/facebook_profiles.csv")