# Basic setup for fetching data from the facebook api

### Remember to add an active access token below and adjust the parameters and fields if necessary

In [1]:
# must be specified to be able to use methods of package
import sys
sys.path.append('../src') 
import numpy as np
import pandas as pd
from political_ads.api_request import API_request
from political_ads.preprocessor import Preprocessor

# Generates a dataset (in the data directory)
requestor = API_request()
# requestor.generate_dataset(500, "Joe Biden", "EAAD3So8oorMBAIgWyz9birkjFZCRkyKZADF6qfnmkZC41UPKMzeoSWPXLXxNTBiFP9td55s5TZCOKfAoheAmRih0U7TjaCCDsDQwZAZCBPH2pGQtx6y9e9Keouk8JosvkuLPxs451MY3QGrzZAgiZAVKFJZBxZCIFxHIKHVJmn8uwnGsJaMKh1mQIknChEeYf16tYyZCZBehAo4VToZCuqW0P5KL2")


In [2]:
# Returns text file as dataframe with transformations
preprocess = Preprocessor()
data = preprocess.file_to_df("..\\data\\generated_dataset.txt")

In [3]:
data.head(2)

Unnamed: 0,ad_creation_time,ad_creative_body,spend,impressions,delivery_by_region,demographic_distribution,page_id,page_name,bylines,id,spend_lo,spend_hi,impressions_lo,impressions_hi
0,2021-07-01,It's a vibe. We’re excited to announce that Dr...,2749.5,849999.5,"[{'percentage': '3.0E-6', 'region': 'Yangon'},...","[{'percentage': '1.8E-5', 'age': '55-64', 'gen...",653079394725698,Aspiration,Aspiration,2883391761877521,2500,2999,800000,899999
1,2021-08-27,🌎 Sierra Club's City Hike-A-Thon is a national...,49.5,1499.5,"[{'percentage': '0.789011', 'region': 'Massach...","[{'percentage': '0.006508', 'age': '45-54', 'g...",6204742571,Sierra Club,SIERRA CLUB,161524832762330,0,99,1000,1999


In [None]:
'''
Visualization attempts
https://pandas.pydata.org/pandas-docs/dev/getting_started/intro_tutorials/09_timeseries.html
'''
import matplotlib.pyplot as plt
import seaborn as sns

# Plotting spending over time and impressions over time 
fig, axes = plt.subplots(2,1,figsize=(8,4))
data.plot(x="ad_creation_time", y="impressions", ax=axes[0])
data.plot(x="ad_creation_time", y="spend", ax=axes[1])

In [None]:
# Plot range of impressions
plt.fill_between(x=data["ad_creation_time"], y1=data["impressions_lo"], y2=data["impressions_hi"], alpha=1, color="green")
plt.plot(data["ad_creation_time"], data["impressions"])
plt.show()

In [4]:
'''
Amount spend by facebook page
'''
by_page = data.groupby("page_name").agg(
    # Aggregate no of ads
    no_ads = ('id', 'count'),
    # Aggregate sum of spend & total impressions generated
    spend_lo = ('spend_lo', 'sum'),
    spend_hi = ('spend_hi', 'sum'),
    impressions_lo = ('impressions_lo', 'sum'),
    impressions_hi = ('impressions_hi', 'sum'),
    # Average number of impressions & spend per ad
    avg_impressions = ('impressions', 'mean'),
    avg_spend = ('spend', 'mean')

).reset_index()

by_page

Unnamed: 0,page_name,no_ads,spend_lo,spend_hi,impressions_lo,impressions_hi,avg_impressions,avg_spend
0,100 Percent Campaign,1,100,199,7000,7999,7499.500000,149.5
1,198 Methods,2,0,198,1000,2998,999.500000,49.5
2,2020Vision4Election2022,1,0,99,0,999,499.500000,49.5
3,350 Seattle,5,0,495,7000,11995,1899.500000,49.5
4,350.org,125,1400,13775,132000,268875,1603.500000,60.7
...,...,...,...,...,...,...,...,...
412,World Bank,1,0,99,15000,19999,17499.500000,49.5
413,Yale Climate Connections,11,0,1089,29000,39989,3135.863636,49.5
414,ZF Group,1,400,499,20000,24999,22499.500000,449.5
415,Zac Bears for Medford City Council,5,0,495,0,4995,499.500000,49.5


In [None]:
# Display range of spend

# Maybe normalize it
plt.fill_between(x=by_page["page_name"], y1=by_page["spend_lo"], y2=by_page["spend_hi"], alpha=1, color="green")
plt.plot(by_page["page_name"], by_page["spend"])
plt.show()

In [7]:
'''
Congress member filtering
'''
# Get page-ids:
# https://commentpicker.com/find-facebook-id.php


congress_members = pd.DataFrame(pd.read_csv("..\\src\\data_sets\\legislators-current.csv"))

In [8]:
congress_members["facebook"]

0      SenatorSherrodBrown
1          senatorcantwell
2         senatorbencardin
3                tomcarper
4          SenatorBobCasey
              ...         
533                    NaN
534                    NaN
535                    NaN
536                    NaN
537                    NaN
Name: facebook, Length: 538, dtype: object

In [None]:
congress_members["facebook"].isnull().sum()

In [22]:
# Scraper experiement
# https://github.com/kevinzg/facebook-scraper

from facebook_scraper import get_page_info

page = get_page_info(account="SenatorBobCasey")

page

{'name': 'U.S. Senator Bob Casey',
 'identifier': 100044143479624,
 'url': 'https://www.facebook.com/SenatorBobCasey',
 'image': 'https://scontent-cph2-1.xx.fbcdn.net/v/t1.6435-1/fr/cp0/e15/q65/243091011_403907001090746_7562234370387016551_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=a12de3&_nc_ohc=sGg9pTwPXxIAX-axsKI&_nc_ht=scontent-cph2-1.xx&oh=a70436a16135d8317ec63ccc1bccd162&oe=619DC418',
 'sameAs': 'https://www.casey.senate.gov/',
 'type': 'Person',
 'likes': 75059}

In [23]:
import time

members_fbNames_test = [] # list of tuples

for value in congress_members["facebook"].items():
    if not pd.isnull(value[1]):
        page = get_page_info(account=value[1])
        print(page)
        if "name" and "identifier" in page:
            members_fbNames_test.append((value[1], page["name"], page["identifier"]))
            print(page["name"] + " id:" + str(page["identifier"]))
    time.sleep(0.5)

{}
{'name': 'Senator Maria Cantwell', 'identifier': 100044609161538, 'url': 'https://www.facebook.com/senatorcantwell', 'image': 'https://scontent-cph2-1.xx.fbcdn.net/v/t1.6435-1/fr/cp0/e15/q65/242936558_394951258668510_3818018345285898742_n.jpg?_nc_cat=109&ccb=1-5&_nc_sid=a12de3&_nc_ohc=EDKO0lCaw-8AX88dJde&_nc_ht=scontent-cph2-1.xx&oh=4172325daa93059f3120d7bc28b56e63&oe=61A03464', 'sameAs': 'https://www.cantwell.senate.gov/', 'type': 'Person', 'likes': 57065}
Senator Maria Cantwell id:100044609161538
{'name': 'Senator Ben Cardin', 'identifier': 100044326544838, 'url': 'https://www.facebook.com/senatorbencardin', 'image': 'https://scontent-cph2-1.xx.fbcdn.net/v/t39.30808-1/fr/cp0/e15/q65/243253063_411301657024049_865601913552816902_n.jpg?_nc_cat=105&ccb=1-5&_nc_sid=a12de3&_nc_ohc=NovI4byjhEgAX9imsaB&_nc_ht=scontent-cph2-1.xx&oh=47b28b846b00a649d1fa6e80345d09d5&oe=617EFFBE', 'sameAs': 'www.cardin.senate.gov', 'type': 'Person', 'likes': 33264}
Senator Ben Cardin id:100044326544838
{'name

In [25]:
members_names = pd.DataFrame(members_fbNames_test, columns=["facebook", "page_name", "identifier"])
# Save data as csv
members_names.to_csv("..\\src\\data_sets\\legislators_page_ids.csv", index=False, header=True)

(185, 3)

In [115]:
members_names_cp = members_names.copy()

In [127]:
congress_members_fb = congress_members[["first_name", "last_name", "full_name", "type", "state", "district", "party", "facebook"]]

merged = congress_members_fb.merge(right=members_names, on="facebook", how="left")

In [128]:
merged["identifier"] = merged["identifier"].fillna(0).astype(np.int64)

In [125]:
merged["identifier"] = merged["identifier"].astype(np.int64)

In [130]:
merged.to_csv("..\\src\\data_sets\\legislators_fb_info.csv", index=False, header=True)

In [107]:
congress_members_fb

Unnamed: 0,first_name,last_name,full_name,type,state,district,party,facebook
0,Sherrod,Brown,Sherrod Brown,sen,OH,,Democrat,SenatorSherrodBrown
1,Maria,Cantwell,Maria Cantwell,sen,WA,,Democrat,senatorcantwell
2,Benjamin,Cardin,Benjamin L. Cardin,sen,MD,,Democrat,senatorbencardin
3,Thomas,Carper,Thomas R. Carper,sen,DE,,Democrat,tomcarper
4,Robert,Casey,"Robert P. Casey, Jr.",sen,PA,,Democrat,SenatorBobCasey
...,...,...,...,...,...,...,...,...
533,Claudia,Tenney,Claudia Tenney,rep,NY,22.0,Republican,
534,Julia,Letlow,Julia Letlow,rep,LA,5.0,Republican,
535,Troy,Carter,Troy A. Carter,rep,LA,2.0,Democrat,
536,Melanie,Stansbury,Melanie A. Stansbury,rep,NM,1.0,Democrat,
