In [93]:
# FauxFoe is being produced by myself and Megan Marelli, a colleague and fellow CJS student. 

# The premise is quite simple. We will leverage Google's new ClaimReview to monitor Twitter. First and foremost, 
# if a Twitter user publishes a link that has been subjected to Google's ClaimReview, FauxFoe will comment on that 
# tweet or DM the user responsible for the tweet notifying them that the claim has been reviewed. 

# As FauxFoe continues to grow, we plan to parse each tweet so that we can address users that rely on 'fake news' 
# for information rather than for direct posts. For instance, if somebody uses a ClaimReviewed fact in a Twitter 
# argument, but does not post the exact link, FauxFoe will be able to address that claim. 

# Alas, for now, FauxFoe is in its infancy. Below is some of the base code that will be used to operate FauxFoe. 

In [95]:
# The guideliens to the metadata can be found here... 
# http://schema.org/ClaimReview

In [96]:
import requests
# Requests documentation: http://docs.python-requests.org/en/master/

url = 'http://feeds.washingtonpost.com/rss/rss_fact-checker'
# As an example of FauxFoe's leveraging ClaimReview, we will pull the rss feed from the Washington Post's Fact 
# Checker, which includes articles that utilize Google's ClaimReview. 

r = requests.get(url)

In [97]:
from bs4 import BeautifulSoup
# BeautifulSoup documentation: https://www.crummy.com/software/BeautifulSoup/bs4/doc/

soup = BeautifulSoup(r.text)

In [98]:
url_list = []
# Create a new, empty array in which we can store the URL's for each of the ClaimReviewed articles featured on the 
# WaPo Fact Checker site. 

for a in soup.find_all('a', href=True): 
    url_list.append(a['href'])

In [99]:
for i in url_list: 
# For every URL...
    x = requests.get(i)
    soup = BeautifulSoup(x.text)

In [100]:
# We will now pull aspects of the ClaimReview micro data (which can be found at schema.org) for each URL so that we
# can later put that data into a dataframe (it'll all make sense in a moment!)

x = requests.get(i)
soup = BeautifulSoup(x.text)
l = (soup(itemprop="claimReviewed"))
m = (soup(itemprop="ratingValue"))
n = (soup(itemprop="datePublished"))
o = str(i)

In [101]:
# To explore the rating scale, visit: https://developers.google.com/search/docs/data-types/factcheck

In [102]:
import pandas as pd
from pandas import DataFrame

# Pandas.DataFrame documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html

In [103]:
column_names = ["claimReviewed","datePublished","ratingValue","url"]
# Generate the column names for our data frame... 

rows = []

no_claimreview = []
# Generate an empty array titled 'rows' (which we will import our ClaimReview data into in a moment)

for i in url_list: 
    x = requests.get(i)
    soup = BeautifulSoup(x.text)
    
    l = (soup(itemprop="claimReviewed"))
    m = (soup(itemprop="ratingValue"))
    n = (soup(itemprop="datePublished"))
    o = str(i)
    # Note that this is the same code as before, only now we are importing it into the data frame! 
    
    if l and m and n and o: 
        rows.append([l[0].text,n[0].text,m[0].text,o]) 
    else: 
        no_claimreview.append(i)

df = DataFrame(rows,columns=column_names)

print no_claimreview

[]


In [104]:
df
# So we can see what we're getting - looks good! 

Unnamed: 0,claimReviewed,datePublished,ratingValue,url
0,"""I nevertheless concur with the judgment of th...",May 11 at 3:00 AM,-1,http://www.washingtonpost.com/blogs/fact-check...
1,"""I greatly appreciate you informing me, on thr...",May 10 at 6:32 PM,-1,http://www.washingtonpost.com/blogs/fact-check...
2,"""The abuses were all over. As just one example...",May 10,4,http://www.washingtonpost.com/blogs/fact-check...
3,"“Once again, 129M people with pre-existing con...",May 10 at 3:00 AM,5,http://www.washingtonpost.com/blogs/fact-check...
4,“General Flynn was given the highest security ...,May 9,4,http://www.washingtonpost.com/blogs/fact-check...
5,“The fact of the matter is that Medicaid spend...,May 8,5,http://www.washingtonpost.com/blogs/fact-check...
6,"American Health Care Act ""made being a rape su...",May 6,5,http://www.washingtonpost.com/blogs/fact-check...
7,“We’re not taking a benefit away. Nobody on Me...,May 5,4,http://www.washingtonpost.com/blogs/fact-check...
8,"""VERIFIED: MacArthur Amendment strengthens AHC...",May 4,-1,http://www.washingtonpost.com/blogs/fact-check...
9,"Obama ""simply imposed regulation without prope...",May 3,4,http://www.washingtonpost.com/blogs/fact-check...


In [105]:
edited_claimReviewed = ["James Comey not able to lead the Bureau","Trump not being under investigation","People being forbidden from getting religious items","129M people being denied coverage","General Flynn and his security clearance","Medicaid spending","AHCA and pre-existing conditions","Medicaid benefits being taken away","The MacArthur Amendment","Obama's imposed regulations"]

In [None]:
# Note that we manually created a "edited_claimReviewed" column to surpass the 140 character Twitter limit. 

In [106]:
df["edited_claimReviewed"] = edited_claimReviewed

In [107]:
df

Unnamed: 0,claimReviewed,datePublished,ratingValue,url,edited_claimReviewed
0,"""I nevertheless concur with the judgment of th...",May 11 at 3:00 AM,-1,http://www.washingtonpost.com/blogs/fact-check...,James Comey not able to lead the Bureau
1,"""I greatly appreciate you informing me, on thr...",May 10 at 6:32 PM,-1,http://www.washingtonpost.com/blogs/fact-check...,Trump not being under investigation
2,"""The abuses were all over. As just one example...",May 10,4,http://www.washingtonpost.com/blogs/fact-check...,People being forbidden from getting religious ...
3,"“Once again, 129M people with pre-existing con...",May 10 at 3:00 AM,5,http://www.washingtonpost.com/blogs/fact-check...,129M people being denied coverage
4,“General Flynn was given the highest security ...,May 9,4,http://www.washingtonpost.com/blogs/fact-check...,General Flynn and his security clearance
5,“The fact of the matter is that Medicaid spend...,May 8,5,http://www.washingtonpost.com/blogs/fact-check...,Medicaid spending
6,"American Health Care Act ""made being a rape su...",May 6,5,http://www.washingtonpost.com/blogs/fact-check...,AHCA and pre-existing conditions
7,“We’re not taking a benefit away. Nobody on Me...,May 5,4,http://www.washingtonpost.com/blogs/fact-check...,Medicaid benefits being taken away
8,"""VERIFIED: MacArthur Amendment strengthens AHC...",May 4,-1,http://www.washingtonpost.com/blogs/fact-check...,The MacArthur Amendment
9,"Obama ""simply imposed regulation without prope...",May 3,4,http://www.washingtonpost.com/blogs/fact-check...,Obama's imposed regulations


In [108]:
CONSUMER_KEY = "***"
CONSUMER_SECRET = "***"
ACCESS_TOKEN = "***"
ACCESS_TOKEN_SECRET = "***"

# For more on the Twitter API, visit https://dev.twitter.com/overview/api 

In [109]:
from tweepy import OAuthHandler, API
# Tweepy documentation: http://docs.tweepy.org/en/v3.5.0/

# Setup the authentication
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

# Create an object we will use to communicate with the Twitter API
api = API(auth)

In [132]:
import time 

In [126]:
from tweepy import Cursor
# A brief introduction to Tweepy's Cursor: http://docs.tweepy.org/en/v3.5.0/cursor_tutorial.html#introduction

claim_DataFrame = [] 
# Create a new, empty list that we will soon fill with data frames. Again, this will make sense a few lines down! 

for i in range(0,10):

    for query in df["claimReviewed"]: 
# For every one of the claims reviewed in our original data frame (named 'df')

        ids = []
        texts = []
        times = []
        retweets = []
        screen_names = []
        followers_counts = []
        friends_counts = []
    # We want to create empty lists for each of these fields so that we can input data into them. 
    

        for page in Cursor(api.search, q=query, result_type='recent', count=100, until="2017-05-11").pages(10):
    
            for tweet in page:
        # For every search...

                ids.append(tweet.id)
                texts.append(tweet.text)
                times.append(tweet.created_at)
                retweets.append(tweet.retweet_count)
                screen_names.append(tweet.user.screen_name)
                friends_counts.append(tweet.user.friends_count)
                followers_counts.append(tweet.user.followers_count)
            # Append these values into our empty lists generated above...
        
            
        df_2 = DataFrame({"id":ids,"text":texts,"time":times,"retweet":retweets,"screen_name":screen_names,"friends_count":friends_counts,"followers_count":followers_counts})
        # ...And create a new data frame, named 'df_2' with those values.    
            
        claim_DataFrame.append(df_2)   
        # Finally, append that new data frame, 'df_2', to our empty, 'claim_DataFrame' list. 
        
        time.sleep(30)

In [127]:
tweet_text = []

for i in range(df.shape[0]): 
    if int(df["ratingValue"][i]) < 0: 
        for j in range(claim_DataFrame[i].shape[0]):
        
            sn = claim_DataFrame[i]["screen_name"][j]
            fid = claim_DataFrame[i]["id"][j]
            tweet = " Hello! @"+sn+ ' ' + "WaPo has refuted your claim about " +df["edited_claimReviewed"][i] + df["url"][i] 
            tweet_text.append([tweet,fid])

In [None]:
for li in tweet_text: 
    api.update_status(str(li[0]),in_reply_to_status_id=int(li[1]))
    time.sleep(100)

In [1]:
# Also note that in some cases, the 'in_reply_to_status_id' does not work. If anyone can figure out why, please let us know!

In [1]:
# Also also, we keep getting kicked off of Twitter/being reported. Any solutions to that one are equally welcome!