In [1]:
import tweepy
import time
from datetime import datetime
from newsapi import NewsApiClient
from geopy.geocoders import Nominatim
import pandas as pd

# Format of config.py:
# twitter_public_key = "XXXXXXXXXXXXXXXXXXXXXXXX"
# twitter_private_key = "XXXXXXXXXXXXXXXXXXXXXXXX"
# twitter_public_token = "XXXXXXXXXXXXXXXXXXXXXXXX"
# twitter_private_token = "XXXXXXXXXXXXXXXXXXXXXXXX"
# google_news_key = "XXXXXXXXXXXXXXXXXXXXXXXX"

exec(open("config.py").read())
newsapi = NewsApiClient(api_key = google_news_key)

## Twitter Authentication

In [2]:
def authenticate(api_key, secret_key, access_token, secret_token):

    auth = tweepy.OAuthHandler(api_key, secret_key)
    auth.set_access_token(access_token, secret_token)
    twitter = tweepy.API(auth, 
                     wait_on_rate_limit=True, 
                     wait_on_rate_limit_notify=True)
  
    return twitter

twitter = authenticate(twitter_public_key, twitter_private_key, twitter_public_token, twitter_private_token)

rateLimitStatus = twitter.rate_limit_status()
print("Trends Remaining: ", rateLimitStatus['resources']['trends']['/trends/place']['remaining'])
print("Tweets Remaining: ", rateLimitStatus['resources']['tweets']['/tweets/search/:product/:label']['remaining'])
timestamp = rateLimitStatus['resources']['tweets']['/tweets/search/:product/:label']['reset']
print("Will reset at: ", datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S'))

Trends Remaining:  75
Tweets Remaining:  1800
Will reset at:  2019-09-15 13:08:47


In [3]:
def limit_handled(cursor):
    
    while True:
        try:
            yield cursor.next()
        except tweepy.RateLimitError:
            time.sleep(2)
            continue

## Lookup Yahoo WOEID for Desired Location

In [4]:
LOC = "Philadephia, Pa"
geolocator = Nominatim(user_agent="my-application")
coordinates = geolocator.geocode(LOC)
trend_node = twitter.trends_closest(coordinates.latitude, coordinates.longitude)[0]
woeid = trend_node['woeid']
print(trend_node)

{'name': 'Philadelphia', 'placeType': {'code': 7, 'name': 'Town'}, 'url': 'http://where.yahooapis.com/v1/place/2471217', 'parentid': 23424977, 'country': 'United States', 'woeid': 2471217, 'countryCode': 'US'}


## Compile a list of hashtags from trending tweets

In [5]:
trends = twitter.trends_place(woeid)
tags = set()
for trend in trends[0]['trends']:
    tweets = twitter.search(trend['query'], lang="en", rpp=10)
    for tweet in tweets:
        for tag in tweet._json['entities']['hashtags']:
            tags.add(tag['text'].lower())
print (tags)

{'lufc', 'swfclive', 'finsup', 'sunday', 'tb12', 'mom', 'tysonfury', 'maxparole', 'newsnight', 'cowboysnation', 'fightforolddc', 'toonami', 'cheerleaders', 'motogp', 'pontida2019', 'sundaymotivation', 'defensewingames', 'trump', 'bolivia', 'vamosargentina', 'restaurant', 'redskinsnation', 'saturdaythoughts', 'thereturnofdashberlin', 'naturelovers', 'introvertlessons101', 'gameday', 'wehatedallas', 'thankyouday', 'leadright', 'fleece', 'アニポケ', 'squishthefish', 'hacklearning', 'garrymonksbarmyarmy', 'kavanaughlied', 'doyourjob', 'twinning', 'thehelpmovie', 'templetuff', 'cheer', 'videogame', 'seductivesunday', 'metoo', 'gaming', 'youwillneverhearme', '𝑺𝒆𝒅𝒖𝒄𝒕𝒊𝒗𝒆𝑺𝒖𝒏𝒅𝒂𝒚', 'democracyday', 'watars', 'beatdallas', 'cornercam', 'lupinthe3rd', 'food', 'rock', 'people', 'njdest', 'photoediting', 'boliviagmaer', 'insteadofbirthdaycards', 'lymphomaawarenessday', 'pontida19', 'corruptgop', 'scotus', 'argesp', 'hogfarmers', 'sanmarinogp', '4littlegirls', 'pontida', 'cheerleading', 'shakespearesunday'

## Search Google News with compiled tags; then dump results in a dataframe

In [None]:
df = pd.DataFrame(columns = ['date', 'source', 'author', 'title', 'description', 'url'])
for tag in tags:
    results = newsapi.get_everything(q = tag, language = 'en', sort_by = 'relevancy')
    for result in results['articles']:
        df = df.append(pd.Series(
            [result['publishedAt'], result['source']['name'], result['author'], result['title'], result['description'], result['url']],
            index = df.columns ), ignore_index=True)

## Remove duplicate search results from dataframe and export to CSV

In [43]:
wdir = '/home/jason/documents/'
date = datetime.now().strftime("%Y%m%d_%H%M%S")

df.drop_duplicates(subset = "url", keep = False, inplace = True)
df.to_csv(wdir + '/' + 'news_' + date + '.csv')
df

Unnamed: 0,date,source,author,title,description,url
0,2019-08-21T22:24:41Z,Arseblog.news,Andrew Allen,VIDEO: Nketiah scores off the bench to send Wr...,Eddie Nketiah came off the bench to score the ...,https://arseblog.news/2019/08/video-nketiah-sc...
1,2019-09-07T13:09:12Z,101greatgoals.com,Conor Laird,Eddie Nketiah confirms Unai Emery wanted him t...,Arsenal youngster Eddie Nketiah has revealed t...,https://www.101greatgoals.com/news/eddie-nketi...
2,2019-09-13T10:39:57Z,Indiatoday.in,Sopan Joshi,Documentary | Waiting for a miracle,A new documentary details what happens when a ...,https://www.indiatoday.in/magazine/leisure/sto...
3,2019-08-27T12:00:00Z,Sbnation.com,Jocelyn Taub,Putting Arsenal’s loss to Liverpool in perspec...,"Despite the dropped points, there are still ma...",https://theshortfuse.sbnation.com/2019/8/27/20...
4,2019-08-26T11:00:00Z,Sbnation.com,davemc_exile,Huddersfield Town 0-2 Reading: The Alternative...,"Hang on, we’ve had an away win...",https://thetilehurstend.sbnation.com/2019/8/26...
...,...,...,...,...,...,...
970,2019-09-08T17:11:20Z,Rawstory.com,David Edwards,‘Whiteness is a helluva drug’: NFL team hammer...,An NFL team came under fire over the weekend a...,https://www.rawstory.com/2019/09/whiteness-is-...
971,2019-08-26T06:28:26Z,Daily Mail,By Dailymail.com Reporter,Tom Arnold seen with kids following report his...,"The Ottumwa, Iowa-born actor and comedian wore...",https://www.dailymail.co.uk/tvshowbiz/article-...
972,2019-09-08T20:50:35Z,Alternet.org,David Edwards,NFL team slammed for giving black kid lawnmowe...,An NFL team came under fire over the weekend a...,https://www.alternet.org/2019/09/nfl-team-slam...
974,2019-09-09T02:43:30Z,Propublica.org,Derek Willis,@JoshForNY -- Deleted after 3 minutes 11 minut...,If you were given a choice between a society t...,https://projects.propublica.org/politwoops/twe...
