### Imports ###

In [1]:
import tweepy
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import json

### GeoPy Functions / Code ###

In [2]:
geolocator = Nominatim(user_agent = "DSI_US_8_LA")

In [3]:
# Function to take a user-entered address and return it's coordinates
def latlong(point):
    coord = geolocator.geocode(point)
    return (coord.latitude,coord.longitude)

In [4]:
cord = latlong('360 East 2nd Street, Los Angeles, CA')

### Twitter Authentication / Tweepy Initialization ###

In [2]:
json_file = open('twitter_creds.json')
json_str = json_file.read()
json_data = json.loads(json_str)

In [5]:
#Code taken from http://www.dealingdata.net/2016/07/23/PoGo-Series-Tweepy/
auth = tweepy.OAuthHandler(json_data['API key'],json_data['API secret key'])
auth.set_access_token(json_data['Access token'], json_data['Access token secret'])

#Creating a twitter API wrapper using tweepy
api = tweepy.API(auth)

#Error handling
if (not api):
    print ("Problem connecting to API")

In [6]:
#Function to return the hashtags in a tweet
def hashtag_process(raw):
    if len(raw)==0:
        return None
    else:
        output = []
        for x in raw:
            if x.get('text') != 'LAtraffic':
                output.append(x.get('text'))
        return output

TweetSearch Function Parameters

- location - exact latitude and longitude to start search from, should be the output of latlong function
- distance - distance in miles to search for (default is 25)
- user - twitter hashtag to pull Tweets from (default is TotalTrafficLA)
- limit - number of tweets to initially pull from the user before applying location filtering (default is 100)

In [7]:
def TweetSearch(location,distance = 25,user = 'TotalTrafficLA',limit = 100):
    full_text = []
    author = []
    creation_time = []
    hashtags = []
    geo = []
    
    for tweet in tweepy.Cursor(api.user_timeline,id=user,tweet_mode='extended').items(limit):
        if tweet.geo != None:
            if geodesic(location,tweet.geo['coordinates']).miles <= distance:
                full_text.append(tweet.full_text)
                author.append(tweet.author.screen_name)
                creation_time.append(tweet.created_at)
                hashtags.append(hashtag_process(tweet.entities['hashtags']))
                geo.append(tweet.geo['coordinates'])
    
    return pd.DataFrame([full_text,author,geo,hashtags,creation_time],\
                        index=['Full Text','Author','Coordinates','Hashtags','Creation Time (UTC)']).T

In [8]:
df = TweetSearch(cord,25)

In [9]:
df.to_csv('first_scrape.csv',index=False)